diff --git a/evaluation/benchmarks/swe_bench/README.md b/evaluation/benchmarks/swe_bench/README.md
index a1bc26fb7f..f51c56462e 100644
--- a/evaluation/benchmarks/swe_bench/README.md
+++ b/evaluation/benchmarks/swe_bench/README.md
@@ -2,6 +2,8 @@
 
 This folder contains the evaluation harness that we built on top of the original [SWE-Bench benchmark](https://www.swebench.com/) ([paper](https://arxiv.org/abs/2310.06770)).
 
+**UPDATE (8/12/2025): We now support running SWE-rebench evaluation (see the paper [here](https://arxiv.org/abs/2505.20411))! For how to run it, checkout [this README](./SWE-rebench.md).**
+
 **UPDATE (6/15/2025): We now support running SWE-bench-Live evaluation (see the paper [here](https://arxiv.org/abs/2505.23419))! For how to run it, checkout [this README](./SWE-bench-Live.md).**
 
 **UPDATE (5/26/2025): We now support running interactive SWE-Bench evaluation (see the paper [here](https://arxiv.org/abs/2502.13069))! For how to run it, checkout [this README](./SWE-Interact.md).**
diff --git a/evaluation/benchmarks/swe_bench/SWE-rebench.md b/evaluation/benchmarks/swe_bench/SWE-rebench.md
new file mode 100644
index 0000000000..9e510e681b
--- /dev/null
+++ b/evaluation/benchmarks/swe_bench/SWE-rebench.md
@@ -0,0 +1,84 @@
+# SWE-rebench
+
+<p align="center">
+<a href="https://arxiv.org/abs/2505.20411">📃 Paper</a>
+•
+<a href="https://huggingface.co/datasets/nebius/SWE-rebench">🤗 HuggingFace</a>
+•
+<a href="https://swe-rebench.com/leaderboard">📊 Leaderboard</a>
+</p>
+
+SWE-rebench is a large-scale dataset for verifiable software engineering tasks.
+It comes in **two datasets**:
+
+* **[`nebius/SWE-rebench-leaderboard`](https://huggingface.co/datasets/nebius/SWE-rebench-leaderboard)** – updatable benchmark used for [leaderboard evaluation](https://swe-rebench.com/leaderboard).
+* **[`nebius/SWE-rebench`](https://huggingface.co/datasets/nebius/SWE-rebench)** – full dataset with **21,302 tasks**, suitable for training or large-scale offline evaluation.
+
+This document explains how to run OpenHands on SWE-rebench, using the leaderboard split as the main example.
+To run on the full dataset, simply replace the dataset name.
+
+
+## Setting Up
+
+Set up your development environment and configure your LLM provider by following the [SWE-bench README](README.md) in this directory.
+
+
+## Running Inference
+
+Use the existing SWE-bench inference script, changing the dataset to `nebius/SWE-rebench-leaderboard` and selecting the split (`test` for leaderboard submission):
+
+```bash
+./evaluation/benchmarks/swe_bench/scripts/run_infer.sh \
+    llm.your_llm HEAD CodeActAgent 30 50 1 nebius/SWE-rebench-leaderboard test
+```
+
+Arguments:
+
+* `llm.your_llm` – your model configuration key
+* `HEAD` – commit reference for reproducibility
+* `CodeActAgent` – agent type
+* `10` – number of examples to evaluate
+* `50` – maximum iterations per task (increase if needed)
+* `1` – number of workers
+* `nebius/SWE-rebench-leaderboard` – Hugging Face dataset name
+* `test` – dataset split
+
+**Tip:** To run on the **full 21k dataset**, replace `nebius/SWE-rebench-leaderboard` with `nebius/SWE-rebench`.
+
+
+## Evaluating Results
+
+After inference completes, evaluate using the [SWE-bench-fork evaluation harness](https://github.com/SWE-rebench/SWE-bench-fork).
+
+1. Convert the OpenHands output to SWE-bench evaluation format:
+
+```bash
+python evaluation/benchmarks/swe_bench/scripts/live/convert.py \
+  --output_jsonl path/to/evaluation/output.jsonl > preds.jsonl
+```
+
+2. Clone the SWE-bench-fork repo (https://github.com/SWE-rebench/SWE-bench-fork) and follow its README to install dependencies.
+
+
+3. Run the evaluation using the fork:
+
+```bash
+python -m swebench.harness.run_evaluation \
+    --dataset_name nebius/SWE-rebench-leaderboard \
+    --split test \
+    --predictions_path preds.jsonl \
+    --max_workers 10 \
+    --run_id openhands
+```
+
+
+## Citation
+
+```bibtex
+@article{badertdinov2025swerebench,
+  title={SWE-rebench: An Automated Pipeline for Task Collection and Decontaminated Evaluation of Software Engineering Agents},
+  author={Badertdinov, Ibragim and Golubev, Alexander and Nekrashevich, Maksim and Shevtsov, Anton and Karasik, Simon and Andriushchenko, Andrei and Trofimova, Maria and Litvintseva, Daria and Yangel, Boris},
+  journal={arXiv preprint arXiv:2505.20411},
+  year={2025}
+}
+```
diff --git a/evaluation/benchmarks/swe_bench/run_infer.py b/evaluation/benchmarks/swe_bench/run_infer.py
index 5272c58a19..715e84a354 100644
--- a/evaluation/benchmarks/swe_bench/run_infer.py
+++ b/evaluation/benchmarks/swe_bench/run_infer.py
@@ -80,6 +80,8 @@ def set_dataset_type(dataset_name: str) -> str:
         DATASET_TYPE = 'SWE-Gym'
     elif 'swe-bench-live' in name_lower:
         DATASET_TYPE = 'SWE-bench-Live'
+    elif 'swe-rebench' in name_lower:
+        DATASET_TYPE = 'SWE-rebench'
     elif 'multimodal' in name_lower:
         DATASET_TYPE = 'Multimodal'
     else:
@@ -178,6 +180,8 @@ def get_instance_docker_image(
             docker_image_prefix = 'docker.io/starryzhang/'
         elif DATASET_TYPE == 'SWE-bench':
             docker_image_prefix = 'docker.io/swebench/'
+        elif DATASET_TYPE == 'SWE-rebench':
+            docker_image_prefix = 'docker.io/swerebench/'
         repo, name = instance_id.split('__')
         image_name = f'{docker_image_prefix.rstrip("/")}/sweb.eval.x86_64.{repo}_1776_{name}:latest'.lower()
         logger.debug(f'Using official SWE-Bench image: {image_name}')
@@ -318,6 +322,8 @@ def initialize_runtime(
         # inject the instance swe entry
         if DATASET_TYPE == 'SWE-bench-Live':
             entry_script_path = 'instance_swe_entry_live.sh'
+        elif DATASET_TYPE == 'SWE-rebench':
+            entry_script_path = 'instance_swe_entry_rebench.sh'
         else:
             entry_script_path = 'instance_swe_entry.sh'
         runtime.copy_to(
diff --git a/evaluation/benchmarks/swe_bench/scripts/setup/instance_swe_entry_rebench.sh b/evaluation/benchmarks/swe_bench/scripts/setup/instance_swe_entry_rebench.sh
new file mode 100644
index 0000000000..7ea5d5659b
--- /dev/null
+++ b/evaluation/benchmarks/swe_bench/scripts/setup/instance_swe_entry_rebench.sh
@@ -0,0 +1,45 @@
+#!/usr/bin/env bash
+
+source ~/.bashrc
+SWEUTIL_DIR=/swe_util
+
+# FIXME: Cannot read SWE_INSTANCE_ID from the environment variable
+# SWE_INSTANCE_ID=django__django-11099
+if [ -z "$SWE_INSTANCE_ID" ]; then
+    echo "Error: SWE_INSTANCE_ID is not set." >&2
+    exit 1
+fi
+
+# Read the swe-bench-test-lite.json file and extract the required item based on instance_id
+item=$(jq --arg INSTANCE_ID "$SWE_INSTANCE_ID" '.[] | select(.instance_id == $INSTANCE_ID)' $SWEUTIL_DIR/eval_data/instances/swe-bench-instance.json)
+
+if [[ -z "$item" ]]; then
+  echo "No item found for the provided instance ID."
+  exit 1
+fi
+
+
+WORKSPACE_NAME=$(echo "$item" | jq -r '(.repo | tostring) + "__" + (.version | tostring) | gsub("/"; "__")')
+
+echo "WORKSPACE_NAME: $WORKSPACE_NAME"
+
+# Clear the workspace
+if [ -d /workspace ]; then
+    rm -rf /workspace/*
+else
+    mkdir /workspace
+fi
+# Copy repo to workspace
+if [ -d /workspace/$WORKSPACE_NAME ]; then
+    rm -rf /workspace/$WORKSPACE_NAME
+fi
+mkdir -p /workspace
+cp -r /testbed /workspace/$WORKSPACE_NAME
+
+# Activate instance-specific environment
+if [ -d /opt/miniconda3 ]; then
+    . /opt/miniconda3/etc/profile.d/conda.sh
+    conda activate testbed
+fi
+
+export PATH=/opt/conda/envs/testbed/bin:$PATH
diff --git a/evaluation/utils/shared.py b/evaluation/utils/shared.py
index aeb563240d..12bc3540eb 100644
--- a/evaluation/utils/shared.py
+++ b/evaluation/utils/shared.py
@@ -263,19 +263,20 @@ def prepare_dataset(
             f'Randomly sampling {eval_n_limit} unique instances with random seed 42.'
         )
 
-    def make_serializable(instance: pd.Series) -> dict:
+    def make_serializable(instance_dict: dict) -> dict:
         import numpy as np
 
-        instance_dict = instance.to_dict()
         for k, v in instance_dict.items():
             if isinstance(v, np.ndarray):
                 instance_dict[k] = v.tolist()
             elif isinstance(v, pd.Timestamp):
                 instance_dict[k] = str(v)
+            elif isinstance(v, dict):
+                instance_dict[k] = make_serializable(v)
         return instance_dict
 
     new_dataset = [
-        make_serializable(instance)
+        make_serializable(instance.to_dict())
         for _, instance in dataset.iterrows()
         if str(instance[id_column]) not in finished_ids
     ]