diff --git a/evaluation/benchmarks/commit0_bench/README.md b/evaluation/benchmarks/commit0_bench/README.md
index 9ac3a0e05d..734875b1f7 100644
--- a/evaluation/benchmarks/commit0_bench/README.md
+++ b/evaluation/benchmarks/commit0_bench/README.md
@@ -73,7 +73,7 @@ ALLHANDS_API_KEY="YOUR-API-KEY" RUNTIME=remote SANDBOX_REMOTE_RUNTIME_API_URL="h
 To clean-up all existing runtime you've already started, run:
 
 ```bash
-ALLHANDS_API_KEY="YOUR-API-KEY" ./evaluation/benchmarks/commit0_bench/scripts/cleanup_remote_runtime.sh
+ALLHANDS_API_KEY="YOUR-API-KEY" ./evaluation/utils/scripts/cleanup_remote_runtime.sh
 ```
 
 ### Specify a subset of tasks to run infer
diff --git a/evaluation/benchmarks/swe_bench/README.md b/evaluation/benchmarks/swe_bench/README.md
index 7ed1e26881..08ec3427e6 100644
--- a/evaluation/benchmarks/swe_bench/README.md
+++ b/evaluation/benchmarks/swe_bench/README.md
@@ -80,7 +80,7 @@ ALLHANDS_API_KEY="YOUR-API-KEY" RUNTIME=remote SANDBOX_REMOTE_RUNTIME_API_URL="h
 To clean-up all existing runtime you've already started, run:
 
 ```bash
-ALLHANDS_API_KEY="YOUR-API-KEY" ./evaluation/benchmarks/swe_bench/scripts/cleanup_remote_runtime.sh
+ALLHANDS_API_KEY="YOUR-API-KEY" ./evaluation/utils/scripts/cleanup_remote_runtime.sh
 ```
 
 ### Specify a subset of tasks to run infer
@@ -178,7 +178,7 @@ evaluation/benchmarks/swe_bench/scripts/eval_infer_remote.sh evaluation/evaluati
 To clean-up all existing runtimes that you've already started, run:
 
 ```bash
-ALLHANDS_API_KEY="YOUR-API-KEY" ./evaluation/benchmarks/swe_bench/scripts/cleanup_remote_runtime.sh
+ALLHANDS_API_KEY="YOUR-API-KEY" ./evaluation/utils/scripts/cleanup_remote_runtime.sh
 ```
 
 ## Visualize Results
diff --git a/evaluation/benchmarks/swe_bench/scripts/cleanup_remote_runtime.sh b/evaluation/benchmarks/swe_bench/scripts/cleanup_remote_runtime.sh
deleted file mode 100755
index 34685b11ae..0000000000
--- a/evaluation/benchmarks/swe_bench/scripts/cleanup_remote_runtime.sh
+++ /dev/null
@@ -1,33 +0,0 @@
-#!/bin/bash
-
-
-# API base URL
-BASE_URL="https://runtime.eval.all-hands.dev"
-
-# Get the list of runtimes
-response=$(curl --silent --location --request GET "${BASE_URL}/list" \
-  --header "X-API-Key: ${ALLHANDS_API_KEY}")
-
-n_runtimes=$(echo $response | jq -r '.total')
-echo "Found ${n_runtimes} runtimes. Stopping them..."
-
-runtime_ids=$(echo $response | jq -r '.runtimes | .[].runtime_id')
-
-# Function to stop a single runtime
-stop_runtime() {
-  local runtime_id=$1
-  local counter=$2
-  echo "Stopping runtime ${counter}/${n_runtimes}: ${runtime_id}"
-  curl --silent --location --request POST "${BASE_URL}/stop" \
-    --header "X-API-Key: ${ALLHANDS_API_KEY}" \
-    --header "Content-Type: application/json" \
-    --data-raw "{\"runtime_id\": \"${runtime_id}\"}"
-  echo
-}
-export -f stop_runtime
-export BASE_URL ALLHANDS_API_KEY n_runtimes
-
-# Use GNU Parallel to stop runtimes in parallel
-echo "$runtime_ids" | parallel -j 16 --progress stop_runtime {} {#}
-
-echo "All runtimes have been stopped."
diff --git a/evaluation/benchmarks/commit0_bench/scripts/cleanup_remote_runtime.sh b/evaluation/utils/scripts/cleanup_remote_runtime.sh
similarity index 100%
rename from evaluation/benchmarks/commit0_bench/scripts/cleanup_remote_runtime.sh
rename to evaluation/utils/scripts/cleanup_remote_runtime.sh