diff --git a/evaluation/benchmarks/commit0_bench/README.md b/evaluation/benchmarks/commit0_bench/README.md index 9ac3a0e05d..734875b1f7 100644 --- a/evaluation/benchmarks/commit0_bench/README.md +++ b/evaluation/benchmarks/commit0_bench/README.md @@ -73,7 +73,7 @@ ALLHANDS_API_KEY="YOUR-API-KEY" RUNTIME=remote SANDBOX_REMOTE_RUNTIME_API_URL="h To clean-up all existing runtime you've already started, run: ```bash -ALLHANDS_API_KEY="YOUR-API-KEY" ./evaluation/benchmarks/commit0_bench/scripts/cleanup_remote_runtime.sh +ALLHANDS_API_KEY="YOUR-API-KEY" ./evaluation/utils/scripts/cleanup_remote_runtime.sh ``` ### Specify a subset of tasks to run infer diff --git a/evaluation/benchmarks/swe_bench/README.md b/evaluation/benchmarks/swe_bench/README.md index 7ed1e26881..08ec3427e6 100644 --- a/evaluation/benchmarks/swe_bench/README.md +++ b/evaluation/benchmarks/swe_bench/README.md @@ -80,7 +80,7 @@ ALLHANDS_API_KEY="YOUR-API-KEY" RUNTIME=remote SANDBOX_REMOTE_RUNTIME_API_URL="h To clean-up all existing runtime you've already started, run: ```bash -ALLHANDS_API_KEY="YOUR-API-KEY" ./evaluation/benchmarks/swe_bench/scripts/cleanup_remote_runtime.sh +ALLHANDS_API_KEY="YOUR-API-KEY" ./evaluation/utils/scripts/cleanup_remote_runtime.sh ``` ### Specify a subset of tasks to run infer @@ -178,7 +178,7 @@ evaluation/benchmarks/swe_bench/scripts/eval_infer_remote.sh evaluation/evaluati To clean-up all existing runtimes that you've already started, run: ```bash -ALLHANDS_API_KEY="YOUR-API-KEY" ./evaluation/benchmarks/swe_bench/scripts/cleanup_remote_runtime.sh +ALLHANDS_API_KEY="YOUR-API-KEY" ./evaluation/utils/scripts/cleanup_remote_runtime.sh ``` ## Visualize Results diff --git a/evaluation/benchmarks/swe_bench/scripts/cleanup_remote_runtime.sh b/evaluation/benchmarks/swe_bench/scripts/cleanup_remote_runtime.sh deleted file mode 100755 index 34685b11ae..0000000000 --- a/evaluation/benchmarks/swe_bench/scripts/cleanup_remote_runtime.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/bin/bash - - -# API base URL -BASE_URL="https://runtime.eval.all-hands.dev" - -# Get the list of runtimes -response=$(curl --silent --location --request GET "${BASE_URL}/list" \ - --header "X-API-Key: ${ALLHANDS_API_KEY}") - -n_runtimes=$(echo $response | jq -r '.total') -echo "Found ${n_runtimes} runtimes. Stopping them..." - -runtime_ids=$(echo $response | jq -r '.runtimes | .[].runtime_id') - -# Function to stop a single runtime -stop_runtime() { - local runtime_id=$1 - local counter=$2 - echo "Stopping runtime ${counter}/${n_runtimes}: ${runtime_id}" - curl --silent --location --request POST "${BASE_URL}/stop" \ - --header "X-API-Key: ${ALLHANDS_API_KEY}" \ - --header "Content-Type: application/json" \ - --data-raw "{\"runtime_id\": \"${runtime_id}\"}" - echo -} -export -f stop_runtime -export BASE_URL ALLHANDS_API_KEY n_runtimes - -# Use GNU Parallel to stop runtimes in parallel -echo "$runtime_ids" | parallel -j 16 --progress stop_runtime {} {#} - -echo "All runtimes have been stopped." diff --git a/evaluation/benchmarks/commit0_bench/scripts/cleanup_remote_runtime.sh b/evaluation/utils/scripts/cleanup_remote_runtime.sh similarity index 100% rename from evaluation/benchmarks/commit0_bench/scripts/cleanup_remote_runtime.sh rename to evaluation/utils/scripts/cleanup_remote_runtime.sh