Add option to run patch evaluation on Modal (#8607)

Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>
This commit is contained in:
Ryan H. Tran 2025-05-23 00:45:45 +07:00 committed by GitHub
parent be78cc07bd
commit 3980ba53c9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 20 additions and 2 deletions

View File

@ -158,6 +158,8 @@ The script now accepts optional arguments:
- `instance_id`: Specify a single instance to evaluate (optional)
- `dataset_name`: The name of the dataset to use (default: `"princeton-nlp/SWE-bench_Lite"`)
- `split`: The split of the dataset to use (default: `"test"`)
- `environment`: The environment to use for patch evaluation (default: `"local"`). You can set it to
`"modal"` to use [official SWE-Bench support](https://github.com/swe-bench/SWE-bench/blob/main/docs/assets/evaluation.md#%EF%B8%8F-evaluation-with-modal) for running evaluation on Modal.
For example, to evaluate a specific instance with a custom dataset and split:

View File

@ -16,11 +16,19 @@ fi
INSTANCE_ID=$2
DATASET_NAME=${3:-"princeton-nlp/SWE-bench_Lite"}
SPLIT=${4:-"test"}
ENVIRONMENT=${5:-"local"}
echo "INSTANCE_ID: $INSTANCE_ID"
echo "DATASET_NAME: $DATASET_NAME"
echo "SPLIT: $SPLIT"
if [[ "$ENVIRONMENT" != "local" && "$ENVIRONMENT" != "modal" ]]; then
echo "Error: ENVIRONMENT must be either 'local' or 'modal'"
exit 1
fi
echo "ENVIRONMENT: $ENVIRONMENT"
PROCESS_FILEPATH=$(realpath $PROCESS_FILEPATH)
FILE_DIR=$(dirname $PROCESS_FILEPATH)
FILE_NAME=$(basename $PROCESS_FILEPATH)
@ -78,6 +86,12 @@ echo "=============================================================="
RUN_ID=$(date +"%Y%m%d_%H%M%S")
N_PROCESS=4
MODAL_FLAG=""
if [[ "$ENVIRONMENT" == "modal" ]]; then
MODAL_FLAG="--modal true"
fi
if [ -z "$INSTANCE_ID" ]; then
echo "Running SWE-bench evaluation on the whole input file..."
# Default to SWE-Bench-lite
@ -90,7 +104,8 @@ if [ -z "$INSTANCE_ID" ]; then
--timeout 3600 \
--cache_level instance \
--max_workers $N_PROCESS \
--run_id $RUN_ID
--run_id $RUN_ID \
$MODAL_FLAG
# get the "model_name_or_path" from the first line of the SWEBENCH_FORMAT_JSONL
MODEL_NAME_OR_PATH=$(jq -r '.model_name_or_path' $SWEBENCH_FORMAT_JSONL | head -n 1)
@ -137,5 +152,6 @@ else
--instance_ids $INSTANCE_ID \
--cache_level instance \
--max_workers $N_PROCESS \
--run_id $RUN_ID
--run_id $RUN_ID \
$MODAL_FLAG
fi