mirror of
https://github.com/OpenHands/OpenHands.git
synced 2025-12-26 05:48:36 +08:00
fix yet another swe_bench issue (#2069)
This commit is contained in:
parent
f0271f9f91
commit
2c0a2dbc61
2
.gitignore
vendored
2
.gitignore
vendored
@ -204,7 +204,7 @@ cache
|
||||
# configuration
|
||||
config.toml
|
||||
config.toml.bak
|
||||
evaluation/swe_bench/eval_workspace
|
||||
evaluation/swe_bench/eval_workspace*
|
||||
evaluation/outputs
|
||||
evaluation/evaluation_outputs
|
||||
test_results*
|
||||
|
||||
@ -34,6 +34,6 @@ Run the following command to do the above two steps. The results will be saved t
|
||||
|
||||
```bash
|
||||
pushd evaluation/swe_bench
|
||||
docker build -t ghcr.io/opendevin/eval-swe-bench:full-v1.2 -f ./scripts/docker/Dockerfile.full.v1.1 .
|
||||
docker push ghcr.io/opendevin/eval-swe-bench:full-v1.2
|
||||
docker build -t ghcr.io/opendevin/eval-swe-bench:full-v1.2.1 -f ./scripts/docker/Dockerfile.full.v1.1 .
|
||||
docker push ghcr.io/opendevin/eval-swe-bench:full-v1.2.1
|
||||
```
|
||||
|
||||
@ -117,7 +117,7 @@ Before evaluating generated patches, you need to set up the Docker environment.
|
||||
```shell
|
||||
docker run -it \
|
||||
-v DIR_TO_YOUR_PATCH_FILES_ON_HOST:/swe_bench_output \
|
||||
ghcr.io/opendevin/eval-swe-bench:full-v1.2 /bin/bash
|
||||
ghcr.io/opendevin/eval-swe-bench:full-v1.2.1 /bin/bash
|
||||
```
|
||||
|
||||
### Evaluate Model Generated Patches
|
||||
|
||||
@ -15,7 +15,7 @@ In [OpenDevin-SWE-Bench fork](https://github.com/OpenDevin/OD-SWE-bench.git) (mo
|
||||
**We pack everything you need for SWE-Bench evaluation into one, gigantic, docker image.** To use it:
|
||||
|
||||
```bash
|
||||
docker pull ghcr.io/opendevin/eval-swe-bench:full-v1.2
|
||||
docker pull ghcr.io/opendevin/eval-swe-bench:full-v1.2.1
|
||||
```
|
||||
|
||||
The Docker image contains several important directories:
|
||||
|
||||
10
evaluation/swe_bench/scripts/docker/Dockerfile.full.v1.2.1
Normal file
10
evaluation/swe_bench/scripts/docker/Dockerfile.full.v1.2.1
Normal file
@ -0,0 +1,10 @@
|
||||
FROM ghcr.io/opendevin/eval-swe-bench:full-v1.2
|
||||
|
||||
# ================== Update OD-SWE-Bench ==================
|
||||
# copy everything except the folder of `eval_data` or `miniforge3`
|
||||
# typically, this should be the OD codebase
|
||||
RUN --mount=type=bind,source=./eval_workspace,target=/eval_workspace \
|
||||
rsync -ar --progress /eval_workspace/OD-SWE-bench/ /swe_util/OD-SWE-bench
|
||||
|
||||
# pushd evaluation/SWE-bench
|
||||
# docker build -t ghcr.io/opendevin/eval-swe-bench:full-v1.2.1 -f ./scripts/docker/Dockerfile.full.v1.2.1 .
|
||||
@ -26,7 +26,7 @@ docker run --rm \
|
||||
-e OD_SWE_BENCH=/swe_util/OD-SWE-bench \
|
||||
-e EVAL_DATA_DIR=/swe_util/eval_data \
|
||||
-w /swe_util \
|
||||
ghcr.io/opendevin/eval-swe-bench:full-v1.2 \
|
||||
ghcr.io/opendevin/eval-swe-bench:full-v1.2.1 \
|
||||
bash -c "./get_agent_report.sh --output-file /swe_bench_output/$FILE_NAME \
|
||||
--agent-name CodeActAgent \
|
||||
--dataset swe-bench-test-lite \
|
||||
|
||||
@ -12,7 +12,7 @@ from opendevin.runtime.plugins import (
|
||||
PluginRequirement,
|
||||
)
|
||||
|
||||
SWE_BENCH_CONTAINER_IMAGE = 'ghcr.io/opendevin/eval-swe-bench:full-v1.2'
|
||||
SWE_BENCH_CONTAINER_IMAGE = 'ghcr.io/opendevin/eval-swe-bench:full-v1.2.1'
|
||||
|
||||
|
||||
class SWEBenchSSHBox(DockerSSHBox):
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user