mirror of
https://github.com/OpenHands/OpenHands.git
synced 2025-12-26 05:48:36 +08:00
[fix eval] Fix issues with aider_bench remote runtime evaluation (#5000)
This commit is contained in:
parent
07f0d1ccb3
commit
42b49e6c43
@ -56,6 +56,20 @@ You can update the arguments in the script
|
||||
./evaluation/aider_bench/scripts/run_infer.sh eval_gpt35_turbo HEAD CodeActAgent 100 1 "1,3,10"
|
||||
```
|
||||
|
||||
### Run Inference on `RemoteRuntime` (experimental)
|
||||
|
||||
This is in limited beta. Contact Xingyao over slack if you want to try this out!
|
||||
|
||||
```bash
|
||||
./evaluation/aider_bench/scripts/run_infer.sh [model_config] [git-version] [agent] [eval_limit] [eval-num-workers] [eval_ids]
|
||||
|
||||
# Example - This runs evaluation on CodeActAgent for 133 instances on aider_bench test set, with 2 workers running in parallel
|
||||
export ALLHANDS_API_KEY="YOUR-API-KEY"
|
||||
export RUNTIME=remote
|
||||
export SANDBOX_REMOTE_RUNTIME_API_URL="https://runtime.eval.all-hands.dev"
|
||||
./evaluation/aider_bench/scripts/run_infer.sh llm.eval HEAD CodeActAgent 133 2
|
||||
```
|
||||
|
||||
## Summarize Results
|
||||
|
||||
```bash
|
||||
|
||||
@ -58,6 +58,9 @@ def get_config(
|
||||
use_host_network=False,
|
||||
timeout=100,
|
||||
api_key=os.environ.get('ALLHANDS_API_KEY', None),
|
||||
remote_runtime_api_url=os.environ.get('SANDBOX_REMOTE_RUNTIME_API_URL'),
|
||||
keep_runtime_alive=False,
|
||||
remote_runtime_init_timeout=1800,
|
||||
),
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
|
||||
@ -67,6 +67,8 @@ def get_config(
|
||||
api_key=os.environ.get('ALLHANDS_API_KEY', None),
|
||||
remote_runtime_api_url=os.environ.get('SANDBOX_REMOTE_RUNTIME_API_URL'),
|
||||
keep_runtime_alive=False,
|
||||
timeout=120,
|
||||
remote_runtime_init_timeout=1800,
|
||||
),
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user