Fix: Add missing arguments for SSHBox in evaluation (#3075)

* Fix WebArena evaluation script to connect to SSH session

* Update run_infer.py

* Add missing arguments for DockerSSHBox
This commit is contained in:
மனோஜ்குமார் பழனிச்சாமி 2024-07-29 20:39:39 +05:30 committed by GitHub
parent 1eb3bdea95
commit 563ebd406d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 43 additions and 5 deletions

View File

@ -99,7 +99,14 @@ def process_instance(
# create sandbox and run the agent
# =============================================
sandbox = DockerSSHBox()
sandbox = DockerSSHBox(
config=config.sandbox,
persist_sandbox=False,
workspace_mount_path=config.workspace_mount_path,
sandbox_workspace_dir=config.workspace_mount_path_in_sandbox,
cache_dir=config.cache_dir,
run_as_devin=config.run_as_devin,
)
sandbox.execute(f'cd {inst_id}')
init_cmd = instance.init

View File

@ -173,7 +173,15 @@ def process_instance(
# use a session id for concurrent evaluation
sid = instance['id'] + '_' + str(os.getpid())
sandbox = DockerSSHBox(sid=sid)
sandbox = DockerSSHBox(
config=config.sandbox,
persist_sandbox=False,
workspace_mount_path=config.workspace_mount_path,
sandbox_workspace_dir=config.workspace_mount_path_in_sandbox,
cache_dir=config.cache_dir,
run_as_devin=config.run_as_devin,
sid=sid,
)
exit_code, command_output = sandbox.execute('pip install scitools-pyke')
# Here's how you can run the agent (similar to the `main` function) and get the final task state

View File

@ -101,7 +101,15 @@ def process_instance(
# use a session id for concurrent processing
sid = instance.task_id + '_' + str(os.getpid())
sandbox = DockerSSHBox(sid=sid)
sandbox = DockerSSHBox(
config=config.sandbox,
persist_sandbox=False,
workspace_mount_path=config.workspace_mount_path,
sandbox_workspace_dir=config.workspace_mount_path_in_sandbox,
cache_dir=config.cache_dir,
run_as_devin=config.run_as_devin,
sid=sid,
)
requirements_host_src = 'evaluation/mint/requirements.txt'
requirements_sandbox_dest = '/opendevin/plugins/mint/requirements.txt'

View File

@ -112,7 +112,15 @@ def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool =
# Create a sandbox, using the instance ID and PID as the session ID to avoid conflicts
sid = str(instance['id']) + '_' + str(os.getpid())
sandbox = DockerSSHBox(sid=sid)
sandbox = DockerSSHBox(
config=config.sandbox,
persist_sandbox=False,
workspace_mount_path=config.workspace_mount_path,
sandbox_workspace_dir=config.workspace_mount_path_in_sandbox,
cache_dir=config.cache_dir,
run_as_devin=config.run_as_devin,
sid=sid,
)
# Set up the task environment
sandbox.execute(f'conda activate {ID2CONDA[instance["github_id"]]}')

View File

@ -34,7 +34,14 @@ docker_ssh_box: DockerSSHBox | None = None
def get_sandbox():
global docker_ssh_box
if docker_ssh_box is None:
docker_ssh_box = DockerSSHBox()
docker_ssh_box = DockerSSHBox(
config=config.sandbox,
persist_sandbox=False,
workspace_mount_path=config.workspace_mount_path,
sandbox_workspace_dir=config.workspace_mount_path_in_sandbox,
cache_dir=config.cache_dir,
run_as_devin=config.run_as_devin,
)
return docker_ssh_box