From f9088766e826e208195345a7fcde4920a87df3dd Mon Sep 17 00:00:00 2001 From: Graham Neubig Date: Sun, 25 Aug 2024 19:05:41 -0400 Subject: [PATCH] Allow setting of runtime container image (#3573) * Add runtime container image setting * Fix typo in test * Fix sandbox base container image * Update variables * Update to base_container_image * Update tests/unit/test_config.py Co-authored-by: Xingyao Wang * Fixed eval * Fixed container_image * Fix typo --------- Co-authored-by: Xingyao Wang --- .github/workflows/ghcr_runtime.yml | 4 +-- config.template.toml | 2 +- .../current/usage/custom_sandbox_guide.md | 8 +++--- .../current/usage/custom_sandbox_guide.md | 8 +++--- .../usage/how-to/custom-sandbox-guide.md | 6 ++--- .../usage/how-to/evaluation-harness.md | 2 +- docs/static/img/backend_architecture.puml | 2 +- docs/static/img/backend_architecture.svg | 2 +- evaluation/EDA/run_infer.py | 2 +- evaluation/agent_bench/run_infer.py | 2 +- evaluation/aider_bench/run_infer.py | 2 +- evaluation/biocoder/run_infer.py | 2 +- evaluation/bird/run_infer.py | 2 +- evaluation/browsing_delegation/run_infer.py | 2 +- evaluation/gaia/run_infer.py | 2 +- evaluation/gorilla/run_infer.py | 2 +- evaluation/gpqa/run_infer.py | 2 +- evaluation/humanevalfix/run_infer.py | 2 +- evaluation/logic_reasoning/run_infer.py | 2 +- evaluation/miniwob/run_infer.py | 2 +- evaluation/mint/run_infer.py | 2 +- evaluation/ml_bench/run_infer.py | 2 +- evaluation/swe_bench/run_infer.py | 6 ++--- evaluation/toolqa/run_infer.py | 2 +- evaluation/webarena/run_infer.py | 2 +- openhands/core/config.py | 8 ++++-- openhands/core/schema/config.py | 2 +- openhands/runtime/client/runtime.py | 27 ++++++++++--------- tests/integration/regenerate.sh | 10 +++---- tests/runtime/conftest.py | 13 +++++---- tests/runtime/test_browsing.py | 2 +- tests/runtime/test_images.py | 24 ++++++++++------- tests/unit/test_config.py | 16 +++++------ 33 files changed, 92 insertions(+), 82 deletions(-) diff --git a/.github/workflows/ghcr_runtime.yml b/.github/workflows/ghcr_runtime.yml index 660e7a4788..b890c7991f 100644 --- a/.github/workflows/ghcr_runtime.yml +++ b/.github/workflows/ghcr_runtime.yml @@ -113,7 +113,7 @@ jobs: TEST_RUNTIME=eventstream \ SANDBOX_USER_ID=$(id -u) \ - SANDBOX_CONTAINER_IMAGE=$image_name \ + SANDBOX_BASE_CONTAINER_IMAGE=$image_name \ TEST_IN_CI=true \ poetry run pytest --cov=agenthub --cov=openhands --cov-report=xml -s ./tests/runtime - name: Upload coverage to Codecov @@ -149,7 +149,7 @@ jobs: TEST_RUNTIME=eventstream \ SANDBOX_USER_ID=$(id -u) \ - SANDBOX_CONTAINER_IMAGE=$image_name \ + SANDBOX_BASE_CONTAINER_IMAGE=$image_name \ TEST_IN_CI=true \ TEST_ONLY=true \ ./tests/integration/regenerate.sh diff --git a/config.template.toml b/config.template.toml index ca9f66c522..54fbbf2090 100644 --- a/config.template.toml +++ b/config.template.toml @@ -174,7 +174,7 @@ llm_config = 'gpt3' #user_id = 1000 # Container image to use for the sandbox -#container_image = "nikolaik/python-nodejs:python3.11-nodejs22" +#base_container_image = "nikolaik/python-nodejs:python3.11-nodejs22" # Use host network #use_host_network = false diff --git a/docs/i18n/fr/docusaurus-plugin-content-docs/current/usage/custom_sandbox_guide.md b/docs/i18n/fr/docusaurus-plugin-content-docs/current/usage/custom_sandbox_guide.md index a0575b2cf3..e18463c977 100644 --- a/docs/i18n/fr/docusaurus-plugin-content-docs/current/usage/custom_sandbox_guide.md +++ b/docs/i18n/fr/docusaurus-plugin-content-docs/current/usage/custom_sandbox_guide.md @@ -42,10 +42,10 @@ Créez un fichier ```config.toml``` dans le répertoire OpenHands et entrez ces [core] workspace_base="./workspace" run_as_openhands=true -sandbox_container_image="image_personnalisée" +sandbox_base_container_image="image_personnalisée" ``` -> Assurez-vous que ```sandbox_container_image``` est défini sur le nom de votre image personnalisée précédente. +> Assurez-vous que ```sandbox_base_container_image``` est défini sur le nom de votre image personnalisée précédente. ## Exécution @@ -61,7 +61,7 @@ Félicitations ! Le code pertinent est défini dans [ssh_box.py](https://github.com/All-Hands-AI/OpenHands/blob/main/openhands/runtime/docker/ssh_box.py) et [image_agnostic_util.py](https://github.com/All-Hands-AI/OpenHands/blob/main/openhands/runtime/docker/image_agnostic_util.py). -En particulier, ssh_box.py vérifie l'objet config pour ```config.sandbox_container_image``` et ensuite tente de récupérer l'image à l'aide de [get_od_sandbox_image](https://github.com/All-Hands-AI/OpenHands/blob/main/openhands/runtime/docker/image_agnostic_util.py#L72), qui est défini dans image_agnostic_util.py. +En particulier, ssh_box.py vérifie l'objet config pour ```config.sandbox.base_container_image``` et ensuite tente de récupérer l'image à l'aide de [get_od_sandbox_image](https://github.com/All-Hands-AI/OpenHands/blob/main/openhands/runtime/docker/image_agnostic_util.py#L72), qui est défini dans image_agnostic_util.py. Lorsqu'une image personnalisée est utilisée pour la première fois, elle ne sera pas trouvée et donc elle sera construite (à l'exécution ultérieure, l'image construite sera trouvée et renvoyée). @@ -92,7 +92,7 @@ Si vous voyez cette erreur dans la sortie de la console, il s'agit du fait que O [core] workspace_base="./workspace" run_as_openhands=true -sandbox_container_image="image_personnalisée" +sandbox_base_container_image="image_personnalisée" sandbox_user_id="1001" ``` diff --git a/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/usage/custom_sandbox_guide.md b/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/usage/custom_sandbox_guide.md index de1a4f352b..7ef5bde8ed 100644 --- a/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/usage/custom_sandbox_guide.md +++ b/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/usage/custom_sandbox_guide.md @@ -41,10 +41,10 @@ docker build -t custom_image . [core] workspace_base="./workspace" run_as_openhands=true -sandbox_container_image="custom_image" +base_container_image="custom_image" ``` -> 确保 `sandbox_container_image` 设置为您前一步中自定义映像的名称。 +> 确保 `sandbox_base_container_image` 设置为您前一步中自定义映像的名称。 ## 运行 @@ -60,7 +60,7 @@ sandbox_container_image="custom_image" 相关代码定义在 [ssh_box.py](https://github.com/All-Hands-AI/OpenHands/blob/main/openhands/runtime/docker/ssh_box.py) 和 [image_agnostic_util.py](https://github.com/All-Hands-AI/OpenHands/blob/main/openhands/runtime/docker/image_agnostic_util.py) 中。 -特别是 ssh_box.py 检查配置对象中的 ```config.sandbox_container_image```,然后尝试使用 [get_od_sandbox_image](https://github.com/All-Hands-AI/OpenHands/blob/main/openhands/runtime/docker/image_agnostic_util.py#L72),在 image_agnostic_util.py 定义中进行检索。 +特别是 ssh_box.py 检查配置对象中的 ```config.sandbox.base_container_image```,然后尝试使用 [get_od_sandbox_image](https://github.com/All-Hands-AI/OpenHands/blob/main/openhands/runtime/docker/image_agnostic_util.py#L72),在 image_agnostic_util.py 定义中进行检索。 初次使用自定义映像时,该映像将不会被找到,因此将被构建(在后续运行中已构建的映像将被查找并返回)。 @@ -92,7 +92,7 @@ dockerfile_content = ( [core] workspace_base="./workspace" run_as_openhands=true -sandbox_container_image="custom_image" +sandbox_base_container_image="custom_image" sandbox_user_id="1001" ``` diff --git a/docs/modules/usage/how-to/custom-sandbox-guide.md b/docs/modules/usage/how-to/custom-sandbox-guide.md index 45d7423401..9bf0855b36 100644 --- a/docs/modules/usage/how-to/custom-sandbox-guide.md +++ b/docs/modules/usage/how-to/custom-sandbox-guide.md @@ -67,10 +67,10 @@ Create a `config.toml` file in the OpenHands directory and enter these contents: [core] workspace_base="./workspace" run_as_openhands=true -sandbox_container_image="custom_image" +sandbox_base_container_image="custom_image" ``` -For `sandbox_container_image`, you can specify either: +For `sandbox_base_container_image`, you can specify either: 1. The name of your custom image that you built in the previous step (e.g., `”custom_image”`) 2. A pre-existing image from Docker Hub (e.g., `”node:20”` if you want a sandbox with Node.js pre-installed) @@ -98,7 +98,7 @@ If you see this error in the console output it is because OpenHands is trying to [core] workspace_base="./workspace" run_as_openhands=true -sandbox_container_image="custom_image" +sandbox_base_container_image="custom_image" sandbox_user_id="1001" ``` diff --git a/docs/modules/usage/how-to/evaluation-harness.md b/docs/modules/usage/how-to/evaluation-harness.md index 0be253c74b..26ac120d1c 100644 --- a/docs/modules/usage/how-to/evaluation-harness.md +++ b/docs/modules/usage/how-to/evaluation-harness.md @@ -116,7 +116,7 @@ To create an evaluation workflow for your benchmark, follow these steps: runtime='eventstream', max_iterations=metadata.max_iterations, sandbox=SandboxConfig( - container_image='your_container_image', + base_container_image='your_container_image', enable_auto_lint=True, timeout=300, ), diff --git a/docs/static/img/backend_architecture.puml b/docs/static/img/backend_architecture.puml index d85242c749..9f5564a52a 100644 --- a/docs/static/img/backend_architecture.puml +++ b/docs/static/img/backend_architecture.puml @@ -135,7 +135,7 @@ class openhands.sandbox.sandbox.DockerInteractive { workspace_dir: None workspace_dir: None timeout: int - container_image: None + base_container_image: None container_name: None } class openhands.observation.UserMessageObservation { diff --git a/docs/static/img/backend_architecture.svg b/docs/static/img/backend_architecture.svg index bef2dbd8ca..a5fc173d5a 100644 --- a/docs/static/img/backend_architecture.svg +++ b/docs/static/img/backend_architecture.svg @@ -1 +1 @@ -openhandsactionagentbasebashbrowsefileoptasksobservationagentllm.llmcontrolleragent_controllercommand_managerplanstatesandbox.sandboxserver.sessionAgentEchoActioncontent: strrunnable: boolaction: strAgentFinishActionrunnable: boolaction: strAgentRecallActionquery: straction: strAgentSummarizeActionsummary: straction: strAgentThinkActionthought: strrunnable: boolaction: strExecutableActionNotExecutableActionActionNullActionaction: strCmdKillActionid: intaction: strCmdRunActioncommand: strbackground: boolaction: strBrowseURLActionurl: straction: strFileReadActionpath: straction: strFileWriteActionpath: strcontents: straction: strAddTaskActionparent: strgoal: strsubtasks: listaction: strModifyTaskActionid: strstate: straction: strAgentMessageObservationrole: strobservation: strAgentRecallObservationmemories: List[str]role: strobservation: strObservationcontent: strBrowserOutputObservationurl: strstatus_code: interror: boolobservation: strFileReadObservationpath: strobservation: strFileWriteObservationpath: strobservation: strAgentErrorObservationobservation: strNullObservationobservation: strCmdOutputObservationcommand_id: intcommand: strexit_code: intobservation: strUserMessageObservationrole: strobservation: strAgent_registry: Dict[str, Type[Agent]]llm: LLM_complete: NoneLLMmodel: Noneapi_key: Nonebase_url: None_debug_dir: None_debug_idx: None_debug_id: None_completion: NoneAgentControlleragent: Agentmax_iterations: intworkdir: strcommand_manager: CommandManagerstate: Stateplan: Plancallbacks: List[Callable]CommandManagerdirectory: Noneshell: NonePlanmain_goal: strtask: Taskmain_goal: strtask: NoneTaskid: strgoal: strparent: Task | Nonesubtasks: List[Task]id: Noneid: Noneparent: Nonegoal: strsubtasks: NoneStateplan: Planiteration: intbackground_commands_obs: List[CmdOutputObservation]history: List[Tuple[Action, Observation]]updated_info: List[Tuple[Action, Observation]]DockerInteractivebackground_commands: Dict[int, BackgroundCommand]instance_id: Noneinstance_id: Noneworkspace_dir: Noneworkspace_dir: Noneworkspace_dir: Nonetimeout: intcontainer_image: Nonecontainer_name: NoneBackgroundCommandSessionwebsocket: Nonecontroller: Optional[AgentController]agent: Optional[Agent]agent_task: NoneBased on f3fda42; Generated bypy2puml +openhandsactionagentbasebashbrowsefileoptasksobservationagentllm.llmcontrolleragent_controllercommand_managerplanstatesandbox.sandboxserver.sessionAgentEchoActioncontent: strrunnable: boolaction: strAgentFinishActionrunnable: boolaction: strAgentRecallActionquery: straction: strAgentSummarizeActionsummary: straction: strAgentThinkActionthought: strrunnable: boolaction: strExecutableActionNotExecutableActionActionNullActionaction: strCmdKillActionid: intaction: strCmdRunActioncommand: strbackground: boolaction: strBrowseURLActionurl: straction: strFileReadActionpath: straction: strFileWriteActionpath: strcontents: straction: strAddTaskActionparent: strgoal: strsubtasks: listaction: strModifyTaskActionid: strstate: straction: strAgentMessageObservationrole: strobservation: strAgentRecallObservationmemories: List[str]role: strobservation: strObservationcontent: strBrowserOutputObservationurl: strstatus_code: interror: boolobservation: strFileReadObservationpath: strobservation: strFileWriteObservationpath: strobservation: strAgentErrorObservationobservation: strNullObservationobservation: strCmdOutputObservationcommand_id: intcommand: strexit_code: intobservation: strUserMessageObservationrole: strobservation: strAgent_registry: Dict[str, Type[Agent]]llm: LLM_complete: NoneLLMmodel: Noneapi_key: Nonebase_url: None_debug_dir: None_debug_idx: None_debug_id: None_completion: NoneAgentControlleragent: Agentmax_iterations: intworkdir: strcommand_manager: CommandManagerstate: Stateplan: Plancallbacks: List[Callable]CommandManagerdirectory: Noneshell: NonePlanmain_goal: strtask: Taskmain_goal: strtask: NoneTaskid: strgoal: strparent: Task | Nonesubtasks: List[Task]id: Noneid: Noneparent: Nonegoal: strsubtasks: NoneStateplan: Planiteration: intbackground_commands_obs: List[CmdOutputObservation]history: List[Tuple[Action, Observation]]updated_info: List[Tuple[Action, Observation]]DockerInteractivebackground_commands: Dict[int, BackgroundCommand]instance_id: Noneinstance_id: Noneworkspace_dir: Noneworkspace_dir: Noneworkspace_dir: Nonetimeout: intbase_container_image: Nonecontainer_name: NoneBackgroundCommandSessionwebsocket: Nonecontroller: Optional[AgentController]agent: Optional[Agent]agent_task: NoneBased on f3fda42; Generated bypy2puml diff --git a/evaluation/EDA/run_infer.py b/evaluation/EDA/run_infer.py index 9abbf9f424..01c7bfa418 100644 --- a/evaluation/EDA/run_infer.py +++ b/evaluation/EDA/run_infer.py @@ -62,7 +62,7 @@ def get_config( runtime='eventstream', max_iterations=metadata.max_iterations, sandbox=SandboxConfig( - container_image='python:3.11-bookworm', + base_container_image='python:3.11-bookworm', enable_auto_lint=False, use_host_network=False, ), diff --git a/evaluation/agent_bench/run_infer.py b/evaluation/agent_bench/run_infer.py index 63948a3c83..a83b3b8780 100644 --- a/evaluation/agent_bench/run_infer.py +++ b/evaluation/agent_bench/run_infer.py @@ -44,7 +44,7 @@ def get_config( runtime='eventstream', max_iterations=metadata.max_iterations, sandbox=SandboxConfig( - container_image='python:3.11-bookworm', + base_container_image='python:3.11-bookworm', enable_auto_lint=True, use_host_network=False, ), diff --git a/evaluation/aider_bench/run_infer.py b/evaluation/aider_bench/run_infer.py index 6db46b180f..6bfc9a6360 100644 --- a/evaluation/aider_bench/run_infer.py +++ b/evaluation/aider_bench/run_infer.py @@ -42,7 +42,7 @@ def get_config( runtime='eventstream', max_iterations=metadata.max_iterations, sandbox=SandboxConfig( - container_image='python:3.11-bookworm', + base_container_image='python:3.11-bookworm', enable_auto_lint=True, use_host_network=False, timeout=100, diff --git a/evaluation/biocoder/run_infer.py b/evaluation/biocoder/run_infer.py index 29e8bda37a..5252001ad1 100644 --- a/evaluation/biocoder/run_infer.py +++ b/evaluation/biocoder/run_infer.py @@ -62,7 +62,7 @@ def get_config( runtime='eventstream', max_iterations=metadata.max_iterations, sandbox=SandboxConfig( - container_image=BIOCODER_BENCH_CONTAINER_IMAGE, + base_container_image=BIOCODER_BENCH_CONTAINER_IMAGE, enable_auto_lint=True, use_host_network=False, ), diff --git a/evaluation/bird/run_infer.py b/evaluation/bird/run_infer.py index 1c3666638d..a48e272b72 100644 --- a/evaluation/bird/run_infer.py +++ b/evaluation/bird/run_infer.py @@ -75,7 +75,7 @@ def get_config( runtime='eventstream', max_iterations=metadata.max_iterations, sandbox=SandboxConfig( - container_image='python:3.11-bookworm', + base_container_image='python:3.11-bookworm', enable_auto_lint=True, use_host_network=False, ), diff --git a/evaluation/browsing_delegation/run_infer.py b/evaluation/browsing_delegation/run_infer.py index 746ccd1a2a..40cfa20cc9 100644 --- a/evaluation/browsing_delegation/run_infer.py +++ b/evaluation/browsing_delegation/run_infer.py @@ -40,7 +40,7 @@ def get_config( runtime='eventstream', max_iterations=metadata.max_iterations, sandbox=SandboxConfig( - container_image='python:3.11-bookworm', + base_container_image='python:3.11-bookworm', enable_auto_lint=False, use_host_network=False, ), diff --git a/evaluation/gaia/run_infer.py b/evaluation/gaia/run_infer.py index b3330632c3..ba9773e339 100644 --- a/evaluation/gaia/run_infer.py +++ b/evaluation/gaia/run_infer.py @@ -51,7 +51,7 @@ def get_config( runtime='eventstream', max_iterations=metadata.max_iterations, sandbox=SandboxConfig( - container_image='python:3.11-bookworm', + base_container_image='python:3.11-bookworm', enable_auto_lint=True, use_host_network=False, ), diff --git a/evaluation/gorilla/run_infer.py b/evaluation/gorilla/run_infer.py index 94f14f0a58..7f17e4235c 100644 --- a/evaluation/gorilla/run_infer.py +++ b/evaluation/gorilla/run_infer.py @@ -43,7 +43,7 @@ def get_config( runtime='eventstream', max_iterations=metadata.max_iterations, sandbox=SandboxConfig( - container_image='python:3.11-bookworm', + base_container_image='python:3.11-bookworm', enable_auto_lint=True, use_host_network=False, ), diff --git a/evaluation/gpqa/run_infer.py b/evaluation/gpqa/run_infer.py index 0d085cb95e..ce4fa7de23 100644 --- a/evaluation/gpqa/run_infer.py +++ b/evaluation/gpqa/run_infer.py @@ -65,7 +65,7 @@ def get_config( runtime='eventstream', max_iterations=metadata.max_iterations, sandbox=SandboxConfig( - container_image='python:3.11-bookworm', + base_container_image='python:3.11-bookworm', enable_auto_lint=True, use_host_network=False, ), diff --git a/evaluation/humanevalfix/run_infer.py b/evaluation/humanevalfix/run_infer.py index 6b0adc569e..3e115b67b5 100644 --- a/evaluation/humanevalfix/run_infer.py +++ b/evaluation/humanevalfix/run_infer.py @@ -86,7 +86,7 @@ def get_config( runtime='eventstream', max_iterations=metadata.max_iterations, sandbox=SandboxConfig( - container_image='python:3.11-bookworm', + base_container_image='python:3.11-bookworm', enable_auto_lint=True, use_host_network=False, ), diff --git a/evaluation/logic_reasoning/run_infer.py b/evaluation/logic_reasoning/run_infer.py index d74e28aefa..83e8642d4f 100644 --- a/evaluation/logic_reasoning/run_infer.py +++ b/evaluation/logic_reasoning/run_infer.py @@ -49,7 +49,7 @@ def get_config( runtime='eventstream', max_iterations=metadata.max_iterations, sandbox=SandboxConfig( - container_image='xingyaoww/od-eval-logic-reasoning:v1.0', + base_container_image='xingyaoww/od-eval-logic-reasoning:v1.0', enable_auto_lint=True, use_host_network=False, runtime_extra_deps='$OD_INTERPRETER_PATH -m pip install scitools-pyke', diff --git a/evaluation/miniwob/run_infer.py b/evaluation/miniwob/run_infer.py index 2ffa8e4a4a..5362ac6f5c 100644 --- a/evaluation/miniwob/run_infer.py +++ b/evaluation/miniwob/run_infer.py @@ -49,7 +49,7 @@ def get_config( runtime='eventstream', max_iterations=metadata.max_iterations, sandbox=SandboxConfig( - container_image='xingyaoww/od-eval-miniwob:v1.0', + base_container_image='xingyaoww/od-eval-miniwob:v1.0', enable_auto_lint=True, use_host_network=False, browsergym_eval_env=env_id, diff --git a/evaluation/mint/run_infer.py b/evaluation/mint/run_infer.py index 3e3228f9f4..081ab76370 100644 --- a/evaluation/mint/run_infer.py +++ b/evaluation/mint/run_infer.py @@ -101,7 +101,7 @@ def get_config( runtime='eventstream', max_iterations=metadata.max_iterations, sandbox=SandboxConfig( - container_image='xingyaoww/od-eval-mint:v1.0', + base_container_image='xingyaoww/od-eval-mint:v1.0', enable_auto_lint=True, use_host_network=False, runtime_extra_deps=f'$OD_INTERPRETER_PATH -m pip install {" ".join(MINT_DEPENDENCIES)}', diff --git a/evaluation/ml_bench/run_infer.py b/evaluation/ml_bench/run_infer.py index f73142fd2c..a927aba69d 100644 --- a/evaluation/ml_bench/run_infer.py +++ b/evaluation/ml_bench/run_infer.py @@ -80,7 +80,7 @@ def get_config( runtime='eventstream', max_iterations=metadata.max_iterations, sandbox=SandboxConfig( - container_image='public.ecr.aws/i5g0m1f6/ml-bench', + base_container_image='public.ecr.aws/i5g0m1f6/ml-bench', enable_auto_lint=True, use_host_network=False, ), diff --git a/evaluation/swe_bench/run_infer.py b/evaluation/swe_bench/run_infer.py index e0b4d97a48..d368533c38 100644 --- a/evaluation/swe_bench/run_infer.py +++ b/evaluation/swe_bench/run_infer.py @@ -93,9 +93,9 @@ def get_config( SWE_BENCH_CONTAINER_IMAGE = 'ghcr.io/opendevin/eval-swe-bench:full-v1.2.1' if USE_INSTANCE_IMAGE: # We use a different instance image for the each instance of swe-bench eval - container_image = 'sweb.eval.x86_64.' + instance['instance_id'] + base_container_image = 'sweb.eval.x86_64.' + instance['instance_id'] else: - container_image = SWE_BENCH_CONTAINER_IMAGE + base_container_image = SWE_BENCH_CONTAINER_IMAGE config = AppConfig( default_agent=metadata.agent_class, @@ -104,7 +104,7 @@ def get_config( max_budget_per_task=4, max_iterations=metadata.max_iterations, sandbox=SandboxConfig( - container_image=container_image, + base_container_image=base_container_image, enable_auto_lint=True, use_host_network=False, # large enough timeout, since some testcases take very long to run diff --git a/evaluation/toolqa/run_infer.py b/evaluation/toolqa/run_infer.py index 8b5906c0e5..7398354267 100644 --- a/evaluation/toolqa/run_infer.py +++ b/evaluation/toolqa/run_infer.py @@ -45,7 +45,7 @@ def get_config( runtime='eventstream', max_iterations=metadata.max_iterations, sandbox=SandboxConfig( - container_image='python:3.11-bookworm', + base_container_image='python:3.11-bookworm', enable_auto_lint=True, use_host_network=False, ), diff --git a/evaluation/webarena/run_infer.py b/evaluation/webarena/run_infer.py index 5782f6bf0f..53dfc7e431 100644 --- a/evaluation/webarena/run_infer.py +++ b/evaluation/webarena/run_infer.py @@ -54,7 +54,7 @@ def get_config( runtime='eventstream', max_iterations=metadata.max_iterations, sandbox=SandboxConfig( - container_image='python:3.11-bookworm', + base_container_image='python:3.11-bookworm', enable_auto_lint=True, use_host_network=False, browsergym_eval_env=env_id, diff --git a/openhands/core/config.py b/openhands/core/config.py index caa7a838f5..97343df1a1 100644 --- a/openhands/core/config.py +++ b/openhands/core/config.py @@ -179,7 +179,8 @@ class SandboxConfig(metaclass=Singleton): Attributes: api_hostname: The hostname for the EventStream Runtime API. - container_image: The container image to use for the sandbox. + base_container_image: The base container image from which to build the runtime image. + runtime_container_image: The runtime container image to use. user_id: The user ID for the sandbox. timeout: The timeout for the sandbox. enable_auto_lint: Whether to enable auto-lint. @@ -199,7 +200,10 @@ class SandboxConfig(metaclass=Singleton): """ api_hostname: str = 'localhost' - container_image: str = 'nikolaik/python-nodejs:python3.11-nodejs22' # default to nikolaik/python-nodejs:python3.11-nodejs22 for eventstream runtime + base_container_image: str | None = ( + 'nikolaik/python-nodejs:python3.11-nodejs22' # default to nikolaik/python-nodejs:python3.11-nodejs22 for eventstream runtime + ) + runtime_container_image: str | None = None user_id: int = os.getuid() if hasattr(os, 'getuid') else 1000 timeout: int = 120 enable_auto_lint: bool = ( diff --git a/openhands/core/schema/config.py b/openhands/core/schema/config.py index b1b19f4b23..463466c488 100644 --- a/openhands/core/schema/config.py +++ b/openhands/core/schema/config.py @@ -22,7 +22,7 @@ class ConfigType(str, Enum): CACHE_DIR = 'CACHE_DIR' LLM_MODEL = 'LLM_MODEL' CONFIRMATION_MODE = 'CONFIRMATION_MODE' - SANDBOX_CONTAINER_IMAGE = 'SANDBOX_CONTAINER_IMAGE' + BASE_CONTAINER_IMAGE = 'BASE_CONTAINER_IMAGE' RUN_AS_OPENHANDS = 'RUN_AS_OPENHANDS' LLM_EMBEDDING_MODEL = 'LLM_EMBEDDING_MODEL' LLM_EMBEDDING_BASE_URL = 'LLM_EMBEDDING_BASE_URL' diff --git a/openhands/runtime/client/runtime.py b/openhands/runtime/client/runtime.py index 55e01d3afb..64519c9a51 100644 --- a/openhands/runtime/client/runtime.py +++ b/openhands/runtime/client/runtime.py @@ -104,7 +104,6 @@ class EventStreamRuntime(Runtime): event_stream: EventStream, sid: str = 'default', plugins: list[PluginRequirement] | None = None, - container_image: str | None = None, ): super().__init__( config, event_stream, sid, plugins @@ -118,11 +117,8 @@ class EventStreamRuntime(Runtime): ) # TODO: We can switch to aiodocker when `get_od_sandbox_image` is updated to use aiodocker self.docker_client: docker.DockerClient = self._init_docker_client() - self.container_image = ( - self.config.sandbox.container_image - if container_image is None - else container_image - ) + self.base_container_image = self.config.sandbox.base_container_image + self.runtime_container_image = self.config.sandbox.runtime_container_image self.container_name = self.container_name_prefix + self.instance_id self.container = None @@ -140,11 +136,16 @@ class EventStreamRuntime(Runtime): f'Installing extra user-provided dependencies in the runtime image: {self.config.sandbox.runtime_extra_deps}' ) - self.container_image = build_runtime_image( - self.container_image, - self.runtime_builder, - extra_deps=self.config.sandbox.runtime_extra_deps, - ) + if self.runtime_container_image is None: + if self.base_container_image is None: + raise ValueError( + 'Neither runtime container image nor base container image is set' + ) + self.runtime_container_image = build_runtime_image( + self.base_container_image, + self.runtime_builder, + extra_deps=self.config.sandbox.runtime_extra_deps, + ) self.container = await self._init_container( self.sandbox_workspace_dir, mount_dir=self.config.workspace_mount_path, @@ -181,7 +182,7 @@ class EventStreamRuntime(Runtime): ): try: logger.info( - f'Starting container with image: {self.container_image} and name: {self.container_name}' + f'Starting container with image: {self.runtime_container_image} and name: {self.container_name}' ) plugin_arg = '' if plugins is not None and len(plugins) > 0: @@ -215,7 +216,7 @@ class EventStreamRuntime(Runtime): else: browsergym_arg = '' container = self.docker_client.containers.run( - self.container_image, + self.runtime_container_image, command=( f'/openhands/miniforge3/bin/mamba run --no-capture-output -n base ' 'PYTHONUNBUFFERED=1 poetry run ' diff --git a/tests/integration/regenerate.sh b/tests/integration/regenerate.sh index 2d8510d31e..00285b82c1 100755 --- a/tests/integration/regenerate.sh +++ b/tests/integration/regenerate.sh @@ -57,8 +57,8 @@ mkdir -p $WORKSPACE_BASE # use environmental variable if exists TEST_RUNTIME="${TEST_RUNTIME:-eventstream}" -if [ -z "$SANDBOX_CONTAINER_IMAGE" ]; then - SANDBOX_CONTAINER_IMAGE="nikolaik/python-nodejs:python3.11-nodejs22" +if [ -z "$SANDBOX_BASE_CONTAINER_IMAGE" ]; then + SANDBOX_BASE_CONTAINER_IMAGE="nikolaik/python-nodejs:python3.11-nodejs22" fi MAX_ITERATIONS=15 @@ -114,7 +114,7 @@ run_test() { MAX_ITERATIONS=$MAX_ITERATIONS \ DEFAULT_AGENT=$agent \ TEST_RUNTIME="$TEST_RUNTIME" \ - SANDBOX_CONTAINER_IMAGE="$SANDBOX_CONTAINER_IMAGE" \ + SANDBOX_BASE_CONTAINER_IMAGE="$SANDBOX_BASE_CONTAINER_IMAGE" \ $pytest_cmd 2>&1 | tee $TMP_FILE # Capture the exit code of pytest @@ -185,7 +185,7 @@ regenerate_without_llm() { FORCE_APPLY_PROMPTS=true \ DEFAULT_AGENT=$agent \ TEST_RUNTIME="$TEST_RUNTIME" \ - SANDBOX_CONTAINER_IMAGE="$SANDBOX_CONTAINER_IMAGE" \ + SANDBOX_BASE_CONTAINER_IMAGE="$SANDBOX_BASE_CONTAINER_IMAGE" \ poetry run pytest -s $SCRIPT_DIR/test_agent.py::$test_name set +x } @@ -212,7 +212,7 @@ regenerate_with_llm() { WORKSPACE_MOUNT_PATH=$WORKSPACE_MOUNT_PATH \ DEFAULT_AGENT=$agent \ RUNTIME="$TEST_RUNTIME" \ - SANDBOX_CONTAINER_IMAGE="$SANDBOX_CONTAINER_IMAGE" \ + SANDBOX_BASE_CONTAINER_IMAGE="$SANDBOX_BASE_CONTAINER_IMAGE" \ poetry run python "$PROJECT_ROOT/openhands/core/main.py" \ -i $MAX_ITERATIONS \ -t "$task Do not ask me for confirmation at any point." \ diff --git a/tests/runtime/conftest.py b/tests/runtime/conftest.py index 1c571dae68..52e810bd81 100644 --- a/tests/runtime/conftest.py +++ b/tests/runtime/conftest.py @@ -61,9 +61,9 @@ def enable_auto_lint(request): @pytest.fixture(scope='module', params=None) -def container_image(request): +def base_container_image(request): time.sleep(1) - env_image = os.environ.get('SANDBOX_CONTAINER_IMAGE') + env_image = os.environ.get('BASE_CONTAINER_IMAGE') if env_image: request.param = env_image else: @@ -95,11 +95,12 @@ async def _load_runtime( box_class, run_as_openhands: bool = True, enable_auto_lint: bool = False, - container_image: str | None = None, + base_container_image: str | None = None, browsergym_eval_env: str | None = None, ) -> Runtime: sid = 'test' cli_session = 'main_test' + # AgentSkills need to be initialized **before** Jupyter # otherwise Jupyter will not access the proper dependencies installed by AgentSkills plugins = [AgentSkillsRequirement(), JupyterRequirement()] @@ -114,19 +115,17 @@ async def _load_runtime( load_from_env(config, os.environ) config.run_as_openhands = run_as_openhands config.sandbox.enable_auto_lint = enable_auto_lint + if base_container_image is not None: + config.sandbox.base_container_image = base_container_image file_store = get_file_store(config.file_store, config.file_store_path) event_stream = EventStream(cli_session, file_store) - if container_image is not None: - config.sandbox.container_image = container_image - runtime = box_class( config=config, event_stream=event_stream, sid=sid, plugins=plugins, - container_image=container_image, ) await runtime.ainit() await asyncio.sleep(1) diff --git a/tests/runtime/test_browsing.py b/tests/runtime/test_browsing.py index 8eb8963f6d..6c2bdd408c 100644 --- a/tests/runtime/test_browsing.py +++ b/tests/runtime/test_browsing.py @@ -80,7 +80,7 @@ async def test_browsergym_eval_env(temp_dir): # only supported in event stream runtime box_class=EventStreamRuntime, run_as_openhands=False, # need root permission to access file - container_image='xingyaoww/od-eval-miniwob:v1.0', + base_container_image='xingyaoww/od-eval-miniwob:v1.0', browsergym_eval_env='browsergym/miniwob.choose-list', ) from openhands.runtime.browser.browser_env import ( diff --git a/tests/runtime/test_images.py b/tests/runtime/test_images.py index 7d2347a061..24d0aa664b 100644 --- a/tests/runtime/test_images.py +++ b/tests/runtime/test_images.py @@ -14,15 +14,17 @@ from openhands.events.action import CmdRunAction @pytest.mark.asyncio -async def test_bash_python_version(temp_dir, box_class, container_image): +async def test_bash_python_version(temp_dir, box_class, base_container_image): """Make sure Python is available in bash.""" - if container_image not in [ + if base_container_image not in [ 'python:3.11-bookworm', 'nikolaik/python-nodejs:python3.11-nodejs22', ]: pytest.skip('This test is only for python-related images') - runtime = await _load_runtime(temp_dir, box_class, container_image=container_image) + runtime = await _load_runtime( + temp_dir, box_class, base_container_image=base_container_image + ) action = CmdRunAction(command='which python') logger.info(action, extra={'msg_type': 'ACTION'}) @@ -49,15 +51,17 @@ async def test_bash_python_version(temp_dir, box_class, container_image): @pytest.mark.asyncio -async def test_nodejs_22_version(temp_dir, box_class, container_image): +async def test_nodejs_22_version(temp_dir, box_class, base_container_image): """Make sure Node.js is available in bash.""" - if container_image not in [ + if base_container_image not in [ 'node:22-bookworm', 'nikolaik/python-nodejs:python3.11-nodejs22', ]: pytest.skip('This test is only for nodejs-related images') - runtime = await _load_runtime(temp_dir, box_class, container_image=container_image) + runtime = await _load_runtime( + temp_dir, box_class, base_container_image=base_container_image + ) action = CmdRunAction(command='node --version') logger.info(action, extra={'msg_type': 'ACTION'}) @@ -71,14 +75,16 @@ async def test_nodejs_22_version(temp_dir, box_class, container_image): @pytest.mark.asyncio -async def test_go_version(temp_dir, box_class, container_image): +async def test_go_version(temp_dir, box_class, base_container_image): """Make sure Go is available in bash.""" - if container_image not in [ + if base_container_image not in [ 'golang:1.23-bookworm', ]: pytest.skip('This test is only for go-related images') - runtime = await _load_runtime(temp_dir, box_class, container_image=container_image) + runtime = await _load_runtime( + temp_dir, box_class, base_container_image=base_container_image + ) action = CmdRunAction(command='go version') logger.info(action, extra={'msg_type': 'ACTION'}) diff --git a/tests/unit/test_config.py b/tests/unit/test_config.py index ba92383f99..711edd5d10 100644 --- a/tests/unit/test_config.py +++ b/tests/unit/test_config.py @@ -75,7 +75,7 @@ def test_load_from_old_style_env(monkeypatch, default_config): monkeypatch.setenv('AGENT_MEMORY_ENABLED', 'True') monkeypatch.setenv('DEFAULT_AGENT', 'PlannerAgent') monkeypatch.setenv('WORKSPACE_BASE', '/opt/files/workspace') - monkeypatch.setenv('SANDBOX_CONTAINER_IMAGE', 'custom_image') + monkeypatch.setenv('SANDBOX_BASE_CONTAINER_IMAGE', 'custom_image') load_from_env(default_config, os.environ) @@ -89,7 +89,7 @@ def test_load_from_old_style_env(monkeypatch, default_config): assert ( default_config.workspace_mount_path_in_sandbox is not UndefinedString.UNDEFINED ) - assert default_config.sandbox.container_image == 'custom_image' + assert default_config.sandbox.base_container_image == 'custom_image' def test_load_from_new_style_toml(default_config, temp_toml_file): @@ -178,7 +178,7 @@ memory_enabled = true [core] workspace_base = "/opt/files2/workspace" sandbox_timeout = 500 -sandbox_container_image = "node:14" +sandbox_base_container_image = "node:14" sandbox_user_id = 1001 default_agent = "TestAgent" """ @@ -192,7 +192,7 @@ default_agent = "TestAgent" assert default_config.get_agent_config().memory_enabled is True assert default_config.workspace_base == '/opt/files2/workspace' assert default_config.sandbox.timeout == 500 - assert default_config.sandbox.container_image == 'node:14' + assert default_config.sandbox.base_container_image == 'node:14' assert default_config.sandbox.user_id == 1001 assert default_config.workspace_mount_path_in_sandbox == '/workspace' @@ -200,7 +200,7 @@ default_agent = "TestAgent" # app config doesn't have fields sandbox_* assert not hasattr(default_config, 'sandbox_timeout') - assert not hasattr(default_config, 'sandbox_container_image') + assert not hasattr(default_config, 'sandbox_base_container_image') assert not hasattr(default_config, 'sandbox_user_id') # after finalize_config, workspace_mount_path is set to the absolute path of workspace_base @@ -319,7 +319,7 @@ model = "test-model" [sandbox] timeout = 1 -container_image = "custom_image" +base_container_image = "custom_image" user_id = 1001 """ ) @@ -330,7 +330,7 @@ user_id = 1001 assert default_config.get_llm_config().model == 'test-model' assert default_config.sandbox.timeout == 1 - assert default_config.sandbox.container_image == 'custom_image' + assert default_config.sandbox.base_container_image == 'custom_image' assert default_config.sandbox.user_id == 1001 @@ -357,7 +357,7 @@ def test_defaults_dict_after_updates(default_config): ) assert defaults_after_updates['sandbox']['timeout']['default'] == 120 assert ( - defaults_after_updates['sandbox']['container_image']['default'] + defaults_after_updates['sandbox']['base_container_image']['default'] == 'nikolaik/python-nodejs:python3.11-nodejs22' ) assert defaults_after_updates == initial_defaults