From b30a2dd87ac640661c69305c70263a288bf6aa27 Mon Sep 17 00:00:00 2001 From: Xingyao Wang Date: Thu, 8 Aug 2024 00:57:11 +0800 Subject: [PATCH] completely remove update_source_code (#3280) --- evaluation/EDA/run_infer.py | 1 - evaluation/agent_bench/run_infer.py | 1 - evaluation/biocoder/run_infer.py | 1 - evaluation/bird/run_infer.py | 1 - evaluation/browsing_delegation/run_infer.py | 1 - evaluation/gaia/run_infer.py | 1 - evaluation/gorilla/run_infer.py | 1 - evaluation/gpqa/run_infer.py | 1 - evaluation/humanevalfix/run_infer.py | 1 - evaluation/logic_reasoning/run_infer.py | 1 - evaluation/miniwob/run_infer.py | 1 - evaluation/mint/run_infer.py | 1 - evaluation/ml_bench/run_infer.py | 1 - evaluation/swe_bench/run_infer.py | 2 - evaluation/toolqa/run_infer.py | 1 - evaluation/webarena/run_infer.py | 1 - opendevin/runtime/client/README.md | 110 -------------------- opendevin/runtime/client/client.py | 4 - tests/integration/README.md | 4 +- 19 files changed, 2 insertions(+), 133 deletions(-) delete mode 100644 opendevin/runtime/client/README.md diff --git a/evaluation/EDA/run_infer.py b/evaluation/EDA/run_infer.py index a907619056..6d489c3053 100644 --- a/evaluation/EDA/run_infer.py +++ b/evaluation/EDA/run_infer.py @@ -65,7 +65,6 @@ def get_config( container_image='ubuntu:22.04', enable_auto_lint=False, use_host_network=False, - update_source_code=True, ), # do not mount workspace workspace_base=None, diff --git a/evaluation/agent_bench/run_infer.py b/evaluation/agent_bench/run_infer.py index 53e9b262c7..23514634b6 100644 --- a/evaluation/agent_bench/run_infer.py +++ b/evaluation/agent_bench/run_infer.py @@ -47,7 +47,6 @@ def get_config( container_image='ubuntu:22.04', enable_auto_lint=True, use_host_network=False, - update_source_code=True, ), # do not mount workspace workspace_base=None, diff --git a/evaluation/biocoder/run_infer.py b/evaluation/biocoder/run_infer.py index 113bcf6727..3fb2137440 100644 --- a/evaluation/biocoder/run_infer.py +++ b/evaluation/biocoder/run_infer.py @@ -65,7 +65,6 @@ def get_config( container_image=BIOCODER_BENCH_CONTAINER_IMAGE, enable_auto_lint=True, use_host_network=False, - update_source_code=True, ), # do not mount workspace workspace_base=None, diff --git a/evaluation/bird/run_infer.py b/evaluation/bird/run_infer.py index f7c5fad812..70402c6a29 100644 --- a/evaluation/bird/run_infer.py +++ b/evaluation/bird/run_infer.py @@ -78,7 +78,6 @@ def get_config( container_image='ubuntu:22.04', enable_auto_lint=True, use_host_network=False, - update_source_code=True, ), # do not mount workspace workspace_base=None, diff --git a/evaluation/browsing_delegation/run_infer.py b/evaluation/browsing_delegation/run_infer.py index d841c6434d..983d753c8e 100644 --- a/evaluation/browsing_delegation/run_infer.py +++ b/evaluation/browsing_delegation/run_infer.py @@ -43,7 +43,6 @@ def get_config( container_image='ubuntu:22.04', enable_auto_lint=False, use_host_network=False, - update_source_code=True, ), workspace_base=None, workspace_mount_path=None, diff --git a/evaluation/gaia/run_infer.py b/evaluation/gaia/run_infer.py index 0cde3c24c0..7a791c4aa7 100644 --- a/evaluation/gaia/run_infer.py +++ b/evaluation/gaia/run_infer.py @@ -54,7 +54,6 @@ def get_config( container_image='ubuntu:22.04', enable_auto_lint=True, use_host_network=False, - update_source_code=True, ), # do not mount workspace workspace_base=None, diff --git a/evaluation/gorilla/run_infer.py b/evaluation/gorilla/run_infer.py index c2fb9a89ae..8aad9d9da9 100644 --- a/evaluation/gorilla/run_infer.py +++ b/evaluation/gorilla/run_infer.py @@ -45,7 +45,6 @@ def get_config( container_image='ubuntu:22.04', enable_auto_lint=True, use_host_network=False, - update_source_code=True, ), # do not mount workspace workspace_base=None, diff --git a/evaluation/gpqa/run_infer.py b/evaluation/gpqa/run_infer.py index 773da12b9d..32cee5fb7e 100644 --- a/evaluation/gpqa/run_infer.py +++ b/evaluation/gpqa/run_infer.py @@ -68,7 +68,6 @@ def get_config( container_image='ubuntu:22.04', enable_auto_lint=True, use_host_network=False, - update_source_code=True, ), # do not mount workspace workspace_base=None, diff --git a/evaluation/humanevalfix/run_infer.py b/evaluation/humanevalfix/run_infer.py index ba3e365499..94d23f9313 100644 --- a/evaluation/humanevalfix/run_infer.py +++ b/evaluation/humanevalfix/run_infer.py @@ -89,7 +89,6 @@ def get_config( container_image='ubuntu:22.04', enable_auto_lint=True, use_host_network=False, - update_source_code=True, ), # do not mount workspace workspace_base=None, diff --git a/evaluation/logic_reasoning/run_infer.py b/evaluation/logic_reasoning/run_infer.py index 295f88812a..d0739d4f39 100644 --- a/evaluation/logic_reasoning/run_infer.py +++ b/evaluation/logic_reasoning/run_infer.py @@ -52,7 +52,6 @@ def get_config( container_image='xingyaoww/od-eval-logic-reasoning:v1.0', enable_auto_lint=True, use_host_network=False, - update_source_code=True, od_runtime_extra_deps='$OD_INTERPRETER_PATH -m pip install scitools-pyke', ), # do not mount workspace diff --git a/evaluation/miniwob/run_infer.py b/evaluation/miniwob/run_infer.py index 87077d76e5..92c96bf425 100644 --- a/evaluation/miniwob/run_infer.py +++ b/evaluation/miniwob/run_infer.py @@ -52,7 +52,6 @@ def get_config( container_image='xingyaoww/od-eval-miniwob:v1.0', enable_auto_lint=True, use_host_network=False, - update_source_code=True, browsergym_eval_env=env_id, ), # do not mount workspace diff --git a/evaluation/mint/run_infer.py b/evaluation/mint/run_infer.py index b938a18d74..42ea459014 100644 --- a/evaluation/mint/run_infer.py +++ b/evaluation/mint/run_infer.py @@ -104,7 +104,6 @@ def get_config( container_image='xingyaoww/od-eval-mint:v1.0', enable_auto_lint=True, use_host_network=False, - update_source_code=True, od_runtime_extra_deps=f'$OD_INTERPRETER_PATH -m pip install {" ".join(MINT_DEPENDENCIES)}', ), # do not mount workspace diff --git a/evaluation/ml_bench/run_infer.py b/evaluation/ml_bench/run_infer.py index 1a73ebd9c8..c7baa77e03 100644 --- a/evaluation/ml_bench/run_infer.py +++ b/evaluation/ml_bench/run_infer.py @@ -83,7 +83,6 @@ def get_config( container_image='public.ecr.aws/i5g0m1f6/ml-bench', enable_auto_lint=True, use_host_network=False, - update_source_code=True, ), # do not mount workspace workspace_base=None, diff --git a/evaluation/swe_bench/run_infer.py b/evaluation/swe_bench/run_infer.py index 96fdb4ad5a..089b6f0402 100644 --- a/evaluation/swe_bench/run_infer.py +++ b/evaluation/swe_bench/run_infer.py @@ -107,8 +107,6 @@ def get_config( container_image=container_image, enable_auto_lint=True, use_host_network=False, - # always make sure we are using the latest source code - update_source_code=True, # large enough timeout, since some testcases take very long to run timeout=300, ), diff --git a/evaluation/toolqa/run_infer.py b/evaluation/toolqa/run_infer.py index ac353a3afa..2eef9d2380 100644 --- a/evaluation/toolqa/run_infer.py +++ b/evaluation/toolqa/run_infer.py @@ -48,7 +48,6 @@ def get_config( container_image='ubuntu:22.04', enable_auto_lint=True, use_host_network=False, - update_source_code=True, ), # do not mount workspace workspace_base=None, diff --git a/evaluation/webarena/run_infer.py b/evaluation/webarena/run_infer.py index c83f2d7c85..c0a3182f77 100644 --- a/evaluation/webarena/run_infer.py +++ b/evaluation/webarena/run_infer.py @@ -57,7 +57,6 @@ def get_config( container_image='ubuntu:22.04', enable_auto_lint=True, use_host_network=False, - update_source_code=True, browsergym_eval_env=env_id, od_runtime_startup_env_vars={ 'BASE_URL': base_url, diff --git a/opendevin/runtime/client/README.md b/opendevin/runtime/client/README.md deleted file mode 100644 index 9690c65a15..0000000000 --- a/opendevin/runtime/client/README.md +++ /dev/null @@ -1,110 +0,0 @@ -# OpenDevin Runtime - -This README provides an overview of the OpenDevin Runtime, a crucial component of the OpenDevin system. It covers two main aspects: - -1. How the Runtime Image is Built: Explains the layered approach to creating Docker images for both production and development environments. -2. How the Runtime Client Works: Details the functionality and architecture of the Runtime Client, which executes actions within the Docker sandbox. - -The following sections dive deeper into these topics, providing a comprehensive understanding of the OpenDevin Runtime system. - -## Architecture Diagram - -``` -+-------------------+ +-------------------+ -| OpenDevin | | Docker Host | -| Backend | | | -| | | +-------------+ | -| +-------------+ | | | Runtime | | -| | EventStream | | | | Container | | -| | Runtime |<-|-----|->| | | -| +-------------+ | | | +-------+ | | -| | | | |Runtime| | | -| | | | |Client | | | -| | | | +-------+ | | -| | | | | | | -| | | | +-------+ | | -| | | | |Plugins| | | -| | | | +-------+ | | -| | | +-------------+ | -+-------------------+ +-------------------+ -``` - -This diagram illustrates the high-level architecture of the OpenDevin Runtime system: - -1. The OpenDevin Backend communicates with the Docker Host through the EventStreamRuntime. -2. The Docker Host runs a Runtime Container, which includes: - - The Runtime Client: Handles incoming actions and generates observations. - - Plugins: Extend the functionality of the Runtime Client. -3. The Runtime Client executes actions within the sandboxed environment of the Docker container. - -This architecture ensures a secure and flexible environment for executing AI-driven development tasks, allowing OpenDevin to execute a wide range of actions safely and efficiently. - -## How the Runtime Image is Built - -The OpenDevin runtime uses a layered approach for building Docker images: - -1. **Original Image**: `ubuntu:22.04` - - This is the base image used for all subsequent layers. - -2. **Runtime Image**: `od_runtime:od_v{OPENDEVIN_VERSION}_image_ubuntu__22.04` - -Example image name: `od_runtime:od_v0.8.1_image_ubuntu__22.04` - - Built from the stable release of OpenDevin. - - This is the primary runtime image that users will interact with. - - Created by copying all OpenDevin code into the original image and installing dependencies using Poetry. - -1. **Dev Runtime Image**: `od_runtime_dev:od_v{OPENDEVIN_VERSION}_image_ubuntu__22.04` - - Built from local source code for development purposes. - -### Build Process - -#### Production Build (if environment variable `SANDBOX_UPDATE_SOURCE_CODE` is not set) -By default, when `SANDBOX_UPDATE_SOURCE_CODE` is unset OR set to false, the build process only needs to run once: -- The Runtime Image (`od_runtime:od_v{OPENDEVIN_VERSION}_image_ubuntu__22.04`) is created by copying OpenDevin code into the original Ubuntu image and installing all dependencies. -- This pre-built image is then used for running the OpenDevin environment. - -#### Development Build (env var `SANDBOX_UPDATE_SOURCE_CODE=True`) -When developing or modifying code that runs inside the container, you can set env var `SANDBOX_UPDATE_SOURCE_CODE=True` to enable a more dynamic build process: -- Every time you run the code, the existing image will be updated with the latest changes. -- The Dev Runtime Image (`od_runtime_dev:od_v{OPENDEVIN_VERSION}_image_ubuntu__22.04`) is rebuilt from the Runtime Image (`od_runtime:od_v{OPENDEVIN_VERSION}_image_ubuntu__22.04`). -- Most dependencies are already installed in the Runtime Image, so this process mainly updates the code and any new dependencies. -- The rebuild process typically takes around 10 seconds, allowing for quick iterations during development. - -This approach allows developers to easily test changes to the OpenDevin codebase, including modifications to files like client.py, without needing to rebuild the entire image from scratch each time. - -## How the Runtime Client Works - -The Runtime Client is a crucial component of the OpenDevin system, responsible for executing actions within the Docker sandbox environment and producing observations. Here's an overview of its functionality: - -1. **Initialization**: - - The `EventStreamRuntime` class in `runtime.py` initializes the Docker container and sets up the runtime environment. - -2. **Communication**: - - The Runtime Client uses FastAPI to create a web server inside the Docker container. - - It listens for incoming action requests from the OpenDevin backend. - -3. **Action Execution**: - - When an action is received, the Runtime Client processes it based on its type: - - `CmdRunAction`: Executes shell commands using a pexpect-spawned bash shell. - - `FileReadAction` and `FileWriteAction`: Perform file operations within the sandbox. - - `IPythonRunCellAction`: Executes Python code in an IPython environment. - - `BrowseURLAction` and `BrowseInteractiveAction`: Handle web browsing tasks using a browser environment. - -4. **Plugin System**: - - The Runtime Client supports a plugin system for extending functionality. - - Plugins like JupyterPlugin can be loaded to provide additional features. - -5. **Observation Generation**: - - After executing an action, the Runtime Client generates an appropriate observation. - - Observations include command outputs, file contents, error messages, etc. - -6. **Asynchronous Operation**: - - The Runtime Client uses asyncio for avoid concurrent requests. - - It ensures that only one action is executed at a time using a semaphore. - -7. **Security**: - - All actions are executed within the confined Docker environment, providing a sandbox for safe execution. - -8. **Flexibility**: - - The system supports both production (`SANDBOX_UPDATE_SOURCE_CODE=False`) and development (`SANDBOX_UPDATE_SOURCE_CODE=True`) modes. - - In development mode, the runtime image can be updated with the latest code changes for testing and debugging. diff --git a/opendevin/runtime/client/client.py b/opendevin/runtime/client/client.py index 1c1b9be947..29044f6d61 100644 --- a/opendevin/runtime/client/client.py +++ b/opendevin/runtime/client/client.py @@ -3,10 +3,6 @@ This is the main file for the runtime client. It is responsible for executing actions received from OpenDevin backend and producing observations. NOTE: this will be executed inside the docker sandbox. - -If you already have pre-build docker image yet you changed the code in this file OR dependencies, you need to rebuild the docker image to update the source code. - -You should add SANDBOX_UPDATE_SOURCE_CODE=True to any `python XXX.py` command you run to update the source code. """ import argparse diff --git a/tests/integration/README.md b/tests/integration/README.md index 2aec83daa3..3a97a7e6cd 100644 --- a/tests/integration/README.md +++ b/tests/integration/README.md @@ -63,7 +63,7 @@ You can run: # for server runtime TEST_RUNTIME=server TEST_ONLY=true ./tests/integration/regenerate.sh # for event stream -SANDBOX_UPDATE_SOURCE_CODE=True TEST_RUNTIME=eventstream TEST_ONLY=true ./tests/integration/regenerate.sh +TEST_RUNTIME=eventstream TEST_ONLY=true ./tests/integration/regenerate.sh ``` to run all integration tests until the first failure occurs. @@ -84,7 +84,7 @@ by running the following command from OpenDevin's project root directory: ```bash TEST_RUNTIME=server ./tests/integration/regenerate.sh -SANDBOX_UPDATE_SOURCE_CODE=True TEST_RUNTIME=eventstream ./tests/integration/regenerate.sh +TEST_RUNTIME=eventstream ./tests/integration/regenerate.sh ``` Please note that this will: