Merge branch 'main' of github.com:OpenDevin/OpenDevin into enyst/memories-condenser

2026-03-22 13:47:19 +08:00 · 2024-06-28 05:29:49 +02:00
parent 13b0ae18e8 917d96e06f
commit c6c2c2dc57
18 changed files with 139 additions and 81 deletions
--- a/evaluation/EDA/README.md
+++ b/evaluation/EDA/README.md
@@ -19,7 +19,7 @@ where `model_config` is mandatory, while `git-version`, `agent`, `dataset` and `
 - `model_config`, e.g. `eval_gpt4_1106_preview`, is the config group name for your
 LLM settings, as defined in your `config.toml`.

- `git-version`, e.g. `head`, is the git commit hash of the OpenDevin version you would
+- `git-version`, e.g. `HEAD`, is the git commit hash of the OpenDevin version you would
 like to evaluate. It could also be a release tag like `0.6.2`.

 - `agent`, e.g. `CodeActAgent`, is the name of the agent for benchmarks, defaulting
--- a/evaluation/biocoder/README.md
+++ b/evaluation/biocoder/README.md
@@ -32,7 +32,7 @@ where `model_config` is mandatory, while `git-version`, `agent`, `dataset` and `
 - `model_config`, e.g. `eval_gpt4_1106_preview`, is the config group name for your
 LLM settings, as defined in your `config.toml`.

- `git-version`, e.g. `head`, is the git commit hash of the OpenDevin version you would
+- `git-version`, e.g. `HEAD`, is the git commit hash of the OpenDevin version you would
 like to evaluate. It could also be a release tag like `0.6.2`.

 - `agent`, e.g. `CodeActAgent`, is the name of the agent for benchmarks, defaulting
--- a/evaluation/bird/README.md
+++ b/evaluation/bird/README.md
@@ -42,7 +42,7 @@ temperature = 0.0
 - `model_config`, e.g. `eval_gpt4_1106_preview`, is the config group name for your
 LLM settings, as defined in your `config.toml`.

- `git-version`, e.g. `head`, is the git commit hash of the OpenDevin version you would
+- `git-version`, e.g. `HEAD`, is the git commit hash of the OpenDevin version you would
 like to evaluate. It could also be a release tag like `0.6.2`.

 ## Examples
--- a/evaluation/gaia/README.md
+++ b/evaluation/gaia/README.md
@@ -22,7 +22,7 @@ where `model_config` is mandatory, while `git-version`, `agent`, `eval_limit` an
 - `model_config`, e.g. `eval_gpt4_1106_preview`, is the config group name for your
 LLM settings, as defined in your `config.toml`, defaulting to `gpt-3.5-turbo`

- `git-version`, e.g. `head`, is the git commit hash of the OpenDevin version you would
+- `git-version`, e.g. `HEAD`, is the git commit hash of the OpenDevin version you would
 like to evaluate. It could also be a release tag like `0.6.2`.

 - `agent`, e.g. `CodeActAgent`, is the name of the agent for benchmarks, defaulting
--- a/evaluation/gorilla/README.md
+++ b/evaluation/gorilla/README.md
@@ -23,7 +23,7 @@ where `model_config` is mandatory, while all other arguments are optional.
 `model_config`, e.g. `llm`, is the config group name for your
 LLM settings, as defined in your `config.toml`.

-`git-version`, e.g. `head`, is the git commit hash of the OpenDevin version you would
+`git-version`, e.g. `HEAD`, is the git commit hash of the OpenDevin version you would
 like to evaluate. It could also be a release tag like `0.6.2`.

 `agent`, e.g. `CodeActAgent`, is the name of the agent for benchmarks, defaulting
--- a/evaluation/gpqa/README.md
+++ b/evaluation/gpqa/README.md
@@ -60,7 +60,7 @@ From the root of the OpenDevin repo, run the following command:
 You can replace `model_config_name` with any model you set up in `config.toml`.

 - `model_config_name`: The model configuration name from `config.toml` that you want to evaluate.
- `git-version`, e.g. `head`, is the git commit hash of the OpenDevin version you would
+- `git-version`, e.g. `HEAD`, is the git commit hash of the OpenDevin version you would
 like to evaluate. It could also be a release tag like `0.6.2`.
 - `num_samples_eval`: Number of samples to evaluate (useful for testing and debugging).
 - `data_split`: The data split to evaluate on. Must be one of `gpqa_main`, `gqpa_diamond`, `gpqa_experts`, `gpqa_extended`. Defaults to `gpqa_diamond` as done in the paper.
--- a/evaluation/mint/README.md
+++ b/evaluation/mint/README.md
@@ -20,7 +20,7 @@ where `model_config` is mandatory, while others are optional.

 - `model_config`, e.g. `eval_gpt4_1106_preview`, is the config group name for your LLM settings, as defined in your `config.toml`.

- `git-version`, e.g. `head`, is the git commit hash of the OpenDevin version you would
+- `git-version`, e.g. `HEAD`, is the git commit hash of the OpenDevin version you would
 like to evaluate. It could also be a release tag like `0.6.2`.

 - `subset`, e.g. `math`, is the subset of the MINT benchmark to evaluate on, defaulting to `math`. It can be either: `math`, `gsm8k`, `mmlu`, `theoremqa`, `mbpp`,`humaneval`.
--- a/evaluation/swe_bench/README.md
+++ b/evaluation/swe_bench/README.md
@@ -82,7 +82,7 @@ If you see an error, please make sure your `config.toml` contains all

 ```bash
 ./evaluation/swe_bench/scripts/run_infer.sh [model_config] [git-version] [agent] [eval_limit]
-# e.g., ./evaluation/swe_bench/scripts/run_infer.sh eval_gpt4_1106_preview head CodeActAgent 300
+# e.g., ./evaluation/swe_bench/scripts/run_infer.sh eval_gpt4_1106_preview HEAD CodeActAgent 300
 ```

 where `model_config` is mandatory, while `agent` and `eval_limit` are optional.
@@ -90,7 +90,7 @@ where `model_config` is mandatory, while `agent` and `eval_limit` are optional.
 `model_config`, e.g. `eval_gpt4_1106_preview`, is the config group name for your
 LLM settings, as defined in your `config.toml`.

-`git-version`, e.g. `head`, is the git commit hash of the OpenDevin version you would
+`git-version`, e.g. `HEAD`, is the git commit hash of the OpenDevin version you would
 like to evaluate. It could also be a release tag like `0.6.2`.

 `agent`, e.g. `CodeActAgent`, is the name of the agent for benchmarks, defaulting
@@ -104,7 +104,7 @@ Let's say you'd like to run 10 instances using `eval_gpt4_1106_preview` and Code
 then your command would be:

 ```bash
-./evaluation/swe_bench/scripts/run_infer.sh eval_gpt4_1106_preview head CodeActAgent 10
+./evaluation/swe_bench/scripts/run_infer.sh eval_gpt4_1106_preview HEAD CodeActAgent 10
 ```

 If you would like to specify a list of tasks you'd like to benchmark on, you could
--- a/evaluation/toolqa/README.md
+++ b/evaluation/toolqa/README.md
@@ -23,7 +23,7 @@ where `model_config` is mandatory, while all other arguments are optional.
 `model_config`, e.g. `llm`, is the config group name for your
 LLM settings, as defined in your `config.toml`.

-`git-version`, e.g. `head`, is the git commit hash of the OpenDevin version you would
+`git-version`, e.g. `HEAD`, is the git commit hash of the OpenDevin version you would
 like to evaluate. It could also be a release tag like `0.6.2`.

 `agent`, e.g. `CodeActAgent`, is the name of the agent for benchmarks, defaulting
--- a/frontend/package-lock.json
+++ b/frontend/package-lock.json
@@ -21,7 +21,7 @@
        "i18next": "^23.11.5",
        "i18next-browser-languagedetector": "^8.0.0",
        "i18next-http-backend": "^2.5.2",
-        "jose": "^5.5.0",
+        "jose": "^5.6.1",
        "monaco-editor": "^0.50.0",
        "react": "^18.3.1",
        "react-dom": "^18.3.1",
@@ -10552,9 +10552,9 @@
      }
    },
    "node_modules/jose": {
-      "version": "5.5.0",
-      "resolved": "https://registry.npmjs.org/jose/-/jose-5.5.0.tgz",
-      "integrity": "sha512-DUPr/1kYXbuqYpkCj9r66+B4SGCKXCLQ5ZbKCgmn4sJveJqcwNqWtAR56u4KPmpXjrmBO2uNuLdEAEiqIhFNBg==",
+      "version": "5.6.1",
+      "resolved": "https://registry.npmjs.org/jose/-/jose-5.6.1.tgz",
+      "integrity": "sha512-KyxsIFAtR0BH72iRCOwe+PRnsGltiXDRtemlOkp2xz7FrakAKd4lvYYJgFOJm2fryOMMUk0+f0E/uuTDoHDiTA==",
      "funding": {
        "url": "https://github.com/sponsors/panva"
      }
--- a/frontend/package.json
+++ b/frontend/package.json
@@ -20,7 +20,7 @@
    "i18next": "^23.11.5",
    "i18next-browser-languagedetector": "^8.0.0",
    "i18next-http-backend": "^2.5.2",
-    "jose": "^5.5.0",
+    "jose": "^5.6.1",
    "monaco-editor": "^0.50.0",
    "react": "^18.3.1",
    "react-dom": "^18.3.1",
--- a/opendevin/controller/agent_controller.py
+++ b/opendevin/controller/agent_controller.py
@@ -126,9 +126,9 @@ class AgentController:
        - the string message should be user-friendly, it will be shown in the UI
        - an ErrorObservation can be sent to the LLM by the agent, with the exception message, so it can self-correct next time
        """
-        if exception:
-            message += f': {exception}'
        self.state.error = message
+        if exception:
+            self.state.error += f': {str(exception)}'
        self.event_stream.add_event(ErrorObservation(message), EventSource.AGENT)

    async def _start_step_loop(self):
@@ -140,6 +140,7 @@ class AgentController:
                logger.info('AgentController task was cancelled')
                break
            except Exception as e:
+                traceback.print_exc()
                logger.error(f'Error while running the agent: {e}')
                logger.error(traceback.format_exc())
                await self.report_error(
--- a/opendevin/runtime/server/runtime.py
+++ b/opendevin/runtime/server/runtime.py
@@ -58,47 +58,52 @@ class ServerRuntime(Runtime):
            ('cat /tmp/opendevin_jupyter_temp.py | execute_cli'), background=False
        )
        output = obs.content
-        if 'pip install' in action.code and 'Successfully installed' in output:
+        if 'pip install' in action.code:
            print(output)
-            restart_kernel = 'import IPython\nIPython.Application.instance().kernel.do_shutdown(True)'
-            if (
-                'Note: you may need to restart the kernel to use updated packages.'
-                in output
-            ):
-                obs = self._run_command(
-                    (
-                        "cat > /tmp/opendevin_jupyter_temp.py <<'EOL'\n"
-                        f'{restart_kernel}\n'
-                        'EOL'
-                    ),
-                    background=False,
-                )
-                obs = self._run_command(
-                    ('cat /tmp/opendevin_jupyter_temp.py | execute_cli'),
-                    background=False,
-                )
-                output = '[Package installed successfully]'
-                if "{'status': 'ok', 'restart': True}" != obs.content.strip():
-                    print(obs.content)
-                    output += '\n[But failed to restart the kernel to load the package]'
-                else:
-                    output += '\n[Kernel restarted successfully to load the package]'
+            package_names = action.code.split(' ', 2)[-1]
+            is_single_package = ' ' not in package_names

-                # re-init the kernel after restart
-                if action.kernel_init_code:
-                    obs = self._run_command(
+            if 'Successfully installed' in output:
+                restart_kernel = 'import IPython\nIPython.Application.instance().kernel.do_shutdown(True)'
+                if (
+                    'Note: you may need to restart the kernel to use updated packages.'
+                    in output
+                ):
+                    self._run_command(
                        (
-                            f"cat > /tmp/opendevin_jupyter_init.py <<'EOL'\n"
-                            f'{action.kernel_init_code}\n'
+                            "cat > /tmp/opendevin_jupyter_temp.py <<'EOL'\n"
+                            f'{restart_kernel}\n'
                            'EOL'
                        ),
                        background=False,
                    )
                    obs = self._run_command(
-                        'cat /tmp/opendevin_jupyter_init.py | execute_cli',
+                        ('cat /tmp/opendevin_jupyter_temp.py | execute_cli'),
                        background=False,
                    )
+                    output = '[Package installed successfully]'
+                    if "{'status': 'ok', 'restart': True}" != obs.content.strip():
+                        print(obs.content)
+                        output += '\n[But failed to restart the kernel to load the package]'
+                    else:
+                        output += '\n[Kernel restarted successfully to load the package]'

+                    # re-init the kernel after restart
+                    if action.kernel_init_code:
+                        obs = self._run_command(
+                            (
+                                f"cat > /tmp/opendevin_jupyter_init.py <<'EOL'\n"
+                                f'{action.kernel_init_code}\n'
+                                'EOL'
+                            ),
+                            background=False,
+                        )
+                        obs = self._run_command(
+                            'cat /tmp/opendevin_jupyter_init.py | execute_cli',
+                            background=False,
+                        )
+            elif is_single_package and f'Requirement already satisfied: {package_names}' in output:
+                output = '[Package already installed]'
        return IPythonRunCellObservation(content=output, code=action.code)

    async def read(self, action: FileReadAction) -> Observation:
@@ -131,9 +136,14 @@ class ServerRuntime(Runtime):
    def _run_immediately(self, command: str) -> Observation:
        try:
            exit_code, output = self.sandbox.execute(command)
-            if 'pip install' in command and 'Successfully installed' in output:
+            if 'pip install' in command:
+                package_names = command.split(' ', 2)[-1]
+                is_single_package = ' ' not in package_names
                print(output)
-                output = 'Package installed successfully'
+                if 'Successfully installed' in output:
+                    output = '[Package installed successfully]'
+                elif is_single_package and f'Requirement already satisfied: {package_names}' in output:
+                    output = '[Package already installed]'
            return CmdOutputObservation(
                command_id=-1, content=str(output), command=command, exit_code=exit_code
            )
--- a/poetry.lock
+++ b/poetry.lock
@@ -416,17 +416,17 @@ files = [

 [[package]]
 name = "boto3"
-version = "1.34.133"
+version = "1.34.134"
 description = "The AWS SDK for Python"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "boto3-1.34.133-py3-none-any.whl", hash = "sha256:da7e78c03270be872ad78301892396ffea56647efcb2c3a8621ef46a905541ab"},
-    {file = "boto3-1.34.133.tar.gz", hash = "sha256:7071f8ce1f09113ca5630860fd590464e6325a4df55faae83c956225941016fc"},
+    {file = "boto3-1.34.134-py3-none-any.whl", hash = "sha256:342782c02ff077aae118c9c61179eed95c585831fba666baacc5588ff04aa6e1"},
+    {file = "boto3-1.34.134.tar.gz", hash = "sha256:f6d6e5b0c9ab022a75373fa16c01f0cd54bc1bb64ef3b6ac64ac7cedd56cbe9c"},
 ]

 [package.dependencies]
-botocore = ">=1.34.133,<1.35.0"
+botocore = ">=1.34.134,<1.35.0"
 jmespath = ">=0.7.1,<2.0.0"
 s3transfer = ">=0.10.0,<0.11.0"

@@ -435,13 +435,13 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"]

 [[package]]
 name = "botocore"
-version = "1.34.133"
+version = "1.34.134"
 description = "Low-level, data-driven core of boto 3."
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "botocore-1.34.133-py3-none-any.whl", hash = "sha256:f269dad8e17432d2527b97ed9f1fd30ec8dc705f8b818957170d1af484680ef2"},
-    {file = "botocore-1.34.133.tar.gz", hash = "sha256:5ea609aa4831a6589e32eef052a359ad8d7311733b4d86a9d35dab4bd3ec80ff"},
+    {file = "botocore-1.34.134-py3-none-any.whl", hash = "sha256:45219e00639755f92569b29f8f279d5dde721494791412c1f7026a3779e8d9f4"},
+    {file = "botocore-1.34.134.tar.gz", hash = "sha256:e29c299599426ed16dd2d4c1e20eef784f96b15e1850ebbc59a3250959285b95"},
 ]

 [package.dependencies]
@@ -1829,13 +1829,13 @@ test = ["coverage[toml]", "ddt (>=1.1.1,!=1.4.3)", "mock", "mypy", "pre-commit",

 [[package]]
 name = "google-ai-generativelanguage"
-version = "0.6.5"
+version = "0.6.6"
 description = "Google Ai Generativelanguage API client library"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "google-ai-generativelanguage-0.6.5.tar.gz", hash = "sha256:c4089c277fa4e26722f76ab03ee3039f28be8bf1c9be282948b9583a154c6d79"},
-    {file = "google_ai_generativelanguage-0.6.5-py3-none-any.whl", hash = "sha256:236875bb4a6d6ebdba2f12bd9d5e776100fd913402157a47b5e9fb80a13f25a7"},
+    {file = "google-ai-generativelanguage-0.6.6.tar.gz", hash = "sha256:1739f035caeeeca5c28f887405eec8690f3372daf79fecf26454a97a4f1733a8"},
+    {file = "google_ai_generativelanguage-0.6.6-py3-none-any.whl", hash = "sha256:59297737931f073d55ce1268dcc6d95111ee62850349d2b6cde942b16a4fca5c"},
 ]

 [package.dependencies]
@@ -1927,16 +1927,16 @@ httplib2 = ">=0.19.0"

 [[package]]
 name = "google-generativeai"
-version = "0.7.0"
+version = "0.7.1"
 description = "Google Generative AI High level API client library and tools."
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "google_generativeai-0.7.0-py3-none-any.whl", hash = "sha256:7be4b634afeb8b6bebde1af7271e94d2af84d2d28b5988c7ed9921733c40fe63"},
+    {file = "google_generativeai-0.7.1-py3-none-any.whl", hash = "sha256:25017b1278c873b6db65ba41c70566ee9fce8f265498e8b2f6aac036528ba1c7"},
 ]

 [package.dependencies]
-google-ai-generativelanguage = "0.6.5"
+google-ai-generativelanguage = "0.6.6"
 google-api-core = "*"
 google-api-python-client = "*"
 google-auth = ">=2.15.0"
@@ -2771,13 +2771,13 @@ types-tqdm = "*"

 [[package]]
 name = "litellm"
-version = "1.40.27"
+version = "1.40.28"
 description = "Library to easily interface with LLM API providers"
 optional = false
 python-versions = "!=2.7.*,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,!=3.7.*,>=3.8"
 files = [
-    {file = "litellm-1.40.27-py3-none-any.whl", hash = "sha256:f6906e5260d784e7e31d579f5b28545e87517268cb96dd0dcaf31e4c5d34073f"},
-    {file = "litellm-1.40.27.tar.gz", hash = "sha256:a13a04168be5a8e52d43c34c2e657ca2521da61039ac39a17abc233a1875923f"},
+    {file = "litellm-1.40.28-py3-none-any.whl", hash = "sha256:aa6d59390f24d1b1168a202b966249f9f5f93d08deba38ed9528654544065e96"},
+    {file = "litellm-1.40.28.tar.gz", hash = "sha256:08fdfcb01715006f9dadb8d05b94143f782e08d1944e5691d9faf20300e62739"},
 ]

 [package.dependencies]
@@ -4190,13 +4190,13 @@ sympy = "*"

 [[package]]
 name = "openai"
-version = "1.35.4"
+version = "1.35.6"
 description = "The official Python library for the openai API"
 optional = false
 python-versions = ">=3.7.1"
 files = [
-    {file = "openai-1.35.4-py3-none-any.whl", hash = "sha256:894b79c485fae2df3a6b68ceb570730e5a480c08bccc32a412cf3be2d4eb1384"},
-    {file = "openai-1.35.4.tar.gz", hash = "sha256:b58a0d6257c5c86e85b9b2f43e6eed04ada616560df9da0dca6697d06845e7c8"},
+    {file = "openai-1.35.6-py3-none-any.whl", hash = "sha256:c2bfa599445a2d6010adc7954476c2dc64e1aa8dad02ef29e0f31b9a887c1d02"},
+    {file = "openai-1.35.6.tar.gz", hash = "sha256:c5958617048a2d777d2b96050fd69ae6721bdffbf59967698694223cc092abd9"},
 ]

 [package.dependencies]
--- a/tests/integration/mock/CodeActSWEAgent/test_ipython_module/prompt_002.log
+++ b/tests/integration/mock/CodeActSWEAgent/test_ipython_module/prompt_002.log
@@ -550,7 +550,7 @@ pip install pymsgbox==1.0.9
 ----------

 OBSERVATION:
-Package installed successfully
+[Package installed successfully]
 [Command -1 finished with exit code 0]

 ENVIRONMENT REMINDER: You have 8 turns left to complete the task.
--- a/tests/integration/mock/CodeActSWEAgent/test_ipython_module/prompt_003.log
+++ b/tests/integration/mock/CodeActSWEAgent/test_ipython_module/prompt_003.log
@@ -550,7 +550,7 @@ pip install pymsgbox==1.0.9
 ----------

 OBSERVATION:
-Package installed successfully
+[Package installed successfully]
 [Command -1 finished with exit code 0]

 ----------
--- a/tests/integration/mock/CodeActSWEAgent/test_ipython_module/prompt_004.log
+++ b/tests/integration/mock/CodeActSWEAgent/test_ipython_module/prompt_004.log
@@ -550,7 +550,7 @@ pip install pymsgbox==1.0.9
 ----------

 OBSERVATION:
-Package installed successfully
+[Package installed successfully]
 [Command -1 finished with exit code 0]

 ----------
--- a/tests/unit/test_sandbox.py
+++ b/tests/unit/test_sandbox.py
@@ -301,26 +301,69 @@ def _test_sandbox_jupyter_agentskills_fileop_pwd_impl(box):
    print(output)
    assert exit_code == 0, 'The exit code should be 0 for ' + box.__class__.__name__

-    exit_code, output = box.execute('echo "create_file(\'a.txt\')" | execute_cli')
+    exit_code, output = box.execute('echo "create_file(\'hello.py\')" | execute_cli')
    print(output)
    assert exit_code == 0, 'The exit code should be 0 for ' + box.__class__.__name__
    assert output.strip().split('\r\n') == (
-        '[File: /workspace/a.txt (1 lines total)]\r\n' '1|\r\n' '[File a.txt created.]'
+        '[File: /workspace/hello.py (1 lines total)]\r\n'
+        '1|\r\n'
+        '[File hello.py created.]'
    ).strip().split('\r\n')

    exit_code, output = box.execute('cd test')
    print(output)
    assert exit_code == 0, 'The exit code should be 0 for ' + box.__class__.__name__

-    exit_code, output = box.execute('echo "create_file(\'a.txt\')" | execute_cli')
+    exit_code, output = box.execute('echo "create_file(\'hello.py\')" | execute_cli')
    print(output)
    assert exit_code == 0, 'The exit code should be 0 for ' + box.__class__.__name__
    assert output.strip().split('\r\n') == (
-        '[File: /workspace/test/a.txt (1 lines total)]\r\n'
+        '[File: /workspace/test/hello.py (1 lines total)]\r\n'
        '1|\r\n'
-        '[File a.txt created.]'
+        '[File hello.py created.]'
    ).strip().split('\r\n')

+    if config.enable_auto_lint:
+        # edit file, but make a mistake in indentation
+        exit_code, output = box.execute(
+            'echo "edit_file(\'hello.py\', 1, 1, \'  print(\\"hello world\\")\')" | execute_cli'
+        )
+        print(output)
+        assert exit_code == 0, 'The exit code should be 0 for ' + box.__class__.__name__
+        assert output.strip().split('\r\n') == (
+            """
+[Your proposed edit has introduced new syntax error(s). Please understand the errors and retry your edit command.]
+ERRORS:
+hello.py:1:3: E999 IndentationError: unexpected indent
+[This is how your edit would have looked if applied]
+-------------------------------------------------
+1|  print("hello world")
+-------------------------------------------------
+
+[This is the original code before your edit]
+-------------------------------------------------
+1|
+-------------------------------------------------
+Your changes have NOT been applied. Please fix your edit command and try again.
+You either need to 1) Specify the correct start/end line arguments or 2) Correct your edit code.
+DO NOT re-run the same failed edit command. Running it again will lead to the same error.
+"""
+        ).strip().split('\n')
+
+    # edit file with correct indentation
+    exit_code, output = box.execute(
+        'echo "edit_file(\'hello.py\', 1, 1, \'print(\\"hello world\\")\')" | execute_cli'
+    )
+    print(output)
+    assert exit_code == 0, 'The exit code should be 0 for ' + box.__class__.__name__
+    assert output.strip().split('\r\n') == (
+        """
+[File: /workspace/test/hello.py (1 lines total after edit)]
+1|print("hello world")
+[File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]
+"""
+    ).strip().split('\n')
+
    exit_code, output = box.execute('rm -rf /workspace/*')
    assert exit_code == 0, 'The exit code should be 0 for ' + box.__class__.__name__
    box.close()
@@ -330,9 +373,10 @@ def test_sandbox_jupyter_agentskills_fileop_pwd(temp_dir):
    # get a temporary directory
    with patch.object(config, 'workspace_base', new=temp_dir), patch.object(
        config, 'workspace_mount_path', new=temp_dir
-    ), patch.object(config, 'run_as_devin', new='true'), patch.object(
+    ), patch.object(config, 'run_as_devin', new=True), patch.object(
        config, 'sandbox_type', new='ssh'
-    ):
+    ), patch.object(config, 'enable_auto_lint', new=True):
+        assert config.enable_auto_lint
        for box in [DockerSSHBox()]:
            _test_sandbox_jupyter_agentskills_fileop_pwd_impl(box)

@@ -346,8 +390,11 @@ def test_agnostic_sandbox_jupyter_agentskills_fileop_pwd(temp_dir):
        # get a temporary directory
        with patch.object(config, 'workspace_base', new=temp_dir), patch.object(
            config, 'workspace_mount_path', new=temp_dir
-        ), patch.object(config, 'run_as_devin', new='true'), patch.object(
+        ), patch.object(config, 'run_as_devin', new=True), patch.object(
            config, 'sandbox_type', new='ssh'
-        ), patch.object(config, 'sandbox_container_image', new=base_sandbox_image):
+        ), patch.object(
+            config, 'sandbox_container_image', new=base_sandbox_image
+        ), patch.object(config, 'enable_auto_lint', new=False):
+            assert not config.enable_auto_lint
            for box in [DockerSSHBox()]:
                _test_sandbox_jupyter_agentskills_fileop_pwd_impl(box)