Merge commit 'f3b2085f9b31af0b3f24ee9a3269525c37ff63b3' into xw/fix-remote-runtime

2025-12-26 05:48:36 +08:00 · 2024-09-10 20:36:51 +00:00 · 2024-09-10 20:36:51 +00:00 · 74f3ac792f
commit 74f3ac792f
parent c40ca7b38d f3b2085f9b
190 changed files with 14449 additions and 3658 deletions
--- a/.github/workflows/ghcr_runtime.yml
+++ b/.github/workflows/ghcr_runtime.yml
@ -38,10 +38,6 @@ jobs:
        base_image:
          - image: 'nikolaik/python-nodejs:python3.11-nodejs22'
            tag: nikolaik
-          - image: 'python:3.11-bookworm'
-            tag: python
-          - image: 'node:22-bookworm'
-            tag: node
    steps:
      - name: Checkout
        uses: actions/checkout@v4
@ -70,31 +66,39 @@ jobs:
      - name: Set up Docker Buildx
        id: buildx
        uses: docker/setup-buildx-action@v3
-      - name: Install poetry via pipx
-        run: pipx install poetry
      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: '3.11'
-          cache: 'poetry'
+      - name: Cache Poetry dependencies
+        uses: actions/cache@v4
+        with:
+          path: |
+            ~/.cache/pypoetry
+            ~/.virtualenvs
+          key: ${{ runner.os }}-poetry-${{ hashFiles('**/poetry.lock') }}
+          restore-keys: |
+            ${{ runner.os }}-poetry-
+      - name: Install poetry via pipx
+        run: pipx install poetry
      - name: Install Python dependencies using Poetry
        run: make install-python-dependencies
      - name: Create source distribution and Dockerfile
        run: poetry run python3 openhands/runtime/utils/runtime_build.py --base_image ${{ matrix.base_image.image }} --build_folder containers/runtime --force_rebuild
      - name: Build and push runtime image ${{ matrix.base_image.image }}
-        if: "!github.event.pull_request.head.repo.fork"
+        if: github.event.pull_request.head.repo.fork != true
        run: |
          ./containers/build.sh runtime ${{ github.repository_owner }} --push ${{ matrix.base_image.tag }}
      # Forked repos can't push to GHCR, so we need to upload the image as an artifact
      - name: Build runtime image ${{ matrix.base_image.image }} for fork
-        if: "github.event.pull_request.head.repo.fork"
+        if: github.event.pull_request.head.repo.fork
        uses: docker/build-push-action@v6
        with:
          tags: ghcr.io/all-hands-ai/runtime:${{ github.sha }}-${{ matrix.base_image.tag }}
          outputs: type=docker,dest=/tmp/runtime-${{ matrix.base_image.tag }}.tar
          context: containers/runtime
      - name: Upload runtime image for fork
-        if: "github.event.pull_request.head.repo.fork"
+        if: github.event.pull_request.head.repo.fork
        uses: actions/upload-artifact@v4
        with:
          name: runtime-${{ matrix.base_image.tag }}
@ -103,11 +107,12 @@ jobs:
  # Run unit tests with the EventStream runtime Docker images
  test_runtime:
    name: Test Runtime
-    runs-on: ubuntu-latest
    needs: [ghcr_build_runtime]
+    runs-on: ubuntu-latest
    strategy:
+      fail-fast: false
      matrix:
-        base_image: ['nikolaik', 'python', 'node']
+        base_image: ['nikolaik']
    steps:
      - uses: actions/checkout@v4
      - name: Free Disk Space (Ubuntu)
@ -121,26 +126,41 @@ jobs:
          swap-storage: true
      # Forked repos can't push to GHCR, so we need to download the image as an artifact
      - name: Download runtime image for fork
-        if: "github.event.pull_request.head.repo.fork"
+        if: github.event.pull_request.head.repo.fork
        uses: actions/download-artifact@v4
        with:
          name: runtime-${{ matrix.base_image }}
          path: /tmp
      - name: Load runtime image for fork
-        if: "github.event.pull_request.head.repo.fork"
+        if: github.event.pull_request.head.repo.fork
        run: |
          docker load --input /tmp/runtime-${{ matrix.base_image }}.tar
-      - name: Install poetry via pipx
-        run: pipx install poetry
+      - name: Cache Poetry dependencies
+        uses: actions/cache@v4
+        with:
+          path: |
+            ~/.cache/pypoetry
+            ~/.virtualenvs
+          key: ${{ runner.os }}-poetry-${{ hashFiles('**/poetry.lock') }}
+          restore-keys: |
+            ${{ runner.os }}-poetry-
      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: '3.11'
-          cache: 'poetry'
+      - name: Install poetry via pipx
+        run: pipx install poetry
      - name: Install Python dependencies using Poetry
        run: make install-python-dependencies
      - name: Run runtime tests
        run: |
+          # We install pytest-xdist in order to run tests across CPUs. However, tests start to fail when we run
+          # then across more than 2 CPUs for some reason
+          poetry run pip install pytest-xdist
+
+          # Install to be able to retry on failures for flaky tests
+          poetry run pip install pytest-rerunfailures
+
          image_name=ghcr.io/${{ github.repository_owner }}/runtime:${{ github.sha }}-${{ matrix.base_image }}
          image_name=$(echo $image_name | tr '[:upper:]' '[:lower:]')

@ -148,7 +168,7 @@ jobs:
          SANDBOX_USER_ID=$(id -u) \
          SANDBOX_BASE_CONTAINER_IMAGE=$image_name \
          TEST_IN_CI=true \
-          poetry run pytest --cov=agenthub --cov=openhands --cov-report=xml -s ./tests/runtime
+          poetry run pytest -n 2 --reruns 2 --cov=agenthub --cov=openhands --cov-report=xml -s ./tests/runtime
      - name: Upload coverage to Codecov
        uses: codecov/codecov-action@v4
        env:
@ -162,27 +182,35 @@ jobs:
    strategy:
      fail-fast: false
      matrix:
-        base_image: ['nikolaik', 'python', 'node']
+        base_image: ['nikolaik']
    steps:
      - uses: actions/checkout@v4
      # Forked repos can't push to GHCR, so we need to download the image as an artifact
      - name: Download runtime image for fork
-        if: "github.event.pull_request.head.repo.fork"
+        if: github.event.pull_request.head.repo.fork
        uses: actions/download-artifact@v4
        with:
          name: runtime-${{ matrix.base_image }}
          path: /tmp
      - name: Load runtime image for fork
-        if: "github.event.pull_request.head.repo.fork"
+        if: github.event.pull_request.head.repo.fork
        run: |
          docker load --input /tmp/runtime-${{ matrix.base_image }}.tar
-      - name: Install poetry via pipx
-        run: pipx install poetry
+      - name: Cache Poetry dependencies
+        uses: actions/cache@v4
+        with:
+          path: |
+            ~/.cache/pypoetry
+            ~/.virtualenvs
+          key: ${{ runner.os }}-poetry-${{ hashFiles('**/poetry.lock') }}
+          restore-keys: |
+            ${{ runner.os }}-poetry-
      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: '3.11'
-          cache: 'poetry'
+      - name: Install poetry via pipx
+        run: pipx install poetry
      - name: Install Python dependencies using Poetry
        run: make install-python-dependencies
      - name: Run integration tests
--- a/.github/workflows/py-unit-tests.yml
+++ b/.github/workflows/py-unit-tests.yml
@ -22,13 +22,21 @@ jobs:
        python-version: ['3.11']
    steps:
      - uses: actions/checkout@v4
-      - name: Install poetry via pipx
-        run: pipx install poetry
      - name: Set up Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}
-          cache: 'poetry'
+      - name: Cache Poetry dependencies
+        uses: actions/cache@v4
+        with:
+          path: |
+            ~/.cache/pypoetry
+            ~/.virtualenvs
+          key: ${{ runner.os }}-poetry-${{ hashFiles('**/poetry.lock') }}
+          restore-keys: |
+            ${{ runner.os }}-poetry-
+      - name: Install poetry via pipx
+        run: pipx install poetry
      - name: Install Python dependencies using Poetry
        run: poetry install --without evaluation,llama-index
      - name: Install & Start Docker
--- a/.github/workflows/regenerate_integration_tests.yml
+++ b/.github/workflows/regenerate_integration_tests.yml
@ -3,6 +3,23 @@ name: Regenerate Integration Tests

 on:
  workflow_dispatch:
+    inputs:
+      debug:
+        description: 'Enable debug mode'
+        type: boolean
+        default: true
+      log_to_file:
+        description: 'Enable logging to file'
+        type: boolean
+        default: true
+      force_regenerate_tests:
+        description: 'Force regeneration of tests'
+        type: boolean
+        default: false
+      force_use_llm:
+        description: 'Force use of LLM'
+        type: boolean
+        default: false

 jobs:
  regenerate_integration_tests:
@ -12,21 +29,32 @@ jobs:
    steps:
    - name: Checkout repository
      uses: actions/checkout@v4
-    - name: Install poetry via pipx
-      run: pipx install poetry
    - name: Set up Python
      uses: actions/setup-python@v5
      with:
-        python-version: ${{ matrix.python-version }}
-        cache: 'poetry'
+        python-version: "3.11"
+    - name: Cache Poetry dependencies
+      uses: actions/cache@v4
+      with:
+        path: |
+          ~/.cache/pypoetry
+          ~/.virtualenvs
+        key: ${{ runner.os }}-poetry-${{ hashFiles('**/poetry.lock') }}
+        restore-keys: |
+          ${{ runner.os }}-poetry-
+    - name: Install poetry via pipx
+      run: pipx install poetry
    - name: Install Python dependencies using Poetry
-      run: poetry install --without evaluation,llama-index
+      run: make install-python-dependencies
    - name: Build Environment
      run: make build
-
    - name: Regenerate integration tests
-      run: ./tests/integration/regenerate.sh
-
+      run: |
+        DEBUG=${{ inputs.debug }} \
+        LOG_TO_FILE=${{ inputs.log_to_file }} \
+        FORCE_REGENERATE_TESTS=${{ inputs.force_regenerate_tests }} \
+        FORCE_USE_LLM=${{ inputs.force_use_llm }} \
+        ./tests/integration/regenerate.sh
    - name: Commit changes
      run: |
        if git diff --quiet --exit-code; then
@ -37,5 +65,6 @@ jobs:
        git config --global user.name 'github-actions[bot]'
        git config --global user.email 'github-actions[bot]@users.noreply.github.com'
        git add .
-        git commit -m "Regenerate integration tests"
+        # run it twice in case pre-commit makes changes
+        git commit -am "Regenerate integration tests" || git commit -am "Regenerate integration tests"
        git push
--- a/README.md
+++ b/README.md
@ -1,72 +1,49 @@
 <a name="readme-top"></a>

-<!--
-*** Thanks for checking out the Best-README-Template. If you have a suggestion
-*** that would make this better, please fork the repo and create a pull request
-*** or simply open an issue with the tag "enhancement".
-*** Don't forget to give the project a star!
-*** Thanks again! Now go create something AMAZING! :D
-->
+<div align="center">
+  <img src="./docs/static/img/logo.png" alt="Logo" width="200">
+  <h1 align="center">OpenHands: Code Less, Make More</h1>
+</div>

-<!-- PROJECT SHIELDS -->
-<!--
-*** I'm using markdown "reference style" links for readability.
-*** Reference links are enclosed in brackets [ ] instead of parentheses ( ).
-*** See the bottom of this document for the declaration of the reference variables
-*** for contributors-url, forks-url, etc. This is an optional, concise syntax you may use.
-*** https://www.markdownguide.org/basic-syntax/#reference-style-links
-->
-
-<p align="center">
-  <a href="https://trendshift.io/repositories/8648">
-    <img src="https://trendshift.io/api/badge/repositories/8648" alt="GitHub Trend" style="display: block; margin: auto;">
-  </a>
-</p>

 <div align="center">
  <a href="https://github.com/All-Hands-AI/OpenHands/graphs/contributors"><img src="https://img.shields.io/github/contributors/All-Hands-AI/OpenHands?style=for-the-badge&color=blue" alt="Contributors"></a>
-  <a href="https://github.com/All-Hands-AI/OpenHands/network/members"><img src="https://img.shields.io/github/forks/All-Hands-AI/OpenHands?style=for-the-badge&color=blue" alt="Forks"></a>
  <a href="https://github.com/All-Hands-AI/OpenHands/stargazers"><img src="https://img.shields.io/github/stars/All-Hands-AI/OpenHands?style=for-the-badge&color=blue" alt="Stargazers"></a>
-  <a href="https://github.com/All-Hands-AI/OpenHands/issues"><img src="https://img.shields.io/github/issues/All-Hands-AI/OpenHands?style=for-the-badge&color=blue" alt="Issues"></a>
+  <a href="https://codecov.io/github/All-Hands-AI/OpenHands?branch=main"><img alt="CodeCov" src="https://img.shields.io/codecov/c/github/All-Hands-AI/OpenHands?style=for-the-badge&color=blue"></a>
  <a href="https://github.com/All-Hands-AI/OpenHands/blob/main/LICENSE"><img src="https://img.shields.io/github/license/All-Hands-AI/OpenHands?style=for-the-badge&color=blue" alt="MIT License"></a>
-  <a href="https://github.com/All-Hands-AI/OpenHands/blob/main/CREDITS.md"><img src="https://img.shields.io/badge/Project-Credits-blue?style=for-the-badge&color=blue" alt="Credits"></a>
  <br/>
  <a href="https://join.slack.com/t/opendevin/shared_invite/zt-2oikve2hu-UDxHeo8nsE69y6T7yFX_BA"><img src="https://img.shields.io/badge/Slack-Join%20Us-red?logo=slack&logoColor=white&style=for-the-badge" alt="Join our Slack community"></a>
  <a href="https://discord.gg/ESHStjSjD4"><img src="https://img.shields.io/badge/Discord-Join%20Us-purple?logo=discord&logoColor=white&style=for-the-badge" alt="Join our Discord community"></a>
-  <a href="https://codecov.io/github/All-Hands-AI/OpenHands?branch=main"><img alt="CodeCov" src="https://img.shields.io/codecov/c/github/All-Hands-AI/OpenHands?style=for-the-badge"></a>
-</div>
-
-<!-- PROJECT LOGO -->
-<div align="center">
-  <img src="./docs/static/img/logo.png" alt="Logo" width="200" height="200">
-  <h1 align="center">OpenHands: Code Less, Make More</h1>
-  <a href="https://docs.all-hands.dev/modules/usage/intro"><img src="https://img.shields.io/badge/Documentation-OpenHands-blue?logo=googledocs&logoColor=white&style=for-the-badge" alt="Check out the documentation"></a>
-  <a href="https://arxiv.org/abs/2407.16741"><img src="https://img.shields.io/badge/Paper-%20on%20Arxiv-red?logo=arxiv&style=for-the-badge" alt="Paper on Arxiv"></a>
+  <a href="https://github.com/All-Hands-AI/OpenHands/blob/main/CREDITS.md"><img src="https://img.shields.io/badge/Project-Credits-blue?style=for-the-badge&color=FFE165&logo=github&logoColor=white" alt="Credits"></a>
  <br/>
-  <a href="https://huggingface.co/spaces/OpenDevin/evaluation"><img src="https://img.shields.io/badge/Evaluation-Benchmark%20on%20HF%20Space-green?logo=huggingface&style=for-the-badge" alt="Evaluation Benchmark"></a>
+  <a href="https://docs.all-hands.dev/modules/usage/getting-started"><img src="https://img.shields.io/badge/Documentation-000?logo=googledocs&logoColor=FFE165&style=for-the-badge" alt="Check out the documentation"></a>
+  <a href="https://arxiv.org/abs/2407.16741"><img src="https://img.shields.io/badge/Paper%20on%20Arxiv-000?logoColor=FFE165&logo=arxiv&style=for-the-badge" alt="Paper on Arxiv"></a>
+  <a href="https://huggingface.co/spaces/OpenDevin/evaluation"><img src="https://img.shields.io/badge/Benchmark%20score-000?logoColor=FFE165&logo=huggingface&style=for-the-badge" alt="Evaluation Benchmark Score"></a>
+  <hr>
 </div>
-<hr>

+Welcome to OpenHands (formerly OpenDevin), a platform for software development agents powered by AI.

-Welcome to OpenHands, a platform for autonomous software engineers, powered by AI and LLMs (previously called "OpenDevin").
+OpenHands agents can do anything a human developer can: modify code, run commands, browse the web,
+call APIs, and yes—even copy code snippets from StackOverflow.

-OpenHands agents collaborate with human developers to write code, fix bugs, and ship features.
+Learn more at [docs.all-hands.dev](https://docs.all-hands.dev), or jump to the [Quick Start](#-quick-start).

 ![App screenshot](./docs/static/img/screenshot.png)

-## ⚡ Getting Started
-OpenHands works best with Docker version 26.0.0+ (Docker Desktop 4.31.0+).
-You must be using Linux, Mac OS, or WSL on Windows.
+## ⚡ Quick Start

-To start OpenHands in a docker container, run the following commands in your terminal:
+The easiest way to run OpenHands is in Docker. You can change `WORKSPACE_BASE` below to
+point OpenHands to existing code that you'd like to modify.

-> [!WARNING]
-> When you run the following command, files in `./workspace` may be modified or deleted.
+See the [Getting Started](https://docs.all-hands.dev/modules/usage/getting-started) guide for
+system requirements and more information.

 ```bash
-WORKSPACE_BASE=$(pwd)/workspace
-docker run -it \
-    --pull=always \
+export WORKSPACE_BASE=$(pwd)/workspace
+
+docker run -it --pull=always \
+    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=ghcr.io/all-hands-ai/runtime:0.9-nikolaik \
    -e SANDBOX_USER_ID=$(id -u) \
    -e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
    -v $WORKSPACE_BASE:/opt/workspace_base \
@ -77,29 +54,23 @@ docker run -it \
    ghcr.io/all-hands-ai/openhands:0.9
 ```

-> [!NOTE]
-> This command pulls the `0.9` tag, which represents the most recent stable release of OpenHands. You have other options as well:
-> - For a specific release version, use `ghcr.io/all-hands-ai/openhands:<OpenHands_version>` (replace <OpenHands_version> with the desired version number).
-> - For the most up-to-date development version, use `ghcr.io/all-hands-ai/openhands:main`. This version may be **(unstable!)** and is recommended for testing or development purposes only.
->
-> Choose the tag that best suits your needs based on stability requirements and desired features.
+You'll find OpenHands running at [http://localhost:3000](http://localhost:3000)!

-You'll find OpenHands running at [http://localhost:3000](http://localhost:3000) with access to `./workspace`. To have OpenHands operate on your code, place it in `./workspace`.
-OpenHands will only have access to this workspace folder. The rest of your system will not be affected as it runs in a secured docker sandbox.
+You can also run OpenHands in a scriptable [headless mode](https://docs.all-hands.dev/modules/usage/how-to/headless-mode),
+or as an [interactive CLI](https://docs.all-hands.dev/modules/usage/how-to/cli-mode).

-Upon opening OpenHands, you must select the appropriate `Model` and enter the `API Key` within the settings that should pop up automatically. These can be set at any time by selecting
-the `Settings` button (gear icon) in the UI. If the required `Model` does not exist in the list, you can manually enter it in the text box.
+Visit [Getting Started](https://docs.all-hands.dev/modules/usage/getting-started) for more information and setup instructions.

-For the development workflow, see [Development.md](https://github.com/All-Hands-AI/OpenHands/blob/main/Development.md).
+If you want to modify the OpenHands source code, check out [Development.md](https://github.com/All-Hands-AI/OpenHands/blob/main/Development.md).

-Are you having trouble? Check out our [Troubleshooting Guide](https://docs.all-hands.dev/modules/usage/troubleshooting).
+Having issues? The [Troubleshooting Guide](https://docs.all-hands.dev/modules/usage/troubleshooting) can help.

-## 🚀 Documentation
+## 📖 Documentation

 To learn more about the project, and for tips on using OpenHands,
-**check out our [documentation](https://docs.all-hands.dev/modules/usage/intro)**.
+**check out our [documentation](https://docs.all-hands.dev/modules/usage/getting-started)**.

-There you'll find resources on how to use different LLM providers (like ollama and Anthropic's Claude),
+There you'll find resources on how to use different LLM providers,
 troubleshooting resources, and advanced configuration options.

 ## 🤝 How to Contribute
@ -134,17 +105,6 @@ Let's make software engineering better together!

 Distributed under the MIT License. See [`LICENSE`](./LICENSE) for more information.

-[contributors-shield]: https://img.shields.io/github/contributors/All-Hands-AI/OpenHands?style=for-the-badge
-[contributors-url]: https://github.com/All-Hands-AI/OpenHands/graphs/contributors
-[forks-shield]: https://img.shields.io/github/forks/All-Hands-AI/OpenHands?style=for-the-badge
-[forks-url]: https://github.com/All-Hands-AI/OpenHands/network/members
-[stars-shield]: https://img.shields.io/github/stars/All-Hands-AI/OpenHands?style=for-the-badge
-[stars-url]: https://github.com/All-Hands-AI/OpenHands/stargazers
-[issues-shield]: https://img.shields.io/github/issues/All-Hands-AI/OpenHands?style=for-the-badge
-[issues-url]: https://github.com/All-Hands-AI/OpenHands/issues
-[license-shield]: https://img.shields.io/github/license/All-Hands-AI/OpenHands?style=for-the-badge
-[license-url]: https://github.com/All-Hands-AI/OpenHands/blob/main/LICENSE
-
 ## 🙏 Acknowledgements

 OpenHands is built by a large number of contributors, and every contribution is greatly appreciated! We also build upon other open source projects, and we are deeply thankful for their work.
--- a/agenthub/browsing_agent/browsing_agent.py
+++ b/agenthub/browsing_agent/browsing_agent.py
@ -206,9 +206,11 @@ class BrowsingAgent(Agent):

        prompt = get_prompt(error_prefix, cur_axtree_txt, prev_action_str)
        messages.append(Message(role='user', content=[TextContent(text=prompt)]))
-        logger.debug(prompt)
+
+        flat_messages = self.llm.format_messages_for_llm(messages)
+
        response = self.llm.completion(
-            messages=[message.model_dump() for message in messages],
+            messages=flat_messages,
            temperature=0.0,
            stop=[')```', ')\n```'],
        )
--- a/agenthub/browsing_agent/prompt.py
+++ b/agenthub/browsing_agent/prompt.py
@ -354,7 +354,7 @@ and executed by a program, make sure to follow the formatting instructions.
        self._prompt += '\n'.join(
            [
                f"""\
- - [{msg['role']}] {msg['message']}"""
+ - [{msg['role']}], {msg['message']}"""
                for msg in chat_messages
            ]
        )
--- a/agenthub/browsing_agent/response_parser.py
+++ b/agenthub/browsing_agent/response_parser.py
@ -24,9 +24,9 @@ class BrowsingResponseParser(ResponseParser):
        if action_str is None:
            return ''
        action_str = action_str.strip()
-        if not action_str.endswith('```'):
+        if action_str and not action_str.endswith('```'):
            action_str = action_str + ')```'
-        logger.info(action_str)
+        logger.debug(action_str)
        return action_str

    def parse_action(self, action_str: str) -> Action:
--- a/agenthub/codeact_agent/codeact_agent.py
+++ b/agenthub/codeact_agent/codeact_agent.py
@ -1,9 +1,11 @@
 import os
+from itertools import islice

 from agenthub.codeact_agent.action_parser import CodeActResponseParser
 from openhands.controller.agent import Agent
 from openhands.controller.state.state import State
 from openhands.core.config import AgentConfig
+from openhands.core.logger import openhands_logger as logger
 from openhands.core.message import ImageContent, Message, TextContent
 from openhands.events.action import (
    Action,
@ -117,7 +119,11 @@ class CodeActAgent(Agent):
        ):
            content = [TextContent(text=self.action_to_str(action))]

-            if isinstance(action, MessageAction) and action.images_urls:
+            if (
+                self.llm.vision_is_active()
+                and isinstance(action, MessageAction)
+                and action.images_urls
+            ):
                content.append(ImageContent(image_urls=action.images_urls))

            return Message(
@ -127,14 +133,15 @@ class CodeActAgent(Agent):

    def get_observation_message(self, obs: Observation) -> Message | None:
        max_message_chars = self.llm.config.max_message_chars
+        obs_prefix = 'OBSERVATION:\n'
        if isinstance(obs, CmdOutputObservation):
-            text = 'OBSERVATION:\n' + truncate_content(obs.content, max_message_chars)
+            text = obs_prefix + truncate_content(obs.content, max_message_chars)
            text += (
                f'\n[Command {obs.command_id} finished with exit code {obs.exit_code}]'
            )
            return Message(role='user', content=[TextContent(text=text)])
        elif isinstance(obs, IPythonRunCellObservation):
-            text = 'OBSERVATION:\n' + obs.content
+            text = obs_prefix + obs.content
            # replace base64 images with a placeholder
            splitted = text.split('\n')
            for i, line in enumerate(splitted):
@ -146,12 +153,10 @@ class CodeActAgent(Agent):
            text = truncate_content(text, max_message_chars)
            return Message(role='user', content=[TextContent(text=text)])
        elif isinstance(obs, AgentDelegateObservation):
-            text = 'OBSERVATION:\n' + truncate_content(
-                str(obs.outputs), max_message_chars
-            )
+            text = obs_prefix + truncate_content(str(obs.outputs), max_message_chars)
            return Message(role='user', content=[TextContent(text=text)])
        elif isinstance(obs, ErrorObservation):
-            text = 'OBSERVATION:\n' + truncate_content(obs.content, max_message_chars)
+            text = obs_prefix + truncate_content(obs.content, max_message_chars)
            text += '\n[Error occurred in processing last action]'
            return Message(role='user', content=[TextContent(text=text)])
        elif isinstance(obs, UserRejectObservation):
@ -188,9 +193,8 @@ class CodeActAgent(Agent):

        # prepare what we want to send to the LLM
        messages = self._get_messages(state)
-
        params = {
-            'messages': [message.model_dump() for message in messages],
+            'messages': self.llm.format_messages_for_llm(messages),
            'stop': [
                '</execute_ipython>',
                '</execute_bash>',
@ -199,12 +203,19 @@ class CodeActAgent(Agent):
            'temperature': 0.0,
        }

-        if self.llm.supports_prompt_caching:
+        if self.llm.is_caching_prompt_active():
            params['extra_headers'] = {
                'anthropic-beta': 'prompt-caching-2024-07-31',
            }

-        response = self.llm.completion(**params)
+        try:
+            response = self.llm.completion(**params)
+        except Exception as e:
+            logger.error(f'{e}')
+            error_message = '{}: {}'.format(type(e).__name__, str(e).split('\n')[0])
+            return AgentFinishAction(
+                thought=f'Agent encountered an error while processing the last action.\nError: {error_message}\nPlease try again.'
+            )

        return self.action_parser.parse(response)

@ -215,7 +226,7 @@ class CodeActAgent(Agent):
                content=[
                    TextContent(
                        text=self.prompt_manager.system_message,
-                        cache_prompt=self.llm.supports_prompt_caching,  # Cache system prompt
+                        cache_prompt=self.llm.is_caching_prompt_active(),  # Cache system prompt
                    )
                ],
            ),
@ -224,7 +235,7 @@ class CodeActAgent(Agent):
                content=[
                    TextContent(
                        text=self.prompt_manager.initial_user_message,
-                        cache_prompt=self.llm.supports_prompt_caching,  # if the user asks the same query,
+                        cache_prompt=self.llm.is_caching_prompt_active(),  # if the user asks the same query,
                    )
                ],
            ),
@ -243,14 +254,14 @@ class CodeActAgent(Agent):
            if message:
                # handle error if the message is the SAME role as the previous message
                # litellm.exceptions.BadRequestError: litellm.BadRequestError: OpenAIException - Error code: 400 - {'detail': 'Only supports u/a/u/a/u...'}
-                # there should not have two consecutive messages from the same role
+                # there shouldn't be two consecutive messages from the same role
                if messages and messages[-1].role == message.role:
                    messages[-1].content.extend(message.content)
                else:
                    messages.append(message)

        # Add caching to the last 2 user messages
-        if self.llm.supports_prompt_caching:
+        if self.llm.is_caching_prompt_active():
            user_turns_processed = 0
            for message in reversed(messages):
                if message.role == 'user' and user_turns_processed < 2:
@ -259,14 +270,17 @@ class CodeActAgent(Agent):
                    ].cache_prompt = True  # Last item inside the message content
                    user_turns_processed += 1

-        # the latest user message is important:
+        # The latest user message is important:
        # we want to remind the agent of the environment constraints
        latest_user_message = next(
-            (
-                m
-                for m in reversed(messages)
-                if m.role == 'user'
-                and any(isinstance(c, TextContent) for c in m.content)
+            islice(
+                (
+                    m
+                    for m in reversed(messages)
+                    if m.role == 'user'
+                    and any(isinstance(c, TextContent) for c in m.content)
+                ),
+                1,
            ),
            None,
        )
--- a/agenthub/codeact_agent/system_prompt.j2
+++ b/agenthub/codeact_agent/system_prompt.j2
@ -5,8 +5,13 @@ The assistant can use a Python environment with <execute_ipython>, e.g.:
 print("Hello World!")
 </execute_ipython>
 The assistant can execute bash commands wrapped with <execute_bash>, e.g. <execute_bash> ls </execute_bash>.
-The assistant is not allowed to run interactive commands. For commands that may run indefinitely,
-the output should be redirected to a file and the command run in the background, e.g. <execute_bash> python3 app.py > server.log 2>&1 & </execute_bash>
+If a bash command returns exit code `-1`, this means the process is not yet finished.
+The assistant must then send a second <execute_bash>. The second <execute_bash> can be empty
+(which will retrieve any additional logs), or it can contain text to be sent to STDIN of the running process,
+or it can contain the text `ctrl+c` to interrupt the process.
+
+For commands that may run indefinitely, the output should be redirected to a file and the command run
+in the background, e.g. <execute_bash> python3 app.py > server.log 2>&1 & </execute_bash>
 If a command execution result says "Command timed out. Sending SIGINT to the process",
 the assistant should retry running the command in the background.
 {% endset %}
--- a/agenthub/codeact_swe_agent/codeact_swe_agent.py
+++ b/agenthub/codeact_swe_agent/codeact_swe_agent.py
@ -94,7 +94,11 @@ class CodeActSWEAgent(Agent):
        ):
            content = [TextContent(text=self.action_to_str(action))]

-            if isinstance(action, MessageAction) and action.images_urls:
+            if (
+                self.llm.vision_is_active()
+                and isinstance(action, MessageAction)
+                and action.images_urls
+            ):
                content.append(ImageContent(image_urls=action.images_urls))

            return Message(
@ -156,9 +160,8 @@ class CodeActSWEAgent(Agent):

        # prepare what we want to send to the LLM
        messages: list[Message] = self._get_messages(state)
-
        response = self.llm.completion(
-            messages=[message.model_dump() for message in messages],
+            messages=self.llm.format_messages_for_llm(messages),
            stop=[
                '</execute_ipython>',
                '</execute_bash>',
--- a/agenthub/micro/agent.py
+++ b/agenthub/micro/agent.py
@ -73,10 +73,13 @@ class MicroAgent(Agent):
            latest_user_message=last_user_message,
        )
        content = [TextContent(text=prompt)]
-        if last_image_urls:
+        if self.llm.vision_is_active() and last_image_urls:
            content.append(ImageContent(image_urls=last_image_urls))
        message = Message(role='user', content=content)
-        resp = self.llm.completion(messages=[message.model_dump()])
+        resp = self.llm.completion(
+            messages=self.llm.format_messages_for_llm(message),
+            temperature=0.0,
+        )
        action_resp = resp['choices'][0]['message']['content']
        action = parse_response(action_resp)
        return action
--- a/agenthub/planner_agent/agent.py
+++ b/agenthub/planner_agent/agent.py
@ -46,8 +46,8 @@ class PlannerAgent(Agent):
            state, self.llm.config.max_message_chars
        )
        content = [TextContent(text=prompt)]
-        if image_urls:
+        if self.llm.vision_is_active() and image_urls:
            content.append(ImageContent(image_urls=image_urls))
        message = Message(role='user', content=content)
-        resp = self.llm.completion(messages=[message.model_dump()])
+        resp = self.llm.completion(messages=self.llm.format_messages_for_llm(message))
        return self.response_parser.parse(resp)
--- a/config.template.toml
+++ b/config.template.toml
@ -126,21 +126,29 @@ embedding_model = ""
 # Model to use
 model = "gpt-4o"

-# Number of retries to attempt
-#num_retries = 5
+# Number of retries to attempt when an operation fails with the LLM.
+# Increase this value to allow more attempts before giving up
+#num_retries = 8

-# Retry maximum wait time
-#retry_max_wait = 60
+# Maximum wait time (in seconds) between retry attempts
+# This caps the exponential backoff to prevent excessively long
+#retry_max_wait = 120

-# Retry minimum wait time
-#retry_min_wait = 3
+# Minimum wait time (in seconds) between retry attempts
+# This sets the initial delay before the first retry
+#retry_min_wait = 15

-# Retry multiplier for exponential backoff
+# Multiplier for exponential backoff calculation
+# The wait time increases by this factor after each failed attempt
+# A value of 2.0 means each retry waits twice as long as the previous one
 #retry_multiplier = 2.0

 # Drop any unmapped (unsupported) params without causing an exception
 #drop_params = false

+# Using the prompt caching feature provided by the LLM
+#caching_prompt = false
+
 # Base URL for the OLLAMA API
 #ollama_base_url = ""

@ -153,6 +161,9 @@ model = "gpt-4o"
 # Top p for the API
 #top_p = 0.5

+# If model is vision capable, this option allows to disable image processing (useful for cost reduction).
+#disable_vision = true
+
 [llm.gpt3]
 # API key to use
 api_key = "your-api-key"
--- a/docs/docusaurus.config.ts
+++ b/docs/docusaurus.config.ts
@ -4,8 +4,8 @@ import { themes as prismThemes } from "prism-react-renderer";

 const config: Config = {
  title: "OpenHands",
-  tagline: "An Open Platform for AI Software Developers as Generalist Agents",
-  favicon: "img/logo.png",
+  tagline: "Code Less, Make More",
+  favicon: "img/logo-square.png",

  // Set the production url of your site here
  url: "https://docs.all-hands.dev",
@ -73,23 +73,28 @@ const config: Config = {
          type: "docSidebar",
          sidebarId: "docsSidebar",
          position: "left",
-          label: "Docs",
+          label: "User Guides",
        },
        {
          type: "docSidebar",
          sidebarId: "apiSidebar",
          position: "left",
-          label: "Codebase",
+          label: "Python API",
+        },
+        {
+          type: 'localeDropdown',
+          position: 'left',
+        },
+        {
+          href: "https://all-hands.dev",
+          label: "Company",
+          position: "right",
        },
        {
          href: "https://github.com/All-Hands-AI/OpenHands",
          label: "GitHub",
          position: "right",
        },
-        {
-          type: 'localeDropdown',
-          position: 'left',
-        },
      ],
    },
    prism: {
--- a/docs/i18n/fr/docusaurus-plugin-content-docs/current/usage/llms/llms.md
+++ b/docs/i18n/fr/docusaurus-plugin-content-docs/current/usage/llms/llms.md
@ -41,4 +41,4 @@ ne peut être aussi puissant que les modèles qui le pilotent -- heureusement, l

 Certains LLM ont des limites de taux et peuvent nécessiter des réessais. OpenHands réessaiera automatiquement les demandes s'il reçoit une erreur 429 ou une erreur de connexion API.
 Vous pouvez définir les variables d'environnement `LLM_NUM_RETRIES`, `LLM_RETRY_MIN_WAIT`, `LLM_RETRY_MAX_WAIT` pour contrôler le nombre de réessais et le temps entre les réessais.
-Par défaut, `LLM_NUM_RETRIES` est 5 et `LLM_RETRY_MIN_WAIT`, `LLM_RETRY_MAX_WAIT` sont respectivement de 3 secondes et 60 secondes.
+Par défaut, `LLM_NUM_RETRIES` est 8 et `LLM_RETRY_MIN_WAIT`, `LLM_RETRY_MAX_WAIT` sont respectivement de 15 secondes et 120 secondes.
--- a/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/usage/llms/llms.md
+++ b/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/usage/llms/llms.md
@ -43,4 +43,4 @@ OpenHands 将向你配置的 LLM 发出许多提示。大多数这些 LLM 都是

 一些 LLM 有速率限制，可能需要重试操作。OpenHands 会在收到 429 错误或 API 连接错误时自动重试请求。
 你可以设置 `LLM_NUM_RETRIES`，`LLM_RETRY_MIN_WAIT`，`LLM_RETRY_MAX_WAIT` 环境变量来控制重试次数和重试之间的时间。
-默认情况下，`LLM_NUM_RETRIES` 为 5，`LLM_RETRY_MIN_WAIT` 和 `LLM_RETRY_MAX_WAIT` 分别为 3 秒和 60 秒。
+默认情况下，`LLM_NUM_RETRIES` 为 8，`LLM_RETRY_MIN_WAIT` 和 `LLM_RETRY_MAX_WAIT` 分别为 15 秒和 120 秒。
--- a/docs/modules/usage/architecture/architecture.mdx
+++ b/docs/modules/usage/architecture/architecture.mdx
--- a/docs/modules/usage/getting-started.md
+++ b/docs/modules/usage/getting-started.md
@ -0,0 +1,60 @@
+---
+sidebar_position: 2
+---
+
+# Getting Started
+
+## System Requirements
+
+* Docker version 26.0.0+ or Docker Desktop 4.31.0+
+* You must be using Linux or Mac OS
+  * If you are on Windows, you must use [WSL](https://learn.microsoft.com/en-us/windows/wsl/install)
+
+## Installation
+
+The easiest way to run OpenHands is in Docker. You can change `WORKSPACE_BASE` below to point OpenHands to
+existing code that you'd like to modify.
+
+```bash
+export WORKSPACE_BASE=$(pwd)/workspace
+
+docker run -it --pull=always \
+    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=ghcr.io/all-hands-ai/runtime:0.9-nikolaik \
+    -e SANDBOX_USER_ID=$(id -u) \
+    -e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
+    -v $WORKSPACE_BASE:/opt/workspace_base \
+    -v /var/run/docker.sock:/var/run/docker.sock \
+    -p 3000:3000 \
+    --add-host host.docker.internal:host-gateway \
+    --name openhands-app-$(date +%Y%m%d%H%M%S) \
+    ghcr.io/all-hands-ai/openhands:0.9
+```
+
+You can also run OpenHands in a scriptable [headless mode](https://docs.all-hands.dev/modules/usage/how-to/headless-mode),
+or as an [interactive CLI](https://docs.all-hands.dev/modules/usage/how-to/cli-mode).
+
+## Setup
+
+After running the command above, you'll find OpenHands running at [http://localhost:3000](http://localhost:3000).
+
+The agent will have access to the `./workspace` folder to do its work. You can copy existing code here, or change `WORKSPACE_BASE` in the
+command to point to an existing folder.
+
+Upon launching OpenHands, you'll see a settings modal. You must select an LLM backend using `Model`, and enter a corresponding `API Key`.
+These can be changed at any time by selecting the `Settings` button (gear icon) in the UI.
+If the required `Model` does not exist in the list, you can toggle `Use custom model` and manually enter it in the text box.
+
+<img src="/img/settings-screenshot.png" alt="settings-modal" width="340" />
+
+## Versions
+
+The command above pulls the `0.9` tag, which represents the most recent stable release of OpenHands. You have other options as well:
+- For a specific release, use `ghcr.io/all-hands-ai/openhands:$VERSION`, replacing $VERSION with the version number.
+- We use semver, and release major, minor, and patch tags. So `0.9` will automatically point to the latest `0.9.x` release, and `0` will point to the latest `0.x.x` release.
+- For the most up-to-date development version, you can use `ghcr.io/all-hands-ai/openhands:main`. This version is unstable and is recommended for testing or development purposes only.
+
+You can choose the tag that best suits your needs based on stability requirements and desired features.
+
+For the development workflow, see [Development.md](https://github.com/All-Hands-AI/OpenHands/blob/main/Development.md).
+
+Are you having trouble? Check out our [Troubleshooting Guide](https://docs.all-hands.dev/modules/usage/troubleshooting).
--- a/docs/modules/usage/how-to/cli-mode.md
+++ b/docs/modules/usage/how-to/cli-mode.md
@ -0,0 +1,108 @@
+# CLI Mode
+
+OpenHands can be run in an interactive CLI mode, which allows users to start an interactive session via the command line.
+
+This mode is different from the [headless mode](headless-mode), which is non-interactive and better for scripting.
+
+## With Python
+
+To start an interactive OpenHands session via the command line, follow these steps:
+
+1. Ensure you have followed the [Development setup instructions](https://github.com/All-Hands-AI/OpenHands/blob/main/Development.md)
+
+2. Run the following command:
+
+```bash
+poetry run python -m openhands.core.cli
+```
+
+This command will start an interactive session where you can input tasks and receive responses from OpenHands.
+
+You'll need to be sure to set your model, API key, and other settings via environment variables
+[or the `config.toml` file](https://github.com/All-Hands-AI/OpenHands/blob/main/config.template.toml).
+
+
+## With Docker
+
+To run OpenHands in CLI mode with Docker, follow these steps:
+
+1. Set `WORKSPACE_BASE` to the directory you want OpenHands to edit:
+
+```bash
+WORKSPACE_BASE=$(pwd)/workspace
+```
+
+2. Set `LLM_MODEL` to the model you want to use:
+
+```bash
+LLM_MODEL="anthropic/claude-3-5-sonnet-20240620"
+```
+
+3. Set `LLM_API_KEY` to your API key:
+
+```bash
+LLM_API_KEY="abcde"
+```
+
+4. Run the following Docker command:
+
+```bash
+docker run -it \
+    --pull=always \
+    -e SANDBOX_USER_ID=$(id -u) \
+    -e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
+    -e LLM_API_KEY=$LLM_API_KEY \
+    -e LLM_MODEL=$LLM_MODEL \
+    -v $WORKSPACE_BASE:/opt/workspace_base \
+    -v /var/run/docker.sock:/var/run/docker.sock \
+    --add-host host.docker.internal:host-gateway \
+    --name openhands-app-$(date +%Y%m%d%H%M%S) \
+    ghcr.io/all-hands-ai/openhands:0.9 \
+    python -m openhands.core.cli
+```
+
+This command will start an interactive session in Docker where you can input tasks and receive responses from OpenHands.
+
+## Examples of CLI Commands and Expected Outputs
+
+Here are some examples of CLI commands and their expected outputs:
+
+### Example 1: Simple Task
+
+```bash
+How can I help? >> Write a Python script that prints "Hello, World!"
+```
+
+Expected Output:
+
+```bash
+🤖 Sure! Here is a Python script that prints "Hello, World!":
+
+❯ print("Hello, World!")
+```
+
+### Example 2: Bash Command
+
+```bash
+How can I help? >> Create a directory named "test_dir"
+```
+
+Expected Output:
+
+```bash
+🤖 Creating a directory named "test_dir":
+
+❯ mkdir test_dir
+```
+
+### Example 3: Error Handling
+
+```bash
+How can I help? >> Delete a non-existent file
+```
+
+Expected Output:
+
+```bash
+🤖 An error occurred. Please try again.
+```
--- a/docs/modules/usage/how-to/custom-sandbox-guide.md
+++ b/docs/modules/usage/how-to/custom-sandbox-guide.md
@ -1,26 +1,19 @@
-# Create and Use a Custom Docker Sandbox
+# Custom Sandbox

-The default OpenHands sandbox comes with a [minimal ubuntu configuration](https://github.com/All-Hands-AI/OpenHands/blob/main/containers/sandbox/Dockerfile).
-Your use case may need additional software installed by default.
+The sandbox is where the agent does its work. Instead of running commands directly on your computer
+(which could be dangerous), the agent runs them inside of a Docker container.
+
+The default OpenHands sandbox (`python-nodejs:python3.11-nodejs22`
+from [nikolaik/python-nodejs](https://hub.docker.com/r/nikolaik/python-nodejs)) comes with some packages installed such
+as python and Node.js but your use case may need additional software installed by default.

 There are two ways you can do so:

-1. Use an existing image from docker hub. For instance, if you want to have `nodejs` installed, you can do so by using the `node:20` image
+1. Use an existing image from docker hub
 2. Creating your own custom docker image and using it

 If you want to take the first approach, you can skip the `Create Your Docker Image` section.

-For a more feature-rich environment, you might consider using pre-built images like **[nikolaik/python-nodejs](https://hub.docker.com/r/nikolaik/python-nodejs)**, which comes with both Python and Node.js pre-installed, along with many other useful tools and libraries, like:
-
- Node.js: 22.x
- npm: 10.x
- yarn: stable
- Python: latest
- pip: latest
- pipenv: latest
- poetry: latest
- uv: latest
-
 ## Setup

 Make sure you are able to run OpenHands using the [Development.md](https://github.com/All-Hands-AI/OpenHands/blob/main/Development.md) first.
@ -79,7 +72,7 @@ Run OpenHands by running ```make run``` in the top level directory.

 Navigate to ```localhost:3001``` and check if your desired dependencies are available.

-In the case of the example above, running ```node -v``` in the terminal produces ```v20.15.0```
+In the case of the example above, running ```node -v``` in the terminal produces ```v20.15.0```.

 Congratulations!

--- a/docs/modules/usage/how-to/evaluation-harness.md
+++ b/docs/modules/usage/how-to/evaluation-harness.md
@ -1,4 +1,4 @@
-# Contribute to OpenHands Evaluation Harness
+# Evaluation

 This guide provides an overview of how to integrate your own evaluation benchmark into the OpenHands framework.

@ -12,7 +12,7 @@ Here's an example configuration file you can use to define and use multiple LLMs
 ```toml
 [llm]
 # IMPORTANT: add your API key here, and set the model to the one you want to evaluate
-model = "gpt-4o-2024-05-13"
+model = "claude-3-5-sonnet-20240620"
 api_key = "sk-XXX"

 [llm.eval_gpt4_1106_preview_llm]
--- a/docs/modules/usage/how-to/headless-mode.md
+++ b/docs/modules/usage/how-to/headless-mode.md
@ -1,12 +1,9 @@
-# Running in Headless / CLI Mode
+# Headless Mode

-You can run OpenHands via a CLI, without starting the web application. This makes it easy
-to automate tasks with OpenHands. There are 2 main modes of operation:
+You can run OpenHands with a single command, without starting the web application.
+This makes it easy to write scripts and automate tasks with OpenHands.

-* **Headless** : Designed for use with scripts
-* **CLI** : Designed for interactive use via a console
-
-As with other modes, the environment is configurable via environment variables or by saving values into [config.toml](https://github.com/All-Hands-AI/OpenHands/blob/main/config.template.toml)
+This is different from [CLI Mode](cli-mode), which is interactive, and better for active development.

 ## With Python

@ -14,34 +11,36 @@ To run OpenHands in headless mode with Python,
 [follow the Development setup instructions](https://github.com/All-Hands-AI/OpenHands/blob/main/Development.md),
 and then run:

-### Headless with Python
-
 ```bash
 poetry run python -m openhands.core.main -t "write a bash script that prints hi"
 ```

-### CLI with Python
+You'll need to be sure to set your model, API key, and other settings via environment variables
+[or the `config.toml` file](https://github.com/All-Hands-AI/OpenHands/blob/main/config.template.toml).
+
+## With Docker
+
+1. Set `WORKSPACE_BASE` to the directory you want OpenHands to edit:

 ```bash
-poetry run python -m openhands.core.cli
-
-How can I help? >> write a bash script that prints hi
+WORKSPACE_BASE=$(pwd)/workspace
 ```

-## Headless With Docker
-
-To run OpenHands in headless mode with Docker, run:
+2. Set `LLM_MODEL` to the model you want to use:

 ```bash
-# Set WORKSPACE_BASE to the directory you want OpenHands to edit
-WORKSPACE_BASE=$(pwd)/workspace
+LLM_MODEL="anthropic/claude-3-5-sonnet-20240620"
+```

-# Set LLM_API_KEY to an API key, e.g. for OpenAI or Anthropic
+3. Set `LLM_API_KEY` to your API key:
+
+```bash
 LLM_API_KEY="abcde"
+```

-# Set LLM_MODEL to the model you want to use
-LLM_MODEL="gpt-4o"
+4. Run the following Docker command:

+```bash
 docker run -it \
    --pull=always \
    -e SANDBOX_USER_ID=$(id -u) \
@ -53,34 +52,5 @@ docker run -it \
    --add-host host.docker.internal:host-gateway \
    --name openhands-app-$(date +%Y%m%d%H%M%S) \
    ghcr.io/all-hands-ai/openhands:0.9 \
-    poetry run python -m openhands.core.main \
-    -t "Write a bash script that prints Hello World"
-```
-
-## CLI With Docker
-
-To run OpenHands in cli mode with Docker, run:
-
-```bash
-# Set WORKSPACE_BASE to the directory you want OpenHands to edit
-WORKSPACE_BASE=$(pwd)/workspace
-
-# Set LLM_API_KEY to an API key, e.g. for OpenAI or Anthropic
-LLM_API_KEY="abcde"
-
-# Set LLM_MODEL to the model you want to use
-LLM_MODEL="gpt-4o"
-
-docker run -it \
-    --pull=always \
-    -e SANDBOX_USER_ID=$(id -u) \
-    -e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
-    -e LLM_API_KEY=$LLM_API_KEY \
-    -e LLM_MODEL=$LLM_MODEL \
-    -v $WORKSPACE_BASE:/opt/workspace_base \
-    -v /var/run/docker.sock:/var/run/docker.sock \
-    --add-host host.docker.internal:host-gateway \
-    --name openhands-app-$(date +%Y%m%d%H%M%S) \
-    ghcr.io/all-hands-ai/openhands:0.9 \
-    poetry run python -m openhands.core.cli
+    python -m openhands.core.main -t "write a bash script that prints hi"
 ```
--- a/docs/modules/usage/how-to/how-to.md
+++ b/docs/modules/usage/how-to/how-to.md
@ -1,5 +0,0 @@
---
-sidebar_position: 6
---
-
-# 🔎 How To Section
--- a/docs/modules/usage/how-to/openshift-example.md
+++ b/docs/modules/usage/how-to/openshift-example.md
@ -1,6 +1,6 @@
-# Use OpenHands in OpenShift/K8S
+# Kubernetes

-There are different ways this can be accomplished. This guide goes through one possible way:
+There are different ways you might run OpenHands on Kubernetes or OpenShift. This guide goes through one possible way:
 1. Create a PV "as a cluster admin" to map workspace_base data and docker directory to the pod through the worker node
 2. Create a PVC to be able to mount those PVs to the pod
 3. Create a pod which contains two containers; the OpenHands and Sandbox containers
--- a/docs/modules/usage/intro.mdx
+++ b/docs/modules/usage/intro.mdx
@ -1,72 +0,0 @@
---
-sidebar_position: 1
---
-
-# 💻 OpenHands
-
-OpenHands is an **autonomous AI software engineer** capable of executing complex engineering tasks and collaborating actively with users on software development projects.
-This project is fully open-source, so you can use and modify it however you like.
-
-:::tip
-Explore the codebase of OpenHands on [GitHub](https://github.com/All-Hands-AI/OpenHands) or join one of our communities!
-
-<a href="https://github.com/All-Hands-AI/OpenHands/graphs/contributors">
-  <img
-    src="https://img.shields.io/github/contributors/All-Hands-AI/OpenHands?style=for-the-badge"
-    alt="Contributors"
-  />
-</a>
-<a href="https://github.com/All-Hands-AI/OpenHands/network/members">
-  <img
-    src="https://img.shields.io/github/forks/All-Hands-AI/OpenHands?style=for-the-badge"
-    alt="Forks"
-  />
-</a>
-<a href="https://github.com/All-Hands-AI/OpenHands/stargazers">
-  <img
-    src="https://img.shields.io/github/stars/All-Hands-AI/OpenHands?style=for-the-badge"
-    alt="Stargazers"
-  />
-</a>
-<a href="https://github.com/All-Hands-AI/OpenHands/issues">
-  <img
-    src="https://img.shields.io/github/issues/All-Hands-AI/OpenHands?style=for-the-badge"
-    alt="Issues"
-  />
-</a>
-<br></br>
-<a href="https://github.com/All-Hands-AI/OpenHands/blob/main/LICENSE">
-  <img
-    src="https://img.shields.io/github/license/All-Hands-AI/OpenHands?style=for-the-badge"
-    alt="MIT License"
-  />
-</a>
-<br></br>
-<a href="https://join.slack.com/t/opendevin/shared_invite/zt-2oikve2hu-UDxHeo8nsE69y6T7yFX_BA">
-  <img
-    src="https://img.shields.io/badge/Slack-Join%20Us-red?logo=slack&logoColor=white&style=for-the-badge"
-    alt="Join our Slack community"
-  />
-</a>
-<a href="https://discord.gg/ESHStjSjD4">
-  <img
-    src="https://img.shields.io/badge/Discord-Join%20Us-purple?logo=discord&logoColor=white&style=for-the-badge"
-    alt="Join our Discord community"
-  />
-</a>
-:::
-
-## 🛠️ Getting Started
-
-[Check out the getting started guide on Github](https://github.com/All-Hands-AI/OpenHands?tab=readme-ov-file#-getting-started)
-
-[contributors-shield]: https://img.shields.io/github/contributors/All-Hands-AI/OpenHands?style=for-the-badge
-[contributors-url]: https://github.com/All-Hands-AI/OpenHands/graphs/contributors
-[forks-shield]: https://img.shields.io/github/forks/All-Hands-AI/OpenHands?style=for-the-badge
-[forks-url]: https://github.com/All-Hands-AI/OpenHands/network/members
-[stars-shield]: https://img.shields.io/github/stars/All-Hands-AI/OpenHands?style=for-the-badge
-[stars-url]: https://github.com/All-Hands-AI/OpenHands/stargazers
-[issues-shield]: https://img.shields.io/github/issues/All-Hands-AI/OpenHands?style=for-the-badge
-[issues-url]: https://github.com/All-Hands-AI/OpenHands/issues
-[license-shield]: https://img.shields.io/github/license/All-Hands-AI/OpenHands?style=for-the-badge
-[license-url]: https://github.com/All-Hands-AI/OpenHands/blob/main/LICENSE
--- a/docs/modules/usage/llms/azure-llms.md
+++ b/docs/modules/usage/llms/azure-llms.md
@ -2,7 +2,7 @@

 ## Completion

-OpenHands uses LiteLLM for completion calls. You can find their documentation on Azure [here](https://docs.litellm.ai/docs/providers/azure)
+OpenHands uses LiteLLM for completion calls. You can find their documentation on Azure [here](https://docs.litellm.ai/docs/providers/azure).

 ### Azure openai configs

@ -12,7 +12,7 @@ When running the OpenHands Docker image, you'll need to set the following enviro
 LLM_BASE_URL="<azure-api-base-url>"          # e.g. "https://openai-gpt-4-test-v-1.openai.azure.com/"
 LLM_API_KEY="<azure-api-key>"
 LLM_MODEL="azure/<your-gpt-deployment-name>"
-LLM_API_VERSION="<api-version>"          # e.g. "2024-02-15-preview"
+LLM_API_VERSION="<api-version>"              # e.g. "2024-02-15-preview"
 ```

 Example:
@ -31,15 +31,18 @@ docker run -it \
 ghcr.io/all-hands-ai/openhands:main
 ```

-You can set the LLM_MODEL and LLM_API_KEY in the OpenHands UI itself.
+You can also set the model and API key in the OpenHands UI through the Settings.

 :::note
-You can find your ChatGPT deployment name on the deployments page in Azure. It could be the same with the chat model name (e.g. 'GPT4-1106-preview'), by default or initially set, but it doesn't have to be the same. Run openhands, and when you load it in the browser, go to Settings and set model as above: "azure/&lt;your-actual-gpt-deployment-name&gt;". If it's not in the list, enter your own text and save it.
+You can find your ChatGPT deployment name on the deployments page in Azure. It could be the same with the chat model
+name (e.g. 'GPT4-1106-preview'), by default or initially set, but it doesn't have to be the same. Run OpenHands,
+and when you load it in the browser, go to Settings and set model as above: "azure/&lt;your-actual-gpt-deployment-name&gt;".
+If it's not in the list, you can open the Settings modal, switch to "Custom Model", and enter your model name.
 :::

 ## Embeddings

-OpenHands uses llama-index for embeddings. You can find their documentation on Azure [here](https://docs.llamaindex.ai/en/stable/api_reference/embeddings/azure_openai/)
+OpenHands uses llama-index for embeddings. You can find their documentation on Azure [here](https://docs.llamaindex.ai/en/stable/api_reference/embeddings/azure_openai/).

 ### Azure openai configs

@ -50,6 +53,6 @@ When running OpenHands in Docker, set the following environment variables using

 ```
 LLM_EMBEDDING_MODEL="azureopenai"
-LLM_EMBEDDING_DEPLOYMENT_NAME="<your-embedding-deployment-name>"        # e.g. "TextEmbedding...<etc>"
-LLM_API_VERSION="<api-version>"         # e.g. "2024-02-15-preview"
+LLM_EMBEDDING_DEPLOYMENT_NAME="<your-embedding-deployment-name>"   # e.g. "TextEmbedding...<etc>"
+LLM_API_VERSION="<api-version>"                                    # e.g. "2024-02-15-preview"
 ```
--- a/docs/modules/usage/llms/google-llms.md
+++ b/docs/modules/usage/llms/google-llms.md
@ -2,7 +2,7 @@

 ## Completion

-OpenHands uses LiteLLM for completion calls. The following resources are relevant for using OpenHands with Google's LLMs
+OpenHands uses LiteLLM for completion calls. The following resources are relevant for using OpenHands with Google's LLMs:

 - [Gemini - Google AI Studio](https://docs.litellm.ai/docs/providers/gemini)
 - [VertexAI - Google Cloud Platform](https://docs.litellm.ai/docs/providers/vertex)
--- a/docs/modules/usage/llms/llms.md
+++ b/docs/modules/usage/llms/llms.md
@ -1,17 +1,23 @@
 ---
-sidebar_position: 2
+sidebar_position: 3
 ---

 # 🤖 LLM Backends

 OpenHands can connect to any LLM supported by LiteLLM. However, it requires a powerful model to work.
 The following are verified by the community to work with OpenHands:
+
 * claude-3-5-sonnet
 * gemini-1.5-pro / gemini-1.5-flash
 * gpt-4 / gpt-4o
 * llama-3.1-405b / hermes-3-llama-3.1-405b
 * wizardlm-2-8x22b

+:::warning
+OpenHands will issue many prompts to the LLM you configure. Most of these LLMs cost money, so be sure to set spending
+limits and monitor usage.
+:::
+
 If you have successfully run OpenHands with specific LLMs not in the list, please add them to the verified list. We
 also encourage you to open a PR to share your setup process to help others using the same provider and LLM!

@ -19,39 +25,39 @@ For a full list of the providers and models available, please consult the
 [litellm documentation](https://docs.litellm.ai/docs/providers).

 ## Local and Open Source Models
+
 Most current local and open source models are not as powerful. When using such models, you may see long
 wait times between messages, poor responses, or errors about malformed JSON. OpenHands can only be as powerful as the
 models driving it. However, if you do find ones that work, please add them to the verified list above.

 ## LLM Configuration
-:::warning
-OpenHands will issue many prompts to the LLM you configure. Most of these LLMs cost money, so be sure to set spending
-limits and monitor usage.
-:::

 The `LLM_MODEL` environment variable controls which model is used in programmatic interactions.
 But when using the OpenHands UI, you'll need to choose your model in the settings window.

 The following environment variables might be necessary for some LLMs/providers:

- `LLM_API_KEY`
- `LLM_BASE_URL`
- `LLM_EMBEDDING_MODEL`
- `LLM_EMBEDDING_DEPLOYMENT_NAME`
- `LLM_API_VERSION`
- `LLM_DROP_PARAMS`
+* `LLM_API_KEY`
+* `LLM_API_VERSION`
+* `LLM_BASE_URL`
+* `LLM_EMBEDDING_MODEL`
+* `LLM_EMBEDDING_DEPLOYMENT_NAME`
+* `LLM_DROP_PARAMS`
+* `LLM_DISABLE_VISION`
+* `LLM_CACHING_PROMPT`

 We have a few guides for running OpenHands with specific model providers:

- [OpenAI](llms/openai-llms)
- [ollama](llms/local-llms)
- [Azure](llms/azure-llms)
- [Google](llms/google-llms)
+* [Azure](llms/azure-llms)
+* [Google](llms/google-llms)
+* [ollama](llms/local-llms)
+* [OpenAI](llms/openai-llms)

 ### API retries and rate limits

 Some LLMs have rate limits and may require retries. OpenHands will automatically retry requests if it receives a 429 error or API connection error.
 You can set the following environment variables to control the number of retries and the time between retries:
-* `LLM_NUM_RETRIES` (Default of 5)
-* `LLM_RETRY_MIN_WAIT` (Default of 3 seconds)
-* `LLM_RETRY_MAX_WAIT` (Default of 60 seconds)
+
+* `LLM_NUM_RETRIES` (Default of 8)
+* `LLM_RETRY_MIN_WAIT` (Default of 15 seconds)
+* `LLM_RETRY_MAX_WAIT` (Default of 120 seconds)
--- a/docs/modules/usage/llms/local-llms.md
+++ b/docs/modules/usage/llms/local-llms.md
@ -5,7 +5,7 @@ When using a Local LLM, OpenHands may have limited functionality.
 :::

 Ensure that you have the Ollama server up and running.
-For detailed startup instructions, refer to [here](https://github.com/ollama/ollama)
+For detailed startup instructions, refer to [here](https://github.com/ollama/ollama).

 This guide assumes you've started ollama with `ollama serve`. If you're running ollama differently (e.g. inside docker), the instructions might need to be modified. Please note that if you're running WSL the default ollama configuration blocks requests from docker containers. See [here](#configuring-ollama-service-wsl-en).

@ -32,7 +32,7 @@ starcoder2:latest               f67ae0f64584    1.7 GB  19 hours ago

 ### Docker

-Use the instructions [here](../intro) to start OpenHands using Docker.
+Use the instructions [here](../getting-started) to start OpenHands using Docker.
 But when running `docker run`, you'll need to add a few more arguments:

 ```bash
--- a/docs/modules/usage/llms/openai-llms.md
+++ b/docs/modules/usage/llms/openai-llms.md
@ -4,72 +4,20 @@ OpenHands uses [LiteLLM](https://www.litellm.ai/) to make calls to OpenAI's chat

 ## Configuration

-### Manual Configuration
-
-When running the OpenHands Docker image, you'll need to set the following environment variables:
-
-```sh
-LLM_MODEL="openai/<gpt-model-name>" # e.g. "openai/gpt-4o"
-LLM_API_KEY="<your-openai-project-api-key>"
-```
+When running the OpenHands Docker image, you'll need to choose a model and set your API key in the OpenHands UI through the Settings.

 To see a full list of OpenAI models that LiteLLM supports, please visit https://docs.litellm.ai/docs/providers/openai#openai-chat-completion-models.

 To find or create your OpenAI Project API Key, please visit https://platform.openai.com/api-keys.

-**Example**:
-
-```sh
-export WORKSPACE_BASE=$(pwd)/workspace
-
-docker run -it \
-    --pull=always \
-    -e SANDBOX_USER_ID=$(id -u) \
-    -e LLM_MODEL="openai/<gpt-model-name>" \
-    -e LLM_API_KEY="<your-openai-project-api-key>" \
-    -e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
-    -v $WORKSPACE_BASE:/opt/workspace_base \
-    -v /var/run/docker.sock:/var/run/docker.sock \
-    -p 3000:3000 \
-    --add-host host.docker.internal:host-gateway \
-    --name openhands-app-$(date +%Y%m%d%H%M%S) \
-    ghcr.io/opendevin/opendevin:0.8
-```
-
-### UI Configuration
-
-You can also directly set the `LLM_MODEL` and `LLM_API_KEY` in the OpenHands client itself. Follow this guide to get up and running with the OpenHands client.
-
-From there, you can set your model and API key in the settings window.
-
 ## Using OpenAI-Compatible Endpoints

 Just as for OpenAI Chat completions, we use LiteLLM for OpenAI-compatible endpoints. You can find their full documentation on this topic [here](https://docs.litellm.ai/docs/providers/openai_compatible).

-When running the OpenHands Docker image, you'll need to set the following environment variables:
+When running the OpenHands Docker image, you'll need to set the following environment variables using `-e`:

 ```sh
-LLM_BASE_URL="<api-base-url>" # e.g. "http://0.0.0.0:3000"
-LLM_MODEL="openai/<model-name>" # e.g. "openai/mistral"
-LLM_API_KEY="<your-api-key>"
+LLM_BASE_URL="<api-base-url>"   # e.g. "http://0.0.0.0:3000"
 ```

-**Example**:
-
-```sh
-export WORKSPACE_BASE=$(pwd)/workspace
-
-docker run -it \
-    --pull=always \
-    -e SANDBOX_USER_ID=$(id -u) \
-    -e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
-    -e LLM_BASE_URL="<api-base-url>" \
-    -e LLM_MODEL="openai/<model-name>" \
-    -e LLM_API_KEY="<your-api-key>" \
-    -v $WORKSPACE_BASE:/opt/workspace_base \
-    -v /var/run/docker.sock:/var/run/docker.sock \
-    -p 3000:3000 \
-    --add-host host.docker.internal:host-gateway \
-    --name openhands-app-$(date +%Y%m%d%H%M%S) \
-    ghcr.io/opendevin/opendevin:0.8
-```
+Then set your model and API key in the OpenHands UI through the Settings.
--- a/docs/package-lock.json
+++ b/docs/package-lock.json
@ -24,7 +24,7 @@
        "@docusaurus/module-type-aliases": "^3.5.1",
        "@docusaurus/tsconfig": "^3.5.2",
        "@docusaurus/types": "^3.5.1",
-        "typescript": "~5.5.4"
+        "typescript": "~5.6.2"
      },
      "engines": {
        "node": ">=18.0"
@ -14853,9 +14853,9 @@
      }
    },
    "node_modules/typescript": {
-      "version": "5.5.4",
-      "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.5.4.tgz",
-      "integrity": "sha512-Mtq29sKDAEYP7aljRgtPOpTvOfbwRWlS6dPRzwjdE+C0R4brX/GUyhHSecbHMFLNBLcJIPt9nl9yG5TZ1weH+Q==",
+      "version": "5.6.2",
+      "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.6.2.tgz",
+      "integrity": "sha512-NW8ByodCSNCwZeghjN3o+JX5OFH0Ojg6sadjEKY4huZ52TqbJTJnDo5+Tw98lSy63NZvi4n+ez5m2u5d4PkZyw==",
      "bin": {
        "tsc": "bin/tsc",
        "tsserver": "bin/tsserver"
--- a/docs/package.json
+++ b/docs/package.json
@ -31,7 +31,7 @@
    "@docusaurus/module-type-aliases": "^3.5.1",
    "@docusaurus/tsconfig": "^3.5.2",
    "@docusaurus/types": "^3.5.1",
-    "typescript": "~5.5.4"
+    "typescript": "~5.6.2"
  },
  "browserslist": {
    "production": [
--- a/docs/sidebars.ts
+++ b/docs/sidebars.ts
@ -1,8 +1,79 @@
 import type { SidebarsConfig } from "@docusaurus/plugin-content-docs";

 const sidebars: SidebarsConfig = {
-  docsSidebar: [{ type: "autogenerated", dirName: "usage" }],
  apiSidebar: [require("./modules/python/sidebar.json")],
+  docsSidebar: [{
+    type: 'doc',
+    label: 'Getting Started',
+    id: 'usage/getting-started',
+  }, {
+    type: 'doc',
+    label: 'Troubleshooting',
+    id: 'usage/troubleshooting/troubleshooting',
+  }, {
+    type: 'doc',
+    label: 'Feedback',
+    id: 'usage/feedback',
+  }, {
+    type: 'category',
+    label: 'How-to Guides',
+    items: [{
+      type: 'doc',
+      id: 'usage/how-to/cli-mode',
+    }, {
+      type: 'doc',
+      id: 'usage/how-to/headless-mode',
+    }, {
+      type: 'doc',
+      id: 'usage/how-to/custom-sandbox-guide',
+    }, {
+      type: 'doc',
+      id: 'usage/how-to/evaluation-harness',
+    }, {
+      type: 'doc',
+      id: 'usage/how-to/openshift-example',
+    }]
+  }, {
+    type: 'category',
+    label: 'LLMs',
+    items: [{
+      type: 'doc',
+      label: 'Overview',
+      id: 'usage/llms/llms',
+    }, {
+      type: 'doc',
+      label: 'OpenAI',
+      id: 'usage/llms/openai-llms',
+    }, {
+      type: 'doc',
+      label: 'Azure',
+      id: 'usage/llms/azure-llms',
+    }, {
+      type: 'doc',
+      label: 'Google',
+      id: 'usage/llms/google-llms',
+    }, {
+      type: 'doc',
+      label: 'Local/ollama',
+      id: 'usage/llms/local-llms',
+    }],
+  }, {
+    type: 'category',
+    label: 'Architecture',
+    items: [{
+      type: 'doc',
+      label: 'Backend',
+      id: 'usage/architecture/backend',
+    }, {
+      type: 'doc',
+      label: 'Runtime',
+      id: 'usage/architecture/runtime',
+    }],
+  }, {
+    type: 'doc',
+    label: 'About',
+    id: 'usage/about',
+  }],
 };

 export default sidebars;
--- a/docs/src/components/CustomFooter.tsx
+++ b/docs/src/components/CustomFooter.tsx
@ -7,17 +7,6 @@ function CustomFooter() {
  return (
    <footer className="custom-footer">
      <div className="footer-content">
-        <div className="footer-top">
-          <div className="footer-title">
-            <Translate id="footer.title">OpenHands</Translate>
-          </div>
-          <div className="footer-link">
-            <a href="/modules/usage/intro">
-              <Translate id="footer.docs">Docs</Translate>
-            </a>
-          </div>
-        </div>
-
        <div className="footer-icons">
          <a href="https://join.slack.com/t/opendevin/shared_invite/zt-2oikve2hu-UDxHeo8nsE69y6T7yFX_BA" target="_blank" rel="noopener noreferrer">
            <FaSlack />
@ -32,7 +21,7 @@ function CustomFooter() {
        <div className="footer-bottom">
          <p>
            <Translate id="footer.copyright" values={{ year: new Date().getFullYear() }}>
-              {'Copyright © {year} OpenHands'}
+              {'Copyright © {year} All Hands AI, Inc'}
            </Translate>
          </p>
        </div>
--- a/docs/src/components/HomepageHeader/HomepageHeader.tsx
+++ b/docs/src/components/HomepageHeader/HomepageHeader.tsx
@ -17,23 +17,19 @@ export function HomepageHeader() {

        <p className="header-subtitle">{siteConfig.tagline}</p>

-        <div className="header-links">
-          <a href="https://github.com/All-Hands-AI/OpenHands">
-            <img src="https://img.shields.io/badge/Code-Github-purple?logo=github&logoColor=white&style=for-the-badge" alt="Code" />
-          </a>
-          <a href="https://join.slack.com/t/opendevin/shared_invite/zt-2oikve2hu-UDxHeo8nsE69y6T7yFX_BA">
-            <img src="https://img.shields.io/badge/Slack-Join%20Us-red?logo=slack&logoColor=white&style=for-the-badge" alt="Join our Slack community" />
-          </a>
-          <a href="https://discord.gg/ESHStjSjD4">
-            <img src="https://img.shields.io/badge/Discord-Join%20Us-purple?logo=discord&logoColor=white&style=for-the-badge" alt="Join our Discord community" />
-          </a>
-
-          <a href="https://arxiv.org/abs/2407.16741">
-            <img src="https://img.shields.io/badge/Paper-%20on%20Arxiv-red?logo=arxiv&style=for-the-badge" alt="Paper on Arxiv" />
-          </a>
-          <a href="https://huggingface.co/spaces/OpenDevin/evaluation">
-            <img src="https://img.shields.io/badge/Evaluation-Benchmark%20on%20HF%20Space-green?logo=huggingface&style=for-the-badge" alt="Evaluation Benchmark" />
-          </a>
+        <div align="center" className="header-links">
+          <a href="https://github.com/All-Hands-AI/OpenHands/graphs/contributors"><img src="https://img.shields.io/github/contributors/All-Hands-AI/OpenHands?style=for-the-badge&color=blue" alt="Contributors" /></a>
+          <a href="https://github.com/All-Hands-AI/OpenHands/stargazers"><img src="https://img.shields.io/github/stars/All-Hands-AI/OpenHands?style=for-the-badge&color=blue" alt="Stargazers" /></a>
+          <a href="https://codecov.io/github/All-Hands-AI/OpenHands?branch=main"><img alt="CodeCov" src="https://img.shields.io/codecov/c/github/All-Hands-AI/OpenHands?style=for-the-badge&color=blue" /></a>
+          <a href="https://github.com/All-Hands-AI/OpenHands/blob/main/LICENSE"><img src="https://img.shields.io/github/license/All-Hands-AI/OpenHands?style=for-the-badge&color=blue" alt="MIT License" /></a>
+          <br/>
+          <a href="https://join.slack.com/t/opendevin/shared_invite/zt-2oikve2hu-UDxHeo8nsE69y6T7yFX_BA"><img src="https://img.shields.io/badge/Slack-Join%20Us-red?logo=slack&logoColor=white&style=for-the-badge" alt="Join our Slack community" /></a>
+          <a href="https://discord.gg/ESHStjSjD4"><img src="https://img.shields.io/badge/Discord-Join%20Us-purple?logo=discord&logoColor=white&style=for-the-badge" alt="Join our Discord community" /></a>
+          <a href="https://github.com/All-Hands-AI/OpenHands/blob/main/CREDITS.md"><img src="https://img.shields.io/badge/Project-Credits-blue?style=for-the-badge&color=FFE165&logo=github&logoColor=white" alt="Credits" /></a>
+          <br/>
+          <a href="https://docs.all-hands.dev/modules/usage/getting-started"><img src="https://img.shields.io/badge/Documentation-000?logo=googledocs&logoColor=FFE165&style=for-the-badge" alt="Check out the documentation" /></a>
+          <a href="https://arxiv.org/abs/2407.16741"><img src="https://img.shields.io/badge/Paper%20on%20Arxiv-000?logoColor=FFE165&logo=arxiv&style=for-the-badge" alt="Paper on Arxiv" /></a>
+          <a href="https://huggingface.co/spaces/OpenDevin/evaluation"><img src="https://img.shields.io/badge/Benchmark%20score-000?logoColor=FFE165&logo=huggingface&style=for-the-badge" alt="Evaluation Benchmark Score" /></a>
        </div>

        <Demo />
--- a/docs/src/css/custom.css
+++ b/docs/src/css/custom.css
@ -7,8 +7,10 @@
 /* You can override the default Infima variables here. */

 :root {
-  --ifm-color-primary: #4465db;
  --ifm-code-font-size: 95%;
+  --ifm-color-primary: #000;
+  --ifm-background-color: #F1EAE0;
+  --ifm-navbar-background-color: #F1EAE0;
  --docusaurus-highlighted-code-line-bg: rgba(0, 0, 0, 0.1);
  --secondary: #171717;
  --secondary-dark: #0a0a0a;
@ -17,21 +19,15 @@

 /* For readability concerns, you should choose a lighter palette in dark mode. */
 [data-theme="dark"] {
-  --ifm-color-primary: #4465db;
+  --ifm-color-primary: #FFF;
+  --ifm-background-color: #000;
+  --ifm-navbar-background-color: #000;
  --docusaurus-highlighted-code-line-bg: rgba(0, 0, 0, 0.3);
  --secondary: #737373;
  --secondary-dark: #171717;
-  --secondary-light: #d4d4d4;
+  --secondary-light: #ccc;
 }

-.footer--dark {
-  background-image: linear-gradient(
-    140deg,
-    var(--secondary) 20%,
-    var(--secondary-light) 100%
-  );
-}
-
-.a {
+p a, .a {
  text-decoration: underline;
-}
+}
--- a/docs/src/css/footer.css
+++ b/docs/src/css/footer.css
@ -2,13 +2,19 @@

 .custom-footer {
    background-color: dark;
-    color: white;
-    height: 200px;
+    color: #000;
+    height: 100px;
    /* background: linear-gradient(to bottom, #1a1a1a, #1a1a1a); */
-    background: linear-gradient(to bottom, #1f2937, #000000);
+    background-color: #F1EAE0;

  }

+[data-theme="dark"] .custom-footer {
+  background-color: #000;
+  color: #fff;
+}
+
+
  .footer-content {
    display: flex;
    flex-direction: column;
@ -47,7 +53,6 @@
  }

  .footer-community {
-    text-transform: uppercase;
    font-weight: 300;
  }

@ -65,7 +70,3 @@
  .footer-icons a:hover {
    color: white;
  }
-
-  .footer-bottom {
-    text-transform: uppercase;
-  }
--- a/docs/src/css/homepageHeader.css
+++ b/docs/src/css/homepageHeader.css
@ -2,8 +2,6 @@

 .homepage-header {
  height: 800px;
-  color: white;
-  background: linear-gradient(to top, #64748b, #000000);
 }

 .header-content {
--- a/docs/src/pages/index.tsx
+++ b/docs/src/pages/index.tsx
@ -20,8 +20,7 @@ export default function Home(): JSX.Element {
      title={`${siteConfig.title}`}
      description={translate({
        id: 'homepage.description',
-        message: 'An Open Platform for AI Software Developers as Generalist Agents',
-        description: 'The homepage description',
+        message: 'Code Less, Make More',
      })}
    >
    <HomepageHeader />
--- a/docs/static/img/logo-square.png
+++ b/docs/static/img/logo-square.png
--- a/docs/static/img/logo.png
+++ b/docs/static/img/logo.png
--- a/docs/static/img/settings-screenshot.png
+++ b/docs/static/img/settings-screenshot.png
--- a/evaluation/swe_bench/run_infer.py
+++ b/evaluation/swe_bench/run_infer.py
@ -456,6 +456,12 @@ if __name__ == '__main__':
    output_file = os.path.join(metadata.eval_output_dir, 'output.jsonl')
    instances = prepare_dataset(swe_bench_tests, output_file, args.eval_n_limit)

+    if not isinstance(
+        instances['PASS_TO_PASS'][instances['PASS_TO_PASS'].index[0]], str
+    ):
+        for col in ['PASS_TO_PASS', 'FAIL_TO_PASS']:
+            instances[col] = instances[col].apply(lambda x: str(list(x)))
+
    run_evaluation(
        instances, metadata, output_file, args.eval_num_workers, process_instance
    )
--- a/frontend/package-lock.json
+++ b/frontend/package-lock.json
@ -1,12 +1,12 @@
 {
  "name": "openhands-frontend",
-  "version": "0.9.1",
+  "version": "0.9.2",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "openhands-frontend",
-      "version": "0.9.1",
+      "version": "0.9.2",
      "dependencies": {
        "@monaco-editor/react": "^4.6.0",
        "@nextui-org/react": "^2.4.6",
@ -17,7 +17,7 @@
        "@xterm/xterm": "^5.4.0",
        "clsx": "^2.1.1",
        "eslint-config-airbnb-typescript": "^18.0.0",
-        "i18next": "^23.14.0",
+        "i18next": "^23.15.1",
        "i18next-browser-languagedetector": "^8.0.0",
        "i18next-http-backend": "^2.6.1",
        "jose": "^5.8.0",
@ -33,7 +33,7 @@
        "react-syntax-highlighter": "^15.5.0",
        "remark-gfm": "^4.0.0",
        "tailwind-merge": "^2.5.2",
-        "vite": "^5.4.2",
+        "vite": "^5.4.3",
        "web-vitals": "^3.5.2"
      },
      "devDependencies": {
@ -41,7 +41,7 @@
        "@testing-library/jest-dom": "^6.5.0",
        "@testing-library/react": "^16.0.1",
        "@testing-library/user-event": "^14.5.2",
-        "@types/node": "^22.5.2",
+        "@types/node": "^22.5.4",
        "@types/react": "^18.3.5",
        "@types/react-dom": "^18.3.0",
        "@types/react-highlight": "^0.12.8",
@ -62,10 +62,10 @@
        "husky": "^9.1.5",
        "jsdom": "^25.0.0",
        "lint-staged": "^15.2.10",
-        "postcss": "^8.4.44",
+        "postcss": "^8.4.45",
        "prettier": "^3.3.3",
        "tailwindcss": "^3.4.10",
-        "typescript": "^5.5.4",
+        "typescript": "^5.6.2",
        "vite-tsconfig-paths": "^5.0.1",
        "vitest": "^1.6.0"
      },
@ -4857,9 +4857,9 @@
      "integrity": "sha512-nG96G3Wp6acyAgJqGasjODb+acrI7KltPiRxzHPXnP3NgI28bpQDRv53olbqGXbfcgF5aiiHmO3xpwEpS5Ld9g=="
    },
    "node_modules/@types/node": {
-      "version": "22.5.2",
-      "resolved": "https://registry.npmjs.org/@types/node/-/node-22.5.2.tgz",
-      "integrity": "sha512-acJsPTEqYqulZS/Yp/S3GgeE6GZ0qYODUR8aVr/DkhHQ8l9nd4j5x1/ZJy9/gHrRlFMqkO6i0I3E27Alu4jjPg==",
+      "version": "22.5.4",
+      "resolved": "https://registry.npmjs.org/@types/node/-/node-22.5.4.tgz",
+      "integrity": "sha512-FDuKUJQm/ju9fT/SeX/6+gBzoPzlVCzfzmGkwKvRHQVxi4BntVbyIwf6a4Xn62mrvndLiml6z/UBXIdEVjQLXg==",
      "devOptional": true,
      "dependencies": {
        "undici-types": "~6.19.2"
@ -8133,9 +8133,9 @@
      }
    },
    "node_modules/i18next": {
-      "version": "23.14.0",
-      "resolved": "https://registry.npmjs.org/i18next/-/i18next-23.14.0.tgz",
-      "integrity": "sha512-Y5GL4OdA8IU2geRrt2+Uc1iIhsjICdHZzT9tNwQ3TVqdNzgxHToGCKf/TPRP80vTCAP6svg2WbbJL+Gx5MFQVA==",
+      "version": "23.15.1",
+      "resolved": "https://registry.npmjs.org/i18next/-/i18next-23.15.1.tgz",
+      "integrity": "sha512-wB4abZ3uK7EWodYisHl/asf8UYEhrI/vj/8aoSsrj/ZDxj4/UXPOa1KvFt1Fq5hkUHquNqwFlDprmjZ8iySgYA==",
      "funding": [
        {
          "type": "individual",
@ -10812,9 +10812,9 @@
      }
    },
    "node_modules/postcss": {
-      "version": "8.4.44",
-      "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.4.44.tgz",
-      "integrity": "sha512-Aweb9unOEpQ3ezu4Q00DPvvM2ZTUitJdNKeP/+uQgr1IBIqu574IaZoURId7BKtWMREwzKa9OgzPzezWGPWFQw==",
+      "version": "8.4.45",
+      "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.4.45.tgz",
+      "integrity": "sha512-7KTLTdzdZZYscUc65XmjFiB73vBhBfbPztCYdUNvlaso9PrzjzcmjqBPR0lNGkcVlcO4BjiO5rK/qNz+XAen1Q==",
      "funding": [
        {
          "type": "opencollective",
@ -12800,9 +12800,9 @@
      }
    },
    "node_modules/typescript": {
-      "version": "5.5.4",
-      "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.5.4.tgz",
-      "integrity": "sha512-Mtq29sKDAEYP7aljRgtPOpTvOfbwRWlS6dPRzwjdE+C0R4brX/GUyhHSecbHMFLNBLcJIPt9nl9yG5TZ1weH+Q==",
+      "version": "5.6.2",
+      "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.6.2.tgz",
+      "integrity": "sha512-NW8ByodCSNCwZeghjN3o+JX5OFH0Ojg6sadjEKY4huZ52TqbJTJnDo5+Tw98lSy63NZvi4n+ez5m2u5d4PkZyw==",
      "dev": true,
      "bin": {
        "tsc": "bin/tsc",
@ -13109,12 +13109,12 @@
      }
    },
    "node_modules/vite": {
-      "version": "5.4.2",
-      "resolved": "https://registry.npmjs.org/vite/-/vite-5.4.2.tgz",
-      "integrity": "sha512-dDrQTRHp5C1fTFzcSaMxjk6vdpKvT+2/mIdE07Gw2ykehT49O0z/VHS3zZ8iV/Gh8BJJKHWOe5RjaNrW5xf/GA==",
+      "version": "5.4.3",
+      "resolved": "https://registry.npmjs.org/vite/-/vite-5.4.3.tgz",
+      "integrity": "sha512-IH+nl64eq9lJjFqU+/yrRnrHPVTlgy42/+IzbOdaFDVlyLgI/wDlf+FCobXLX1cT0X5+7LMyH1mIy2xJdLfo8Q==",
      "dependencies": {
        "esbuild": "^0.21.3",
-        "postcss": "^8.4.41",
+        "postcss": "^8.4.43",
        "rollup": "^4.20.0"
      },
      "bin": {
--- a/frontend/package.json
+++ b/frontend/package.json
@ -1,6 +1,6 @@
 {
  "name": "openhands-frontend",
-  "version": "0.9.1",
+  "version": "0.9.2",
  "private": true,
  "type": "module",
  "engines": {
@ -16,7 +16,7 @@
    "@xterm/xterm": "^5.4.0",
    "clsx": "^2.1.1",
    "eslint-config-airbnb-typescript": "^18.0.0",
-    "i18next": "^23.14.0",
+    "i18next": "^23.15.1",
    "i18next-browser-languagedetector": "^8.0.0",
    "i18next-http-backend": "^2.6.1",
    "jose": "^5.8.0",
@ -32,7 +32,7 @@
    "react-syntax-highlighter": "^15.5.0",
    "remark-gfm": "^4.0.0",
    "tailwind-merge": "^2.5.2",
-    "vite": "^5.4.2",
+    "vite": "^5.4.3",
    "web-vitals": "^3.5.2"
  },
  "scripts": {
@ -64,7 +64,7 @@
    "@testing-library/jest-dom": "^6.5.0",
    "@testing-library/react": "^16.0.1",
    "@testing-library/user-event": "^14.5.2",
-    "@types/node": "^22.5.2",
+    "@types/node": "^22.5.4",
    "@types/react": "^18.3.5",
    "@types/react-dom": "^18.3.0",
    "@types/react-highlight": "^0.12.8",
@ -85,10 +85,10 @@
    "husky": "^9.1.5",
    "jsdom": "^25.0.0",
    "lint-staged": "^15.2.10",
-    "postcss": "^8.4.44",
+    "postcss": "^8.4.45",
    "prettier": "^3.3.3",
    "tailwindcss": "^3.4.10",
-    "typescript": "^5.5.4",
+    "typescript": "^5.6.2",
    "vite-tsconfig-paths": "^5.0.1",
    "vitest": "^1.6.0"
  },
--- a/frontend/src/components/modals/base-modal/BaseModal.tsx
+++ b/frontend/src/components/modals/base-modal/BaseModal.tsx
@ -39,7 +39,6 @@ function BaseModal({
      data-testid={testID}
      isOpen={isOpen}
      onOpenChange={onOpenChange}
-      title={title}
      isDismissable={isDismissable}
      backdrop="blur"
      hideCloseButton
@ -51,14 +50,14 @@ function BaseModal({
          <>
            {title && (
              <ModalHeader className="flex flex-col p-0">
-                <HeaderContent title={title} subtitle={subtitle} />
+                <HeaderContent maintitle={title} subtitle={subtitle} />
              </ModalHeader>
            )}

            <ModalBody className={bodyClassName}>{children}</ModalBody>

            {actions && actions.length > 0 && (
-              <ModalFooter className="flex-col flex justify-start p-0">
+              <ModalFooter className="flex-row flex justify-start p-0">
                <FooterContent actions={actions} closeModal={closeModal} />
              </ModalFooter>
            )}
--- a/frontend/src/components/modals/base-modal/HeaderContent.tsx
+++ b/frontend/src/components/modals/base-modal/HeaderContent.tsx
@ -1,17 +1,17 @@
 import React from "react";

 interface HeaderContentProps {
-  title: string;
+  maintitle: string;
  subtitle?: string;
 }

 export function HeaderContent({
-  title,
+  maintitle,
  subtitle = undefined,
 }: HeaderContentProps) {
  return (
    <>
-      <h3>{title}</h3>
+      <h3>{maintitle}</h3>
      {subtitle && (
        <span className="text-neutral-400 text-sm font-light">{subtitle}</span>
      )}
--- a/frontend/src/components/modals/feedback/FeedbackModal.tsx
+++ b/frontend/src/components/modals/feedback/FeedbackModal.tsx
@ -8,7 +8,7 @@ import { Feedback, sendFeedback } from "#/services/feedbackService";
 import toast from "#/utils/toast";
 import { getToken } from "#/services/auth";
 import Session from "#/services/session";
-import { removeApiKey } from "#/utils/utils";
+import { removeApiKey, removeUnwantedKeys } from "#/utils/utils";

 const isEmailValid = (email: string) => {
  // Regular expression to validate email format
@ -95,7 +95,7 @@ function FeedbackModal({
      email,
      permissions,
      token: getToken(),
-      trajectory: removeApiKey(Session._history),
+      trajectory: removeApiKey(removeUnwantedKeys(Session._history)),
    };

    try {
--- a/frontend/src/components/modals/settings/ModelSelector.test.tsx
+++ b/frontend/src/components/modals/settings/ModelSelector.test.tsx
@ -29,7 +29,7 @@ describe("ModelSelector", () => {
    const onModelChange = vi.fn();
    render(<ModelSelector models={models} onModelChange={onModelChange} />);

-    const selector = screen.getByLabelText("Provider");
+    const selector = screen.getByLabelText("LLM Provider");
    expect(selector).toBeInTheDocument();

    await user.click(selector);
@ -45,10 +45,10 @@ describe("ModelSelector", () => {
    const onModelChange = vi.fn();
    render(<ModelSelector models={models} onModelChange={onModelChange} />);

-    const modelSelector = screen.getByLabelText("Model");
+    const modelSelector = screen.getByLabelText("LLM Model");
    expect(modelSelector).toBeDisabled();

-    const providerSelector = screen.getByLabelText("Provider");
+    const providerSelector = screen.getByLabelText("LLM Provider");
    await user.click(providerSelector);

    const vertexAI = screen.getByText("VertexAI");
@ -62,13 +62,13 @@ describe("ModelSelector", () => {
    const onModelChange = vi.fn();
    render(<ModelSelector models={models} onModelChange={onModelChange} />);

-    const providerSelector = screen.getByLabelText("Provider");
+    const providerSelector = screen.getByLabelText("LLM Provider");
    await user.click(providerSelector);

    const azureProvider = screen.getByText("Azure");
    await user.click(azureProvider);

-    const modelSelector = screen.getByLabelText("Model");
+    const modelSelector = screen.getByLabelText("LLM Model");
    await user.click(modelSelector);

    expect(screen.getByText("ada")).toBeInTheDocument();
@ -84,42 +84,13 @@ describe("ModelSelector", () => {
    expect(screen.getByText("chat-bison-32k")).toBeInTheDocument();
  });

-  it("should display the actual litellm model ID as the user is making the selections", async () => {
-    const user = userEvent.setup();
-    const onModelChange = vi.fn();
-    render(<ModelSelector models={models} onModelChange={onModelChange} />);
-
-    const id = screen.getByTestId("model-id");
-    const providerSelector = screen.getByLabelText("Provider");
-    const modelSelector = screen.getByLabelText("Model");
-
-    expect(id).toHaveTextContent("No model selected");
-
-    await user.click(providerSelector);
-    await user.click(screen.getByText("Azure"));
-
-    expect(id).toHaveTextContent("azure/");
-
-    await user.click(modelSelector);
-    await user.click(screen.getByText("ada"));
-    expect(id).toHaveTextContent("azure/ada");
-
-    await user.click(providerSelector);
-    await user.click(screen.getByText("cohere"));
-    expect(id).toHaveTextContent("cohere.");
-
-    await user.click(modelSelector);
-    await user.click(screen.getByText("command-r-v1:0"));
-    expect(id).toHaveTextContent("cohere.command-r-v1:0");
-  });
-
  it("should call onModelChange when the model is changed", async () => {
    const user = userEvent.setup();
    const onModelChange = vi.fn();
    render(<ModelSelector models={models} onModelChange={onModelChange} />);

-    const providerSelector = screen.getByLabelText("Provider");
-    const modelSelector = screen.getByLabelText("Model");
+    const providerSelector = screen.getByLabelText("LLM Provider");
+    const modelSelector = screen.getByLabelText("LLM Model");

    await user.click(providerSelector);
    await user.click(screen.getByText("Azure"));
@ -146,29 +117,6 @@ describe("ModelSelector", () => {
    expect(onModelChange).toHaveBeenCalledWith("cohere.command-r-v1:0");
  });

-  it("should clear the model ID when the provider is cleared", async () => {
-    const user = userEvent.setup();
-    const onModelChange = vi.fn();
-    render(<ModelSelector models={models} onModelChange={onModelChange} />);
-
-    const providerSelector = screen.getByLabelText("Provider");
-    const modelSelector = screen.getByLabelText("Model");
-
-    await user.click(providerSelector);
-    await user.click(screen.getByText("Azure"));
-
-    await user.click(modelSelector);
-    await user.click(screen.getByText("ada"));
-
-    expect(screen.getByTestId("model-id")).toHaveTextContent("azure/ada");
-
-    await user.clear(providerSelector);
-
-    expect(screen.getByTestId("model-id")).toHaveTextContent(
-      "No model selected",
-    );
-  });
-
  it("should have a default value if passed", async () => {
    const onModelChange = vi.fn();
    render(
@ -179,9 +127,8 @@ describe("ModelSelector", () => {
      />,
    );

-    expect(screen.getByTestId("model-id")).toHaveTextContent("azure/ada");
-    expect(screen.getByLabelText("Provider")).toHaveValue("Azure");
-    expect(screen.getByLabelText("Model")).toHaveValue("ada");
+    expect(screen.getByLabelText("LLM Provider")).toHaveValue("Azure");
+    expect(screen.getByLabelText("LLM Model")).toHaveValue("ada");
  });

  it.todo("should disable provider if isDisabled is true");
--- a/frontend/src/components/modals/settings/ModelSelector.tsx
+++ b/frontend/src/components/modals/settings/ModelSelector.tsx
@ -21,7 +21,7 @@ export function ModelSelector({
  onModelChange,
  defaultModel,
 }: ModelSelectorProps) {
-  const [litellmId, setLitellmId] = React.useState<string | null>(null);
+  const [, setLitellmId] = React.useState<string | null>(null);
  const [selectedProvider, setSelectedProvider] = React.useState<string | null>(
    null,
  );
@ -61,14 +61,10 @@ export function ModelSelector({

  return (
    <div data-testid="model-selector" className="flex flex-col gap-2">
-      <span className="text-center italic text-gray-500" data-testid="model-id">
-        {litellmId?.replace("other", "") || "No model selected"}
-      </span>
-
-      <div className="flex flex-col gap-3">
+      <div className="flex flex-row gap-3">
        <Autocomplete
          isDisabled={isDisabled}
-          label="Provider"
+          label="LLM Provider"
          placeholder="Select a provider"
          isClearable={false}
          onSelectionChange={(e) => {
@ -99,7 +95,7 @@ export function ModelSelector({
        </Autocomplete>

        <Autocomplete
-          label="Model"
+          label="LLM Model"
          placeholder="Select a model"
          onSelectionChange={(e) => {
            if (e?.toString()) handleChangeModel(e.toString());
--- a/frontend/src/components/modals/settings/SettingsForm.test.tsx
+++ b/frontend/src/components/modals/settings/SettingsForm.test.tsx
@ -6,8 +6,6 @@ import { Settings } from "#/services/settings";
 import SettingsForm from "./SettingsForm";

 const onModelChangeMock = vi.fn();
-const onCustomModelChangeMock = vi.fn();
-const onModelTypeChangeMock = vi.fn();
 const onAgentChangeMock = vi.fn();
 const onLanguageChangeMock = vi.fn();
 const onAPIKeyChangeMock = vi.fn();
@ -21,21 +19,17 @@ const renderSettingsForm = (settings?: Settings) => {
      settings={
        settings || {
          LLM_MODEL: "gpt-4o",
-          CUSTOM_LLM_MODEL: "",
-          USING_CUSTOM_MODEL: false,
          AGENT: "agent1",
          LANGUAGE: "en",
          LLM_API_KEY: "sk-...",
-          CONFIRMATION_MODE: true,
-          SECURITY_ANALYZER: "analyzer1",
+          CONFIRMATION_MODE: false,
+          SECURITY_ANALYZER: "",
        }
      }
      models={["gpt-4o", "gpt-3.5-turbo", "azure/ada"]}
      agents={["agent1", "agent2", "agent3"]}
      securityAnalyzers={["analyzer1", "analyzer2", "analyzer3"]}
      onModelChange={onModelChangeMock}
-      onCustomModelChange={onCustomModelChangeMock}
-      onModelTypeChange={onModelTypeChangeMock}
      onAgentChange={onAgentChangeMock}
      onLanguageChange={onLanguageChangeMock}
      onAPIKeyChange={onAPIKeyChangeMock}
@ -49,50 +43,38 @@ describe("SettingsForm", () => {
  it("should display the first values in the array by default", () => {
    renderSettingsForm();

-    const providerInput = screen.getByRole("combobox", { name: "Provider" });
-    const modelInput = screen.getByRole("combobox", { name: "Model" });
-    const agentInput = screen.getByRole("combobox", { name: "agent" });
+    const providerInput = screen.getByRole("combobox", {
+      name: "LLM Provider",
+    });
+    const modelInput = screen.getByRole("combobox", { name: "LLM Model" });
    const languageInput = screen.getByRole("combobox", { name: "language" });
    const apiKeyInput = screen.getByTestId("apikey");
-    const confirmationModeInput = screen.getByTestId("confirmationmode");
-    const securityAnalyzerInput = screen.getByRole("combobox", {
-      name: "securityanalyzer",
-    });

    expect(providerInput).toHaveValue("OpenAI");
    expect(modelInput).toHaveValue("gpt-4o");
-    expect(agentInput).toHaveValue("agent1");
    expect(languageInput).toHaveValue("English");
    expect(apiKeyInput).toHaveValue("sk-...");
-    expect(confirmationModeInput).toHaveAttribute("data-selected", "true");
-    expect(securityAnalyzerInput).toHaveValue("analyzer1");
  });

  it("should display the existing values if they are present", () => {
    renderSettingsForm({
      LLM_MODEL: "gpt-3.5-turbo",
-      CUSTOM_LLM_MODEL: "",
-      USING_CUSTOM_MODEL: false,
      AGENT: "agent2",
      LANGUAGE: "es",
      LLM_API_KEY: "sk-...",
-      CONFIRMATION_MODE: true,
-      SECURITY_ANALYZER: "analyzer2",
+      CONFIRMATION_MODE: false,
+      SECURITY_ANALYZER: "",
    });

-    const providerInput = screen.getByRole("combobox", { name: "Provider" });
-    const modelInput = screen.getByRole("combobox", { name: "Model" });
-    const agentInput = screen.getByRole("combobox", { name: "agent" });
-    const languageInput = screen.getByRole("combobox", { name: "language" });
-    const securityAnalyzerInput = screen.getByRole("combobox", {
-      name: "securityanalyzer",
+    const providerInput = screen.getByRole("combobox", {
+      name: "LLM Provider",
    });
+    const modelInput = screen.getByRole("combobox", { name: "LLM Model" });
+    const languageInput = screen.getByRole("combobox", { name: "language" });

    expect(providerInput).toHaveValue("OpenAI");
    expect(modelInput).toHaveValue("gpt-3.5-turbo");
-    expect(agentInput).toHaveValue("agent2");
    expect(languageInput).toHaveValue("Español");
-    expect(securityAnalyzerInput).toHaveValue("analyzer2");
  });

  it("should disable settings when disabled is true", () => {
@ -100,21 +82,17 @@ describe("SettingsForm", () => {
      <SettingsForm
        settings={{
          LLM_MODEL: "gpt-4o",
-          CUSTOM_LLM_MODEL: "",
-          USING_CUSTOM_MODEL: false,
          AGENT: "agent1",
          LANGUAGE: "en",
          LLM_API_KEY: "sk-...",
-          CONFIRMATION_MODE: true,
-          SECURITY_ANALYZER: "analyzer1",
+          CONFIRMATION_MODE: false,
+          SECURITY_ANALYZER: "",
        }}
        models={["gpt-4o", "gpt-3.5-turbo", "azure/ada"]}
        agents={["agent1", "agent2", "agent3"]}
        securityAnalyzers={["analyzer1", "analyzer2", "analyzer3"]}
        disabled
        onModelChange={onModelChangeMock}
-        onCustomModelChange={onCustomModelChangeMock}
-        onModelTypeChange={onModelTypeChangeMock}
        onAgentChange={onAgentChangeMock}
        onLanguageChange={onLanguageChangeMock}
        onAPIKeyChange={onAPIKeyChangeMock}
@ -123,21 +101,15 @@ describe("SettingsForm", () => {
      />,
    );

-    const providerInput = screen.getByRole("combobox", { name: "Provider" });
-    const modelInput = screen.getByRole("combobox", { name: "Model" });
-    const agentInput = screen.getByRole("combobox", { name: "agent" });
-    const languageInput = screen.getByRole("combobox", { name: "language" });
-    const confirmationModeInput = screen.getByTestId("confirmationmode");
-    const securityAnalyzerInput = screen.getByRole("combobox", {
-      name: "securityanalyzer",
+    const providerInput = screen.getByRole("combobox", {
+      name: "LLM Provider",
    });
+    const modelInput = screen.getByRole("combobox", { name: "LLM Model" });
+    const languageInput = screen.getByRole("combobox", { name: "language" });

    expect(providerInput).toBeDisabled();
    expect(modelInput).toBeDisabled();
-    expect(agentInput).toBeDisabled();
    expect(languageInput).toBeDisabled();
-    expect(confirmationModeInput).toHaveAttribute("data-disabled", "true");
-    expect(securityAnalyzerInput).toBeDisabled();
  });

  describe("onChange handlers", () => {
@ -146,7 +118,7 @@ describe("SettingsForm", () => {
      renderSettingsForm();

      // We need to enable the agent select
-      const agentSwitch = screen.getByTestId("enableagentselect");
+      const agentSwitch = screen.getByTestId("advanced-options-toggle");
      await user.click(agentSwitch);

      const agentInput = screen.getByRole("combobox", { name: "agent" });
@ -201,8 +173,8 @@ describe("SettingsForm", () => {
      const user = userEvent.setup();
      renderSettingsForm();

-      const customModelToggle = screen.getByTestId("custom-model-toggle");
-      await user.click(customModelToggle);
+      const advancedToggle = screen.getByTestId("advanced-options-toggle");
+      await user.click(advancedToggle);

      const modelSelector = screen.queryByTestId("model-selector");
      expect(modelSelector).not.toBeInTheDocument();
@ -215,23 +187,21 @@ describe("SettingsForm", () => {
      const user = userEvent.setup();
      renderSettingsForm();

-      const customModelToggle = screen.getByTestId("custom-model-toggle");
-      await user.click(customModelToggle);
+      const advancedToggle = screen.getByTestId("advanced-options-toggle");
+      await user.click(advancedToggle);

      const customModelInput = screen.getByTestId("custom-model-input");
+      await userEvent.clear(customModelInput);
      await userEvent.type(customModelInput, "my/custom-model");

-      expect(onCustomModelChangeMock).toHaveBeenCalledWith("my/custom-model");
-      expect(onModelTypeChangeMock).toHaveBeenCalledWith("custom");
+      expect(onModelChangeMock).toHaveBeenCalledWith("my/custom-model");
    });

-    it("should have custom model switched if using custom model", () => {
+    it("should have advanced options switched if using advanced options", () => {
      renderWithProviders(
        <SettingsForm
          settings={{
            LLM_MODEL: "gpt-4o",
-            CUSTOM_LLM_MODEL: "CUSTOM_MODEL",
-            USING_CUSTOM_MODEL: true,
            AGENT: "agent1",
            LANGUAGE: "en",
            LLM_API_KEY: "sk-...",
@ -243,8 +213,6 @@ describe("SettingsForm", () => {
          securityAnalyzers={["analyzer1", "analyzer2", "analyzer3"]}
          disabled
          onModelChange={onModelChangeMock}
-          onCustomModelChange={onCustomModelChangeMock}
-          onModelTypeChange={onModelTypeChangeMock}
          onAgentChange={onAgentChangeMock}
          onLanguageChange={onLanguageChangeMock}
          onAPIKeyChange={onAPIKeyChangeMock}
@ -253,8 +221,8 @@ describe("SettingsForm", () => {
        />,
      );

-      const customModelToggle = screen.getByTestId("custom-model-toggle");
-      expect(customModelToggle).toHaveAttribute("aria-checked", "true");
+      const advancedToggle = screen.getByTestId("advanced-options-toggle");
+      expect(advancedToggle).toHaveAttribute("aria-checked", "true");
    });
  });
 });
--- a/frontend/src/components/modals/settings/SettingsForm.tsx
+++ b/frontend/src/components/modals/settings/SettingsForm.tsx
@ -17,8 +17,6 @@ interface SettingsFormProps {
  disabled: boolean;

  onModelChange: (model: string) => void;
-  onCustomModelChange: (model: string) => void;
-  onModelTypeChange: (type: "custom" | "default") => void;
  onAPIKeyChange: (apiKey: string) => void;
  onAgentChange: (agent: string) => void;
  onLanguageChange: (language: string) => void;
@ -33,8 +31,6 @@ function SettingsForm({
  securityAnalyzers,
  disabled,
  onModelChange,
-  onCustomModelChange,
-  onModelTypeChange,
  onAPIKeyChange,
  onAgentChange,
  onLanguageChange,
@ -43,40 +39,31 @@ function SettingsForm({
 }: SettingsFormProps) {
  const { t } = useTranslation();
  const { isOpen: isVisible, onOpenChange: onVisibleChange } = useDisclosure();
-  const [isAgentSelectEnabled, setIsAgentSelectEnabled] = React.useState(false);
-  const [usingCustomModel, setUsingCustomModel] = React.useState(
-    settings.USING_CUSTOM_MODEL,
-  );
-
-  const changeModelType = (type: "custom" | "default") => {
-    if (type === "custom") {
-      setUsingCustomModel(true);
-      onModelTypeChange("custom");
-    } else {
-      setUsingCustomModel(false);
-      onModelTypeChange("default");
-    }
-  };
+  const advancedAlreadyInUse =
+    !!settings.SECURITY_ANALYZER || !!settings.CONFIRMATION_MODE;
+  // TODO: || model is not in the list
+  const [enableAdvanced, setEnableAdvanced] =
+    React.useState(advancedAlreadyInUse);

  return (
    <>
      <Switch
-        data-testid="custom-model-toggle"
-        aria-checked={usingCustomModel}
-        isSelected={usingCustomModel}
-        onValueChange={(value) => changeModelType(value ? "custom" : "default")}
+        data-testid="advanced-options-toggle"
+        aria-checked={enableAdvanced}
+        isSelected={enableAdvanced}
+        onValueChange={(value) => setEnableAdvanced(value)}
      >
-        Use custom model
+        Advanced Options
      </Switch>
-      {usingCustomModel && (
+      {enableAdvanced && (
        <Input
          data-testid="custom-model-input"
          label="Custom Model"
-          onValueChange={onCustomModelChange}
-          defaultValue={settings.CUSTOM_LLM_MODEL}
+          onValueChange={onModelChange}
+          defaultValue={settings.LLM_MODEL}
        />
      )}
-      {!usingCustomModel && (
+      {!enableAdvanced && (
        <ModelSelector
          isDisabled={disabled}
          models={organizeModelsAndProviders(models)}
@ -117,52 +104,48 @@ function SettingsForm({
        tooltip={t(I18nKey.SETTINGS$LANGUAGE_TOOLTIP)}
        disabled={disabled}
      />
-      <AutocompleteCombobox
-        ariaLabel="agent"
-        items={agents.map((agent) => ({ value: agent, label: agent }))}
-        defaultKey={settings.AGENT}
-        onChange={onAgentChange}
-        tooltip={t(I18nKey.SETTINGS$AGENT_TOOLTIP)}
-        disabled={disabled || !isAgentSelectEnabled}
-      />
-      <Switch
-        defaultSelected={false}
-        isSelected={isAgentSelectEnabled}
-        onValueChange={setIsAgentSelectEnabled}
-        aria-label="enableagentselect"
-        data-testid="enableagentselect"
-      >
-        {t(I18nKey.SETTINGS$AGENT_SELECT_ENABLED)}
-      </Switch>
-      <AutocompleteCombobox
-        ariaLabel="securityanalyzer"
-        items={securityAnalyzers.map((securityAnalyzer) => ({
-          value: securityAnalyzer,
-          label: securityAnalyzer,
-        }))}
-        defaultKey={settings.SECURITY_ANALYZER}
-        onChange={onSecurityAnalyzerChange}
-        tooltip={t(I18nKey.SETTINGS$SECURITY_ANALYZER)}
-        disabled={disabled}
-      />
-      <Switch
-        aria-label="confirmationmode"
-        data-testid="confirmationmode"
-        defaultSelected={
-          settings.CONFIRMATION_MODE || !!settings.SECURITY_ANALYZER
-        }
-        onValueChange={onConfirmationModeChange}
-        isDisabled={disabled || !!settings.SECURITY_ANALYZER}
-        isSelected={settings.CONFIRMATION_MODE}
-      >
-        <Tooltip
-          content={t(I18nKey.SETTINGS$CONFIRMATION_MODE_TOOLTIP)}
-          closeDelay={100}
-          delay={500}
+      {enableAdvanced && (
+        <AutocompleteCombobox
+          ariaLabel="agent"
+          items={agents.map((agent) => ({ value: agent, label: agent }))}
+          defaultKey={settings.AGENT}
+          onChange={onAgentChange}
+          tooltip={t(I18nKey.SETTINGS$AGENT_TOOLTIP)}
+        />
+      )}
+      {enableAdvanced && (
+        <AutocompleteCombobox
+          ariaLabel="securityanalyzer"
+          items={securityAnalyzers.map((securityAnalyzer) => ({
+            value: securityAnalyzer,
+            label: securityAnalyzer,
+          }))}
+          defaultKey={settings.SECURITY_ANALYZER}
+          onChange={onSecurityAnalyzerChange}
+          tooltip={t(I18nKey.SETTINGS$SECURITY_ANALYZER)}
+          disabled={disabled}
+        />
+      )}
+      {enableAdvanced && (
+        <Switch
+          aria-label="confirmationmode"
+          data-testid="confirmationmode"
+          defaultSelected={
+            settings.CONFIRMATION_MODE || !!settings.SECURITY_ANALYZER
+          }
+          onValueChange={onConfirmationModeChange}
+          isDisabled={disabled || !!settings.SECURITY_ANALYZER}
+          isSelected={settings.CONFIRMATION_MODE}
        >
-          {t(I18nKey.SETTINGS$CONFIRMATION_MODE)}
-        </Tooltip>
-      </Switch>
+          <Tooltip
+            content={t(I18nKey.SETTINGS$CONFIRMATION_MODE_TOOLTIP)}
+            closeDelay={100}
+            delay={500}
+          >
+            {t(I18nKey.SETTINGS$CONFIRMATION_MODE)}
+          </Tooltip>
+        </Switch>
+      )}
    </>
  );
 }
--- a/frontend/src/components/modals/settings/SettingsModal.test.tsx
+++ b/frontend/src/components/modals/settings/SettingsModal.test.tsx
@ -4,7 +4,6 @@ import i18next from "i18next";
 import React from "react";
 import { renderWithProviders } from "test-utils";
 import { Mock } from "vitest";
-import toast from "#/utils/toast";
 import {
  Settings,
  getSettings,
@ -15,7 +14,6 @@ import Session from "#/services/session";
 import { fetchAgents, fetchModels } from "#/services/options";
 import SettingsModal from "./SettingsModal";

-const toastSpy = vi.spyOn(toast, "settingsChanged");
 const i18nSpy = vi.spyOn(i18next, "changeLanguage");
 const startNewSessionSpy = vi.spyOn(Session, "startNewSession");
 vi.spyOn(Session, "isConnected").mockImplementation(() => true);
@ -24,18 +22,14 @@ vi.mock("#/services/settings", async (importOriginal) => ({
  ...(await importOriginal<typeof import("#/services/settings")>()),
  getSettings: vi.fn().mockReturnValue({
    LLM_MODEL: "gpt-4o",
-    CUSTOM_LLM_MODEL: "",
-    USING_CUSTOM_MODEL: false,
    AGENT: "CodeActAgent",
    LANGUAGE: "en",
    LLM_API_KEY: "sk-...",
-    CONFIRMATION_MODE: true,
-    SECURITY_ANALYZER: "invariant",
+    CONFIRMATION_MODE: false,
+    SECURITY_ANALYZER: "",
  }),
  getDefaultSettings: vi.fn().mockReturnValue({
    LLM_MODEL: "gpt-4o",
-    CUSTOM_LLM_MODEL: "",
-    USING_CUSTOM_MODEL: false,
    AGENT: "CodeActAgent",
    LANGUAGE: "en",
    LLM_API_KEY: "",
@ -98,8 +92,7 @@ describe("SettingsModal", () => {
  it("should disabled the save button if the settings contain a missing value", async () => {
    const onOpenChangeMock = vi.fn();
    (getSettings as Mock).mockReturnValueOnce({
-      LLM_MODEL: "gpt-4o",
-      AGENT: "",
+      LLM_MODEL: "",
    });
    await act(async () =>
      renderWithProviders(
@ -115,13 +108,11 @@ describe("SettingsModal", () => {
  describe("onHandleSave", () => {
    const initialSettings: Settings = {
      LLM_MODEL: "gpt-4o",
-      CUSTOM_LLM_MODEL: "",
-      USING_CUSTOM_MODEL: false,
      AGENT: "CodeActAgent",
      LANGUAGE: "en",
      LLM_API_KEY: "sk-...",
-      CONFIRMATION_MODE: true,
-      SECURITY_ANALYZER: "invariant",
+      SECURITY_ANALYZER: "",
+      CONFIRMATION_MODE: false,
    };

    it("should save the settings", async () => {
@ -135,8 +126,10 @@ describe("SettingsModal", () => {
      await assertModelsAndAgentsFetched();

      const saveButton = screen.getByRole("button", { name: /save/i });
-      const providerInput = screen.getByRole("combobox", { name: "Provider" });
-      const modelInput = screen.getByRole("combobox", { name: "Model" });
+      const providerInput = screen.getByRole("combobox", {
+        name: "LLM Provider",
+      });
+      const modelInput = screen.getByRole("combobox", { name: "LLM Model" });

      await user.click(providerInput);
      const azure = screen.getByText("Azure");
@ -164,8 +157,10 @@ describe("SettingsModal", () => {
      );

      const saveButton = screen.getByRole("button", { name: /save/i });
-      const providerInput = screen.getByRole("combobox", { name: "Provider" });
-      const modelInput = screen.getByRole("combobox", { name: "Model" });
+      const providerInput = screen.getByRole("combobox", {
+        name: "LLM Provider",
+      });
+      const modelInput = screen.getByRole("combobox", { name: "LLM Model" });

      await user.click(providerInput);
      const openai = screen.getByText("OpenAI");
@ -180,32 +175,6 @@ describe("SettingsModal", () => {
      expect(startNewSessionSpy).toHaveBeenCalled();
    });

-    it("should display a toast for every change", async () => {
-      const user = userEvent.setup();
-      const onOpenChangeMock = vi.fn();
-      await act(async () =>
-        renderWithProviders(
-          <SettingsModal isOpen onOpenChange={onOpenChangeMock} />,
-        ),
-      );
-
-      const saveButton = screen.getByRole("button", { name: /save/i });
-      const providerInput = screen.getByRole("combobox", { name: "Provider" });
-      const modelInput = screen.getByRole("combobox", { name: "Model" });
-
-      await user.click(providerInput);
-      const cohere = screen.getByText("cohere");
-      await user.click(cohere);
-
-      await user.click(modelInput);
-      const model3 = screen.getByText("command-r-v1:0");
-      await user.click(model3);
-
-      await user.click(saveButton);
-
-      expect(toastSpy).toHaveBeenCalledTimes(4);
-    });
-
    it("should change the language", async () => {
      const user = userEvent.setup();
      const onOpenChangeMock = vi.fn();
@ -230,6 +199,10 @@ describe("SettingsModal", () => {
    it("should close the modal", async () => {
      const user = userEvent.setup();
      const onOpenChangeMock = vi.fn();
+      (getSettings as Mock).mockReturnValueOnce({
+        LLM_MODEL: "gpt-4o",
+        LLM_API_KEY: "sk-...",
+      });
      await act(async () =>
        renderWithProviders(
          <SettingsModal isOpen onOpenChange={onOpenChangeMock} />,
@ -241,8 +214,10 @@ describe("SettingsModal", () => {
      });

      const saveButton = screen.getByRole("button", { name: /save/i });
-      const providerInput = screen.getByRole("combobox", { name: "Provider" });
-      const modelInput = screen.getByRole("combobox", { name: "Model" });
+      const providerInput = screen.getByRole("combobox", {
+        name: "LLM Provider",
+      });
+      const modelInput = screen.getByRole("combobox", { name: "LLM Model" });

      await user.click(providerInput);
      const cohere = screen.getByText("cohere");
@ -252,6 +227,7 @@ describe("SettingsModal", () => {
      const model3 = screen.getByText("command-r-v1:0");
      await user.click(model3);

+      expect(saveButton).not.toBeDisabled();
      await user.click(saveButton);

      expect(onOpenChangeMock).toHaveBeenCalledWith(false);
@ -261,16 +237,16 @@ describe("SettingsModal", () => {
  it("should reset settings to defaults when the 'reset to defaults' button is clicked", async () => {
    const user = userEvent.setup();
    const onOpenChangeMock = vi.fn();
+    (getSettings as Mock).mockReturnValueOnce({
+      LLM_MODEL: "gpt-4o",
+      SECURITY_ANALYZER: "fakeanalyzer",
+    });
    await act(async () =>
      renderWithProviders(
        <SettingsModal isOpen onOpenChange={onOpenChangeMock} />,
      ),
    );

-    // We need to enable the agent select first
-    const agentSwitch = screen.getByTestId("enableagentselect");
-    await user.click(agentSwitch);
-
    const resetButton = screen.getByRole("button", {
      name: /MODAL_RESET_BUTTON_LABEL/i,
    });
--- a/frontend/src/components/modals/settings/SettingsModal.tsx
+++ b/frontend/src/components/modals/settings/SettingsModal.tsx
@ -17,7 +17,6 @@ import {
  Settings,
  getSettings,
  getDefaultSettings,
-  getSettingsDifference,
  settingsAreUpToDate,
  maybeMigrateSettings,
  saveSettings,
@ -31,7 +30,7 @@ interface SettingsProps {
  onOpenChange: (isOpen: boolean) => void;
 }

-const REQUIRED_SETTINGS = ["LLM_MODEL", "AGENT"];
+const REQUIRED_SETTINGS = ["LLM_MODEL"];

 function SettingsModal({ isOpen, onOpenChange }: SettingsProps) {
  const { t } = useTranslation();
@ -83,20 +82,6 @@ function SettingsModal({ isOpen, onOpenChange }: SettingsProps) {
    }));
  };

-  const handleCustomModelChange = (model: string) => {
-    setSettings((prev) => ({
-      ...prev,
-      CUSTOM_LLM_MODEL: model,
-    }));
-  };
-
-  const handleModelTypeChange = (type: "custom" | "default") => {
-    setSettings((prev) => ({
-      ...prev,
-      USING_CUSTOM_MODEL: type === "custom",
-    }));
-  };
-
  const handleAgentChange = (agent: string) => {
    setSettings((prev) => ({ ...prev, AGENT: agent }));
  };
@ -131,28 +116,17 @@ function SettingsModal({ isOpen, onOpenChange }: SettingsProps) {
  };

  const handleSaveSettings = () => {
-    const updatedSettings = getSettingsDifference(settings);
    saveSettings(settings);
    i18next.changeLanguage(settings.LANGUAGE);
    Session.startNewSession();

-    const sensitiveKeys = ["LLM_API_KEY"];
-
-    Object.entries(updatedSettings).forEach(([key, value]) => {
-      if (!sensitiveKeys.includes(key)) {
-        toast.settingsChanged(`${key} set to "${value}"`);
-      } else {
-        toast.settingsChanged(`${key} has been updated securely.`);
-      }
-    });
-
    localStorage.setItem(
      `API_KEY_${settings.LLM_MODEL || models[0]}`,
      settings.LLM_API_KEY,
    );
  };

-  let subtitle = t(I18nKey.CONFIGURATION$MODAL_SUB_TITLE);
+  let subtitle = "";
  if (loading) {
    subtitle = t(I18nKey.CONFIGURATION$AGENT_LOADING);
  } else if (agentIsRunning) {
@ -205,8 +179,6 @@ function SettingsModal({ isOpen, onOpenChange }: SettingsProps) {
          agents={agents}
          securityAnalyzers={securityAnalyzers}
          onModelChange={handleModelChange}
-          onCustomModelChange={handleCustomModelChange}
-          onModelTypeChange={handleModelTypeChange}
          onAgentChange={handleAgentChange}
          onLanguageChange={handleLanguageChange}
          onAPIKeyChange={handleAPIKeyChange}
--- a/frontend/src/i18n/translation.json
+++ b/frontend/src/i18n/translation.json
@ -250,18 +250,6 @@
    "fr": "Configuration",
    "tr": "Konfigürasyon"
  },
-  "CONFIGURATION$MODAL_SUB_TITLE": {
-    "en": "Adjust settings to your liking",
-    "zh-CN": "根据您的喜好调整设置",
-    "de": "Passen Sie die Einstellungen nach Ihren Wünschen an ",
-    "ko-KR": "원하는 대로 설정 조정",
-    "no": "Juster innstillinger etter dine ønsker ",
-    "zh-TW": "調整設定以符合您的喜好",
-    "it": "Regola le impostazioni in base alle tue preferenze",
-    "pt": "Ajuste as configurações de acordo com sua preferência",
-    "es": "Ajusta la configuración a tu gusto",
-    "tr": "Ayarları isteğinize göre ayarlayın"
-  },
  "CONFIGURATION$MODEL_SELECT_LABEL": {
    "en": "Model",
    "zh-CN": "模型",
--- a/frontend/src/services/session.test.ts
+++ b/frontend/src/services/session.test.ts
@ -19,8 +19,6 @@ describe("startNewSession", () => {
  it("Should start a new session with the current settings", () => {
    const settings: Settings = {
      LLM_MODEL: "llm_value",
-      CUSTOM_LLM_MODEL: "",
-      USING_CUSTOM_MODEL: false,
      AGENT: "agent_value",
      LANGUAGE: "language_value",
      LLM_API_KEY: "sk-...",
@ -39,33 +37,4 @@ describe("startNewSession", () => {
    expect(setupSpy).toHaveBeenCalledTimes(1);
    expect(sendSpy).toHaveBeenCalledWith(JSON.stringify(event));
  });
-
-  it("should start with the custom llm if set", () => {
-    const settings: Settings = {
-      LLM_MODEL: "llm_value",
-      CUSTOM_LLM_MODEL: "custom_llm_value",
-      USING_CUSTOM_MODEL: true,
-      AGENT: "agent_value",
-      LANGUAGE: "language_value",
-      LLM_API_KEY: "sk-...",
-      CONFIRMATION_MODE: true,
-      SECURITY_ANALYZER: "analyzer",
-    };
-
-    const event = {
-      action: ActionType.INIT,
-      args: settings,
-    };
-
-    saveSettings(settings);
-    Session.startNewSession();
-
-    expect(setupSpy).toHaveBeenCalledTimes(1);
-    expect(sendSpy).toHaveBeenCalledWith(
-      JSON.stringify({
-        ...event,
-        args: { ...settings, LLM_MODEL: "custom_llm_value" },
-      }),
-    );
-  });
 });
--- a/frontend/src/services/session.ts
+++ b/frontend/src/services/session.ts
@ -50,9 +50,6 @@ class Session {
      action: ActionType.INIT,
      args: {
        ...settings,
-        LLM_MODEL: settings.USING_CUSTOM_MODEL
-          ? settings.CUSTOM_LLM_MODEL
-          : settings.LLM_MODEL,
      },
    };
    const eventString = JSON.stringify(event);
--- a/frontend/src/services/settings.test.ts
+++ b/frontend/src/services/settings.test.ts
@ -3,7 +3,6 @@ import {
  DEFAULT_SETTINGS,
  Settings,
  getSettings,
-  getSettingsDifference,
  saveSettings,
 } from "./settings";

@ -18,8 +17,6 @@ describe("getSettings", () => {
  it("should get the stored settings", () => {
    (localStorage.getItem as Mock)
      .mockReturnValueOnce("llm_value")
-      .mockReturnValueOnce("custom_llm_value")
-      .mockReturnValueOnce("true")
      .mockReturnValueOnce("agent_value")
      .mockReturnValueOnce("language_value")
      .mockReturnValueOnce("api_key")
@ -30,8 +27,6 @@ describe("getSettings", () => {

    expect(settings).toEqual({
      LLM_MODEL: "llm_value",
-      CUSTOM_LLM_MODEL: "custom_llm_value",
-      USING_CUSTOM_MODEL: true,
      AGENT: "agent_value",
      LANGUAGE: "language_value",
      LLM_API_KEY: "api_key",
@ -55,8 +50,6 @@ describe("getSettings", () => {

    expect(settings).toEqual({
      LLM_MODEL: DEFAULT_SETTINGS.LLM_MODEL,
-      CUSTOM_LLM_MODEL: "",
-      USING_CUSTOM_MODEL: DEFAULT_SETTINGS.USING_CUSTOM_MODEL,
      AGENT: DEFAULT_SETTINGS.AGENT,
      LANGUAGE: DEFAULT_SETTINGS.LANGUAGE,
      LLM_API_KEY: "",
@ -70,8 +63,6 @@ describe("saveSettings", () => {
  it("should save the settings", () => {
    const settings: Settings = {
      LLM_MODEL: "llm_value",
-      CUSTOM_LLM_MODEL: "custom_llm_value",
-      USING_CUSTOM_MODEL: true,
      AGENT: "agent_value",
      LANGUAGE: "language_value",
      LLM_API_KEY: "some_key",
@ -82,14 +73,6 @@ describe("saveSettings", () => {
    saveSettings(settings);

    expect(localStorage.setItem).toHaveBeenCalledWith("LLM_MODEL", "llm_value");
-    expect(localStorage.setItem).toHaveBeenCalledWith(
-      "CUSTOM_LLM_MODEL",
-      "custom_llm_value",
-    );
-    expect(localStorage.setItem).toHaveBeenCalledWith(
-      "USING_CUSTOM_MODEL",
-      "true",
-    );
    expect(localStorage.setItem).toHaveBeenCalledWith("AGENT", "agent_value");
    expect(localStorage.setItem).toHaveBeenCalledWith(
      "LANGUAGE",
@ -110,7 +93,7 @@ describe("saveSettings", () => {

    expect(localStorage.setItem).toHaveBeenCalledTimes(2);
    expect(localStorage.setItem).toHaveBeenCalledWith("LLM_MODEL", "llm_value");
-    expect(localStorage.setItem).toHaveBeenCalledWith("SETTINGS_VERSION", "1");
+    expect(localStorage.setItem).toHaveBeenCalledWith("SETTINGS_VERSION", "2");
  });

  it("should not save invalid settings", () => {
@ -135,47 +118,3 @@ describe("saveSettings", () => {
    );
  });
 });
-
-describe("getSettingsDifference", () => {
-  beforeEach(() => {
-    (localStorage.getItem as Mock)
-      .mockReturnValueOnce("llm_value")
-      .mockReturnValueOnce("custom_llm_value")
-      .mockReturnValueOnce("false")
-      .mockReturnValueOnce("agent_value")
-      .mockReturnValueOnce("language_value");
-  });
-
-  it("should return updated settings", () => {
-    const settings = {
-      LLM_MODEL: "new_llm_value",
-      CUSTOM_LLM_MODEL: "custom_llm_value",
-      USING_CUSTOM_MODEL: true,
-      AGENT: "new_agent_value",
-      LANGUAGE: "language_value",
-    };
-
-    const updatedSettings = getSettingsDifference(settings);
-
-    expect(updatedSettings).toEqual({
-      USING_CUSTOM_MODEL: true,
-      LLM_MODEL: "new_llm_value",
-      AGENT: "new_agent_value",
-    });
-  });
-
-  it("should not handle invalid settings", () => {
-    const settings = {
-      LLM_MODEL: "new_llm_value",
-      AGENT: "new_agent_value",
-      INVALID: "invalid_value",
-    };
-
-    const updatedSettings = getSettingsDifference(settings);
-
-    expect(updatedSettings).toEqual({
-      LLM_MODEL: "new_llm_value",
-      AGENT: "new_agent_value",
-    });
-  });
-});
--- a/frontend/src/services/settings.ts
+++ b/frontend/src/services/settings.ts
@ -1,9 +1,7 @@
-const LATEST_SETTINGS_VERSION = 1;
+const LATEST_SETTINGS_VERSION = 2;

 export type Settings = {
  LLM_MODEL: string;
-  CUSTOM_LLM_MODEL: string;
-  USING_CUSTOM_MODEL: boolean;
  AGENT: string;
  LANGUAGE: string;
  LLM_API_KEY: string;
@ -11,12 +9,8 @@ export type Settings = {
  SECURITY_ANALYZER: string;
 };

-type SettingsInput = Settings[keyof Settings];
-
 export const DEFAULT_SETTINGS: Settings = {
  LLM_MODEL: "openai/gpt-4o",
-  CUSTOM_LLM_MODEL: "",
-  USING_CUSTOM_MODEL: false,
  AGENT: "CodeActAgent",
  LANGUAGE: "en",
  LLM_API_KEY: "",
@ -46,6 +40,14 @@ export const maybeMigrateSettings = () => {
  if (currentVersion < 1) {
    localStorage.setItem("AGENT", DEFAULT_SETTINGS.AGENT);
  }
+  if (currentVersion < 2) {
+    const customModel = localStorage.getItem("CUSTOM_LLM_MODEL");
+    if (customModel) {
+      localStorage.setItem("LLM_MODEL", customModel);
+    }
+    localStorage.removeItem("CUSTOM_LLM_MODEL");
+    localStorage.removeItem("USING_CUSTOM_MODEL");
+  }
 };

 /**
@ -58,9 +60,6 @@ export const getDefaultSettings = (): Settings => DEFAULT_SETTINGS;
 */
 export const getSettings = (): Settings => {
  const model = localStorage.getItem("LLM_MODEL");
-  const customModel = localStorage.getItem("CUSTOM_LLM_MODEL");
-  const usingCustomModel =
-    localStorage.getItem("USING_CUSTOM_MODEL") === "true";
  const agent = localStorage.getItem("AGENT");
  const language = localStorage.getItem("LANGUAGE");
  const apiKey = localStorage.getItem("LLM_API_KEY");
@ -69,8 +68,6 @@ export const getSettings = (): Settings => {

  return {
    LLM_MODEL: model || DEFAULT_SETTINGS.LLM_MODEL,
-    CUSTOM_LLM_MODEL: customModel || DEFAULT_SETTINGS.CUSTOM_LLM_MODEL,
-    USING_CUSTOM_MODEL: usingCustomModel || DEFAULT_SETTINGS.USING_CUSTOM_MODEL,
    AGENT: agent || DEFAULT_SETTINGS.AGENT,
    LANGUAGE: language || DEFAULT_SETTINGS.LANGUAGE,
    LLM_API_KEY: apiKey || DEFAULT_SETTINGS.LLM_API_KEY,
@ -93,34 +90,3 @@ export const saveSettings = (settings: Partial<Settings>) => {
  });
  localStorage.setItem("SETTINGS_VERSION", LATEST_SETTINGS_VERSION.toString());
 };
-
-/**
- * Get the difference between the current settings and the provided settings.
- * Useful for notifying the user of exact changes.
- *
- * @example
- * // Assuming the current settings are: { LLM_MODEL: "gpt-4o", AGENT: "CodeActAgent", LANGUAGE: "en" }
- * const updatedSettings = getSettingsDifference({ LLM_MODEL: "gpt-4o", AGENT: "OTHER_AGENT", LANGUAGE: "en" });
- * // updatedSettings = { AGENT: "OTHER_AGENT" }
- *
- * @param settings - the settings to compare
- * @returns the updated settings
- */
-export const getSettingsDifference = (settings: Partial<Settings>) => {
-  const currentSettings = getSettings();
-  const updatedSettings: Partial<Settings> = {};
-
-  Object.keys(settings).forEach((key) => {
-    const typedKey = key as keyof Settings;
-    if (
-      validKeys.includes(typedKey) &&
-      settings[typedKey] !== currentSettings[typedKey]
-    ) {
-      (updatedSettings[typedKey] as SettingsInput) = settings[
-        typedKey
-      ] as SettingsInput;
-    }
-  });
-
-  return updatedSettings;
-};
--- a/frontend/src/utils/extractModelAndProvider.test.ts
+++ b/frontend/src/utils/extractModelAndProvider.test.ts
@ -58,5 +58,23 @@ describe("extractModelAndProvider", () => {
      model: "gpt-4o",
      separator: "/",
    });
+
+    expect(extractModelAndProvider("claude-3-5-sonnet-20240620")).toEqual({
+      provider: "anthropic",
+      model: "claude-3-5-sonnet-20240620",
+      separator: "/",
+    });
+
+    expect(extractModelAndProvider("claude-3-haiku-20240307")).toEqual({
+      provider: "anthropic",
+      model: "claude-3-haiku-20240307",
+      separator: "/",
+    });
+
+    expect(extractModelAndProvider("claude-2.1")).toEqual({
+      provider: "anthropic",
+      model: "claude-2.1",
+      separator: "/",
+    });
  });
 });
--- a/frontend/src/utils/extractModelAndProvider.ts
+++ b/frontend/src/utils/extractModelAndProvider.ts
@ -1,5 +1,8 @@
 import { isNumber } from "./isNumber";
-import { VERIFIED_OPENAI_MODELS } from "./verified-models";
+import {
+  VERIFIED_ANTHROPIC_MODELS,
+  VERIFIED_OPENAI_MODELS,
+} from "./verified-models";

 /**
 * Checks if the split array is actually a version number.
@ -41,6 +44,9 @@ export const extractModelAndProvider = (model: string) => {
    if (VERIFIED_OPENAI_MODELS.includes(split[0])) {
      return { provider: "openai", model: split[0], separator: "/" };
    }
+    if (VERIFIED_ANTHROPIC_MODELS.includes(split[0])) {
+      return { provider: "anthropic", model: split[0], separator: "/" };
+    }
    // return as model only
    return { provider: "", model, separator: "" };
  }
--- a/frontend/src/utils/organizeModelsAndProviders.test.ts
+++ b/frontend/src/utils/organizeModelsAndProviders.test.ts
@ -15,6 +15,11 @@ test("organizeModelsAndProviders", () => {
    "gpt-4o",
    "together-ai-21.1b-41b",
    "gpt-3.5-turbo",
+    "claude-3-5-sonnet-20240620",
+    "claude-3-haiku-20240307",
+    "claude-2",
+    "claude-2.1",
+    "anthropic.unsafe-claude-2.1",
  ];

  const object = organizeModelsAndProviders(models);
@ -43,6 +48,15 @@ test("organizeModelsAndProviders", () => {
      separator: "/",
      models: ["gpt-4o", "gpt-3.5-turbo"],
    },
+    anthropic: {
+      separator: "/",
+      models: [
+        "claude-3-5-sonnet-20240620",
+        "claude-3-haiku-20240307",
+        "claude-2",
+        "claude-2.1",
+      ],
+    },
    other: {
      separator: "",
      models: ["together-ai-21.1b-41b"],
--- a/frontend/src/utils/organizeModelsAndProviders.ts
+++ b/frontend/src/utils/organizeModelsAndProviders.ts
@ -32,6 +32,13 @@ export const organizeModelsAndProviders = (models: string[]) => {
      provider,
      model: modelId,
    } = extractModelAndProvider(model);
+
+    // Ignore "anthropic" providers with a separator of "."
+    // These are outdated and incompatible providers.
+    if (provider === "anthropic" && separator === ".") {
+      return;
+    }
+
    const key = provider || "other";
    if (!object[key]) {
      object[key] = { separator, models: [] };
--- a/frontend/src/utils/utils.ts
+++ b/frontend/src/utils/utils.ts
@ -10,9 +10,50 @@ interface EventActionHistory {
    LLM_API_KEY?: string;
    [key: string]: unknown;
  };
+  extras?: {
+    open_page_urls: string[];
+    active_page_index: number;
+    dom_object: Record<string, unknown>;
+    axtree_object: Record<string, unknown>;
+    extra_element_properties: Record<string, unknown>;
+    last_browser_action: string;
+    last_browser_action_error: unknown;
+    focused_element_bid: string;
+  };
  [key: string]: unknown;
 }

+export const removeUnwantedKeys = (
+  data: EventActionHistory[],
+): EventActionHistory[] => {
+  const UNDESIRED_KEYS = [
+    "open_page_urls",
+    "active_page_index",
+    "dom_object",
+    "axtree_object",
+    "extra_element_properties",
+    "last_browser_action",
+    "last_browser_action_error",
+    "focused_element_bid",
+  ];
+
+  return data.map((item) => {
+    // Create a shallow copy of item
+    const newItem = { ...item };
+
+    // Check if extras exists and delete it from a new extras object
+    if (newItem.extras) {
+      const newExtras = { ...newItem.extras };
+      UNDESIRED_KEYS.forEach((key) => {
+        delete newExtras[key as keyof typeof newExtras];
+      });
+      newItem.extras = newExtras;
+    }
+
+    return newItem;
+  });
+};
+
 export const removeApiKey = (
  data: EventActionHistory[],
 ): EventActionHistory[] =>
--- a/frontend/src/utils/verified-models.ts
+++ b/frontend/src/utils/verified-models.ts
@ -1,6 +1,6 @@
 // Here are the list of verified models and providers that we know work well with OpenHands.
 export const VERIFIED_PROVIDERS = ["openai", "azure", "anthropic"];
-export const VERIFIED_MODELS = ["gpt-4o", "claude-3-5-sonnet-20240620-v1:0"];
+export const VERIFIED_MODELS = ["gpt-4o", "claude-3-5-sonnet-20240620"];

 // LiteLLM does not return OpenAI models with the provider, so we list them here to set them ourselves for consistency
 // (e.g., they return `gpt-4o` instead of `openai/gpt-4o`)
@ -12,3 +12,16 @@ export const VERIFIED_OPENAI_MODELS = [
  "gpt-4-32k",
  "gpt-3.5-turbo",
 ];
+
+// LiteLLM does not return the compatible Anthropic models with the provider, so we list them here to set them ourselves
+// (e.g., they return `claude-3-5-sonnet-20240620` instead of `anthropic/claude-3-5-sonnet-20240620`)
+export const VERIFIED_ANTHROPIC_MODELS = [
+  "claude-2",
+  "claude-2.1",
+  "claude-3-5-sonnet-20240620",
+  "claude-3-haiku-20240307",
+  "claude-3-opus-20240229",
+  "claude-3-sonnet-20240229",
+  "claude-instant-1",
+  "claude-instant-1.2",
+];
--- a/openhands/core/config.py
+++ b/openhands/core/config.py
@ -51,6 +51,8 @@ class LLMConfig:
        output_cost_per_token: The cost per output token. This will available in logs for the user to check.
        ollama_base_url: The base URL for the OLLAMA API.
        drop_params: Drop any unmapped (unsupported) params without causing an exception.
+        disable_vision: If model is vision capable, this option allows to disable image processing (useful for cost reduction).
+        caching_prompt: Using the prompt caching feature provided by the LLM.
    """

    model: str = 'gpt-4o'
@ -63,10 +65,10 @@ class LLMConfig:
    aws_access_key_id: str | None = None
    aws_secret_access_key: str | None = None
    aws_region_name: str | None = None
-    num_retries: int = 10
+    num_retries: int = 8
    retry_multiplier: float = 2
-    retry_min_wait: int = 3
-    retry_max_wait: int = 300
+    retry_min_wait: int = 15
+    retry_max_wait: int = 120
    timeout: int | None = None
    max_message_chars: int = 10_000  # maximum number of characters in an observation's content when sent to the llm
    temperature: float = 0
@ -78,6 +80,8 @@ class LLMConfig:
    output_cost_per_token: float | None = None
    ollama_base_url: str | None = None
    drop_params: bool | None = None
+    disable_vision: bool | None = None
+    caching_prompt: bool = False

    def defaults_to_dict(self) -> dict:
        """Serialize fields to a dict for the frontend, including type hints, defaults, and whether it's optional."""
@ -619,7 +623,7 @@ def get_llm_config_arg(
    model = 'gpt-3.5-turbo'
    api_key = '...'
    temperature = 0.5
-    num_retries = 10
+    num_retries = 8
    ...
    ```

--- a/openhands/core/logger.py
+++ b/openhands/core/logger.py
@ -239,7 +239,7 @@ class LlmFileHandler(logging.FileHandler):
        self.message_counter += 1


-def _get_llm_file_handler(name, log_level=logging.INFO):
+def _get_llm_file_handler(name: str, log_level: int):
    # The 'delay' parameter, when set to True, postpones the opening of the log file
    # until the first log message is emitted.
    llm_file_handler = LlmFileHandler(name, delay=True)
@ -248,7 +248,7 @@ def _get_llm_file_handler(name, log_level=logging.INFO):
    return llm_file_handler


-def _setup_llm_logger(name, log_level=logging.INFO):
+def _setup_llm_logger(name: str, log_level: int):
    logger = logging.getLogger(name)
    logger.propagate = False
    logger.setLevel(log_level)
--- a/openhands/core/message.py
+++ b/openhands/core/message.py
@ -1,8 +1,11 @@
 from enum import Enum
+from typing import Union

 from pydantic import BaseModel, Field, model_serializer
 from typing_extensions import Literal

+from openhands.core.logger import openhands_logger as logger
+

 class ContentType(Enum):
    TEXT = 'text'
@ -10,7 +13,7 @@ class ContentType(Enum):


 class Content(BaseModel):
-    type: ContentType
+    type: str
    cache_prompt: bool = False

    @model_serializer
@ -19,13 +22,13 @@ class Content(BaseModel):


 class TextContent(Content):
-    type: ContentType = ContentType.TEXT
+    type: str = ContentType.TEXT.value
    text: str

    @model_serializer
    def serialize_model(self):
        data: dict[str, str | dict[str, str]] = {
-            'type': self.type.value,
+            'type': self.type,
            'text': self.text,
        }
        if self.cache_prompt:
@ -34,14 +37,14 @@ class TextContent(Content):


 class ImageContent(Content):
-    type: ContentType = ContentType.IMAGE_URL
+    type: str = ContentType.IMAGE_URL.value
    image_urls: list[str]

    @model_serializer
    def serialize_model(self):
        images: list[dict[str, str | dict[str, str]]] = []
        for url in self.image_urls:
-            images.append({'type': self.type.value, 'image_url': {'url': url}})
+            images.append({'type': self.type, 'image_url': {'url': url}})
        if self.cache_prompt and images:
            images[-1]['cache_control'] = {'type': 'ephemeral'}
        return images
@ -65,4 +68,52 @@ class Message(BaseModel):
            elif isinstance(item, ImageContent):
                content.extend(item.model_dump())

-        return {'role': self.role, 'content': content}
+        return {'content': content, 'role': self.role}
+
+
+def format_messages(
+    messages: Union[Message, list[Message]],
+    with_images: bool,
+    with_prompt_caching: bool,
+) -> list[dict]:
+    if not isinstance(messages, list):
+        messages = [messages]
+
+    if with_images or with_prompt_caching:
+        return [message.model_dump() for message in messages]
+
+    converted_messages = []
+    for message in messages:
+        content_parts = []
+        role = 'user'
+
+        if isinstance(message, str) and message:
+            content_parts.append(message)
+        elif isinstance(message, dict):
+            role = message.get('role', 'user')
+            if 'content' in message and message['content']:
+                content_parts.append(message['content'])
+        elif isinstance(message, Message):
+            role = message.role
+            for content in message.content:
+                if isinstance(content, list):
+                    for item in content:
+                        if isinstance(item, TextContent) and item.text:
+                            content_parts.append(item.text)
+                elif isinstance(content, TextContent) and content.text:
+                    content_parts.append(content.text)
+        else:
+            logger.error(
+                f'>>> `message` is not a string, dict, or Message: {type(message)}'
+            )
+
+        if content_parts:
+            content_str = '\n'.join(content_parts)
+            converted_messages.append(
+                {
+                    'role': role,
+                    'content': content_str,
+                }
+            )
+
+    return converted_messages
--- a/openhands/core/schema/config.py
+++ b/openhands/core/schema/config.py
@ -3,45 +3,47 @@ from enum import Enum

 class ConfigType(str, Enum):
    # For frontend
-    LLM_CUSTOM_LLM_PROVIDER = 'LLM_CUSTOM_LLM_PROVIDER'
-    LLM_DROP_PARAMS = 'LLM_DROP_PARAMS'
-    LLM_MAX_INPUT_TOKENS = 'LLM_MAX_INPUT_TOKENS'
-    LLM_MAX_OUTPUT_TOKENS = 'LLM_MAX_OUTPUT_TOKENS'
-    LLM_TOP_P = 'LLM_TOP_P'
-    LLM_TEMPERATURE = 'LLM_TEMPERATURE'
-    LLM_TIMEOUT = 'LLM_TIMEOUT'
-    LLM_API_KEY = 'LLM_API_KEY'
-    LLM_BASE_URL = 'LLM_BASE_URL'
-    AWS_ACCESS_KEY_ID = 'AWS_ACCESS_KEY_ID'
-    AWS_SECRET_ACCESS_KEY = 'AWS_SECRET_ACCESS_KEY'
-    AWS_REGION_NAME = 'AWS_REGION_NAME'
-    WORKSPACE_BASE = 'WORKSPACE_BASE'
-    WORKSPACE_MOUNT_PATH = 'WORKSPACE_MOUNT_PATH'
-    WORKSPACE_MOUNT_REWRITE = 'WORKSPACE_MOUNT_REWRITE'
-    WORKSPACE_MOUNT_PATH_IN_SANDBOX = 'WORKSPACE_MOUNT_PATH_IN_SANDBOX'
-    CACHE_DIR = 'CACHE_DIR'
-    LLM_MODEL = 'LLM_MODEL'
-    CONFIRMATION_MODE = 'CONFIRMATION_MODE'
-    BASE_CONTAINER_IMAGE = 'BASE_CONTAINER_IMAGE'
-    RUN_AS_OPENHANDS = 'RUN_AS_OPENHANDS'
-    LLM_EMBEDDING_MODEL = 'LLM_EMBEDDING_MODEL'
-    LLM_EMBEDDING_BASE_URL = 'LLM_EMBEDDING_BASE_URL'
-    LLM_EMBEDDING_DEPLOYMENT_NAME = 'LLM_EMBEDDING_DEPLOYMENT_NAME'
-    LLM_API_VERSION = 'LLM_API_VERSION'
-    LLM_NUM_RETRIES = 'LLM_NUM_RETRIES'
-    LLM_RETRY_MIN_WAIT = 'LLM_RETRY_MIN_WAIT'
-    LLM_RETRY_MAX_WAIT = 'LLM_RETRY_MAX_WAIT'
-    AGENT_MEMORY_MAX_THREADS = 'AGENT_MEMORY_MAX_THREADS'
-    AGENT_MEMORY_ENABLED = 'AGENT_MEMORY_ENABLED'
-    MAX_ITERATIONS = 'MAX_ITERATIONS'
    AGENT = 'AGENT'
-    E2B_API_KEY = 'E2B_API_KEY'
-    SECURITY_ANALYZER = 'SECURITY_ANALYZER'
-    SANDBOX_USER_ID = 'SANDBOX_USER_ID'
-    SANDBOX_TIMEOUT = 'SANDBOX_TIMEOUT'
-    USE_HOST_NETWORK = 'USE_HOST_NETWORK'
-    DISABLE_COLOR = 'DISABLE_COLOR'
+    AGENT_MEMORY_ENABLED = 'AGENT_MEMORY_ENABLED'
+    AGENT_MEMORY_MAX_THREADS = 'AGENT_MEMORY_MAX_THREADS'
+    AWS_ACCESS_KEY_ID = 'AWS_ACCESS_KEY_ID'
+    AWS_REGION_NAME = 'AWS_REGION_NAME'
+    AWS_SECRET_ACCESS_KEY = 'AWS_SECRET_ACCESS_KEY'
+    BASE_CONTAINER_IMAGE = 'BASE_CONTAINER_IMAGE'
+    CACHE_DIR = 'CACHE_DIR'
+    CONFIRMATION_MODE = 'CONFIRMATION_MODE'
    DEBUG = 'DEBUG'
+    DISABLE_COLOR = 'DISABLE_COLOR'
+    E2B_API_KEY = 'E2B_API_KEY'
+    FILE_UPLOADS_ALLOWED_EXTENSIONS = 'FILE_UPLOADS_ALLOWED_EXTENSIONS'
    FILE_UPLOADS_MAX_FILE_SIZE_MB = 'FILE_UPLOADS_MAX_FILE_SIZE_MB'
    FILE_UPLOADS_RESTRICT_FILE_TYPES = 'FILE_UPLOADS_RESTRICT_FILE_TYPES'
-    FILE_UPLOADS_ALLOWED_EXTENSIONS = 'FILE_UPLOADS_ALLOWED_EXTENSIONS'
+    LLM_API_KEY = 'LLM_API_KEY'
+    LLM_API_VERSION = 'LLM_API_VERSION'
+    LLM_BASE_URL = 'LLM_BASE_URL'
+    LLM_CACHING_PROMPT = 'LLM_CACHING_PROMPT'
+    LLM_CUSTOM_LLM_PROVIDER = 'LLM_CUSTOM_LLM_PROVIDER'
+    LLM_DROP_PARAMS = 'LLM_DROP_PARAMS'
+    LLM_EMBEDDING_BASE_URL = 'LLM_EMBEDDING_BASE_URL'
+    LLM_EMBEDDING_DEPLOYMENT_NAME = 'LLM_EMBEDDING_DEPLOYMENT_NAME'
+    LLM_EMBEDDING_MODEL = 'LLM_EMBEDDING_MODEL'
+    LLM_MAX_INPUT_TOKENS = 'LLM_MAX_INPUT_TOKENS'
+    LLM_MAX_OUTPUT_TOKENS = 'LLM_MAX_OUTPUT_TOKENS'
+    LLM_MODEL = 'LLM_MODEL'
+    LLM_NUM_RETRIES = 'LLM_NUM_RETRIES'
+    LLM_RETRY_MAX_WAIT = 'LLM_RETRY_MAX_WAIT'
+    LLM_RETRY_MIN_WAIT = 'LLM_RETRY_MIN_WAIT'
+    LLM_TEMPERATURE = 'LLM_TEMPERATURE'
+    LLM_TIMEOUT = 'LLM_TIMEOUT'
+    LLM_TOP_P = 'LLM_TOP_P'
+    LLM_DISABLE_VISION = 'LLM_DISABLE_VISION'
+    MAX_ITERATIONS = 'MAX_ITERATIONS'
+    RUN_AS_OPENHANDS = 'RUN_AS_OPENHANDS'
+    SANDBOX_TIMEOUT = 'SANDBOX_TIMEOUT'
+    SANDBOX_USER_ID = 'SANDBOX_USER_ID'
+    SECURITY_ANALYZER = 'SECURITY_ANALYZER'
+    USE_HOST_NETWORK = 'USE_HOST_NETWORK'
+    WORKSPACE_BASE = 'WORKSPACE_BASE'
+    WORKSPACE_MOUNT_PATH = 'WORKSPACE_MOUNT_PATH'
+    WORKSPACE_MOUNT_PATH_IN_SANDBOX = 'WORKSPACE_MOUNT_PATH_IN_SANDBOX'
+    WORKSPACE_MOUNT_REWRITE = 'WORKSPACE_MOUNT_REWRITE'
--- a/openhands/llm/llm.py
+++ b/openhands/llm/llm.py
@ -2,6 +2,7 @@ import asyncio
 import copy
 import warnings
 from functools import partial
+from typing import Union

 from openhands.core.config import LLMConfig

@ -24,12 +25,13 @@ from tenacity import (
    retry,
    retry_if_exception_type,
    stop_after_attempt,
-    wait_random_exponential,
+    wait_exponential,
 )

-from openhands.core.exceptions import UserCancelledError
+from openhands.core.exceptions import LLMResponseError, UserCancelledError
 from openhands.core.logger import llm_prompt_logger, llm_response_logger
 from openhands.core.logger import openhands_logger as logger
+from openhands.core.message import Message, format_messages
 from openhands.core.metrics import Metrics

 __all__ = ['LLM']
@ -61,12 +63,9 @@ class LLM:
        Args:
            config: The LLM configuration
        """
-        self.config = copy.deepcopy(config)
        self.metrics = metrics if metrics is not None else Metrics()
        self.cost_metric_supported = True
-        self.supports_prompt_caching = (
-            self.config.model in cache_prompting_supported_models
-        )
+        self.config = copy.deepcopy(config)

        # Set up config attributes with default values to prevent AttributeError
        LLMConfig.set_missing_attributes(self.config)
@ -84,6 +83,15 @@ class LLM:
        except Exception as e:
            logger.warning(f'Could not get model info for {config.model}:\n{e}')

+        # Tuple of exceptions to retry on
+        self.retry_exceptions = (
+            APIConnectionError,
+            ContentPolicyViolationError,
+            InternalServerError,
+            OpenAIError,
+            RateLimitError,
+        )
+
        # Set the max tokens in an LM-specific way if not set
        if self.config.max_input_tokens is None:
            if (
@ -123,33 +131,58 @@ class LLM:
            top_p=self.config.top_p,
        )

+        if self.vision_is_active():
+            logger.debug('LLM: model has vision enabled')
+
        completion_unwrapped = self._completion

        def attempt_on_error(retry_state):
+            """Custom attempt function for litellm completion."""
            logger.error(
-                f'{retry_state.outcome.exception()}. Attempt #{retry_state.attempt_number} | You can customize these settings in the configuration.',
+                f'{retry_state.outcome.exception()}. Attempt #{retry_state.attempt_number} | You can customize retry values in the configuration.',
                exc_info=False,
            )
            return None

-        @retry(
-            reraise=True,
-            stop=stop_after_attempt(self.config.num_retries),
-            wait=wait_random_exponential(
+        def custom_completion_wait(retry_state):
+            """Custom wait function for litellm completion."""
+            if not retry_state:
+                return 0
+            exception = retry_state.outcome.exception() if retry_state.outcome else None
+            if exception is None:
+                return 0
+
+            min_wait_time = self.config.retry_min_wait
+            max_wait_time = self.config.retry_max_wait
+
+            # for rate limit errors, wait 1 minute by default, max 4 minutes between retries
+            exception_type = type(exception).__name__
+            logger.error(f'\nexception_type: {exception_type}\n')
+
+            if exception_type == 'RateLimitError':
+                min_wait_time = 60
+                max_wait_time = 240
+            elif exception_type == 'BadRequestError' and exception.response:
+                # this should give us the burried, actual error message from
+                # the LLM model.
+                logger.error(f'\n\nBadRequestError: {exception.response}\n\n')
+
+            # Return the wait time using exponential backoff
+            exponential_wait = wait_exponential(
                multiplier=self.config.retry_multiplier,
-                min=self.config.retry_min_wait,
-                max=self.config.retry_max_wait,
-            ),
-            retry=retry_if_exception_type(
-                (
-                    APIConnectionError,
-                    ContentPolicyViolationError,
-                    InternalServerError,
-                    OpenAIError,
-                    RateLimitError,
-                )
-            ),
+                min=min_wait_time,
+                max=max_wait_time,
+            )
+
+            # Call the exponential wait function with retry_state to get the actual wait time
+            return exponential_wait(retry_state)
+
+        @retry(
            after=attempt_on_error,
+            stop=stop_after_attempt(self.config.num_retries),
+            reraise=True,
+            retry=retry_if_exception_type(self.retry_exceptions),
+            wait=custom_completion_wait,
        )
        def wrapper(*args, **kwargs):
            """Wrapper for the litellm completion function. Logs the input and output of the completion function."""
@ -157,47 +190,33 @@ class LLM:
            if 'messages' in kwargs:
                messages = kwargs['messages']
            else:
-                messages = args[1]
+                messages = args[1] if len(args) > 1 else []

-            # log the prompt
-            debug_message = ''
-            for message in messages:
-                content = message['content']
+            # this serves to prevent empty messages and logging the messages
+            debug_message = self._get_debug_message(messages)

-                if isinstance(content, list):
-                    for element in content:
-                        if isinstance(element, dict):
-                            if 'text' in element:
-                                content_str = element['text'].strip()
-                            elif (
-                                'image_url' in element and 'url' in element['image_url']
-                            ):
-                                content_str = element['image_url']['url']
-                            else:
-                                content_str = str(element)
-                        else:
-                            content_str = str(element)
-
-                        debug_message += message_separator + content_str
-                else:
-                    content_str = str(content)
-                    debug_message += message_separator + content_str
-
-            llm_prompt_logger.debug(debug_message)
+            if self.is_caching_prompt_active():
+                # Anthropic-specific prompt caching
+                if 'claude-3' in self.config.model:
+                    kwargs['extra_headers'] = {
+                        'anthropic-beta': 'prompt-caching-2024-07-31',
+                    }

            # skip if messages is empty (thus debug_message is empty)
            if debug_message:
+                llm_prompt_logger.debug(debug_message)
                resp = completion_unwrapped(*args, **kwargs)
            else:
+                logger.debug('No completion messages!')
                resp = {'choices': [{'message': {'content': ''}}]}

            # log the response
            message_back = resp['choices'][0]['message']['content']
+            if message_back:
+                llm_response_logger.debug(message_back)

-            llm_response_logger.debug(message_back)
-
-            # post-process to log costs
-            self._post_completion(resp)
+                # post-process to log costs
+                self._post_completion(resp)

            return resp

@ -221,23 +240,11 @@ class LLM:
        async_completion_unwrapped = self._async_completion

        @retry(
-            reraise=True,
-            stop=stop_after_attempt(self.config.num_retries),
-            wait=wait_random_exponential(
-                multiplier=self.config.retry_multiplier,
-                min=self.config.retry_min_wait,
-                max=self.config.retry_max_wait,
-            ),
-            retry=retry_if_exception_type(
-                (
-                    APIConnectionError,
-                    ContentPolicyViolationError,
-                    InternalServerError,
-                    OpenAIError,
-                    RateLimitError,
-                )
-            ),
            after=attempt_on_error,
+            stop=stop_after_attempt(self.config.num_retries),
+            reraise=True,
+            retry=retry_if_exception_type(self.retry_exceptions),
+            wait=custom_completion_wait,
        )
        async def async_completion_wrapper(*args, **kwargs):
            """Async wrapper for the litellm acompletion function."""
@ -245,34 +252,10 @@ class LLM:
            if 'messages' in kwargs:
                messages = kwargs['messages']
            else:
-                messages = args[1]
+                messages = args[1] if len(args) > 1 else []

-            # log the prompt
-            debug_message = ''
-            for message in messages:
-                content = message['content']
-
-                if isinstance(content, list):
-                    for element in content:
-                        if isinstance(element, dict):
-                            if 'text' in element:
-                                content_str = element['text']
-                            elif (
-                                'image_url' in element and 'url' in element['image_url']
-                            ):
-                                content_str = element['image_url']['url']
-                            else:
-                                content_str = str(element)
-                        else:
-                            content_str = str(element)
-
-                        debug_message += message_separator + content_str
-                else:
-                    content_str = str(content)
-
-                debug_message += message_separator + content_str
-
-            llm_prompt_logger.debug(debug_message)
+            # this serves to prevent empty messages and logging the messages
+            debug_message = self._get_debug_message(messages)

            async def check_stopped():
                while True:
@ -288,7 +271,12 @@ class LLM:

            try:
                # Directly call and await litellm_acompletion
-                resp = await async_completion_unwrapped(*args, **kwargs)
+                if debug_message:
+                    llm_prompt_logger.debug(debug_message)
+                    resp = await async_completion_unwrapped(*args, **kwargs)
+                else:
+                    logger.debug('No completion messages!')
+                    resp = {'choices': [{'message': {'content': ''}}]}

                # skip if messages is empty (thus debug_message is empty)
                if debug_message:
@ -325,23 +313,11 @@ class LLM:
                    pass

        @retry(
-            reraise=True,
-            stop=stop_after_attempt(self.config.num_retries),
-            wait=wait_random_exponential(
-                multiplier=self.config.retry_multiplier,
-                min=self.config.retry_min_wait,
-                max=self.config.retry_max_wait,
-            ),
-            retry=retry_if_exception_type(
-                (
-                    APIConnectionError,
-                    ContentPolicyViolationError,
-                    InternalServerError,
-                    OpenAIError,
-                    RateLimitError,
-                )
-            ),
            after=attempt_on_error,
+            stop=stop_after_attempt(self.config.num_retries),
+            reraise=True,
+            retry=retry_if_exception_type(self.retry_exceptions),
+            wait=custom_completion_wait,
        )
        async def async_acompletion_stream_wrapper(*args, **kwargs):
            """Async wrapper for the litellm acompletion with streaming function."""
@ -349,7 +325,7 @@ class LLM:
            if 'messages' in kwargs:
                messages = kwargs['messages']
            else:
-                messages = args[1]
+                messages = args[1] if len(args) > 1 else []

            # log the prompt
            debug_message = ''
@ -401,6 +377,38 @@ class LLM:
        self._async_completion = async_completion_wrapper  # type: ignore
        self._async_streaming_completion = async_acompletion_stream_wrapper  # type: ignore

+    def _get_debug_message(self, messages):
+        if not messages:
+            return ''
+
+        messages = messages if isinstance(messages, list) else [messages]
+        return message_separator.join(
+            self._format_message_content(msg) for msg in messages if msg['content']
+        )
+
+    def _format_message_content(self, message):
+        content = message['content']
+        if isinstance(content, list):
+            return self._format_list_content(content)
+        return str(content)
+
+    def _format_list_content(self, content_list):
+        return '\n'.join(
+            self._format_content_element(element) for element in content_list
+        )
+
+    def _format_content_element(self, element):
+        if isinstance(element, dict):
+            if 'text' in element:
+                return element['text']
+            if (
+                self.vision_is_active()
+                and 'image_url' in element
+                and 'url' in element['image_url']
+            ):
+                return element['image_url']['url']
+        return str(element)
+
    async def _call_acompletion(self, *args, **kwargs):
        return await litellm.acompletion(*args, **kwargs)

@ -410,7 +418,10 @@ class LLM:

        Check the complete documentation at https://litellm.vercel.app/docs/completion
        """
-        return self._completion
+        try:
+            return self._completion
+        except Exception as e:
+            raise LLMResponseError(e)

    @property
    def async_completion(self):
@ -418,7 +429,10 @@ class LLM:

        Check the complete documentation at https://litellm.vercel.app/docs/providers/ollama#example-usage---streaming--acompletion
        """
-        return self._async_completion
+        try:
+            return self._async_completion
+        except Exception as e:
+            raise LLMResponseError(e)

    @property
    def async_streaming_completion(self):
@ -426,10 +440,34 @@ class LLM:

        Check the complete documentation at https://litellm.vercel.app/docs/providers/ollama#example-usage---streaming--acompletion
        """
-        return self._async_streaming_completion
+        try:
+            return self._async_streaming_completion
+        except Exception as e:
+            raise LLMResponseError(e)

-    def supports_vision(self):
-        return litellm.supports_vision(self.config.model)
+    def vision_is_active(self):
+        return not self.config.disable_vision and self._supports_vision()
+
+    def _supports_vision(self):
+        """Acquire from litellm if model is vision capable.
+
+        Returns:
+            bool: True if model is vision capable. If model is not supported by litellm, it will return False.
+        """
+        try:
+            return litellm.supports_vision(self.config.model)
+        except Exception:
+            return False
+
+    def is_caching_prompt_active(self) -> bool:
+        """Check if prompt caching is enabled and supported for current model.
+
+        Returns:
+            boolean: True if prompt caching is active for the given model.
+        """
+        return self.config.caching_prompt is True and any(
+            model in self.config.model for model in cache_prompting_supported_models
+        )

    def _post_completion(self, response) -> None:
        """Post-process the completion response."""
@ -485,7 +523,11 @@ class LLM:
        Returns:
            int: The number of tokens.
        """
-        return litellm.token_counter(model=self.config.model, messages=messages)
+        try:
+            return litellm.token_counter(model=self.config.model, messages=messages)
+        except Exception:
+            # TODO: this is to limit logspam in case token count is not supported
+            return 0

    def is_local(self):
        """Determines if the system is using a locally running LLM.
@ -551,3 +593,10 @@ class LLM:

    def reset(self):
        self.metrics = Metrics()
+
+    def format_messages_for_llm(
+        self, messages: Union[Message, list[Message]]
+    ) -> list[dict]:
+        return format_messages(
+            messages, self.vision_is_active(), self.is_caching_prompt_active()
+        )
--- a/openhands/memory/memory.py
+++ b/openhands/memory/memory.py
@ -43,7 +43,7 @@ if LLAMA_INDEX_AVAILABLE:

    def attempt_on_error(retry_state):
        logger.error(
-            f'{retry_state.outcome.exception()}. Attempt #{retry_state.attempt_number} | You can customize these settings in the configuration.',
+            f'{retry_state.outcome.exception()}. Attempt #{retry_state.attempt_number} | You can customize retry values in the configuration.',
            exc_info=False,
        )
        return None
--- a/openhands/runtime/client/client.py
+++ b/openhands/runtime/client/client.py
@ -60,6 +60,7 @@ ROOT_GID = 0
 INIT_COMMANDS = [
    'git config --global user.name "openhands" && git config --global user.email "openhands@all-hands.dev" && alias git="git --no-pager"',
 ]
+SOFT_TIMEOUT_SECONDS = 5


 class RuntimeClient:
@ -114,6 +115,7 @@ class RuntimeClient:
            logger.info(f'AgentSkills initialized: {obs}')

        await self._init_bash_commands()
+        logger.info('Runtime client initialized.')

    def _init_user(self, username: str, user_id: int) -> None:
        """Create user if not exists."""
@ -212,6 +214,9 @@ class RuntimeClient:
        if ps1 == pexpect.EOF:
            logger.error(f'Bash shell EOF! {self.shell.after=}, {self.shell.before=}')
            raise RuntimeError('Bash shell EOF')
+        if ps1 == pexpect.TIMEOUT:
+            logger.warning('Bash shell timeout')
+            return ''

        # begin at the last occurrence of '[PEXPECT_BEGIN]'.
        # In multi-line bash commands, the prompt will be repeated
@ -243,39 +248,56 @@ class RuntimeClient:
        command: str,
        timeout: int | None,
        keep_prompt: bool = True,
+        kill_on_timeout: bool = True,
    ) -> tuple[str, int]:
        logger.debug(f'Executing command: {command}')
+        self.shell.sendline(command)
+        return self._continue_bash(
+            timeout=timeout, keep_prompt=keep_prompt, kill_on_timeout=kill_on_timeout
+        )
+
+    def _interrupt_bash(self, timeout: int | None = None) -> tuple[str, int]:
+        self.shell.sendintr()  # send SIGINT to the shell
+        self.shell.expect(self.__bash_expect_regex, timeout=timeout)
+        output = self.shell.before
+        exit_code = 130  # SIGINT
+        return output, exit_code
+
+    def _continue_bash(
+        self,
+        timeout: int | None,
+        keep_prompt: bool = True,
+        kill_on_timeout: bool = True,
+    ) -> tuple[str, int]:
        try:
-            self.shell.sendline(command)
            self.shell.expect(self.__bash_expect_regex, timeout=timeout)

            output = self.shell.before

            # Get exit code
            self.shell.sendline('echo $?')
-            logger.debug(f'Executing command for exit code: {command}')
+            logger.debug('Requesting exit code...')
            self.shell.expect(self.__bash_expect_regex, timeout=timeout)
            _exit_code_output = self.shell.before
-            logger.debug(f'Exit code Output: {_exit_code_output}')
            exit_code = int(_exit_code_output.strip().split()[0])

        except pexpect.TIMEOUT as e:
-            self.shell.sendintr()  # send SIGINT to the shell
-            self.shell.expect(self.__bash_expect_regex, timeout=timeout)
-            output = self.shell.before
-            output += (
-                '\r\n\r\n'
-                + f'[Command timed out after {timeout} seconds. SIGINT was sent to interrupt it.]'
-            )
-            exit_code = 130  # SIGINT
-            logger.error(f'Failed to execute command: {command}. Error: {e}')
+            if kill_on_timeout:
+                output, exit_code = self._interrupt_bash()
+                output += (
+                    '\r\n\r\n'
+                    + f'[Command timed out after {timeout} seconds. SIGINT was sent to interrupt it.]'
+                )
+                logger.error(f'Failed to execute command. Error: {e}')
+            else:
+                output = self.shell.before or ''
+                exit_code = -1

        finally:
            bash_prompt = self._get_bash_prompt_and_update_pwd()
            if keep_prompt:
                output += '\r\n' + bash_prompt
            logger.debug(f'Command output: {output}')
-
        return output, exit_code

    async def run_action(self, action) -> Observation:
@ -293,11 +315,23 @@ class RuntimeClient:
            commands = split_bash_commands(action.command)
            all_output = ''
            for command in commands:
-                output, exit_code = self._execute_bash(
-                    command,
-                    timeout=action.timeout,
-                    keep_prompt=action.keep_prompt,
-                )
+                if command == '':
+                    output, exit_code = self._continue_bash(
+                        timeout=SOFT_TIMEOUT_SECONDS,
+                        keep_prompt=action.keep_prompt,
+                        kill_on_timeout=False,
+                    )
+                elif command.lower() == 'ctrl+c':
+                    output, exit_code = self._interrupt_bash(
+                        timeout=SOFT_TIMEOUT_SECONDS
+                    )
+                else:
+                    output, exit_code = self._execute_bash(
+                        command,
+                        timeout=SOFT_TIMEOUT_SECONDS,
+                        keep_prompt=action.keep_prompt,
+                        kill_on_timeout=False,
+                    )
                if all_output:
                    # previous output already exists with prompt "user@hostname:working_dir #""
                    # we need to add the command to the previous output,
@ -482,7 +516,6 @@ if __name__ == '__main__':
            browsergym_eval_env=args.browsergym_eval_env,
        )
        await client.ainit()
-        logger.info('Runtime client initialized.')
        yield
        # Clean up & release the resources
        client.close()
@ -689,6 +722,7 @@ if __name__ == '__main__':
            logger.error(f'Error listing files: {e}', exc_info=True)
            return []

+    logger.info('Runtime client initialized.')
+
    logger.info(f'Starting action execution API on port {args.port}')
-    print(f'Starting action execution API on port {args.port}')
    run(app, host='0.0.0.0', port=args.port)
--- a/openhands/runtime/client/runtime.py
+++ b/openhands/runtime/client/runtime.py
@ -47,6 +47,9 @@ class LogBuffer:
    """

    def __init__(self, container: docker.models.containers.Container):
+        self.client_ready = False
+        self.init_msg = 'Runtime client initialized.'
+
        self.buffer: list[str] = []
        self.lock = threading.Lock()
        self.log_generator = container.logs(stream=True, follow=True)
@ -77,9 +80,12 @@ class LogBuffer:
                if self._stop_event.is_set():
                    break
                if log_line:
-                    self.append(log_line.decode('utf-8').rstrip())
+                    decoded_line = log_line.decode('utf-8').rstrip()
+                    self.append(decoded_line)
+                    if self.init_msg in decoded_line:
+                        self.client_ready = True
        except Exception as e:
-            logger.error(f'Error in stream_logs: {e}')
+            logger.error(f'Error streaming docker logs: {e}')

    def __del__(self):
        if self.log_stream_thread.is_alive():
@ -125,11 +131,10 @@ class EventStreamRuntime(Runtime):
        self.action_semaphore = threading.Semaphore(1)  # Ensure one action at a time

        self.runtime_builder = DockerRuntimeBuilder(self.docker_client)
-        logger.debug(f'EventStreamRuntime `{sid}` config:\n{self.config}')
+        logger.debug(f'EventStreamRuntime `{sid}`')

        # Buffer for container logs
        self.log_buffer: LogBuffer | None = None
-        self.startup_done = False

        if self.config.sandbox.runtime_extra_deps:
            logger.info(
@ -165,7 +170,7 @@ class EventStreamRuntime(Runtime):
            return docker.from_env()
        except Exception as ex:
            logger.error(
-                'Launch docker client failed. Please make sure you have installed docker and started the docker daemon.'
+                'Launch docker client failed. Please make sure you have installed docker and started docker desktop/daemon.'
            )
            raise ex

@ -246,9 +251,9 @@ class EventStreamRuntime(Runtime):
    @tenacity.retry(
        stop=tenacity.stop_after_attempt(10),
        wait=tenacity.wait_exponential(multiplier=2, min=10, max=60),
+        reraise=(ConnectionRefusedError,),
    )
    def _wait_until_alive(self):
-        init_msg = 'Runtime client initialized.'
        logger.debug('Getting container logs...')

        # Print and clear the log buffer
@ -256,26 +261,23 @@ class EventStreamRuntime(Runtime):
            self.log_buffer is not None
        ), 'Log buffer is expected to be initialized when container is started'

-        # Always process logs, regardless of startup_done status
+        # Always process logs, regardless of client_ready status
        logs = self.log_buffer.get_and_clear()
        if logs:
            formatted_logs = '\n'.join([f'    |{log}' for log in logs])
            logger.info(
                '\n'
-                + '-' * 30
+                + '-' * 35
                + 'Container logs:'
-                + '-' * 30
+                + '-' * 35
                + f'\n{formatted_logs}'
                + '\n'
-                + '-' * 90
+                + '-' * 80
            )
-            # Check for initialization message even if startup_done is True
-            if any(init_msg in log for log in logs):
-                self.startup_done = True

-        if not self.startup_done:
+        if not self.log_buffer.client_ready:
            attempts = 0
-            while not self.startup_done and attempts < 10:
+            while not self.log_buffer.client_ready and attempts < 5:
                attempts += 1
                time.sleep(1)
                logs = self.log_buffer.get_and_clear()
@ -283,16 +285,13 @@ class EventStreamRuntime(Runtime):
                    formatted_logs = '\n'.join([f'    |{log}' for log in logs])
                    logger.info(
                        '\n'
-                        + '-' * 30
+                        + '-' * 35
                        + 'Container logs:'
-                        + '-' * 30
+                        + '-' * 35
                        + f'\n{formatted_logs}'
                        + '\n'
-                        + '-' * 90
+                        + '-' * 80
                    )
-                    if any(init_msg in log for log in logs):
-                        self.startup_done = True
-                        break

        response = self.session.get(f'{self.api_url}/alive')
        if response.status_code == 200:
@ -306,7 +305,15 @@ class EventStreamRuntime(Runtime):
    def sandbox_workspace_dir(self):
        return self.config.workspace_mount_path_in_sandbox

-    def close(self, close_client: bool = True):
+    def close(self, close_client: bool = True, rm_all_containers: bool = True):
+        """
+        Closes the EventStreamRuntime and associated objects
+
+        Parameters:
+        - close_client (bool): Whether to close the DockerClient
+        - rm_all_containers (bool): Whether to remove all containers with the 'openhands-sandbox-' prefix
+        """
+
        if self.log_buffer:
            self.log_buffer.close()

@ -316,7 +323,13 @@ class EventStreamRuntime(Runtime):
        containers = self.docker_client.containers.list(all=True)
        for container in containers:
            try:
-                if container.name.startswith(self.container_name_prefix):
+                # If the app doesn't shut down properly, it can leave runtime containers on the system. This ensures
+                # that all 'openhands-sandbox-' containers are removed as well.
+                if rm_all_containers and container.name.startswith(
+                    self.container_name_prefix
+                ):
+                    container.remove(force=True)
+                elif container.name == self.container_name:
                    logs = container.logs(tail=1000).decode('utf-8')
                    logger.debug(
                        f'==== Container logs ====\n{logs}\n==== End of container logs ===='
@ -324,6 +337,7 @@ class EventStreamRuntime(Runtime):
                    container.remove(force=True)
            except docker.errors.NotFound:
                pass
+
        if close_client:
            self.docker_client.close()

--- a/openhands/runtime/plugins/agent_skills/file_ops/file_ops.py
+++ b/openhands/runtime/plugins/agent_skills/file_ops/file_ops.py
@ -20,6 +20,7 @@ import os
 import re
 import shutil
 import tempfile
+import uuid

 if __package__ is None or __package__ == '':
    from aider import Linter
@ -456,7 +457,6 @@ def _edit_file_impl(
    # Use a temporary file to write changes
    content = str(content or '')
    temp_file_path = ''
-    src_abs_path = os.path.abspath(file_name)
    first_error_line = None

    try:
@ -471,10 +471,13 @@ def _edit_file_impl(
                shutil.copy2(file_name, orig_file_clone.name)
                original_lint_error, _ = _lint_file(orig_file_clone.name)

-        # Create a temporary file
-        with tempfile.NamedTemporaryFile('w', delete=False) as temp_file:
-            temp_file_path = temp_file.name
+        # Create a temporary file in the same directory as the original file
+        original_dir = os.path.dirname(file_name)
+        original_ext = os.path.splitext(file_name)[1]
+        temp_file_name = f'.temp_{uuid.uuid4().hex}{original_ext}'
+        temp_file_path = os.path.join(original_dir, temp_file_name)

+        with open(temp_file_path, 'w') as temp_file:
            # Read the original file and check if empty and for a trailing newline
            with open(file_name) as original_file:
                lines = original_file.readlines()
@ -500,8 +503,8 @@ def _edit_file_impl(
            # Write the new content to the temporary file
            temp_file.write(content)

-        # Replace the original file with the temporary file atomically
-        shutil.move(temp_file_path, src_abs_path)
+        # Replace the original file with the temporary file
+        os.replace(temp_file_path, file_name)

        # Handle linting
        # NOTE: we need to get env var inside this function
@ -599,6 +602,8 @@ def _edit_file_impl(

    except FileNotFoundError as e:
        ret_str += f'File not found: {e}\n'
+    except PermissionError as e:
+        ret_str += f'Permission error during file operation: {str(e)}\n'
    except IOError as e:
        ret_str += f'An error occurred while handling the file: {e}\n'
    except ValueError as e:
--- a/openhands/runtime/runtime.py
+++ b/openhands/runtime/runtime.py
@ -67,7 +67,7 @@ class Runtime:
        self.config = copy.deepcopy(config)
        self.DEFAULT_ENV_VARS = _default_env_vars(config.sandbox)
        atexit.register(self.close)
-        logger.debug(f'Runtime `{sid}` config:\n{self.config}')
+        logger.debug(f'Runtime `{sid}`')

        if self.DEFAULT_ENV_VARS:
            logger.debug(f'Adding default env vars: {self.DEFAULT_ENV_VARS}')
--- a/openhands/runtime/utils/bash.py
+++ b/openhands/runtime/utils/bash.py
@ -4,6 +4,8 @@ from openhands.core.logger import openhands_logger as logger


 def split_bash_commands(commands):
+    if not commands.strip():
+        return ['']
    try:
        parsed = bashlex.parse(commands)
    except bashlex.errors.ParsingError as e:
--- a/openhands/server/session/session.py
+++ b/openhands/server/session/session.py
@ -152,11 +152,17 @@ class Session:
        # This checks if the model supports images
        if isinstance(event, MessageAction) and event.images_urls:
            controller = self.agent_session.controller
-            if controller and not controller.agent.llm.supports_vision():
-                await self.send_error(
-                    'Model does not support image upload, change to a different model or try without an image.'
-                )
-                return
+            if controller:
+                if controller.agent.llm.config.disable_vision:
+                    await self.send_error(
+                        'Support for images is disabled for this model, try without an image.'
+                    )
+                    return
+                if not controller.agent.llm.vision_is_active():
+                    await self.send_error(
+                        'Model does not support image upload, change to a different model or try without an image.'
+                    )
+                    return
        self.agent_session.event_stream.add_event(event, EventSource.USER)

    async def send(self, data: dict[str, object]) -> bool:
--- a/poetry.lock
+++ b/poetry.lock
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [tool.poetry]
 name = "openhands-ai"
-version = "0.9.1"
+version = "0.9.2"
 description = "OpenHands: Code Less, Make More"
 authors = ["OpenHands"]
 license = "MIT"
@ -27,7 +27,7 @@ uvicorn = "*"
 types-toml = "*"
 numpy = "*"
 json-repair = "*"
-browsergym = "0.3.4" # integrate browsergym as the browsing interface
+browsergym = "0.4.3" # integrate browsergym as the browsing interface
 html2text = "*"
 e2b = "^0.17.1"
 pexpect = "*"
@ -65,7 +65,7 @@ llama-index-embeddings-azure-openai = "*"
 llama-index-embeddings-ollama = "*"

 [tool.poetry.group.dev.dependencies]
-ruff = "0.6.3"
+ruff = "0.6.4"
 mypy = "1.11.2"
 pre-commit = "3.8.0"
 build = "*"
@ -85,7 +85,6 @@ reportlab = "*"
 [tool.coverage.run]
 concurrency = ["gevent"]

-
 [tool.poetry.group.runtime.dependencies]
 jupyterlab = "*"
 notebook = "*"
@ -116,7 +115,6 @@ ignore = ["D1"]
 [tool.ruff.lint.pydocstyle]
 convention = "google"

-
 [tool.poetry.group.evaluation.dependencies]
 streamlit = "*"
 whatthepatch = "*"
--- a/tests/integration/conftest.py
+++ b/tests/integration/conftest.py
@ -2,16 +2,16 @@ import io
 import os
 import re
 import shutil
+import socket
 import subprocess
 import tempfile
-import time
 from functools import partial
 from http.server import HTTPServer, SimpleHTTPRequestHandler
-from threading import Thread

 import pytest
 from litellm import completion

+from openhands.core.message import format_messages
 from openhands.llm.llm import message_separator

 script_dir = os.environ.get('SCRIPT_DIR')
@ -78,7 +78,9 @@ def get_log_id(prompt_log_name):
        return match.group(1)


-def apply_prompt_and_get_mock_response(test_name: str, messages: str, id: int) -> str:
+def apply_prompt_and_get_mock_response(
+    test_name: str, messages: str, id: int
+) -> str | None:
    """Apply the mock prompt, and find mock response based on id.
    If there is no matching response file, return None.

@ -183,11 +185,11 @@ def mock_user_response(*args, test_name, **kwargs):
 def mock_completion(*args, test_name, **kwargs):
    global cur_id
    messages = kwargs['messages']
-    message_str = ''
-    for message in messages:
-        for m in message['content']:
-            if m['type'] == 'text':
-                message_str += message_separator + m['text']
+    plain_messages = format_messages(
+        messages, with_images=False, with_prompt_caching=False
+    )
+    message_str = message_separator.join(msg['content'] for msg in plain_messages)
+
    # this assumes all response_(*).log filenames are in numerical order, starting from one
    cur_id += 1
    if os.environ.get('FORCE_APPLY_PROMPTS') == 'true':
@ -197,7 +199,7 @@ def mock_completion(*args, test_name, **kwargs):
    else:
        mock_response = get_mock_response(test_name, message_str, cur_id)
    if mock_response is None:
-        raise SecretExit('Mock response for prompt is not found')
+        raise SecretExit('\n\n***** Mock response for prompt is not found *****\n')
    response = completion(**kwargs, mock_response=mock_response)
    return response

@ -222,6 +224,12 @@ def patch_completion(monkeypatch, request):
        lambda completion_response, **extra_kwargs: 1,
    )

+    # Mock LLMConfig to disable vision support
+    monkeypatch.setattr(
+        'openhands.llm.llm.LLM.vision_is_active',
+        lambda self: False,
+    )
+
    # Mock user input (only for tests that have user_responses.log)
    user_responses_str = mock_user_response(test_name=test_name)
    if user_responses_str:
@ -229,25 +237,17 @@ def patch_completion(monkeypatch, request):
        monkeypatch.setattr('sys.stdin', user_responses)


-@pytest.fixture
-def http_server():
-    web_dir = os.path.join(os.path.dirname(__file__), 'static')
-    os.chdir(web_dir)
-    handler = SimpleHTTPRequestHandler
+class MultiAddressServer(HTTPServer):
+    def server_bind(self):
+        self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+        self.socket.bind(self.server_address)

-    # Start the server
-    server = HTTPServer(('localhost', 8000), handler)
-    thread = Thread(target=server.serve_forever)
-    thread.setDaemon(True)
-    thread.start()
-    time.sleep(1)

-    print('HTTP server started...')
-    yield server
-
-    # Stop the server
-    server.shutdown()
-    thread.join()
+class LoggingHTTPRequestHandler(SimpleHTTPRequestHandler):
+    def log_message(self, format, *args):
+        print(
+            f'Request received: {self.address_string()} - {self.log_date_time_string()} - {format % args}'
+        )


 def set_up():
--- a/tests/integration/mock/eventstream_runtime/BrowsingAgent/test_browse_internet/prompt_001.log
+++ b/tests/integration/mock/eventstream_runtime/BrowsingAgent/test_browse_internet/prompt_001.log
@ -1,7 +1,3 @@
-
-
----------
-
 # Instructions
 Review the current state of the page and all other information to find the best
 possible next action to accomplish your goal. Your answer will be interpreted
@ -40,13 +36,13 @@ fill(bid: str, value: str)

 select_option(bid: str, options: str | list[str])
    Examples:
-        select_option('48', 'blue')
+        select_option('a48', 'blue')

-        select_option('48', ['red', 'green', 'blue'])
+        select_option('c48', ['red', 'green', 'blue'])

 click(bid: str, button: Literal['left', 'middle', 'right'] = 'left', modifiers: list[typing.Literal['Alt', 'Control', 'Meta', 'Shift']] = [])
    Examples:
-        click('51')
+        click('a51')

        click('b22', button='right')

@ -102,13 +98,11 @@ goto(url: str)
    Examples:
        goto('http://www.example.com')

-Multiple actions can be provided at once. Example:
+Multiple actions can be provided at once, but will be executed sequentially without any feedback from the page.
+More than 2-3 actions usually leads to failure or unexpected behavior. Example:
 fill('a12', 'example with "quotes"')
-click('51')
+click('a51')
 click('48', button='middle', modifiers=['Shift'])
-Multiple actions are meant to be executed sequentially without any feedback from the page.
-Don't execute multiple actions at once if you need feedback from the page.
-


 ----------
--- a/tests/integration/mock/eventstream_runtime/BrowsingAgent/test_browse_internet/prompt_002.log
+++ b/tests/integration/mock/eventstream_runtime/BrowsingAgent/test_browse_internet/prompt_002.log
@ -1,7 +1,3 @@
-
-
----------
-
 # Instructions
 Review the current state of the page and all other information to find the best
 possible next action to accomplish your goal. Your answer will be interpreted
@ -40,13 +36,13 @@ fill(bid: str, value: str)

 select_option(bid: str, options: str | list[str])
    Examples:
-        select_option('48', 'blue')
+        select_option('a48', 'blue')

-        select_option('48', ['red', 'green', 'blue'])
+        select_option('c48', ['red', 'green', 'blue'])

 click(bid: str, button: Literal['left', 'middle', 'right'] = 'left', modifiers: list[typing.Literal['Alt', 'Control', 'Meta', 'Shift']] = [])
    Examples:
-        click('51')
+        click('a51')

        click('b22', button='right')

@ -102,13 +98,11 @@ goto(url: str)
    Examples:
        goto('http://www.example.com')

-Multiple actions can be provided at once. Example:
+Multiple actions can be provided at once, but will be executed sequentially without any feedback from the page.
+More than 2-3 actions usually leads to failure or unexpected behavior. Example:
 fill('a12', 'example with "quotes"')
-click('51')
+click('a51')
 click('48', button='middle', modifiers=['Shift'])
-Multiple actions are meant to be executed sequentially without any feedback from the page.
-Don't execute multiple actions at once if you need feedback from the page.
-


 ----------
@ -121,7 +115,7 @@ RootWebArea 'The Ultimate Answer', focused
 	[10] button 'Click me', clickable

 # Previous Actions
-goto('http://localhost:8000')
+goto("http://localhost:8000")

 Here is an example with chain of thought of a valid action when clicking on a button:
 "
--- a/tests/integration/mock/eventstream_runtime/BrowsingAgent/test_browse_internet/prompt_003.log
+++ b/tests/integration/mock/eventstream_runtime/BrowsingAgent/test_browse_internet/prompt_003.log
@ -1,7 +1,3 @@
-
-
----------
-
 # Instructions
 Review the current state of the page and all other information to find the best
 possible next action to accomplish your goal. Your answer will be interpreted
@ -40,13 +36,13 @@ fill(bid: str, value: str)

 select_option(bid: str, options: str | list[str])
    Examples:
-        select_option('48', 'blue')
+        select_option('a48', 'blue')

-        select_option('48', ['red', 'green', 'blue'])
+        select_option('c48', ['red', 'green', 'blue'])

 click(bid: str, button: Literal['left', 'middle', 'right'] = 'left', modifiers: list[typing.Literal['Alt', 'Control', 'Meta', 'Shift']] = [])
    Examples:
-        click('51')
+        click('a51')

        click('b22', button='right')

@ -102,13 +98,11 @@ goto(url: str)
    Examples:
        goto('http://www.example.com')

-Multiple actions can be provided at once. Example:
+Multiple actions can be provided at once, but will be executed sequentially without any feedback from the page.
+More than 2-3 actions usually leads to failure or unexpected behavior. Example:
 fill('a12', 'example with "quotes"')
-click('51')
+click('a51')
 click('48', button='middle', modifiers=['Shift'])
-Multiple actions are meant to be executed sequentially without any feedback from the page.
-Don't execute multiple actions at once if you need feedback from the page.
-


 ----------
@ -122,7 +116,7 @@ RootWebArea 'The Ultimate Answer', focused
 	StaticText 'The answer is OpenHands is all you need!'

 # Previous Actions
-goto('http://localhost:8000')
+goto("http://localhost:8000")
 click("10")

 Here is an example with chain of thought of a valid action when clicking on a button:
--- a/tests/integration/mock/eventstream_runtime/BrowsingAgent/test_browse_internet/response_001.log
+++ b/tests/integration/mock/eventstream_runtime/BrowsingAgent/test_browse_internet/response_001.log
@ -1,2 +1,3 @@
-In order to accomplish my goal, I need to navigate to the localhost page.
-```goto('http://localhost:8000'
+To accomplish the goal of browsing localhost:8000 and finding the ultimate answer to life, I need to first navigate to the specified URL.
+
+```goto("http://localhost:8000"
--- a/tests/integration/mock/eventstream_runtime/BrowsingAgent/test_browse_internet/response_002.log
+++ b/tests/integration/mock/eventstream_runtime/BrowsingAgent/test_browse_internet/response_002.log
@ -1,2 +1,4 @@
-In order to accomplish my goal, I need to click on the button with bid 10 to reveal the answer to life, the universe, and everything.
-```click("10"
+To accomplish the goal of finding the ultimate answer to life, I need to click on the "Click me" button to reveal the answer. The button has a bid of 10.
+
+```
+click("10"
--- a/tests/integration/mock/eventstream_runtime/BrowsingAgent/test_browse_internet/response_003.log
+++ b/tests/integration/mock/eventstream_runtime/BrowsingAgent/test_browse_internet/response_003.log
@ -1,3 +1,6 @@
-In order to accomplish my goal, I need to read the static text that reveals the answer to life, the universe, and everything.
+Based on the current Accessibility Tree and the previous actions taken, I can see that we have already navigated to the localhost:8000 page and clicked the "Click me" button. The answer to life, the universe, and everything has been revealed on the page.

-```send_msg_to_user('The answer is OpenHands is all you need!'
+To accomplish the goal of telling you the ultimate answer to life, I will send a message to the user with the revealed answer.
+
+```
+send_msg_to_user("The ultimate answer to life, according to the webpage, is: OpenHands is all you need!"
--- a/tests/integration/mock/eventstream_runtime/CodeActAgent/test_browse_internet/prompt_001.log
+++ b/tests/integration/mock/eventstream_runtime/CodeActAgent/test_browse_internet/prompt_001.log
@ -1,33 +1,30 @@
-
-
----------
-
-A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.
-The assistant can use an interactive Python (Jupyter Notebook) environment, executing code with <execute_ipython>.
+A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed answers to the user's questions.
+The assistant can use a Python environment with <execute_ipython>, e.g.:
 <execute_ipython>
 print("Hello World!")
 </execute_ipython>
-The assistant can execute bash commands on behalf of the user by wrapping them with <execute_bash> and </execute_bash>.
+The assistant can execute bash commands wrapped with <execute_bash>, e.g. <execute_bash> ls </execute_bash>.
+If a bash command returns exit code `-1`, this means the process is not yet finished.
+The assistant must then send a second <execute_bash>. The second <execute_bash> can be empty
+(which will retrieve any additional logs), or it can contain text to be sent to STDIN of the running process,
+or it can contain the text `ctrl+c` to interrupt the process.
+
+For commands that may run indefinitely, the output should be redirected to a file and the command run
+in the background, e.g. <execute_bash> python3 app.py > server.log 2>&1 & </execute_bash>
+If a command execution result says "Command timed out. Sending SIGINT to the process",
+the assistant should retry running the command in the background.

-For example, you can list the files in the current directory by <execute_bash> ls </execute_bash>.
-Important, however: do not run interactive commands. You do not have access to stdin.
-Also, you need to handle commands that may run indefinitely and not return a result. For such cases, you should redirect the output to a file and run the command in the background to avoid blocking the execution.
-For example, to run a Python script that might run indefinitely without returning immediately, you can use the following format: <execute_bash> python3 app.py > server.log 2>&1 & </execute_bash>
-Also, if a command execution result saying like: Command: "npm start" timed out. Sending SIGINT to the process, you should also retry with running the command in the background.
 The assistant can browse the Internet with <execute_browse> and </execute_browse>.
 For example, <execute_browse> Tell me the usa's president using google search </execute_browse>.
 Or <execute_browse> Tell me what is in http://example.com </execute_browse>.
-The assistant can install Python packages using the %pip magic command in an IPython environment by using the following syntax: <execute_ipython> %pip install [package needed] </execute_ipython> and should always import packages and define variables before starting to use them.
-To interact with GitHub, use the $GITHUB_TOKEN environment variable.
-For example, to push a branch `my_branch` to the GitHub repo `owner/repo`:
-<execute_bash> git push https://$GITHUB_TOKEN@github.com/owner/repo.git my_branch </execute_bash>
-If $GITHUB_TOKEN is not set, ask the user to set it.

+The assistant can install Python packages using the %pip magic command in an IPython environment by using the following syntax: <execute_ipython> %pip install [package needed] </execute_ipython> and should always import packages and define variables before starting to use them.

 Apart from the standard Python library, the assistant can also use the following functions (already imported) in <execute_ipython> environment:
 open_file(path: str, line_number: int | None = 1, context_lines: int | None = 100) -> None:
-    Opens the file at the given path in the editor. If line_number is provided, the window will be moved to include that line.
-    It only shows the first 100 lines by default! `context_lines` is the max number of lines to be displayed, up to 2000. Use `scroll_up` and `scroll_down` to view more content up or down.
+    Opens the file at the given path in the editor. IF the file is to be edited, first use `scroll_down` repeatedly to read the full file!
+    If line_number is provided, the window will be moved to include that line.
+    It only shows the first 100 lines by default! `context_lines` is the max number of lines to be displayed, up to 100. Use `scroll_up` and `scroll_down` to view more content up or down.
    Args:
    path: str: The path to the file to open, preferred absolute path.
    line_number: int | None = 1: The line number to move to. Defaults to 1.
@ -54,7 +51,9 @@ create_file(filename: str) -> None:
    filename: str: The name of the file to create.

 edit_file_by_replace(file_name: str, to_replace: str, new_content: str) -> None:
-    Edit an existing file. This will search for non-empty `to_replace` in the given file and replace it with `new_content`. Use `append_file` method for writing after `create_file`!
+    Edit an existing file. This will search for non-empty `to_replace` in the given file and replace it with non-empty `new_content`.
+    `to_replace` and `new_content` must be different! Split large edits into multiple smaller edits if necessary!
+    Use `append_file` method for writing after `create_file`!
    Every *to_replace* must *EXACTLY MATCH* the existing source code, character for character, including all comments, docstrings, etc.
    Include enough lines to make code in `to_replace` unique. `to_replace` should NOT be empty.
    For example, given a file "/workspace/example.txt" with the following content:
@ -115,7 +114,7 @@ insert_content_at_line(file_name: str, line_number: int, content: str) -> None:

 append_file(file_name: str, content: str) -> None:
    Append content to the given file.
-    It appends text `content` to the end of the specified file.
+    It appends text `content` to the end of the specified file, ideal after a `create_file`!
    Args:
    file_name: str: The name of the file to edit.
    line_number: int: The line number (starting from 1) to insert the content after.
@ -125,19 +124,19 @@ search_dir(search_term: str, dir_path: str = './') -> None:
    Searches for search_term in all files in dir. If dir is not provided, searches in the current directory.
    Args:
    search_term: str: The term to search for.
-    dir_path: Optional[str]: The path to the directory to search.
+    dir_path: str: The path to the directory to search.

-search_file(search_term: str, file_path: Optional[str] = None) -> None:
+search_file(search_term: str, file_path: str | None = None) -> None:
    Searches for search_term in file. If file is not provided, searches in the current open file.
    Args:
    search_term: str: The term to search for.
-    file_path: Optional[str]: The path to the file to search.
+    file_path: str | None: The path to the file to search.

 find_file(file_name: str, dir_path: str = './') -> None:
    Finds all files with the given name in the specified directory.
    Args:
    file_name: str: The name of the file to find.
-    dir_path: Optional[str]: The path to the directory to search.
+    dir_path: str: The path to the directory to search.

 parse_pdf(file_path: str) -> None:
    Parses the content of a PDF file and prints it.
@ -159,15 +158,23 @@ parse_pptx(file_path: str) -> None:
    Args:
    file_path: str: The path to the file to open.

-Please note that THE `edit_file_by_replace`, `append_file` and `insert_content_at_line` FUNCTIONS REQUIRE PROPER INDENTATION. If the assistant would like to add the line '        print(x)', it must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. Use `append_file` after `create_file` to write content!
+
+IMPORTANT:
+- `open_file` only returns the first 100 lines of the file by default! The assistant MUST use `scroll_down` repeatedly to read the full file BEFORE making edits!
+- The assistant shall adhere to THE `edit_file_by_replace`, `append_file` and `insert_content_at_line` FUNCTIONS REQUIRING PROPER INDENTATION. If the assistant would like to add the line '        print(x)', it must fully write the line out, with all leading spaces before the code!
+- Indentation is important and code that is not indented correctly will fail and require fixing before it can be run.
+- Any code issued should be less than 50 lines to avoid context being cut off!
+- After EVERY `create_file` the method `append_file` shall be used to write the FIRST content!
+- For `edit_file_by_replace` NEVER provide empty parameters!
+- For `edit_file_by_replace` the file must be read fully before any replacements!

 Responses should be concise.
 The assistant should attempt fewer things at a time instead of putting too many commands OR too much code in one "execute" block.
 Include ONLY ONE <execute_ipython>, <execute_bash>, or <execute_browse> per response, unless the assistant is finished with the task or needs more input or action from the user in order to proceed.
 If the assistant is finished with the task you MUST include <finish></finish> in your response.
 IMPORTANT: Execute code using <execute_ipython>, <execute_bash>, or <execute_browse> whenever possible.
-When handling files, try to use full paths and pwd to avoid errors.
-
+The assistant should utilize full file paths and the `pwd` command to prevent path-related errors.
+The assistant must avoid apologies and thanks in its responses.

 ----------

@ -393,8 +400,9 @@ The server is running on port 5000 with PID 126. You can access the list of numb
 --- END OF EXAMPLE ---


-NOW, LET'S START!

+NOW, LET'S START!
 Browse localhost:8000, and tell me the ultimate answer to life. Do not ask me for confirmation at any point.

-ENVIRONMENT REMINDER: You have 14 turns left to complete the task. When finished reply with <finish></finish>.
+
+ENVIRONMENT REMINDER: You have 19 turns left to complete the task. When finished reply with <finish></finish>.
--- a/tests/integration/mock/eventstream_runtime/CodeActAgent/test_browse_internet/prompt_002.log
+++ b/tests/integration/mock/eventstream_runtime/CodeActAgent/test_browse_internet/prompt_002.log
@ -1,14 +1,10 @@
-
-
----------
-
 # Instructions
 Review the current state of the page and all other information to find the best
 possible next action to accomplish your goal. Your answer will be interpreted
 and executed by a program, make sure to follow the formatting instructions.

 # Goal:
-Certainly! I'll browse localhost:8000 and retrieve the ultimate answer to life for you.. I should start with: Get the content on "http://localhost:8000"
+. I should start with: Get the content on "http://localhost:8000"

 # Action Space

@ -40,13 +36,13 @@ fill(bid: str, value: str)

 select_option(bid: str, options: str | list[str])
    Examples:
-        select_option('48', 'blue')
+        select_option('a48', 'blue')

-        select_option('48', ['red', 'green', 'blue'])
+        select_option('c48', ['red', 'green', 'blue'])

 click(bid: str, button: Literal['left', 'middle', 'right'] = 'left', modifiers: list[typing.Literal['Alt', 'Control', 'Meta', 'Shift']] = [])
    Examples:
-        click('51')
+        click('a51')

        click('b22', button='right')

@ -102,13 +98,11 @@ goto(url: str)
    Examples:
        goto('http://www.example.com')

-Multiple actions can be provided at once. Example:
+Multiple actions can be provided at once, but will be executed sequentially without any feedback from the page.
+More than 2-3 actions usually leads to failure or unexpected behavior. Example:
 fill('a12', 'example with "quotes"')
-click('51')
+click('a51')
 click('48', button='middle', modifiers=['Shift'])
-Multiple actions are meant to be executed sequentially without any feedback from the page.
-Don't execute multiple actions at once if you need feedback from the page.
-


 ----------
--- a/tests/integration/mock/eventstream_runtime/CodeActAgent/test_browse_internet/prompt_003.log
+++ b/tests/integration/mock/eventstream_runtime/CodeActAgent/test_browse_internet/prompt_003.log
@ -1,14 +1,10 @@
-
-
----------
-
 # Instructions
 Review the current state of the page and all other information to find the best
 possible next action to accomplish your goal. Your answer will be interpreted
 and executed by a program, make sure to follow the formatting instructions.

 # Goal:
-Certainly! I'll browse localhost:8000 and retrieve the ultimate answer to life for you.. I should start with: Get the content on "http://localhost:8000"
+. I should start with: Get the content on "http://localhost:8000"

 # Action Space

@ -40,13 +36,13 @@ fill(bid: str, value: str)

 select_option(bid: str, options: str | list[str])
    Examples:
-        select_option('48', 'blue')
+        select_option('a48', 'blue')

-        select_option('48', ['red', 'green', 'blue'])
+        select_option('c48', ['red', 'green', 'blue'])

 click(bid: str, button: Literal['left', 'middle', 'right'] = 'left', modifiers: list[typing.Literal['Alt', 'Control', 'Meta', 'Shift']] = [])
    Examples:
-        click('51')
+        click('a51')

        click('b22', button='right')

@ -102,13 +98,11 @@ goto(url: str)
    Examples:
        goto('http://www.example.com')

-Multiple actions can be provided at once. Example:
+Multiple actions can be provided at once, but will be executed sequentially without any feedback from the page.
+More than 2-3 actions usually leads to failure or unexpected behavior. Example:
 fill('a12', 'example with "quotes"')
-click('51')
+click('a51')
 click('48', button='middle', modifiers=['Shift'])
-Multiple actions are meant to be executed sequentially without any feedback from the page.
-Don't execute multiple actions at once if you need feedback from the page.
-


 ----------
--- a/tests/integration/mock/eventstream_runtime/CodeActAgent/test_browse_internet/prompt_004.log
+++ b/tests/integration/mock/eventstream_runtime/CodeActAgent/test_browse_internet/prompt_004.log
@ -1,14 +1,10 @@
-
-
----------
-
 # Instructions
 Review the current state of the page and all other information to find the best
 possible next action to accomplish your goal. Your answer will be interpreted
 and executed by a program, make sure to follow the formatting instructions.

 # Goal:
-Certainly! I'll browse localhost:8000 and retrieve the ultimate answer to life for you.. I should start with: Get the content on "http://localhost:8000"
+. I should start with: Get the content on "http://localhost:8000"

 # Action Space

@ -40,13 +36,13 @@ fill(bid: str, value: str)

 select_option(bid: str, options: str | list[str])
    Examples:
-        select_option('48', 'blue')
+        select_option('a48', 'blue')

-        select_option('48', ['red', 'green', 'blue'])
+        select_option('c48', ['red', 'green', 'blue'])

 click(bid: str, button: Literal['left', 'middle', 'right'] = 'left', modifiers: list[typing.Literal['Alt', 'Control', 'Meta', 'Shift']] = [])
    Examples:
-        click('51')
+        click('a51')

        click('b22', button='right')

@ -102,13 +98,11 @@ goto(url: str)
    Examples:
        goto('http://www.example.com')

-Multiple actions can be provided at once. Example:
+Multiple actions can be provided at once, but will be executed sequentially without any feedback from the page.
+More than 2-3 actions usually leads to failure or unexpected behavior. Example:
 fill('a12', 'example with "quotes"')
-click('51')
+click('a51')
 click('48', button='middle', modifiers=['Shift'])
-Multiple actions are meant to be executed sequentially without any feedback from the page.
-Don't execute multiple actions at once if you need feedback from the page.
-


 ----------
--- a/tests/integration/mock/eventstream_runtime/CodeActAgent/test_browse_internet/prompt_005.log
+++ b/tests/integration/mock/eventstream_runtime/CodeActAgent/test_browse_internet/prompt_005.log
@ -1,33 +1,30 @@
-
-
----------
-
-A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.
-The assistant can use an interactive Python (Jupyter Notebook) environment, executing code with <execute_ipython>.
+A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed answers to the user's questions.
+The assistant can use a Python environment with <execute_ipython>, e.g.:
 <execute_ipython>
 print("Hello World!")
 </execute_ipython>
-The assistant can execute bash commands on behalf of the user by wrapping them with <execute_bash> and </execute_bash>.
+The assistant can execute bash commands wrapped with <execute_bash>, e.g. <execute_bash> ls </execute_bash>.
+If a bash command returns exit code `-1`, this means the process is not yet finished.
+The assistant must then send a second <execute_bash>. The second <execute_bash> can be empty
+(which will retrieve any additional logs), or it can contain text to be sent to STDIN of the running process,
+or it can contain the text `ctrl+c` to interrupt the process.
+
+For commands that may run indefinitely, the output should be redirected to a file and the command run
+in the background, e.g. <execute_bash> python3 app.py > server.log 2>&1 & </execute_bash>
+If a command execution result says "Command timed out. Sending SIGINT to the process",
+the assistant should retry running the command in the background.

-For example, you can list the files in the current directory by <execute_bash> ls </execute_bash>.
-Important, however: do not run interactive commands. You do not have access to stdin.
-Also, you need to handle commands that may run indefinitely and not return a result. For such cases, you should redirect the output to a file and run the command in the background to avoid blocking the execution.
-For example, to run a Python script that might run indefinitely without returning immediately, you can use the following format: <execute_bash> python3 app.py > server.log 2>&1 & </execute_bash>
-Also, if a command execution result saying like: Command: "npm start" timed out. Sending SIGINT to the process, you should also retry with running the command in the background.
 The assistant can browse the Internet with <execute_browse> and </execute_browse>.
 For example, <execute_browse> Tell me the usa's president using google search </execute_browse>.
 Or <execute_browse> Tell me what is in http://example.com </execute_browse>.
-The assistant can install Python packages using the %pip magic command in an IPython environment by using the following syntax: <execute_ipython> %pip install [package needed] </execute_ipython> and should always import packages and define variables before starting to use them.
-To interact with GitHub, use the $GITHUB_TOKEN environment variable.
-For example, to push a branch `my_branch` to the GitHub repo `owner/repo`:
-<execute_bash> git push https://$GITHUB_TOKEN@github.com/owner/repo.git my_branch </execute_bash>
-If $GITHUB_TOKEN is not set, ask the user to set it.

+The assistant can install Python packages using the %pip magic command in an IPython environment by using the following syntax: <execute_ipython> %pip install [package needed] </execute_ipython> and should always import packages and define variables before starting to use them.

 Apart from the standard Python library, the assistant can also use the following functions (already imported) in <execute_ipython> environment:
 open_file(path: str, line_number: int | None = 1, context_lines: int | None = 100) -> None:
-    Opens the file at the given path in the editor. If line_number is provided, the window will be moved to include that line.
-    It only shows the first 100 lines by default! `context_lines` is the max number of lines to be displayed, up to 2000. Use `scroll_up` and `scroll_down` to view more content up or down.
+    Opens the file at the given path in the editor. IF the file is to be edited, first use `scroll_down` repeatedly to read the full file!
+    If line_number is provided, the window will be moved to include that line.
+    It only shows the first 100 lines by default! `context_lines` is the max number of lines to be displayed, up to 100. Use `scroll_up` and `scroll_down` to view more content up or down.
    Args:
    path: str: The path to the file to open, preferred absolute path.
    line_number: int | None = 1: The line number to move to. Defaults to 1.
@ -54,7 +51,9 @@ create_file(filename: str) -> None:
    filename: str: The name of the file to create.

 edit_file_by_replace(file_name: str, to_replace: str, new_content: str) -> None:
-    Edit an existing file. This will search for non-empty `to_replace` in the given file and replace it with `new_content`. Use `append_file` method for writing after `create_file`!
+    Edit an existing file. This will search for non-empty `to_replace` in the given file and replace it with non-empty `new_content`.
+    `to_replace` and `new_content` must be different! Split large edits into multiple smaller edits if necessary!
+    Use `append_file` method for writing after `create_file`!
    Every *to_replace* must *EXACTLY MATCH* the existing source code, character for character, including all comments, docstrings, etc.
    Include enough lines to make code in `to_replace` unique. `to_replace` should NOT be empty.
    For example, given a file "/workspace/example.txt" with the following content:
@ -115,7 +114,7 @@ insert_content_at_line(file_name: str, line_number: int, content: str) -> None:

 append_file(file_name: str, content: str) -> None:
    Append content to the given file.
-    It appends text `content` to the end of the specified file.
+    It appends text `content` to the end of the specified file, ideal after a `create_file`!
    Args:
    file_name: str: The name of the file to edit.
    line_number: int: The line number (starting from 1) to insert the content after.
@ -125,19 +124,19 @@ search_dir(search_term: str, dir_path: str = './') -> None:
    Searches for search_term in all files in dir. If dir is not provided, searches in the current directory.
    Args:
    search_term: str: The term to search for.
-    dir_path: Optional[str]: The path to the directory to search.
+    dir_path: str: The path to the directory to search.

-search_file(search_term: str, file_path: Optional[str] = None) -> None:
+search_file(search_term: str, file_path: str | None = None) -> None:
    Searches for search_term in file. If file is not provided, searches in the current open file.
    Args:
    search_term: str: The term to search for.
-    file_path: Optional[str]: The path to the file to search.
+    file_path: str | None: The path to the file to search.

 find_file(file_name: str, dir_path: str = './') -> None:
    Finds all files with the given name in the specified directory.
    Args:
    file_name: str: The name of the file to find.
-    dir_path: Optional[str]: The path to the directory to search.
+    dir_path: str: The path to the directory to search.

 parse_pdf(file_path: str) -> None:
    Parses the content of a PDF file and prints it.
@ -159,15 +158,23 @@ parse_pptx(file_path: str) -> None:
    Args:
    file_path: str: The path to the file to open.

-Please note that THE `edit_file_by_replace`, `append_file` and `insert_content_at_line` FUNCTIONS REQUIRE PROPER INDENTATION. If the assistant would like to add the line '        print(x)', it must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. Use `append_file` after `create_file` to write content!
+
+IMPORTANT:
+- `open_file` only returns the first 100 lines of the file by default! The assistant MUST use `scroll_down` repeatedly to read the full file BEFORE making edits!
+- The assistant shall adhere to THE `edit_file_by_replace`, `append_file` and `insert_content_at_line` FUNCTIONS REQUIRING PROPER INDENTATION. If the assistant would like to add the line '        print(x)', it must fully write the line out, with all leading spaces before the code!
+- Indentation is important and code that is not indented correctly will fail and require fixing before it can be run.
+- Any code issued should be less than 50 lines to avoid context being cut off!
+- After EVERY `create_file` the method `append_file` shall be used to write the FIRST content!
+- For `edit_file_by_replace` NEVER provide empty parameters!
+- For `edit_file_by_replace` the file must be read fully before any replacements!

 Responses should be concise.
 The assistant should attempt fewer things at a time instead of putting too many commands OR too much code in one "execute" block.
 Include ONLY ONE <execute_ipython>, <execute_bash>, or <execute_browse> per response, unless the assistant is finished with the task or needs more input or action from the user in order to proceed.
 If the assistant is finished with the task you MUST include <finish></finish> in your response.
 IMPORTANT: Execute code using <execute_ipython>, <execute_bash>, or <execute_browse> whenever possible.
-When handling files, try to use full paths and pwd to avoid errors.
-
+The assistant should utilize full file paths and the `pwd` command to prevent path-related errors.
+The assistant must avoid apologies and thanks in its responses.

 ----------

@ -393,22 +400,21 @@ The server is running on port 5000 with PID 126. You can access the list of numb
 --- END OF EXAMPLE ---


+
 NOW, LET'S START!
-
----------
-
 Browse localhost:8000, and tell me the ultimate answer to life. Do not ask me for confirmation at any point.

 ----------


 <execute_browse>
-Certainly! I'll browse localhost:8000 and retrieve the ultimate answer to life for you.. I should start with: Get the content on "http://localhost:8000"
+. I should start with: Get the content on "http://localhost:8000"
 </execute_browse>

 ----------

 OBSERVATION:
-{'content': 'The ultimate answer to life, the universe, and everything is: OpenHands is all you need!'}
+{'content': 'The answer to life, the universe, and everything has been revealed: OpenHands is all you need!'}

-ENVIRONMENT REMINDER: You have 8 turns left to complete the task. When finished reply with <finish></finish>
+
+ENVIRONMENT REMINDER: You have 13 turns left to complete the task. When finished reply with <finish></finish>.
--- a/tests/integration/mock/eventstream_runtime/CodeActAgent/test_browse_internet/response_001.log
+++ b/tests/integration/mock/eventstream_runtime/CodeActAgent/test_browse_internet/response_001.log
@ -1,4 +1,2 @@
-Certainly! I'll browse localhost:8000 and retrieve the ultimate answer to life for you.
-
 <execute_browse>
 Get the content on "http://localhost:8000"
--- a/tests/integration/mock/eventstream_runtime/CodeActAgent/test_browse_internet/response_002.log
+++ b/tests/integration/mock/eventstream_runtime/CodeActAgent/test_browse_internet/response_002.log
@ -1,5 +1,5 @@
-To accomplish our goal of retrieving the ultimate answer to life from localhost:8000, I need to start by accessing the content on that page. Here's my thought process and action:
+To accomplish my goal of getting the content on "http://localhost:8000", I need to use the goto action to navigate to that URL. Here's the action with my chain of thought:

-In order to accomplish my goal, I need to navigate to the specified URL using the goto action.
+In order to accomplish my goal of accessing the content on "http://localhost:8000", I need to navigate to that URL using the goto action.

 ```goto("http://localhost:8000"
--- a/tests/integration/mock/eventstream_runtime/CodeActAgent/test_browse_internet/response_003.log
+++ b/tests/integration/mock/eventstream_runtime/CodeActAgent/test_browse_internet/response_003.log
@ -1,6 +1,6 @@
-To accomplish our goal of retrieving the ultimate answer to life, we need to click on the "Click me" button. The button has a bid of 10, so we should click on it.
+To accomplish the goal of revealing the answer, I need to click on the "Click me" button. The button has a bid of 10.

 Here's the action I'll take:

-In order to accomplish my goal, I need to click on the button with bid 10
+In order to accomplish our goal, I need to click on the button with bid 10
 ```click("10"
--- a/tests/integration/mock/eventstream_runtime/CodeActAgent/test_browse_internet/response_004.log
+++ b/tests/integration/mock/eventstream_runtime/CodeActAgent/test_browse_internet/response_004.log
@ -1,6 +1,10 @@
-Based on the current Accessibility Tree and the previous actions taken, it appears that we have already successfully navigated to the webpage and clicked the button to reveal the answer. The answer is now visible on the page.
+Based on the current Accessibility Tree and the previous actions taken, it appears that we have already navigated to the correct page and clicked the button to reveal the answer. The answer is now visible on the page.

-To accomplish our goal of retrieving the ultimate answer to life, I will now send this information to the user:
+To accomplish our goal of retrieving the ultimate answer to life, I will now send a message to the user with the revealed answer:

-```
-send_msg_to_user("The ultimate answer to life, the universe, and everything is: OpenHands is all you need!"
+Since the goal was to get the content on the page and reveal the answer, we have successfully completed the task. There's no need for any further actions.
+
+However, to formally conclude and report our findings to the user, we should send a message summarizing what we've discovered. Here's the action I propose:
+
+In order to report our findings to the user, I will send a message with the revealed answer.
+```send_msg_to_user("The answer to life, the universe, and everything has been revealed: OpenHands is all you need!"
--- a/tests/integration/mock/eventstream_runtime/CodeActAgent/test_browse_internet/response_005.log
+++ b/tests/integration/mock/eventstream_runtime/CodeActAgent/test_browse_internet/response_005.log
@ -1,11 +1,7 @@
-Thank you for providing the content from localhost:8000. Based on the information retrieved, I can now answer your question about the ultimate answer to life.
+Based on the content retrieved from localhost:8000, I can provide you with the ultimate answer to life:

-The ultimate answer to life, according to the content on localhost:8000, is:
+The ultimate answer to life, the universe, and everything is: OpenHands is all you need!

-"OpenHands is all you need!"
-
-This appears to be a playful reference to the popular science fiction series "The Hitchhiker's Guide to the Galaxy" by Douglas Adams, where the answer to the ultimate question of life, the universe, and everything is famously "42". In this case, it's been adapted to promote OpenHands as the ultimate solution.
-
-Since I've successfully retrieved and reported the answer from the specified localhost, I believe I've completed the task you've given me.
+This appears to be the answer provided by the server at localhost:8000. It's a playful reference to Douglas Adams' "The Hitchhiker's Guide to the Galaxy," where the answer to life, the universe, and everything is famously 42. In this case, it's been customized to highlight OpenHands.

 <finish></finish>
--- a/tests/integration/mock/eventstream_runtime/CodeActAgent/test_edits/prompt_001.log
+++ b/tests/integration/mock/eventstream_runtime/CodeActAgent/test_edits/prompt_001.log
@ -2,32 +2,33 @@

 ----------

-A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.
-The assistant can use an interactive Python (Jupyter Notebook) environment, executing code with <execute_ipython>.
+A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed answers to the user's questions.
+The assistant can use a Python environment with <execute_ipython>, e.g.:
 <execute_ipython>
 print("Hello World!")
 </execute_ipython>
-The assistant can execute bash commands on behalf of the user by wrapping them with <execute_bash> and </execute_bash>.
+The assistant can execute bash commands wrapped with <execute_bash>, e.g. <execute_bash> ls </execute_bash>.
+If a bash command returns exit code `-1`, this means the process is not yet finished.
+The assistant must then send a second <execute_bash>. The second <execute_bash> can be empty
+(which will retrieve any additional logs), or it can contain text to be sent to STDIN of the running process,
+or it can contain the text `ctrl+c` to interrupt the process.
+
+For commands that may run indefinitely, the output should be redirected to a file and the command run
+in the background, e.g. <execute_bash> python3 app.py > server.log 2>&1 & </execute_bash>
+If a command execution result says "Command timed out. Sending SIGINT to the process",
+the assistant should retry running the command in the background.

-For example, you can list the files in the current directory by <execute_bash> ls </execute_bash>.
-Important, however: do not run interactive commands. You do not have access to stdin.
-Also, you need to handle commands that may run indefinitely and not return a result. For such cases, you should redirect the output to a file and run the command in the background to avoid blocking the execution.
-For example, to run a Python script that might run indefinitely without returning immediately, you can use the following format: <execute_bash> python3 app.py > server.log 2>&1 & </execute_bash>
-Also, if a command execution result saying like: Command: "npm start" timed out. Sending SIGINT to the process, you should also retry with running the command in the background.
 The assistant can browse the Internet with <execute_browse> and </execute_browse>.
 For example, <execute_browse> Tell me the usa's president using google search </execute_browse>.
 Or <execute_browse> Tell me what is in http://example.com </execute_browse>.
-The assistant can install Python packages using the %pip magic command in an IPython environment by using the following syntax: <execute_ipython> %pip install [package needed] </execute_ipython> and should always import packages and define variables before starting to use them.
-To interact with GitHub, use the $GITHUB_TOKEN environment variable.
-For example, to push a branch `my_branch` to the GitHub repo `owner/repo`:
-<execute_bash> git push https://$GITHUB_TOKEN@github.com/owner/repo.git my_branch </execute_bash>
-If $GITHUB_TOKEN is not set, ask the user to set it.

+The assistant can install Python packages using the %pip magic command in an IPython environment by using the following syntax: <execute_ipython> %pip install [package needed] </execute_ipython> and should always import packages and define variables before starting to use them.

 Apart from the standard Python library, the assistant can also use the following functions (already imported) in <execute_ipython> environment:
 open_file(path: str, line_number: int | None = 1, context_lines: int | None = 100) -> None:
-    Opens the file at the given path in the editor. If line_number is provided, the window will be moved to include that line.
-    It only shows the first 100 lines by default! `context_lines` is the max number of lines to be displayed, up to 2000. Use `scroll_up` and `scroll_down` to view more content up or down.
+    Opens the file at the given path in the editor. IF the file is to be edited, first use `scroll_down` repeatedly to read the full file!
+    If line_number is provided, the window will be moved to include that line.
+    It only shows the first 100 lines by default! `context_lines` is the max number of lines to be displayed, up to 100. Use `scroll_up` and `scroll_down` to view more content up or down.
    Args:
    path: str: The path to the file to open, preferred absolute path.
    line_number: int | None = 1: The line number to move to. Defaults to 1.
@ -54,7 +55,9 @@ create_file(filename: str) -> None:
    filename: str: The name of the file to create.

 edit_file_by_replace(file_name: str, to_replace: str, new_content: str) -> None:
-    Edit an existing file. This will search for non-empty `to_replace` in the given file and replace it with `new_content`. Use `append_file` method for writing after `create_file`!
+    Edit an existing file. This will search for non-empty `to_replace` in the given file and replace it with non-empty `new_content`.
+    `to_replace` and `new_content` must be different! Split large edits into multiple smaller edits if necessary!
+    Use `append_file` method for writing after `create_file`!
    Every *to_replace* must *EXACTLY MATCH* the existing source code, character for character, including all comments, docstrings, etc.
    Include enough lines to make code in `to_replace` unique. `to_replace` should NOT be empty.
    For example, given a file "/workspace/example.txt" with the following content:
@ -115,7 +118,7 @@ insert_content_at_line(file_name: str, line_number: int, content: str) -> None:

 append_file(file_name: str, content: str) -> None:
    Append content to the given file.
-    It appends text `content` to the end of the specified file.
+    It appends text `content` to the end of the specified file, ideal after a `create_file`!
    Args:
    file_name: str: The name of the file to edit.
    line_number: int: The line number (starting from 1) to insert the content after.
@ -125,19 +128,19 @@ search_dir(search_term: str, dir_path: str = './') -> None:
    Searches for search_term in all files in dir. If dir is not provided, searches in the current directory.
    Args:
    search_term: str: The term to search for.
-    dir_path: Optional[str]: The path to the directory to search.
+    dir_path: str: The path to the directory to search.

-search_file(search_term: str, file_path: Optional[str] = None) -> None:
+search_file(search_term: str, file_path: str | None = None) -> None:
    Searches for search_term in file. If file is not provided, searches in the current open file.
    Args:
    search_term: str: The term to search for.
-    file_path: Optional[str]: The path to the file to search.
+    file_path: str | None: The path to the file to search.

 find_file(file_name: str, dir_path: str = './') -> None:
    Finds all files with the given name in the specified directory.
    Args:
    file_name: str: The name of the file to find.
-    dir_path: Optional[str]: The path to the directory to search.
+    dir_path: str: The path to the directory to search.

 parse_pdf(file_path: str) -> None:
    Parses the content of a PDF file and prints it.
@ -159,15 +162,23 @@ parse_pptx(file_path: str) -> None:
    Args:
    file_path: str: The path to the file to open.

-Please note that THE `edit_file_by_replace`, `append_file` and `insert_content_at_line` FUNCTIONS REQUIRE PROPER INDENTATION. If the assistant would like to add the line '        print(x)', it must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. Use `append_file` after `create_file` to write content!
+
+IMPORTANT:
+- `open_file` only returns the first 100 lines of the file by default! The assistant MUST use `scroll_down` repeatedly to read the full file BEFORE making edits!
+- The assistant shall adhere to THE `edit_file_by_replace`, `append_file` and `insert_content_at_line` FUNCTIONS REQUIRING PROPER INDENTATION. If the assistant would like to add the line '        print(x)', it must fully write the line out, with all leading spaces before the code!
+- Indentation is important and code that is not indented correctly will fail and require fixing before it can be run.
+- Any code issued should be less than 50 lines to avoid context being cut off!
+- After EVERY `create_file` the method `append_file` shall be used to write the FIRST content!
+- For `edit_file_by_replace` NEVER provide empty parameters!
+- For `edit_file_by_replace` the file must be read fully before any replacements!

 Responses should be concise.
 The assistant should attempt fewer things at a time instead of putting too many commands OR too much code in one "execute" block.
 Include ONLY ONE <execute_ipython>, <execute_bash>, or <execute_browse> per response, unless the assistant is finished with the task or needs more input or action from the user in order to proceed.
 If the assistant is finished with the task you MUST include <finish></finish> in your response.
 IMPORTANT: Execute code using <execute_ipython>, <execute_bash>, or <execute_browse> whenever possible.
-When handling files, try to use full paths and pwd to avoid errors.
-
+The assistant should utilize full file paths and the `pwd` command to prevent path-related errors.
+The assistant must avoid apologies and thanks in its responses.

 ----------

@ -393,8 +404,9 @@ The server is running on port 5000 with PID 126. You can access the list of numb
 --- END OF EXAMPLE ---


-NOW, LET'S START!

+NOW, LET'S START!
 Fix typos in bad.txt. Do not ask me for confirmation at any point.

-ENVIRONMENT REMINDER: You have 14 turns left to complete the task. When finished reply with <finish></finish>.
+
+ENVIRONMENT REMINDER: You have 19 turns left to complete the task. When finished reply with <finish></finish>.
--- a/tests/integration/mock/eventstream_runtime/CodeActAgent/test_edits/prompt_002.log
+++ b/tests/integration/mock/eventstream_runtime/CodeActAgent/test_edits/prompt_002.log
@ -2,32 +2,33 @@

 ----------

-A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.
-The assistant can use an interactive Python (Jupyter Notebook) environment, executing code with <execute_ipython>.
+A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed answers to the user's questions.
+The assistant can use a Python environment with <execute_ipython>, e.g.:
 <execute_ipython>
 print("Hello World!")
 </execute_ipython>
-The assistant can execute bash commands on behalf of the user by wrapping them with <execute_bash> and </execute_bash>.
+The assistant can execute bash commands wrapped with <execute_bash>, e.g. <execute_bash> ls </execute_bash>.
+If a bash command returns exit code `-1`, this means the process is not yet finished.
+The assistant must then send a second <execute_bash>. The second <execute_bash> can be empty
+(which will retrieve any additional logs), or it can contain text to be sent to STDIN of the running process,
+or it can contain the text `ctrl+c` to interrupt the process.
+
+For commands that may run indefinitely, the output should be redirected to a file and the command run
+in the background, e.g. <execute_bash> python3 app.py > server.log 2>&1 & </execute_bash>
+If a command execution result says "Command timed out. Sending SIGINT to the process",
+the assistant should retry running the command in the background.

-For example, you can list the files in the current directory by <execute_bash> ls </execute_bash>.
-Important, however: do not run interactive commands. You do not have access to stdin.
-Also, you need to handle commands that may run indefinitely and not return a result. For such cases, you should redirect the output to a file and run the command in the background to avoid blocking the execution.
-For example, to run a Python script that might run indefinitely without returning immediately, you can use the following format: <execute_bash> python3 app.py > server.log 2>&1 & </execute_bash>
-Also, if a command execution result saying like: Command: "npm start" timed out. Sending SIGINT to the process, you should also retry with running the command in the background.
 The assistant can browse the Internet with <execute_browse> and </execute_browse>.
 For example, <execute_browse> Tell me the usa's president using google search </execute_browse>.
 Or <execute_browse> Tell me what is in http://example.com </execute_browse>.
-The assistant can install Python packages using the %pip magic command in an IPython environment by using the following syntax: <execute_ipython> %pip install [package needed] </execute_ipython> and should always import packages and define variables before starting to use them.
-To interact with GitHub, use the $GITHUB_TOKEN environment variable.
-For example, to push a branch `my_branch` to the GitHub repo `owner/repo`:
-<execute_bash> git push https://$GITHUB_TOKEN@github.com/owner/repo.git my_branch </execute_bash>
-If $GITHUB_TOKEN is not set, ask the user to set it.

+The assistant can install Python packages using the %pip magic command in an IPython environment by using the following syntax: <execute_ipython> %pip install [package needed] </execute_ipython> and should always import packages and define variables before starting to use them.

 Apart from the standard Python library, the assistant can also use the following functions (already imported) in <execute_ipython> environment:
 open_file(path: str, line_number: int | None = 1, context_lines: int | None = 100) -> None:
-    Opens the file at the given path in the editor. If line_number is provided, the window will be moved to include that line.
-    It only shows the first 100 lines by default! `context_lines` is the max number of lines to be displayed, up to 2000. Use `scroll_up` and `scroll_down` to view more content up or down.
+    Opens the file at the given path in the editor. IF the file is to be edited, first use `scroll_down` repeatedly to read the full file!
+    If line_number is provided, the window will be moved to include that line.
+    It only shows the first 100 lines by default! `context_lines` is the max number of lines to be displayed, up to 100. Use `scroll_up` and `scroll_down` to view more content up or down.
    Args:
    path: str: The path to the file to open, preferred absolute path.
    line_number: int | None = 1: The line number to move to. Defaults to 1.
@ -54,7 +55,9 @@ create_file(filename: str) -> None:
    filename: str: The name of the file to create.

 edit_file_by_replace(file_name: str, to_replace: str, new_content: str) -> None:
-    Edit an existing file. This will search for non-empty `to_replace` in the given file and replace it with `new_content`. Use `append_file` method for writing after `create_file`!
+    Edit an existing file. This will search for non-empty `to_replace` in the given file and replace it with non-empty `new_content`.
+    `to_replace` and `new_content` must be different! Split large edits into multiple smaller edits if necessary!
+    Use `append_file` method for writing after `create_file`!
    Every *to_replace* must *EXACTLY MATCH* the existing source code, character for character, including all comments, docstrings, etc.
    Include enough lines to make code in `to_replace` unique. `to_replace` should NOT be empty.
    For example, given a file "/workspace/example.txt" with the following content:
@ -115,7 +118,7 @@ insert_content_at_line(file_name: str, line_number: int, content: str) -> None:

 append_file(file_name: str, content: str) -> None:
    Append content to the given file.
-    It appends text `content` to the end of the specified file.
+    It appends text `content` to the end of the specified file, ideal after a `create_file`!
    Args:
    file_name: str: The name of the file to edit.
    line_number: int: The line number (starting from 1) to insert the content after.
@ -125,19 +128,19 @@ search_dir(search_term: str, dir_path: str = './') -> None:
    Searches for search_term in all files in dir. If dir is not provided, searches in the current directory.
    Args:
    search_term: str: The term to search for.
-    dir_path: Optional[str]: The path to the directory to search.
+    dir_path: str: The path to the directory to search.

-search_file(search_term: str, file_path: Optional[str] = None) -> None:
+search_file(search_term: str, file_path: str | None = None) -> None:
    Searches for search_term in file. If file is not provided, searches in the current open file.
    Args:
    search_term: str: The term to search for.
-    file_path: Optional[str]: The path to the file to search.
+    file_path: str | None: The path to the file to search.

 find_file(file_name: str, dir_path: str = './') -> None:
    Finds all files with the given name in the specified directory.
    Args:
    file_name: str: The name of the file to find.
-    dir_path: Optional[str]: The path to the directory to search.
+    dir_path: str: The path to the directory to search.

 parse_pdf(file_path: str) -> None:
    Parses the content of a PDF file and prints it.
@ -159,15 +162,23 @@ parse_pptx(file_path: str) -> None:
    Args:
    file_path: str: The path to the file to open.

-Please note that THE `edit_file_by_replace`, `append_file` and `insert_content_at_line` FUNCTIONS REQUIRE PROPER INDENTATION. If the assistant would like to add the line '        print(x)', it must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. Use `append_file` after `create_file` to write content!
+
+IMPORTANT:
+- `open_file` only returns the first 100 lines of the file by default! The assistant MUST use `scroll_down` repeatedly to read the full file BEFORE making edits!
+- The assistant shall adhere to THE `edit_file_by_replace`, `append_file` and `insert_content_at_line` FUNCTIONS REQUIRING PROPER INDENTATION. If the assistant would like to add the line '        print(x)', it must fully write the line out, with all leading spaces before the code!
+- Indentation is important and code that is not indented correctly will fail and require fixing before it can be run.
+- Any code issued should be less than 50 lines to avoid context being cut off!
+- After EVERY `create_file` the method `append_file` shall be used to write the FIRST content!
+- For `edit_file_by_replace` NEVER provide empty parameters!
+- For `edit_file_by_replace` the file must be read fully before any replacements!

 Responses should be concise.
 The assistant should attempt fewer things at a time instead of putting too many commands OR too much code in one "execute" block.
 Include ONLY ONE <execute_ipython>, <execute_bash>, or <execute_browse> per response, unless the assistant is finished with the task or needs more input or action from the user in order to proceed.
 If the assistant is finished with the task you MUST include <finish></finish> in your response.
 IMPORTANT: Execute code using <execute_ipython>, <execute_bash>, or <execute_browse> whenever possible.
-When handling files, try to use full paths and pwd to avoid errors.
-
+The assistant should utilize full file paths and the `pwd` command to prevent path-related errors.
+The assistant must avoid apologies and thanks in its responses.

 ----------

@ -393,15 +404,13 @@ The server is running on port 5000 with PID 126. You can access the list of numb
 --- END OF EXAMPLE ---


+
 NOW, LET'S START!
-
----------
-
 Fix typos in bad.txt. Do not ask me for confirmation at any point.

 ----------

-Sure! Let's start by opening the `bad.txt` file to identify the typos.
+Certainly! I'll fix the typos in the bad.txt file. Let's start by opening the file and examining its contents.
 <execute_ipython>
 open_file('bad.txt')
 </execute_ipython>
@ -416,6 +425,8 @@ OBSERVATION:
 3|No mor typos!
 4|Enjoy!
 (this is the end of the file)
+[Jupyter current working directory: /workspace]
+[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]


-ENVIRONMENT REMINDER: You have 13 turns left to complete the task. When finished reply with <finish></finish>
+ENVIRONMENT REMINDER: You have 18 turns left to complete the task. When finished reply with <finish></finish>.
--- a/tests/integration/mock/eventstream_runtime/CodeActAgent/test_edits/prompt_003.log
+++ b/tests/integration/mock/eventstream_runtime/CodeActAgent/test_edits/prompt_003.log
@ -2,32 +2,33 @@

 ----------

-A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.
-The assistant can use an interactive Python (Jupyter Notebook) environment, executing code with <execute_ipython>.
+A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed answers to the user's questions.
+The assistant can use a Python environment with <execute_ipython>, e.g.:
 <execute_ipython>
 print("Hello World!")
 </execute_ipython>
-The assistant can execute bash commands on behalf of the user by wrapping them with <execute_bash> and </execute_bash>.
+The assistant can execute bash commands wrapped with <execute_bash>, e.g. <execute_bash> ls </execute_bash>.
+If a bash command returns exit code `-1`, this means the process is not yet finished.
+The assistant must then send a second <execute_bash>. The second <execute_bash> can be empty
+(which will retrieve any additional logs), or it can contain text to be sent to STDIN of the running process,
+or it can contain the text `ctrl+c` to interrupt the process.
+
+For commands that may run indefinitely, the output should be redirected to a file and the command run
+in the background, e.g. <execute_bash> python3 app.py > server.log 2>&1 & </execute_bash>
+If a command execution result says "Command timed out. Sending SIGINT to the process",
+the assistant should retry running the command in the background.

-For example, you can list the files in the current directory by <execute_bash> ls </execute_bash>.
-Important, however: do not run interactive commands. You do not have access to stdin.
-Also, you need to handle commands that may run indefinitely and not return a result. For such cases, you should redirect the output to a file and run the command in the background to avoid blocking the execution.
-For example, to run a Python script that might run indefinitely without returning immediately, you can use the following format: <execute_bash> python3 app.py > server.log 2>&1 & </execute_bash>
-Also, if a command execution result saying like: Command: "npm start" timed out. Sending SIGINT to the process, you should also retry with running the command in the background.
 The assistant can browse the Internet with <execute_browse> and </execute_browse>.
 For example, <execute_browse> Tell me the usa's president using google search </execute_browse>.
 Or <execute_browse> Tell me what is in http://example.com </execute_browse>.
-The assistant can install Python packages using the %pip magic command in an IPython environment by using the following syntax: <execute_ipython> %pip install [package needed] </execute_ipython> and should always import packages and define variables before starting to use them.
-To interact with GitHub, use the $GITHUB_TOKEN environment variable.
-For example, to push a branch `my_branch` to the GitHub repo `owner/repo`:
-<execute_bash> git push https://$GITHUB_TOKEN@github.com/owner/repo.git my_branch </execute_bash>
-If $GITHUB_TOKEN is not set, ask the user to set it.

+The assistant can install Python packages using the %pip magic command in an IPython environment by using the following syntax: <execute_ipython> %pip install [package needed] </execute_ipython> and should always import packages and define variables before starting to use them.

 Apart from the standard Python library, the assistant can also use the following functions (already imported) in <execute_ipython> environment:
 open_file(path: str, line_number: int | None = 1, context_lines: int | None = 100) -> None:
-    Opens the file at the given path in the editor. If line_number is provided, the window will be moved to include that line.
-    It only shows the first 100 lines by default! `context_lines` is the max number of lines to be displayed, up to 2000. Use `scroll_up` and `scroll_down` to view more content up or down.
+    Opens the file at the given path in the editor. IF the file is to be edited, first use `scroll_down` repeatedly to read the full file!
+    If line_number is provided, the window will be moved to include that line.
+    It only shows the first 100 lines by default! `context_lines` is the max number of lines to be displayed, up to 100. Use `scroll_up` and `scroll_down` to view more content up or down.
    Args:
    path: str: The path to the file to open, preferred absolute path.
    line_number: int | None = 1: The line number to move to. Defaults to 1.
@ -54,7 +55,9 @@ create_file(filename: str) -> None:
    filename: str: The name of the file to create.

 edit_file_by_replace(file_name: str, to_replace: str, new_content: str) -> None:
-    Edit an existing file. This will search for non-empty `to_replace` in the given file and replace it with `new_content`. Use `append_file` method for writing after `create_file`!
+    Edit an existing file. This will search for non-empty `to_replace` in the given file and replace it with non-empty `new_content`.
+    `to_replace` and `new_content` must be different! Split large edits into multiple smaller edits if necessary!
+    Use `append_file` method for writing after `create_file`!
    Every *to_replace* must *EXACTLY MATCH* the existing source code, character for character, including all comments, docstrings, etc.
    Include enough lines to make code in `to_replace` unique. `to_replace` should NOT be empty.
    For example, given a file "/workspace/example.txt" with the following content:
@ -115,7 +118,7 @@ insert_content_at_line(file_name: str, line_number: int, content: str) -> None:

 append_file(file_name: str, content: str) -> None:
    Append content to the given file.
-    It appends text `content` to the end of the specified file.
+    It appends text `content` to the end of the specified file, ideal after a `create_file`!
    Args:
    file_name: str: The name of the file to edit.
    line_number: int: The line number (starting from 1) to insert the content after.
@ -125,19 +128,19 @@ search_dir(search_term: str, dir_path: str = './') -> None:
    Searches for search_term in all files in dir. If dir is not provided, searches in the current directory.
    Args:
    search_term: str: The term to search for.
-    dir_path: Optional[str]: The path to the directory to search.
+    dir_path: str: The path to the directory to search.

-search_file(search_term: str, file_path: Optional[str] = None) -> None:
+search_file(search_term: str, file_path: str | None = None) -> None:
    Searches for search_term in file. If file is not provided, searches in the current open file.
    Args:
    search_term: str: The term to search for.
-    file_path: Optional[str]: The path to the file to search.
+    file_path: str | None: The path to the file to search.

 find_file(file_name: str, dir_path: str = './') -> None:
    Finds all files with the given name in the specified directory.
    Args:
    file_name: str: The name of the file to find.
-    dir_path: Optional[str]: The path to the directory to search.
+    dir_path: str: The path to the directory to search.

 parse_pdf(file_path: str) -> None:
    Parses the content of a PDF file and prints it.
@ -159,15 +162,23 @@ parse_pptx(file_path: str) -> None:
    Args:
    file_path: str: The path to the file to open.

-Please note that THE `edit_file_by_replace`, `append_file` and `insert_content_at_line` FUNCTIONS REQUIRE PROPER INDENTATION. If the assistant would like to add the line '        print(x)', it must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. Use `append_file` after `create_file` to write content!
+
+IMPORTANT:
+- `open_file` only returns the first 100 lines of the file by default! The assistant MUST use `scroll_down` repeatedly to read the full file BEFORE making edits!
+- The assistant shall adhere to THE `edit_file_by_replace`, `append_file` and `insert_content_at_line` FUNCTIONS REQUIRING PROPER INDENTATION. If the assistant would like to add the line '        print(x)', it must fully write the line out, with all leading spaces before the code!
+- Indentation is important and code that is not indented correctly will fail and require fixing before it can be run.
+- Any code issued should be less than 50 lines to avoid context being cut off!
+- After EVERY `create_file` the method `append_file` shall be used to write the FIRST content!
+- For `edit_file_by_replace` NEVER provide empty parameters!
+- For `edit_file_by_replace` the file must be read fully before any replacements!

 Responses should be concise.
 The assistant should attempt fewer things at a time instead of putting too many commands OR too much code in one "execute" block.
 Include ONLY ONE <execute_ipython>, <execute_bash>, or <execute_browse> per response, unless the assistant is finished with the task or needs more input or action from the user in order to proceed.
 If the assistant is finished with the task you MUST include <finish></finish> in your response.
 IMPORTANT: Execute code using <execute_ipython>, <execute_bash>, or <execute_browse> whenever possible.
-When handling files, try to use full paths and pwd to avoid errors.
-
+The assistant should utilize full file paths and the `pwd` command to prevent path-related errors.
+The assistant must avoid apologies and thanks in its responses.

 ----------

@ -393,15 +404,13 @@ The server is running on port 5000 with PID 126. You can access the list of numb
 --- END OF EXAMPLE ---


+
 NOW, LET'S START!
-
----------
-
 Fix typos in bad.txt. Do not ask me for confirmation at any point.

 ----------

-Sure! Let's start by opening the `bad.txt` file to identify the typos.
+Certainly! I'll fix the typos in the bad.txt file. Let's start by opening the file and examining its contents.
 <execute_ipython>
 open_file('bad.txt')
 </execute_ipython>
@ -416,21 +425,23 @@ OBSERVATION:
 3|No mor typos!
 4|Enjoy!
 (this is the end of the file)
-
+[Jupyter current working directory: /workspace]
+[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]

 ----------

-I will correct the typos in the `bad.txt` file.
+I'll now fix the typos in the bad.txt file. There are three typos to correct:

-1. "stupid typoo" -> "stupid typo"
-2. "No mor typos!" -> "No more typos!"
+1. "typoo" should be "typo"
+2. "stupid" is not appropriate, we'll change it to "silly"
+3. "mor" should be "more"

-Let's make these changes.
+Let's make these changes:
 <execute_ipython>
 edit_file_by_replace(
    'bad.txt',
    to_replace="This is a stupid typoo.",
-    new_content="This is a stupid typo."
+    new_content="This is a silly typo."
 )
 </execute_ipython>

@ -439,12 +450,14 @@ edit_file_by_replace(
 OBSERVATION:
 [File: /workspace/bad.txt (4 lines total after edit)]
 (this is the beginning of the file)
-1|This is a stupid typo.
+1|This is a silly typo.
 2|Really?
 3|No mor typos!
 4|Enjoy!
 (this is the end of the file)
 [File updated (edited at line 1). Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]
+[Jupyter current working directory: /workspace]
+[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]


-ENVIRONMENT REMINDER: You have 12 turns left to complete the task. When finished reply with <finish></finish>
+ENVIRONMENT REMINDER: You have 17 turns left to complete the task. When finished reply with <finish></finish>.
--- a/tests/integration/mock/eventstream_runtime/CodeActAgent/test_edits/prompt_004.log
+++ b/tests/integration/mock/eventstream_runtime/CodeActAgent/test_edits/prompt_004.log
@ -2,32 +2,33 @@

 ----------

-A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.
-The assistant can use an interactive Python (Jupyter Notebook) environment, executing code with <execute_ipython>.
+A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed answers to the user's questions.
+The assistant can use a Python environment with <execute_ipython>, e.g.:
 <execute_ipython>
 print("Hello World!")
 </execute_ipython>
-The assistant can execute bash commands on behalf of the user by wrapping them with <execute_bash> and </execute_bash>.
+The assistant can execute bash commands wrapped with <execute_bash>, e.g. <execute_bash> ls </execute_bash>.
+If a bash command returns exit code `-1`, this means the process is not yet finished.
+The assistant must then send a second <execute_bash>. The second <execute_bash> can be empty
+(which will retrieve any additional logs), or it can contain text to be sent to STDIN of the running process,
+or it can contain the text `ctrl+c` to interrupt the process.
+
+For commands that may run indefinitely, the output should be redirected to a file and the command run
+in the background, e.g. <execute_bash> python3 app.py > server.log 2>&1 & </execute_bash>
+If a command execution result says "Command timed out. Sending SIGINT to the process",
+the assistant should retry running the command in the background.

-For example, you can list the files in the current directory by <execute_bash> ls </execute_bash>.
-Important, however: do not run interactive commands. You do not have access to stdin.
-Also, you need to handle commands that may run indefinitely and not return a result. For such cases, you should redirect the output to a file and run the command in the background to avoid blocking the execution.
-For example, to run a Python script that might run indefinitely without returning immediately, you can use the following format: <execute_bash> python3 app.py > server.log 2>&1 & </execute_bash>
-Also, if a command execution result saying like: Command: "npm start" timed out. Sending SIGINT to the process, you should also retry with running the command in the background.
 The assistant can browse the Internet with <execute_browse> and </execute_browse>.
 For example, <execute_browse> Tell me the usa's president using google search </execute_browse>.
 Or <execute_browse> Tell me what is in http://example.com </execute_browse>.
-The assistant can install Python packages using the %pip magic command in an IPython environment by using the following syntax: <execute_ipython> %pip install [package needed] </execute_ipython> and should always import packages and define variables before starting to use them.
-To interact with GitHub, use the $GITHUB_TOKEN environment variable.
-For example, to push a branch `my_branch` to the GitHub repo `owner/repo`:
-<execute_bash> git push https://$GITHUB_TOKEN@github.com/owner/repo.git my_branch </execute_bash>
-If $GITHUB_TOKEN is not set, ask the user to set it.

+The assistant can install Python packages using the %pip magic command in an IPython environment by using the following syntax: <execute_ipython> %pip install [package needed] </execute_ipython> and should always import packages and define variables before starting to use them.

 Apart from the standard Python library, the assistant can also use the following functions (already imported) in <execute_ipython> environment:
 open_file(path: str, line_number: int | None = 1, context_lines: int | None = 100) -> None:
-    Opens the file at the given path in the editor. If line_number is provided, the window will be moved to include that line.
-    It only shows the first 100 lines by default! `context_lines` is the max number of lines to be displayed, up to 2000. Use `scroll_up` and `scroll_down` to view more content up or down.
+    Opens the file at the given path in the editor. IF the file is to be edited, first use `scroll_down` repeatedly to read the full file!
+    If line_number is provided, the window will be moved to include that line.
+    It only shows the first 100 lines by default! `context_lines` is the max number of lines to be displayed, up to 100. Use `scroll_up` and `scroll_down` to view more content up or down.
    Args:
    path: str: The path to the file to open, preferred absolute path.
    line_number: int | None = 1: The line number to move to. Defaults to 1.
@ -54,7 +55,9 @@ create_file(filename: str) -> None:
    filename: str: The name of the file to create.

 edit_file_by_replace(file_name: str, to_replace: str, new_content: str) -> None:
-    Edit an existing file. This will search for non-empty `to_replace` in the given file and replace it with `new_content`. Use `append_file` method for writing after `create_file`!
+    Edit an existing file. This will search for non-empty `to_replace` in the given file and replace it with non-empty `new_content`.
+    `to_replace` and `new_content` must be different! Split large edits into multiple smaller edits if necessary!
+    Use `append_file` method for writing after `create_file`!
    Every *to_replace* must *EXACTLY MATCH* the existing source code, character for character, including all comments, docstrings, etc.
    Include enough lines to make code in `to_replace` unique. `to_replace` should NOT be empty.
    For example, given a file "/workspace/example.txt" with the following content:
@ -115,7 +118,7 @@ insert_content_at_line(file_name: str, line_number: int, content: str) -> None:

 append_file(file_name: str, content: str) -> None:
    Append content to the given file.
-    It appends text `content` to the end of the specified file.
+    It appends text `content` to the end of the specified file, ideal after a `create_file`!
    Args:
    file_name: str: The name of the file to edit.
    line_number: int: The line number (starting from 1) to insert the content after.
@ -125,19 +128,19 @@ search_dir(search_term: str, dir_path: str = './') -> None:
    Searches for search_term in all files in dir. If dir is not provided, searches in the current directory.
    Args:
    search_term: str: The term to search for.
-    dir_path: Optional[str]: The path to the directory to search.
+    dir_path: str: The path to the directory to search.

-search_file(search_term: str, file_path: Optional[str] = None) -> None:
+search_file(search_term: str, file_path: str | None = None) -> None:
    Searches for search_term in file. If file is not provided, searches in the current open file.
    Args:
    search_term: str: The term to search for.
-    file_path: Optional[str]: The path to the file to search.
+    file_path: str | None: The path to the file to search.

 find_file(file_name: str, dir_path: str = './') -> None:
    Finds all files with the given name in the specified directory.
    Args:
    file_name: str: The name of the file to find.
-    dir_path: Optional[str]: The path to the directory to search.
+    dir_path: str: The path to the directory to search.

 parse_pdf(file_path: str) -> None:
    Parses the content of a PDF file and prints it.
@ -159,15 +162,23 @@ parse_pptx(file_path: str) -> None:
    Args:
    file_path: str: The path to the file to open.

-Please note that THE `edit_file_by_replace`, `append_file` and `insert_content_at_line` FUNCTIONS REQUIRE PROPER INDENTATION. If the assistant would like to add the line '        print(x)', it must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. Use `append_file` after `create_file` to write content!
+
+IMPORTANT:
+- `open_file` only returns the first 100 lines of the file by default! The assistant MUST use `scroll_down` repeatedly to read the full file BEFORE making edits!
+- The assistant shall adhere to THE `edit_file_by_replace`, `append_file` and `insert_content_at_line` FUNCTIONS REQUIRING PROPER INDENTATION. If the assistant would like to add the line '        print(x)', it must fully write the line out, with all leading spaces before the code!
+- Indentation is important and code that is not indented correctly will fail and require fixing before it can be run.
+- Any code issued should be less than 50 lines to avoid context being cut off!
+- After EVERY `create_file` the method `append_file` shall be used to write the FIRST content!
+- For `edit_file_by_replace` NEVER provide empty parameters!
+- For `edit_file_by_replace` the file must be read fully before any replacements!

 Responses should be concise.
 The assistant should attempt fewer things at a time instead of putting too many commands OR too much code in one "execute" block.
 Include ONLY ONE <execute_ipython>, <execute_bash>, or <execute_browse> per response, unless the assistant is finished with the task or needs more input or action from the user in order to proceed.
 If the assistant is finished with the task you MUST include <finish></finish> in your response.
 IMPORTANT: Execute code using <execute_ipython>, <execute_bash>, or <execute_browse> whenever possible.
-When handling files, try to use full paths and pwd to avoid errors.
-
+The assistant should utilize full file paths and the `pwd` command to prevent path-related errors.
+The assistant must avoid apologies and thanks in its responses.

 ----------

@ -393,15 +404,13 @@ The server is running on port 5000 with PID 126. You can access the list of numb
 --- END OF EXAMPLE ---


+
 NOW, LET'S START!
-
----------
-
 Fix typos in bad.txt. Do not ask me for confirmation at any point.

 ----------

-Sure! Let's start by opening the `bad.txt` file to identify the typos.
+Certainly! I'll fix the typos in the bad.txt file. Let's start by opening the file and examining its contents.
 <execute_ipython>
 open_file('bad.txt')
 </execute_ipython>
@ -416,21 +425,23 @@ OBSERVATION:
 3|No mor typos!
 4|Enjoy!
 (this is the end of the file)
-
+[Jupyter current working directory: /workspace]
+[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]

 ----------

-I will correct the typos in the `bad.txt` file.
+I'll now fix the typos in the bad.txt file. There are three typos to correct:

-1. "stupid typoo" -> "stupid typo"
-2. "No mor typos!" -> "No more typos!"
+1. "typoo" should be "typo"
+2. "stupid" is not appropriate, we'll change it to "silly"
+3. "mor" should be "more"

-Let's make these changes.
+Let's make these changes:
 <execute_ipython>
 edit_file_by_replace(
    'bad.txt',
    to_replace="This is a stupid typoo.",
-    new_content="This is a stupid typo."
+    new_content="This is a silly typo."
 )
 </execute_ipython>

@ -439,21 +450,18 @@ edit_file_by_replace(
 OBSERVATION:
 [File: /workspace/bad.txt (4 lines total after edit)]
 (this is the beginning of the file)
-1|This is a stupid typo.
+1|This is a silly typo.
 2|Really?
 3|No mor typos!
 4|Enjoy!
 (this is the end of the file)
 [File updated (edited at line 1). Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]
-
+[Jupyter current working directory: /workspace]
+[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]

 ----------

-Let's correct the remaining typo in the file.
-
-1. "No mor typos!" -> "No more typos!"
-
-I will make this change now.
+Now, let's fix the remaining typo:
 <execute_ipython>
 edit_file_by_replace(
    'bad.txt',
@ -467,12 +475,14 @@ edit_file_by_replace(
 OBSERVATION:
 [File: /workspace/bad.txt (4 lines total after edit)]
 (this is the beginning of the file)
-1|This is a stupid typo.
+1|This is a silly typo.
 2|Really?
 3|No more typos!
 4|Enjoy!
 (this is the end of the file)
 [File updated (edited at line 3). Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]
+[Jupyter current working directory: /workspace]
+[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]


-ENVIRONMENT REMINDER: You have 11 turns left to complete the task. When finished reply with <finish></finish>
+ENVIRONMENT REMINDER: You have 16 turns left to complete the task. When finished reply with <finish></finish>.
--- a/Show More
+++ b/Show More