diff --git a/.github/workflows/ghcr_runtime.yml b/.github/workflows/ghcr_runtime.yml index 0e146bde39..cfd046b99d 100644 --- a/.github/workflows/ghcr_runtime.yml +++ b/.github/workflows/ghcr_runtime.yml @@ -38,10 +38,6 @@ jobs: base_image: - image: 'nikolaik/python-nodejs:python3.11-nodejs22' tag: nikolaik - - image: 'python:3.11-bookworm' - tag: python - - image: 'node:22-bookworm' - tag: node steps: - name: Checkout uses: actions/checkout@v4 @@ -70,31 +66,39 @@ jobs: - name: Set up Docker Buildx id: buildx uses: docker/setup-buildx-action@v3 - - name: Install poetry via pipx - run: pipx install poetry - name: Set up Python uses: actions/setup-python@v5 with: python-version: '3.11' - cache: 'poetry' + - name: Cache Poetry dependencies + uses: actions/cache@v4 + with: + path: | + ~/.cache/pypoetry + ~/.virtualenvs + key: ${{ runner.os }}-poetry-${{ hashFiles('**/poetry.lock') }} + restore-keys: | + ${{ runner.os }}-poetry- + - name: Install poetry via pipx + run: pipx install poetry - name: Install Python dependencies using Poetry run: make install-python-dependencies - name: Create source distribution and Dockerfile run: poetry run python3 openhands/runtime/utils/runtime_build.py --base_image ${{ matrix.base_image.image }} --build_folder containers/runtime --force_rebuild - name: Build and push runtime image ${{ matrix.base_image.image }} - if: "!github.event.pull_request.head.repo.fork" + if: github.event.pull_request.head.repo.fork != true run: | ./containers/build.sh runtime ${{ github.repository_owner }} --push ${{ matrix.base_image.tag }} # Forked repos can't push to GHCR, so we need to upload the image as an artifact - name: Build runtime image ${{ matrix.base_image.image }} for fork - if: "github.event.pull_request.head.repo.fork" + if: github.event.pull_request.head.repo.fork uses: docker/build-push-action@v6 with: tags: ghcr.io/all-hands-ai/runtime:${{ github.sha }}-${{ matrix.base_image.tag }} outputs: type=docker,dest=/tmp/runtime-${{ matrix.base_image.tag }}.tar context: containers/runtime - name: Upload runtime image for fork - if: "github.event.pull_request.head.repo.fork" + if: github.event.pull_request.head.repo.fork uses: actions/upload-artifact@v4 with: name: runtime-${{ matrix.base_image.tag }} @@ -103,11 +107,12 @@ jobs: # Run unit tests with the EventStream runtime Docker images test_runtime: name: Test Runtime - runs-on: ubuntu-latest needs: [ghcr_build_runtime] + runs-on: ubuntu-latest strategy: + fail-fast: false matrix: - base_image: ['nikolaik', 'python', 'node'] + base_image: ['nikolaik'] steps: - uses: actions/checkout@v4 - name: Free Disk Space (Ubuntu) @@ -121,26 +126,41 @@ jobs: swap-storage: true # Forked repos can't push to GHCR, so we need to download the image as an artifact - name: Download runtime image for fork - if: "github.event.pull_request.head.repo.fork" + if: github.event.pull_request.head.repo.fork uses: actions/download-artifact@v4 with: name: runtime-${{ matrix.base_image }} path: /tmp - name: Load runtime image for fork - if: "github.event.pull_request.head.repo.fork" + if: github.event.pull_request.head.repo.fork run: | docker load --input /tmp/runtime-${{ matrix.base_image }}.tar - - name: Install poetry via pipx - run: pipx install poetry + - name: Cache Poetry dependencies + uses: actions/cache@v4 + with: + path: | + ~/.cache/pypoetry + ~/.virtualenvs + key: ${{ runner.os }}-poetry-${{ hashFiles('**/poetry.lock') }} + restore-keys: | + ${{ runner.os }}-poetry- - name: Set up Python uses: actions/setup-python@v5 with: python-version: '3.11' - cache: 'poetry' + - name: Install poetry via pipx + run: pipx install poetry - name: Install Python dependencies using Poetry run: make install-python-dependencies - name: Run runtime tests run: | + # We install pytest-xdist in order to run tests across CPUs. However, tests start to fail when we run + # then across more than 2 CPUs for some reason + poetry run pip install pytest-xdist + + # Install to be able to retry on failures for flaky tests + poetry run pip install pytest-rerunfailures + image_name=ghcr.io/${{ github.repository_owner }}/runtime:${{ github.sha }}-${{ matrix.base_image }} image_name=$(echo $image_name | tr '[:upper:]' '[:lower:]') @@ -148,7 +168,7 @@ jobs: SANDBOX_USER_ID=$(id -u) \ SANDBOX_BASE_CONTAINER_IMAGE=$image_name \ TEST_IN_CI=true \ - poetry run pytest --cov=agenthub --cov=openhands --cov-report=xml -s ./tests/runtime + poetry run pytest -n 2 --reruns 2 --cov=agenthub --cov=openhands --cov-report=xml -s ./tests/runtime - name: Upload coverage to Codecov uses: codecov/codecov-action@v4 env: @@ -162,27 +182,35 @@ jobs: strategy: fail-fast: false matrix: - base_image: ['nikolaik', 'python', 'node'] + base_image: ['nikolaik'] steps: - uses: actions/checkout@v4 # Forked repos can't push to GHCR, so we need to download the image as an artifact - name: Download runtime image for fork - if: "github.event.pull_request.head.repo.fork" + if: github.event.pull_request.head.repo.fork uses: actions/download-artifact@v4 with: name: runtime-${{ matrix.base_image }} path: /tmp - name: Load runtime image for fork - if: "github.event.pull_request.head.repo.fork" + if: github.event.pull_request.head.repo.fork run: | docker load --input /tmp/runtime-${{ matrix.base_image }}.tar - - name: Install poetry via pipx - run: pipx install poetry + - name: Cache Poetry dependencies + uses: actions/cache@v4 + with: + path: | + ~/.cache/pypoetry + ~/.virtualenvs + key: ${{ runner.os }}-poetry-${{ hashFiles('**/poetry.lock') }} + restore-keys: | + ${{ runner.os }}-poetry- - name: Set up Python uses: actions/setup-python@v5 with: python-version: '3.11' - cache: 'poetry' + - name: Install poetry via pipx + run: pipx install poetry - name: Install Python dependencies using Poetry run: make install-python-dependencies - name: Run integration tests diff --git a/.github/workflows/py-unit-tests.yml b/.github/workflows/py-unit-tests.yml index ec66b9a36b..dc66917499 100644 --- a/.github/workflows/py-unit-tests.yml +++ b/.github/workflows/py-unit-tests.yml @@ -22,13 +22,21 @@ jobs: python-version: ['3.11'] steps: - uses: actions/checkout@v4 - - name: Install poetry via pipx - run: pipx install poetry - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - cache: 'poetry' + - name: Cache Poetry dependencies + uses: actions/cache@v4 + with: + path: | + ~/.cache/pypoetry + ~/.virtualenvs + key: ${{ runner.os }}-poetry-${{ hashFiles('**/poetry.lock') }} + restore-keys: | + ${{ runner.os }}-poetry- + - name: Install poetry via pipx + run: pipx install poetry - name: Install Python dependencies using Poetry run: poetry install --without evaluation,llama-index - name: Install & Start Docker diff --git a/.github/workflows/regenerate_integration_tests.yml b/.github/workflows/regenerate_integration_tests.yml index 0940b657ee..6cd30df9a2 100644 --- a/.github/workflows/regenerate_integration_tests.yml +++ b/.github/workflows/regenerate_integration_tests.yml @@ -3,6 +3,23 @@ name: Regenerate Integration Tests on: workflow_dispatch: + inputs: + debug: + description: 'Enable debug mode' + type: boolean + default: true + log_to_file: + description: 'Enable logging to file' + type: boolean + default: true + force_regenerate_tests: + description: 'Force regeneration of tests' + type: boolean + default: false + force_use_llm: + description: 'Force use of LLM' + type: boolean + default: false jobs: regenerate_integration_tests: @@ -12,21 +29,32 @@ jobs: steps: - name: Checkout repository uses: actions/checkout@v4 - - name: Install poetry via pipx - run: pipx install poetry - name: Set up Python uses: actions/setup-python@v5 with: - python-version: ${{ matrix.python-version }} - cache: 'poetry' + python-version: "3.11" + - name: Cache Poetry dependencies + uses: actions/cache@v4 + with: + path: | + ~/.cache/pypoetry + ~/.virtualenvs + key: ${{ runner.os }}-poetry-${{ hashFiles('**/poetry.lock') }} + restore-keys: | + ${{ runner.os }}-poetry- + - name: Install poetry via pipx + run: pipx install poetry - name: Install Python dependencies using Poetry - run: poetry install --without evaluation,llama-index + run: make install-python-dependencies - name: Build Environment run: make build - - name: Regenerate integration tests - run: ./tests/integration/regenerate.sh - + run: | + DEBUG=${{ inputs.debug }} \ + LOG_TO_FILE=${{ inputs.log_to_file }} \ + FORCE_REGENERATE_TESTS=${{ inputs.force_regenerate_tests }} \ + FORCE_USE_LLM=${{ inputs.force_use_llm }} \ + ./tests/integration/regenerate.sh - name: Commit changes run: | if git diff --quiet --exit-code; then @@ -37,5 +65,6 @@ jobs: git config --global user.name 'github-actions[bot]' git config --global user.email 'github-actions[bot]@users.noreply.github.com' git add . - git commit -m "Regenerate integration tests" + # run it twice in case pre-commit makes changes + git commit -am "Regenerate integration tests" || git commit -am "Regenerate integration tests" git push diff --git a/README.md b/README.md index a19aebd6cd..8bddb5e7e8 100644 --- a/README.md +++ b/README.md @@ -1,72 +1,49 @@ - +
+ Logo +

OpenHands: Code Less, Make More

+
- - - -

- - GitHub Trend - -

Contributors - Forks Stargazers - Issues + CodeCov MIT License - Credits
Join our Slack community Join our Discord community - CodeCov -
- - -
- Logo -

OpenHands: Code Less, Make More

- Check out the documentation - Paper on Arxiv + Credits
- Evaluation Benchmark + Check out the documentation + Paper on Arxiv + Evaluation Benchmark Score +
-
+Welcome to OpenHands (formerly OpenDevin), a platform for software development agents powered by AI. -Welcome to OpenHands, a platform for autonomous software engineers, powered by AI and LLMs (previously called "OpenDevin"). +OpenHands agents can do anything a human developer can: modify code, run commands, browse the web, +call APIs, and yes—even copy code snippets from StackOverflow. -OpenHands agents collaborate with human developers to write code, fix bugs, and ship features. +Learn more at [docs.all-hands.dev](https://docs.all-hands.dev), or jump to the [Quick Start](#-quick-start). ![App screenshot](./docs/static/img/screenshot.png) -## ⚡ Getting Started -OpenHands works best with Docker version 26.0.0+ (Docker Desktop 4.31.0+). -You must be using Linux, Mac OS, or WSL on Windows. +## ⚡ Quick Start -To start OpenHands in a docker container, run the following commands in your terminal: +The easiest way to run OpenHands is in Docker. You can change `WORKSPACE_BASE` below to +point OpenHands to existing code that you'd like to modify. -> [!WARNING] -> When you run the following command, files in `./workspace` may be modified or deleted. +See the [Getting Started](https://docs.all-hands.dev/modules/usage/getting-started) guide for +system requirements and more information. ```bash -WORKSPACE_BASE=$(pwd)/workspace -docker run -it \ - --pull=always \ +export WORKSPACE_BASE=$(pwd)/workspace + +docker run -it --pull=always \ + -e SANDBOX_RUNTIME_CONTAINER_IMAGE=ghcr.io/all-hands-ai/runtime:0.9-nikolaik \ -e SANDBOX_USER_ID=$(id -u) \ -e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \ -v $WORKSPACE_BASE:/opt/workspace_base \ @@ -77,29 +54,23 @@ docker run -it \ ghcr.io/all-hands-ai/openhands:0.9 ``` -> [!NOTE] -> This command pulls the `0.9` tag, which represents the most recent stable release of OpenHands. You have other options as well: -> - For a specific release version, use `ghcr.io/all-hands-ai/openhands:` (replace with the desired version number). -> - For the most up-to-date development version, use `ghcr.io/all-hands-ai/openhands:main`. This version may be **(unstable!)** and is recommended for testing or development purposes only. -> -> Choose the tag that best suits your needs based on stability requirements and desired features. +You'll find OpenHands running at [http://localhost:3000](http://localhost:3000)! -You'll find OpenHands running at [http://localhost:3000](http://localhost:3000) with access to `./workspace`. To have OpenHands operate on your code, place it in `./workspace`. -OpenHands will only have access to this workspace folder. The rest of your system will not be affected as it runs in a secured docker sandbox. +You can also run OpenHands in a scriptable [headless mode](https://docs.all-hands.dev/modules/usage/how-to/headless-mode), +or as an [interactive CLI](https://docs.all-hands.dev/modules/usage/how-to/cli-mode). -Upon opening OpenHands, you must select the appropriate `Model` and enter the `API Key` within the settings that should pop up automatically. These can be set at any time by selecting -the `Settings` button (gear icon) in the UI. If the required `Model` does not exist in the list, you can manually enter it in the text box. +Visit [Getting Started](https://docs.all-hands.dev/modules/usage/getting-started) for more information and setup instructions. -For the development workflow, see [Development.md](https://github.com/All-Hands-AI/OpenHands/blob/main/Development.md). +If you want to modify the OpenHands source code, check out [Development.md](https://github.com/All-Hands-AI/OpenHands/blob/main/Development.md). -Are you having trouble? Check out our [Troubleshooting Guide](https://docs.all-hands.dev/modules/usage/troubleshooting). +Having issues? The [Troubleshooting Guide](https://docs.all-hands.dev/modules/usage/troubleshooting) can help. -## 🚀 Documentation +## 📖 Documentation To learn more about the project, and for tips on using OpenHands, -**check out our [documentation](https://docs.all-hands.dev/modules/usage/intro)**. +**check out our [documentation](https://docs.all-hands.dev/modules/usage/getting-started)**. -There you'll find resources on how to use different LLM providers (like ollama and Anthropic's Claude), +There you'll find resources on how to use different LLM providers, troubleshooting resources, and advanced configuration options. ## 🤝 How to Contribute @@ -134,17 +105,6 @@ Let's make software engineering better together! Distributed under the MIT License. See [`LICENSE`](./LICENSE) for more information. -[contributors-shield]: https://img.shields.io/github/contributors/All-Hands-AI/OpenHands?style=for-the-badge -[contributors-url]: https://github.com/All-Hands-AI/OpenHands/graphs/contributors -[forks-shield]: https://img.shields.io/github/forks/All-Hands-AI/OpenHands?style=for-the-badge -[forks-url]: https://github.com/All-Hands-AI/OpenHands/network/members -[stars-shield]: https://img.shields.io/github/stars/All-Hands-AI/OpenHands?style=for-the-badge -[stars-url]: https://github.com/All-Hands-AI/OpenHands/stargazers -[issues-shield]: https://img.shields.io/github/issues/All-Hands-AI/OpenHands?style=for-the-badge -[issues-url]: https://github.com/All-Hands-AI/OpenHands/issues -[license-shield]: https://img.shields.io/github/license/All-Hands-AI/OpenHands?style=for-the-badge -[license-url]: https://github.com/All-Hands-AI/OpenHands/blob/main/LICENSE - ## 🙏 Acknowledgements OpenHands is built by a large number of contributors, and every contribution is greatly appreciated! We also build upon other open source projects, and we are deeply thankful for their work. diff --git a/agenthub/browsing_agent/browsing_agent.py b/agenthub/browsing_agent/browsing_agent.py index b86576c6ed..e70f0ab178 100644 --- a/agenthub/browsing_agent/browsing_agent.py +++ b/agenthub/browsing_agent/browsing_agent.py @@ -206,9 +206,11 @@ class BrowsingAgent(Agent): prompt = get_prompt(error_prefix, cur_axtree_txt, prev_action_str) messages.append(Message(role='user', content=[TextContent(text=prompt)])) - logger.debug(prompt) + + flat_messages = self.llm.format_messages_for_llm(messages) + response = self.llm.completion( - messages=[message.model_dump() for message in messages], + messages=flat_messages, temperature=0.0, stop=[')```', ')\n```'], ) diff --git a/agenthub/browsing_agent/prompt.py b/agenthub/browsing_agent/prompt.py index 468c1d52d3..3eda72a0a0 100644 --- a/agenthub/browsing_agent/prompt.py +++ b/agenthub/browsing_agent/prompt.py @@ -354,7 +354,7 @@ and executed by a program, make sure to follow the formatting instructions. self._prompt += '\n'.join( [ f"""\ - - [{msg['role']}] {msg['message']}""" + - [{msg['role']}], {msg['message']}""" for msg in chat_messages ] ) diff --git a/agenthub/browsing_agent/response_parser.py b/agenthub/browsing_agent/response_parser.py index 8c9e838ad7..a5cc3fd054 100644 --- a/agenthub/browsing_agent/response_parser.py +++ b/agenthub/browsing_agent/response_parser.py @@ -24,9 +24,9 @@ class BrowsingResponseParser(ResponseParser): if action_str is None: return '' action_str = action_str.strip() - if not action_str.endswith('```'): + if action_str and not action_str.endswith('```'): action_str = action_str + ')```' - logger.info(action_str) + logger.debug(action_str) return action_str def parse_action(self, action_str: str) -> Action: diff --git a/agenthub/codeact_agent/codeact_agent.py b/agenthub/codeact_agent/codeact_agent.py index 497c923da8..ea45068e3f 100644 --- a/agenthub/codeact_agent/codeact_agent.py +++ b/agenthub/codeact_agent/codeact_agent.py @@ -1,9 +1,11 @@ import os +from itertools import islice from agenthub.codeact_agent.action_parser import CodeActResponseParser from openhands.controller.agent import Agent from openhands.controller.state.state import State from openhands.core.config import AgentConfig +from openhands.core.logger import openhands_logger as logger from openhands.core.message import ImageContent, Message, TextContent from openhands.events.action import ( Action, @@ -117,7 +119,11 @@ class CodeActAgent(Agent): ): content = [TextContent(text=self.action_to_str(action))] - if isinstance(action, MessageAction) and action.images_urls: + if ( + self.llm.vision_is_active() + and isinstance(action, MessageAction) + and action.images_urls + ): content.append(ImageContent(image_urls=action.images_urls)) return Message( @@ -127,14 +133,15 @@ class CodeActAgent(Agent): def get_observation_message(self, obs: Observation) -> Message | None: max_message_chars = self.llm.config.max_message_chars + obs_prefix = 'OBSERVATION:\n' if isinstance(obs, CmdOutputObservation): - text = 'OBSERVATION:\n' + truncate_content(obs.content, max_message_chars) + text = obs_prefix + truncate_content(obs.content, max_message_chars) text += ( f'\n[Command {obs.command_id} finished with exit code {obs.exit_code}]' ) return Message(role='user', content=[TextContent(text=text)]) elif isinstance(obs, IPythonRunCellObservation): - text = 'OBSERVATION:\n' + obs.content + text = obs_prefix + obs.content # replace base64 images with a placeholder splitted = text.split('\n') for i, line in enumerate(splitted): @@ -146,12 +153,10 @@ class CodeActAgent(Agent): text = truncate_content(text, max_message_chars) return Message(role='user', content=[TextContent(text=text)]) elif isinstance(obs, AgentDelegateObservation): - text = 'OBSERVATION:\n' + truncate_content( - str(obs.outputs), max_message_chars - ) + text = obs_prefix + truncate_content(str(obs.outputs), max_message_chars) return Message(role='user', content=[TextContent(text=text)]) elif isinstance(obs, ErrorObservation): - text = 'OBSERVATION:\n' + truncate_content(obs.content, max_message_chars) + text = obs_prefix + truncate_content(obs.content, max_message_chars) text += '\n[Error occurred in processing last action]' return Message(role='user', content=[TextContent(text=text)]) elif isinstance(obs, UserRejectObservation): @@ -188,9 +193,8 @@ class CodeActAgent(Agent): # prepare what we want to send to the LLM messages = self._get_messages(state) - params = { - 'messages': [message.model_dump() for message in messages], + 'messages': self.llm.format_messages_for_llm(messages), 'stop': [ '', '', @@ -199,12 +203,19 @@ class CodeActAgent(Agent): 'temperature': 0.0, } - if self.llm.supports_prompt_caching: + if self.llm.is_caching_prompt_active(): params['extra_headers'] = { 'anthropic-beta': 'prompt-caching-2024-07-31', } - response = self.llm.completion(**params) + try: + response = self.llm.completion(**params) + except Exception as e: + logger.error(f'{e}') + error_message = '{}: {}'.format(type(e).__name__, str(e).split('\n')[0]) + return AgentFinishAction( + thought=f'Agent encountered an error while processing the last action.\nError: {error_message}\nPlease try again.' + ) return self.action_parser.parse(response) @@ -215,7 +226,7 @@ class CodeActAgent(Agent): content=[ TextContent( text=self.prompt_manager.system_message, - cache_prompt=self.llm.supports_prompt_caching, # Cache system prompt + cache_prompt=self.llm.is_caching_prompt_active(), # Cache system prompt ) ], ), @@ -224,7 +235,7 @@ class CodeActAgent(Agent): content=[ TextContent( text=self.prompt_manager.initial_user_message, - cache_prompt=self.llm.supports_prompt_caching, # if the user asks the same query, + cache_prompt=self.llm.is_caching_prompt_active(), # if the user asks the same query, ) ], ), @@ -243,14 +254,14 @@ class CodeActAgent(Agent): if message: # handle error if the message is the SAME role as the previous message # litellm.exceptions.BadRequestError: litellm.BadRequestError: OpenAIException - Error code: 400 - {'detail': 'Only supports u/a/u/a/u...'} - # there should not have two consecutive messages from the same role + # there shouldn't be two consecutive messages from the same role if messages and messages[-1].role == message.role: messages[-1].content.extend(message.content) else: messages.append(message) # Add caching to the last 2 user messages - if self.llm.supports_prompt_caching: + if self.llm.is_caching_prompt_active(): user_turns_processed = 0 for message in reversed(messages): if message.role == 'user' and user_turns_processed < 2: @@ -259,14 +270,17 @@ class CodeActAgent(Agent): ].cache_prompt = True # Last item inside the message content user_turns_processed += 1 - # the latest user message is important: + # The latest user message is important: # we want to remind the agent of the environment constraints latest_user_message = next( - ( - m - for m in reversed(messages) - if m.role == 'user' - and any(isinstance(c, TextContent) for c in m.content) + islice( + ( + m + for m in reversed(messages) + if m.role == 'user' + and any(isinstance(c, TextContent) for c in m.content) + ), + 1, ), None, ) diff --git a/agenthub/codeact_agent/system_prompt.j2 b/agenthub/codeact_agent/system_prompt.j2 index 4454df1ca3..bec50ee178 100644 --- a/agenthub/codeact_agent/system_prompt.j2 +++ b/agenthub/codeact_agent/system_prompt.j2 @@ -5,8 +5,13 @@ The assistant can use a Python environment with , e.g.: print("Hello World!") The assistant can execute bash commands wrapped with , e.g. ls . -The assistant is not allowed to run interactive commands. For commands that may run indefinitely, -the output should be redirected to a file and the command run in the background, e.g. python3 app.py > server.log 2>&1 & +If a bash command returns exit code `-1`, this means the process is not yet finished. +The assistant must then send a second . The second can be empty +(which will retrieve any additional logs), or it can contain text to be sent to STDIN of the running process, +or it can contain the text `ctrl+c` to interrupt the process. + +For commands that may run indefinitely, the output should be redirected to a file and the command run +in the background, e.g. python3 app.py > server.log 2>&1 & If a command execution result says "Command timed out. Sending SIGINT to the process", the assistant should retry running the command in the background. {% endset %} diff --git a/agenthub/codeact_swe_agent/codeact_swe_agent.py b/agenthub/codeact_swe_agent/codeact_swe_agent.py index dd18d94a84..db3003ff6a 100644 --- a/agenthub/codeact_swe_agent/codeact_swe_agent.py +++ b/agenthub/codeact_swe_agent/codeact_swe_agent.py @@ -94,7 +94,11 @@ class CodeActSWEAgent(Agent): ): content = [TextContent(text=self.action_to_str(action))] - if isinstance(action, MessageAction) and action.images_urls: + if ( + self.llm.vision_is_active() + and isinstance(action, MessageAction) + and action.images_urls + ): content.append(ImageContent(image_urls=action.images_urls)) return Message( @@ -156,9 +160,8 @@ class CodeActSWEAgent(Agent): # prepare what we want to send to the LLM messages: list[Message] = self._get_messages(state) - response = self.llm.completion( - messages=[message.model_dump() for message in messages], + messages=self.llm.format_messages_for_llm(messages), stop=[ '', '', diff --git a/agenthub/micro/agent.py b/agenthub/micro/agent.py index 4645d38cd9..3e3a9803f1 100644 --- a/agenthub/micro/agent.py +++ b/agenthub/micro/agent.py @@ -73,10 +73,13 @@ class MicroAgent(Agent): latest_user_message=last_user_message, ) content = [TextContent(text=prompt)] - if last_image_urls: + if self.llm.vision_is_active() and last_image_urls: content.append(ImageContent(image_urls=last_image_urls)) message = Message(role='user', content=content) - resp = self.llm.completion(messages=[message.model_dump()]) + resp = self.llm.completion( + messages=self.llm.format_messages_for_llm(message), + temperature=0.0, + ) action_resp = resp['choices'][0]['message']['content'] action = parse_response(action_resp) return action diff --git a/agenthub/planner_agent/agent.py b/agenthub/planner_agent/agent.py index 6e53b8e43f..a640ea354b 100644 --- a/agenthub/planner_agent/agent.py +++ b/agenthub/planner_agent/agent.py @@ -46,8 +46,8 @@ class PlannerAgent(Agent): state, self.llm.config.max_message_chars ) content = [TextContent(text=prompt)] - if image_urls: + if self.llm.vision_is_active() and image_urls: content.append(ImageContent(image_urls=image_urls)) message = Message(role='user', content=content) - resp = self.llm.completion(messages=[message.model_dump()]) + resp = self.llm.completion(messages=self.llm.format_messages_for_llm(message)) return self.response_parser.parse(resp) diff --git a/config.template.toml b/config.template.toml index 46f90bb0bf..a27d9e51d0 100644 --- a/config.template.toml +++ b/config.template.toml @@ -126,21 +126,29 @@ embedding_model = "" # Model to use model = "gpt-4o" -# Number of retries to attempt -#num_retries = 5 +# Number of retries to attempt when an operation fails with the LLM. +# Increase this value to allow more attempts before giving up +#num_retries = 8 -# Retry maximum wait time -#retry_max_wait = 60 +# Maximum wait time (in seconds) between retry attempts +# This caps the exponential backoff to prevent excessively long +#retry_max_wait = 120 -# Retry minimum wait time -#retry_min_wait = 3 +# Minimum wait time (in seconds) between retry attempts +# This sets the initial delay before the first retry +#retry_min_wait = 15 -# Retry multiplier for exponential backoff +# Multiplier for exponential backoff calculation +# The wait time increases by this factor after each failed attempt +# A value of 2.0 means each retry waits twice as long as the previous one #retry_multiplier = 2.0 # Drop any unmapped (unsupported) params without causing an exception #drop_params = false +# Using the prompt caching feature provided by the LLM +#caching_prompt = false + # Base URL for the OLLAMA API #ollama_base_url = "" @@ -153,6 +161,9 @@ model = "gpt-4o" # Top p for the API #top_p = 0.5 +# If model is vision capable, this option allows to disable image processing (useful for cost reduction). +#disable_vision = true + [llm.gpt3] # API key to use api_key = "your-api-key" diff --git a/docs/docusaurus.config.ts b/docs/docusaurus.config.ts index 2f81477729..17ca650247 100644 --- a/docs/docusaurus.config.ts +++ b/docs/docusaurus.config.ts @@ -4,8 +4,8 @@ import { themes as prismThemes } from "prism-react-renderer"; const config: Config = { title: "OpenHands", - tagline: "An Open Platform for AI Software Developers as Generalist Agents", - favicon: "img/logo.png", + tagline: "Code Less, Make More", + favicon: "img/logo-square.png", // Set the production url of your site here url: "https://docs.all-hands.dev", @@ -73,23 +73,28 @@ const config: Config = { type: "docSidebar", sidebarId: "docsSidebar", position: "left", - label: "Docs", + label: "User Guides", }, { type: "docSidebar", sidebarId: "apiSidebar", position: "left", - label: "Codebase", + label: "Python API", + }, + { + type: 'localeDropdown', + position: 'left', + }, + { + href: "https://all-hands.dev", + label: "Company", + position: "right", }, { href: "https://github.com/All-Hands-AI/OpenHands", label: "GitHub", position: "right", }, - { - type: 'localeDropdown', - position: 'left', - }, ], }, prism: { diff --git a/docs/i18n/fr/docusaurus-plugin-content-docs/current/usage/llms/llms.md b/docs/i18n/fr/docusaurus-plugin-content-docs/current/usage/llms/llms.md index fb8ed07d66..1241bc2250 100644 --- a/docs/i18n/fr/docusaurus-plugin-content-docs/current/usage/llms/llms.md +++ b/docs/i18n/fr/docusaurus-plugin-content-docs/current/usage/llms/llms.md @@ -41,4 +41,4 @@ ne peut être aussi puissant que les modèles qui le pilotent -- heureusement, l Certains LLM ont des limites de taux et peuvent nécessiter des réessais. OpenHands réessaiera automatiquement les demandes s'il reçoit une erreur 429 ou une erreur de connexion API. Vous pouvez définir les variables d'environnement `LLM_NUM_RETRIES`, `LLM_RETRY_MIN_WAIT`, `LLM_RETRY_MAX_WAIT` pour contrôler le nombre de réessais et le temps entre les réessais. -Par défaut, `LLM_NUM_RETRIES` est 5 et `LLM_RETRY_MIN_WAIT`, `LLM_RETRY_MAX_WAIT` sont respectivement de 3 secondes et 60 secondes. +Par défaut, `LLM_NUM_RETRIES` est 8 et `LLM_RETRY_MIN_WAIT`, `LLM_RETRY_MAX_WAIT` sont respectivement de 15 secondes et 120 secondes. diff --git a/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/usage/llms/llms.md b/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/usage/llms/llms.md index 73c81c9e48..945d30d9e3 100644 --- a/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/usage/llms/llms.md +++ b/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/usage/llms/llms.md @@ -43,4 +43,4 @@ OpenHands 将向你配置的 LLM 发出许多提示。大多数这些 LLM 都是 一些 LLM 有速率限制,可能需要重试操作。OpenHands 会在收到 429 错误或 API 连接错误时自动重试请求。 你可以设置 `LLM_NUM_RETRIES`,`LLM_RETRY_MIN_WAIT`,`LLM_RETRY_MAX_WAIT` 环境变量来控制重试次数和重试之间的时间。 -默认情况下,`LLM_NUM_RETRIES` 为 5,`LLM_RETRY_MIN_WAIT` 和 `LLM_RETRY_MAX_WAIT` 分别为 3 秒和 60 秒。 +默认情况下,`LLM_NUM_RETRIES` 为 8,`LLM_RETRY_MIN_WAIT` 和 `LLM_RETRY_MAX_WAIT` 分别为 15 秒和 120 秒。 diff --git a/docs/modules/usage/architecture/architecture.mdx b/docs/modules/usage/architecture/backend.mdx similarity index 100% rename from docs/modules/usage/architecture/architecture.mdx rename to docs/modules/usage/architecture/backend.mdx diff --git a/docs/modules/usage/getting-started.md b/docs/modules/usage/getting-started.md new file mode 100644 index 0000000000..a8badef812 --- /dev/null +++ b/docs/modules/usage/getting-started.md @@ -0,0 +1,60 @@ +--- +sidebar_position: 2 +--- + +# Getting Started + +## System Requirements + +* Docker version 26.0.0+ or Docker Desktop 4.31.0+ +* You must be using Linux or Mac OS + * If you are on Windows, you must use [WSL](https://learn.microsoft.com/en-us/windows/wsl/install) + +## Installation + +The easiest way to run OpenHands is in Docker. You can change `WORKSPACE_BASE` below to point OpenHands to +existing code that you'd like to modify. + +```bash +export WORKSPACE_BASE=$(pwd)/workspace + +docker run -it --pull=always \ + -e SANDBOX_RUNTIME_CONTAINER_IMAGE=ghcr.io/all-hands-ai/runtime:0.9-nikolaik \ + -e SANDBOX_USER_ID=$(id -u) \ + -e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \ + -v $WORKSPACE_BASE:/opt/workspace_base \ + -v /var/run/docker.sock:/var/run/docker.sock \ + -p 3000:3000 \ + --add-host host.docker.internal:host-gateway \ + --name openhands-app-$(date +%Y%m%d%H%M%S) \ + ghcr.io/all-hands-ai/openhands:0.9 +``` + +You can also run OpenHands in a scriptable [headless mode](https://docs.all-hands.dev/modules/usage/how-to/headless-mode), +or as an [interactive CLI](https://docs.all-hands.dev/modules/usage/how-to/cli-mode). + +## Setup + +After running the command above, you'll find OpenHands running at [http://localhost:3000](http://localhost:3000). + +The agent will have access to the `./workspace` folder to do its work. You can copy existing code here, or change `WORKSPACE_BASE` in the +command to point to an existing folder. + +Upon launching OpenHands, you'll see a settings modal. You must select an LLM backend using `Model`, and enter a corresponding `API Key`. +These can be changed at any time by selecting the `Settings` button (gear icon) in the UI. +If the required `Model` does not exist in the list, you can toggle `Use custom model` and manually enter it in the text box. + +settings-modal + +## Versions + +The command above pulls the `0.9` tag, which represents the most recent stable release of OpenHands. You have other options as well: +- For a specific release, use `ghcr.io/all-hands-ai/openhands:$VERSION`, replacing $VERSION with the version number. +- We use semver, and release major, minor, and patch tags. So `0.9` will automatically point to the latest `0.9.x` release, and `0` will point to the latest `0.x.x` release. +- For the most up-to-date development version, you can use `ghcr.io/all-hands-ai/openhands:main`. This version is unstable and is recommended for testing or development purposes only. + +You can choose the tag that best suits your needs based on stability requirements and desired features. + +For the development workflow, see [Development.md](https://github.com/All-Hands-AI/OpenHands/blob/main/Development.md). + +Are you having trouble? Check out our [Troubleshooting Guide](https://docs.all-hands.dev/modules/usage/troubleshooting). diff --git a/docs/modules/usage/how-to/cli-mode.md b/docs/modules/usage/how-to/cli-mode.md new file mode 100644 index 0000000000..c619560af5 --- /dev/null +++ b/docs/modules/usage/how-to/cli-mode.md @@ -0,0 +1,108 @@ +# CLI Mode + +OpenHands can be run in an interactive CLI mode, which allows users to start an interactive session via the command line. + +This mode is different from the [headless mode](headless-mode), which is non-interactive and better for scripting. + +## With Python + +To start an interactive OpenHands session via the command line, follow these steps: + +1. Ensure you have followed the [Development setup instructions](https://github.com/All-Hands-AI/OpenHands/blob/main/Development.md) + +2. Run the following command: + +```bash +poetry run python -m openhands.core.cli +``` + +This command will start an interactive session where you can input tasks and receive responses from OpenHands. + +You'll need to be sure to set your model, API key, and other settings via environment variables +[or the `config.toml` file](https://github.com/All-Hands-AI/OpenHands/blob/main/config.template.toml). + + +## With Docker + +To run OpenHands in CLI mode with Docker, follow these steps: + +1. Set `WORKSPACE_BASE` to the directory you want OpenHands to edit: + +```bash +WORKSPACE_BASE=$(pwd)/workspace +``` + +2. Set `LLM_MODEL` to the model you want to use: + +```bash +LLM_MODEL="anthropic/claude-3-5-sonnet-20240620" +``` + +3. Set `LLM_API_KEY` to your API key: + +```bash +LLM_API_KEY="abcde" +``` + +4. Run the following Docker command: + +```bash +docker run -it \ + --pull=always \ + -e SANDBOX_USER_ID=$(id -u) \ + -e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \ + -e LLM_API_KEY=$LLM_API_KEY \ + -e LLM_MODEL=$LLM_MODEL \ + -v $WORKSPACE_BASE:/opt/workspace_base \ + -v /var/run/docker.sock:/var/run/docker.sock \ + --add-host host.docker.internal:host-gateway \ + --name openhands-app-$(date +%Y%m%d%H%M%S) \ + ghcr.io/all-hands-ai/openhands:0.9 \ + python -m openhands.core.cli +``` + +This command will start an interactive session in Docker where you can input tasks and receive responses from OpenHands. + +## Examples of CLI Commands and Expected Outputs + +Here are some examples of CLI commands and their expected outputs: + +### Example 1: Simple Task + +```bash +How can I help? >> Write a Python script that prints "Hello, World!" +``` + +Expected Output: + +```bash +🤖 Sure! Here is a Python script that prints "Hello, World!": + +❯ print("Hello, World!") +``` + +### Example 2: Bash Command + +```bash +How can I help? >> Create a directory named "test_dir" +``` + +Expected Output: + +```bash +🤖 Creating a directory named "test_dir": + +❯ mkdir test_dir +``` + +### Example 3: Error Handling + +```bash +How can I help? >> Delete a non-existent file +``` + +Expected Output: + +```bash +🤖 An error occurred. Please try again. +``` diff --git a/docs/modules/usage/how-to/custom-sandbox-guide.md b/docs/modules/usage/how-to/custom-sandbox-guide.md index 7367499745..fa16fadcbc 100644 --- a/docs/modules/usage/how-to/custom-sandbox-guide.md +++ b/docs/modules/usage/how-to/custom-sandbox-guide.md @@ -1,26 +1,19 @@ -# Create and Use a Custom Docker Sandbox +# Custom Sandbox -The default OpenHands sandbox comes with a [minimal ubuntu configuration](https://github.com/All-Hands-AI/OpenHands/blob/main/containers/sandbox/Dockerfile). -Your use case may need additional software installed by default. +The sandbox is where the agent does its work. Instead of running commands directly on your computer +(which could be dangerous), the agent runs them inside of a Docker container. + +The default OpenHands sandbox (`python-nodejs:python3.11-nodejs22` +from [nikolaik/python-nodejs](https://hub.docker.com/r/nikolaik/python-nodejs)) comes with some packages installed such +as python and Node.js but your use case may need additional software installed by default. There are two ways you can do so: -1. Use an existing image from docker hub. For instance, if you want to have `nodejs` installed, you can do so by using the `node:20` image +1. Use an existing image from docker hub 2. Creating your own custom docker image and using it If you want to take the first approach, you can skip the `Create Your Docker Image` section. -For a more feature-rich environment, you might consider using pre-built images like **[nikolaik/python-nodejs](https://hub.docker.com/r/nikolaik/python-nodejs)**, which comes with both Python and Node.js pre-installed, along with many other useful tools and libraries, like: - -- Node.js: 22.x -- npm: 10.x -- yarn: stable -- Python: latest -- pip: latest -- pipenv: latest -- poetry: latest -- uv: latest - ## Setup Make sure you are able to run OpenHands using the [Development.md](https://github.com/All-Hands-AI/OpenHands/blob/main/Development.md) first. @@ -79,7 +72,7 @@ Run OpenHands by running ```make run``` in the top level directory. Navigate to ```localhost:3001``` and check if your desired dependencies are available. -In the case of the example above, running ```node -v``` in the terminal produces ```v20.15.0``` +In the case of the example above, running ```node -v``` in the terminal produces ```v20.15.0```. Congratulations! diff --git a/docs/modules/usage/how-to/evaluation-harness.md b/docs/modules/usage/how-to/evaluation-harness.md index 903d079728..cb9b64754e 100644 --- a/docs/modules/usage/how-to/evaluation-harness.md +++ b/docs/modules/usage/how-to/evaluation-harness.md @@ -1,4 +1,4 @@ -# Contribute to OpenHands Evaluation Harness +# Evaluation This guide provides an overview of how to integrate your own evaluation benchmark into the OpenHands framework. @@ -12,7 +12,7 @@ Here's an example configuration file you can use to define and use multiple LLMs ```toml [llm] # IMPORTANT: add your API key here, and set the model to the one you want to evaluate -model = "gpt-4o-2024-05-13" +model = "claude-3-5-sonnet-20240620" api_key = "sk-XXX" [llm.eval_gpt4_1106_preview_llm] diff --git a/docs/modules/usage/how-to/headless-mode.md b/docs/modules/usage/how-to/headless-mode.md index d354826ab6..ea620c65ee 100644 --- a/docs/modules/usage/how-to/headless-mode.md +++ b/docs/modules/usage/how-to/headless-mode.md @@ -1,12 +1,9 @@ -# Running in Headless / CLI Mode +# Headless Mode -You can run OpenHands via a CLI, without starting the web application. This makes it easy -to automate tasks with OpenHands. There are 2 main modes of operation: +You can run OpenHands with a single command, without starting the web application. +This makes it easy to write scripts and automate tasks with OpenHands. -* **Headless** : Designed for use with scripts -* **CLI** : Designed for interactive use via a console - -As with other modes, the environment is configurable via environment variables or by saving values into [config.toml](https://github.com/All-Hands-AI/OpenHands/blob/main/config.template.toml) +This is different from [CLI Mode](cli-mode), which is interactive, and better for active development. ## With Python @@ -14,34 +11,36 @@ To run OpenHands in headless mode with Python, [follow the Development setup instructions](https://github.com/All-Hands-AI/OpenHands/blob/main/Development.md), and then run: -### Headless with Python - ```bash poetry run python -m openhands.core.main -t "write a bash script that prints hi" ``` -### CLI with Python +You'll need to be sure to set your model, API key, and other settings via environment variables +[or the `config.toml` file](https://github.com/All-Hands-AI/OpenHands/blob/main/config.template.toml). + +## With Docker + +1. Set `WORKSPACE_BASE` to the directory you want OpenHands to edit: ```bash -poetry run python -m openhands.core.cli - -How can I help? >> write a bash script that prints hi +WORKSPACE_BASE=$(pwd)/workspace ``` -## Headless With Docker - -To run OpenHands in headless mode with Docker, run: +2. Set `LLM_MODEL` to the model you want to use: ```bash -# Set WORKSPACE_BASE to the directory you want OpenHands to edit -WORKSPACE_BASE=$(pwd)/workspace +LLM_MODEL="anthropic/claude-3-5-sonnet-20240620" +``` -# Set LLM_API_KEY to an API key, e.g. for OpenAI or Anthropic +3. Set `LLM_API_KEY` to your API key: + +```bash LLM_API_KEY="abcde" +``` -# Set LLM_MODEL to the model you want to use -LLM_MODEL="gpt-4o" +4. Run the following Docker command: +```bash docker run -it \ --pull=always \ -e SANDBOX_USER_ID=$(id -u) \ @@ -53,34 +52,5 @@ docker run -it \ --add-host host.docker.internal:host-gateway \ --name openhands-app-$(date +%Y%m%d%H%M%S) \ ghcr.io/all-hands-ai/openhands:0.9 \ - poetry run python -m openhands.core.main \ - -t "Write a bash script that prints Hello World" -``` - -## CLI With Docker - -To run OpenHands in cli mode with Docker, run: - -```bash -# Set WORKSPACE_BASE to the directory you want OpenHands to edit -WORKSPACE_BASE=$(pwd)/workspace - -# Set LLM_API_KEY to an API key, e.g. for OpenAI or Anthropic -LLM_API_KEY="abcde" - -# Set LLM_MODEL to the model you want to use -LLM_MODEL="gpt-4o" - -docker run -it \ - --pull=always \ - -e SANDBOX_USER_ID=$(id -u) \ - -e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \ - -e LLM_API_KEY=$LLM_API_KEY \ - -e LLM_MODEL=$LLM_MODEL \ - -v $WORKSPACE_BASE:/opt/workspace_base \ - -v /var/run/docker.sock:/var/run/docker.sock \ - --add-host host.docker.internal:host-gateway \ - --name openhands-app-$(date +%Y%m%d%H%M%S) \ - ghcr.io/all-hands-ai/openhands:0.9 \ - poetry run python -m openhands.core.cli + python -m openhands.core.main -t "write a bash script that prints hi" ``` diff --git a/docs/modules/usage/how-to/how-to.md b/docs/modules/usage/how-to/how-to.md deleted file mode 100644 index bff43fc3e2..0000000000 --- a/docs/modules/usage/how-to/how-to.md +++ /dev/null @@ -1,5 +0,0 @@ ---- -sidebar_position: 6 ---- - -# 🔎 How To Section diff --git a/docs/modules/usage/how-to/openshift-example.md b/docs/modules/usage/how-to/openshift-example.md index e33e59eaa5..1f259aaaa0 100644 --- a/docs/modules/usage/how-to/openshift-example.md +++ b/docs/modules/usage/how-to/openshift-example.md @@ -1,6 +1,6 @@ -# Use OpenHands in OpenShift/K8S +# Kubernetes -There are different ways this can be accomplished. This guide goes through one possible way: +There are different ways you might run OpenHands on Kubernetes or OpenShift. This guide goes through one possible way: 1. Create a PV "as a cluster admin" to map workspace_base data and docker directory to the pod through the worker node 2. Create a PVC to be able to mount those PVs to the pod 3. Create a pod which contains two containers; the OpenHands and Sandbox containers diff --git a/docs/modules/usage/intro.mdx b/docs/modules/usage/intro.mdx deleted file mode 100644 index 307473e187..0000000000 --- a/docs/modules/usage/intro.mdx +++ /dev/null @@ -1,72 +0,0 @@ ---- -sidebar_position: 1 ---- - -# 💻 OpenHands - -OpenHands is an **autonomous AI software engineer** capable of executing complex engineering tasks and collaborating actively with users on software development projects. -This project is fully open-source, so you can use and modify it however you like. - -:::tip -Explore the codebase of OpenHands on [GitHub](https://github.com/All-Hands-AI/OpenHands) or join one of our communities! - - - Contributors - - - Forks - - - Stargazers - - - Issues - -

- - MIT License - -

- - Join our Slack community - - - Join our Discord community - -::: - -## 🛠️ Getting Started - -[Check out the getting started guide on Github](https://github.com/All-Hands-AI/OpenHands?tab=readme-ov-file#-getting-started) - -[contributors-shield]: https://img.shields.io/github/contributors/All-Hands-AI/OpenHands?style=for-the-badge -[contributors-url]: https://github.com/All-Hands-AI/OpenHands/graphs/contributors -[forks-shield]: https://img.shields.io/github/forks/All-Hands-AI/OpenHands?style=for-the-badge -[forks-url]: https://github.com/All-Hands-AI/OpenHands/network/members -[stars-shield]: https://img.shields.io/github/stars/All-Hands-AI/OpenHands?style=for-the-badge -[stars-url]: https://github.com/All-Hands-AI/OpenHands/stargazers -[issues-shield]: https://img.shields.io/github/issues/All-Hands-AI/OpenHands?style=for-the-badge -[issues-url]: https://github.com/All-Hands-AI/OpenHands/issues -[license-shield]: https://img.shields.io/github/license/All-Hands-AI/OpenHands?style=for-the-badge -[license-url]: https://github.com/All-Hands-AI/OpenHands/blob/main/LICENSE diff --git a/docs/modules/usage/llms/azure-llms.md b/docs/modules/usage/llms/azure-llms.md index 3dcc03f456..a3f269f804 100644 --- a/docs/modules/usage/llms/azure-llms.md +++ b/docs/modules/usage/llms/azure-llms.md @@ -2,7 +2,7 @@ ## Completion -OpenHands uses LiteLLM for completion calls. You can find their documentation on Azure [here](https://docs.litellm.ai/docs/providers/azure) +OpenHands uses LiteLLM for completion calls. You can find their documentation on Azure [here](https://docs.litellm.ai/docs/providers/azure). ### Azure openai configs @@ -12,7 +12,7 @@ When running the OpenHands Docker image, you'll need to set the following enviro LLM_BASE_URL="" # e.g. "https://openai-gpt-4-test-v-1.openai.azure.com/" LLM_API_KEY="" LLM_MODEL="azure/" -LLM_API_VERSION="" # e.g. "2024-02-15-preview" +LLM_API_VERSION="" # e.g. "2024-02-15-preview" ``` Example: @@ -31,15 +31,18 @@ docker run -it \ ghcr.io/all-hands-ai/openhands:main ``` -You can set the LLM_MODEL and LLM_API_KEY in the OpenHands UI itself. +You can also set the model and API key in the OpenHands UI through the Settings. :::note -You can find your ChatGPT deployment name on the deployments page in Azure. It could be the same with the chat model name (e.g. 'GPT4-1106-preview'), by default or initially set, but it doesn't have to be the same. Run openhands, and when you load it in the browser, go to Settings and set model as above: "azure/<your-actual-gpt-deployment-name>". If it's not in the list, enter your own text and save it. +You can find your ChatGPT deployment name on the deployments page in Azure. It could be the same with the chat model +name (e.g. 'GPT4-1106-preview'), by default or initially set, but it doesn't have to be the same. Run OpenHands, +and when you load it in the browser, go to Settings and set model as above: "azure/<your-actual-gpt-deployment-name>". +If it's not in the list, you can open the Settings modal, switch to "Custom Model", and enter your model name. ::: ## Embeddings -OpenHands uses llama-index for embeddings. You can find their documentation on Azure [here](https://docs.llamaindex.ai/en/stable/api_reference/embeddings/azure_openai/) +OpenHands uses llama-index for embeddings. You can find their documentation on Azure [here](https://docs.llamaindex.ai/en/stable/api_reference/embeddings/azure_openai/). ### Azure openai configs @@ -50,6 +53,6 @@ When running OpenHands in Docker, set the following environment variables using ``` LLM_EMBEDDING_MODEL="azureopenai" -LLM_EMBEDDING_DEPLOYMENT_NAME="" # e.g. "TextEmbedding..." -LLM_API_VERSION="" # e.g. "2024-02-15-preview" +LLM_EMBEDDING_DEPLOYMENT_NAME="" # e.g. "TextEmbedding..." +LLM_API_VERSION="" # e.g. "2024-02-15-preview" ``` diff --git a/docs/modules/usage/llms/google-llms.md b/docs/modules/usage/llms/google-llms.md index 31789a5fb3..5ead0ebc45 100644 --- a/docs/modules/usage/llms/google-llms.md +++ b/docs/modules/usage/llms/google-llms.md @@ -2,7 +2,7 @@ ## Completion -OpenHands uses LiteLLM for completion calls. The following resources are relevant for using OpenHands with Google's LLMs +OpenHands uses LiteLLM for completion calls. The following resources are relevant for using OpenHands with Google's LLMs: - [Gemini - Google AI Studio](https://docs.litellm.ai/docs/providers/gemini) - [VertexAI - Google Cloud Platform](https://docs.litellm.ai/docs/providers/vertex) diff --git a/docs/modules/usage/llms/llms.md b/docs/modules/usage/llms/llms.md index 9c2ba0105b..9dcfccc2a1 100644 --- a/docs/modules/usage/llms/llms.md +++ b/docs/modules/usage/llms/llms.md @@ -1,17 +1,23 @@ --- -sidebar_position: 2 +sidebar_position: 3 --- # 🤖 LLM Backends OpenHands can connect to any LLM supported by LiteLLM. However, it requires a powerful model to work. The following are verified by the community to work with OpenHands: + * claude-3-5-sonnet * gemini-1.5-pro / gemini-1.5-flash * gpt-4 / gpt-4o * llama-3.1-405b / hermes-3-llama-3.1-405b * wizardlm-2-8x22b +:::warning +OpenHands will issue many prompts to the LLM you configure. Most of these LLMs cost money, so be sure to set spending +limits and monitor usage. +::: + If you have successfully run OpenHands with specific LLMs not in the list, please add them to the verified list. We also encourage you to open a PR to share your setup process to help others using the same provider and LLM! @@ -19,39 +25,39 @@ For a full list of the providers and models available, please consult the [litellm documentation](https://docs.litellm.ai/docs/providers). ## Local and Open Source Models + Most current local and open source models are not as powerful. When using such models, you may see long wait times between messages, poor responses, or errors about malformed JSON. OpenHands can only be as powerful as the models driving it. However, if you do find ones that work, please add them to the verified list above. ## LLM Configuration -:::warning -OpenHands will issue many prompts to the LLM you configure. Most of these LLMs cost money, so be sure to set spending -limits and monitor usage. -::: The `LLM_MODEL` environment variable controls which model is used in programmatic interactions. But when using the OpenHands UI, you'll need to choose your model in the settings window. The following environment variables might be necessary for some LLMs/providers: -- `LLM_API_KEY` -- `LLM_BASE_URL` -- `LLM_EMBEDDING_MODEL` -- `LLM_EMBEDDING_DEPLOYMENT_NAME` -- `LLM_API_VERSION` -- `LLM_DROP_PARAMS` +* `LLM_API_KEY` +* `LLM_API_VERSION` +* `LLM_BASE_URL` +* `LLM_EMBEDDING_MODEL` +* `LLM_EMBEDDING_DEPLOYMENT_NAME` +* `LLM_DROP_PARAMS` +* `LLM_DISABLE_VISION` +* `LLM_CACHING_PROMPT` We have a few guides for running OpenHands with specific model providers: -- [OpenAI](llms/openai-llms) -- [ollama](llms/local-llms) -- [Azure](llms/azure-llms) -- [Google](llms/google-llms) +* [Azure](llms/azure-llms) +* [Google](llms/google-llms) +* [ollama](llms/local-llms) +* [OpenAI](llms/openai-llms) ### API retries and rate limits Some LLMs have rate limits and may require retries. OpenHands will automatically retry requests if it receives a 429 error or API connection error. You can set the following environment variables to control the number of retries and the time between retries: -* `LLM_NUM_RETRIES` (Default of 5) -* `LLM_RETRY_MIN_WAIT` (Default of 3 seconds) -* `LLM_RETRY_MAX_WAIT` (Default of 60 seconds) + +* `LLM_NUM_RETRIES` (Default of 8) +* `LLM_RETRY_MIN_WAIT` (Default of 15 seconds) +* `LLM_RETRY_MAX_WAIT` (Default of 120 seconds) diff --git a/docs/modules/usage/llms/local-llms.md b/docs/modules/usage/llms/local-llms.md index a43008c530..da134101e2 100644 --- a/docs/modules/usage/llms/local-llms.md +++ b/docs/modules/usage/llms/local-llms.md @@ -5,7 +5,7 @@ When using a Local LLM, OpenHands may have limited functionality. ::: Ensure that you have the Ollama server up and running. -For detailed startup instructions, refer to [here](https://github.com/ollama/ollama) +For detailed startup instructions, refer to [here](https://github.com/ollama/ollama). This guide assumes you've started ollama with `ollama serve`. If you're running ollama differently (e.g. inside docker), the instructions might need to be modified. Please note that if you're running WSL the default ollama configuration blocks requests from docker containers. See [here](#configuring-ollama-service-wsl-en). @@ -32,7 +32,7 @@ starcoder2:latest f67ae0f64584 1.7 GB 19 hours ago ### Docker -Use the instructions [here](../intro) to start OpenHands using Docker. +Use the instructions [here](../getting-started) to start OpenHands using Docker. But when running `docker run`, you'll need to add a few more arguments: ```bash diff --git a/docs/modules/usage/llms/openai-llms.md b/docs/modules/usage/llms/openai-llms.md index e8bf74320a..07c8e547a6 100644 --- a/docs/modules/usage/llms/openai-llms.md +++ b/docs/modules/usage/llms/openai-llms.md @@ -4,72 +4,20 @@ OpenHands uses [LiteLLM](https://www.litellm.ai/) to make calls to OpenAI's chat ## Configuration -### Manual Configuration - -When running the OpenHands Docker image, you'll need to set the following environment variables: - -```sh -LLM_MODEL="openai/" # e.g. "openai/gpt-4o" -LLM_API_KEY="" -``` +When running the OpenHands Docker image, you'll need to choose a model and set your API key in the OpenHands UI through the Settings. To see a full list of OpenAI models that LiteLLM supports, please visit https://docs.litellm.ai/docs/providers/openai#openai-chat-completion-models. To find or create your OpenAI Project API Key, please visit https://platform.openai.com/api-keys. -**Example**: - -```sh -export WORKSPACE_BASE=$(pwd)/workspace - -docker run -it \ - --pull=always \ - -e SANDBOX_USER_ID=$(id -u) \ - -e LLM_MODEL="openai/" \ - -e LLM_API_KEY="" \ - -e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \ - -v $WORKSPACE_BASE:/opt/workspace_base \ - -v /var/run/docker.sock:/var/run/docker.sock \ - -p 3000:3000 \ - --add-host host.docker.internal:host-gateway \ - --name openhands-app-$(date +%Y%m%d%H%M%S) \ - ghcr.io/opendevin/opendevin:0.8 -``` - -### UI Configuration - -You can also directly set the `LLM_MODEL` and `LLM_API_KEY` in the OpenHands client itself. Follow this guide to get up and running with the OpenHands client. - -From there, you can set your model and API key in the settings window. - ## Using OpenAI-Compatible Endpoints Just as for OpenAI Chat completions, we use LiteLLM for OpenAI-compatible endpoints. You can find their full documentation on this topic [here](https://docs.litellm.ai/docs/providers/openai_compatible). -When running the OpenHands Docker image, you'll need to set the following environment variables: +When running the OpenHands Docker image, you'll need to set the following environment variables using `-e`: ```sh -LLM_BASE_URL="" # e.g. "http://0.0.0.0:3000" -LLM_MODEL="openai/" # e.g. "openai/mistral" -LLM_API_KEY="" +LLM_BASE_URL="" # e.g. "http://0.0.0.0:3000" ``` -**Example**: - -```sh -export WORKSPACE_BASE=$(pwd)/workspace - -docker run -it \ - --pull=always \ - -e SANDBOX_USER_ID=$(id -u) \ - -e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \ - -e LLM_BASE_URL="" \ - -e LLM_MODEL="openai/" \ - -e LLM_API_KEY="" \ - -v $WORKSPACE_BASE:/opt/workspace_base \ - -v /var/run/docker.sock:/var/run/docker.sock \ - -p 3000:3000 \ - --add-host host.docker.internal:host-gateway \ - --name openhands-app-$(date +%Y%m%d%H%M%S) \ - ghcr.io/opendevin/opendevin:0.8 -``` +Then set your model and API key in the OpenHands UI through the Settings. diff --git a/docs/package-lock.json b/docs/package-lock.json index 9255d756d2..cdc028b640 100644 --- a/docs/package-lock.json +++ b/docs/package-lock.json @@ -24,7 +24,7 @@ "@docusaurus/module-type-aliases": "^3.5.1", "@docusaurus/tsconfig": "^3.5.2", "@docusaurus/types": "^3.5.1", - "typescript": "~5.5.4" + "typescript": "~5.6.2" }, "engines": { "node": ">=18.0" @@ -14853,9 +14853,9 @@ } }, "node_modules/typescript": { - "version": "5.5.4", - "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.5.4.tgz", - "integrity": "sha512-Mtq29sKDAEYP7aljRgtPOpTvOfbwRWlS6dPRzwjdE+C0R4brX/GUyhHSecbHMFLNBLcJIPt9nl9yG5TZ1weH+Q==", + "version": "5.6.2", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.6.2.tgz", + "integrity": "sha512-NW8ByodCSNCwZeghjN3o+JX5OFH0Ojg6sadjEKY4huZ52TqbJTJnDo5+Tw98lSy63NZvi4n+ez5m2u5d4PkZyw==", "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" diff --git a/docs/package.json b/docs/package.json index 165c8f8b7a..fa95edd996 100644 --- a/docs/package.json +++ b/docs/package.json @@ -31,7 +31,7 @@ "@docusaurus/module-type-aliases": "^3.5.1", "@docusaurus/tsconfig": "^3.5.2", "@docusaurus/types": "^3.5.1", - "typescript": "~5.5.4" + "typescript": "~5.6.2" }, "browserslist": { "production": [ diff --git a/docs/sidebars.ts b/docs/sidebars.ts index ed1b823008..2afd99c6e5 100644 --- a/docs/sidebars.ts +++ b/docs/sidebars.ts @@ -1,8 +1,79 @@ import type { SidebarsConfig } from "@docusaurus/plugin-content-docs"; const sidebars: SidebarsConfig = { - docsSidebar: [{ type: "autogenerated", dirName: "usage" }], apiSidebar: [require("./modules/python/sidebar.json")], + docsSidebar: [{ + type: 'doc', + label: 'Getting Started', + id: 'usage/getting-started', + }, { + type: 'doc', + label: 'Troubleshooting', + id: 'usage/troubleshooting/troubleshooting', + }, { + type: 'doc', + label: 'Feedback', + id: 'usage/feedback', + }, { + type: 'category', + label: 'How-to Guides', + items: [{ + type: 'doc', + id: 'usage/how-to/cli-mode', + }, { + type: 'doc', + id: 'usage/how-to/headless-mode', + }, { + type: 'doc', + id: 'usage/how-to/custom-sandbox-guide', + }, { + type: 'doc', + id: 'usage/how-to/evaluation-harness', + }, { + type: 'doc', + id: 'usage/how-to/openshift-example', + }] + }, { + type: 'category', + label: 'LLMs', + items: [{ + type: 'doc', + label: 'Overview', + id: 'usage/llms/llms', + }, { + type: 'doc', + label: 'OpenAI', + id: 'usage/llms/openai-llms', + }, { + type: 'doc', + label: 'Azure', + id: 'usage/llms/azure-llms', + }, { + type: 'doc', + label: 'Google', + id: 'usage/llms/google-llms', + }, { + type: 'doc', + label: 'Local/ollama', + id: 'usage/llms/local-llms', + }], + }, { + type: 'category', + label: 'Architecture', + items: [{ + type: 'doc', + label: 'Backend', + id: 'usage/architecture/backend', + }, { + type: 'doc', + label: 'Runtime', + id: 'usage/architecture/runtime', + }], + }, { + type: 'doc', + label: 'About', + id: 'usage/about', + }], }; export default sidebars; diff --git a/docs/src/components/CustomFooter.tsx b/docs/src/components/CustomFooter.tsx index 0a5eddad99..b461d4dd17 100644 --- a/docs/src/components/CustomFooter.tsx +++ b/docs/src/components/CustomFooter.tsx @@ -7,17 +7,6 @@ function CustomFooter() { return (