mirror of
https://github.com/OpenHands/OpenHands.git
synced 2025-12-26 05:48:36 +08:00
Customize LLM config per agent (#2756)
Currently, OpenDevin uses a global singleton LLM config and a global singleton agent config. This PR allows customers to configure an LLM config for each agent. A hypothetically useful scenario is to use a cheaper LLM for repo exploration / code search, and a more powerful LLM to actually do the problem solving (CodeActAgent). Partially solves #2075 (web GUI improvement is not the goal of this PR)
This commit is contained in:
parent
23e2d01cf5
commit
c68478f470
2
.github/workflows/dummy-agent-test.yml
vendored
2
.github/workflows/dummy-agent-test.yml
vendored
@ -31,7 +31,7 @@ jobs:
|
||||
- name: Run tests
|
||||
run: |
|
||||
set -e
|
||||
poetry run python opendevin/core/main.py -t "do a flip" -m ollama/not-a-model -d ./workspace/ -c DummyAgent
|
||||
poetry run python opendevin/core/main.py -t "do a flip" -d ./workspace/ -c DummyAgent
|
||||
- name: Check exit code
|
||||
run: |
|
||||
if [ $? -ne 0 ]; then
|
||||
|
||||
@ -8,6 +8,7 @@ from agenthub.codeact_agent.prompt import (
|
||||
)
|
||||
from opendevin.controller.agent import Agent
|
||||
from opendevin.controller.state.state import State
|
||||
from opendevin.core.config import config
|
||||
from opendevin.events.action import (
|
||||
Action,
|
||||
AgentDelegateAction,
|
||||
@ -60,8 +61,11 @@ def get_action_message(action: Action) -> dict[str, str] | None:
|
||||
|
||||
|
||||
def get_observation_message(obs) -> dict[str, str] | None:
|
||||
max_message_chars = config.get_llm_config_from_agent(
|
||||
'CodeActAgent'
|
||||
).max_message_chars
|
||||
if isinstance(obs, CmdOutputObservation):
|
||||
content = 'OBSERVATION:\n' + truncate_content(obs.content)
|
||||
content = 'OBSERVATION:\n' + truncate_content(obs.content, max_message_chars)
|
||||
content += (
|
||||
f'\n[Command {obs.command_id} finished with exit code {obs.exit_code}]'
|
||||
)
|
||||
@ -76,10 +80,12 @@ def get_observation_message(obs) -> dict[str, str] | None:
|
||||
' already displayed to user'
|
||||
)
|
||||
content = '\n'.join(splitted)
|
||||
content = truncate_content(content)
|
||||
content = truncate_content(content, max_message_chars)
|
||||
return {'role': 'user', 'content': content}
|
||||
elif isinstance(obs, AgentDelegateObservation):
|
||||
content = 'OBSERVATION:\n' + truncate_content(str(obs.outputs))
|
||||
content = 'OBSERVATION:\n' + truncate_content(
|
||||
str(obs.outputs), max_message_chars
|
||||
)
|
||||
return {'role': 'user', 'content': content}
|
||||
return None
|
||||
|
||||
|
||||
@ -7,6 +7,7 @@ from agenthub.codeact_swe_agent.prompt import (
|
||||
from agenthub.codeact_swe_agent.response_parser import CodeActSWEResponseParser
|
||||
from opendevin.controller.agent import Agent
|
||||
from opendevin.controller.state.state import State
|
||||
from opendevin.core.config import config
|
||||
from opendevin.events.action import (
|
||||
Action,
|
||||
AgentFinishAction,
|
||||
@ -52,8 +53,11 @@ def get_action_message(action: Action) -> dict[str, str] | None:
|
||||
|
||||
|
||||
def get_observation_message(obs) -> dict[str, str] | None:
|
||||
max_message_chars = config.get_llm_config_from_agent(
|
||||
'CodeActSWEAgent'
|
||||
).max_message_chars
|
||||
if isinstance(obs, CmdOutputObservation):
|
||||
content = 'OBSERVATION:\n' + truncate_content(obs.content)
|
||||
content = 'OBSERVATION:\n' + truncate_content(obs.content, max_message_chars)
|
||||
content += (
|
||||
f'\n[Command {obs.command_id} finished with exit code {obs.exit_code}]'
|
||||
)
|
||||
@ -68,7 +72,7 @@ def get_observation_message(obs) -> dict[str, str] | None:
|
||||
' already displayed to user'
|
||||
)
|
||||
content = '\n'.join(splitted)
|
||||
content = truncate_content(content)
|
||||
content = truncate_content(content, max_message_chars)
|
||||
return {'role': 'user', 'content': content}
|
||||
return None
|
||||
|
||||
|
||||
@ -2,6 +2,7 @@ from jinja2 import BaseLoader, Environment
|
||||
|
||||
from opendevin.controller.agent import Agent
|
||||
from opendevin.controller.state.state import State
|
||||
from opendevin.core.config import config
|
||||
from opendevin.core.utils import json
|
||||
from opendevin.events.action import Action
|
||||
from opendevin.events.serialization.action import action_from_dict
|
||||
@ -32,6 +33,9 @@ def history_to_json(history: ShortTermHistory, max_events=20, **kwargs):
|
||||
"""
|
||||
Serialize and simplify history to str format
|
||||
"""
|
||||
# TODO: get agent specific llm config
|
||||
llm_config = config.get_llm_config()
|
||||
max_message_chars = llm_config.max_message_chars
|
||||
|
||||
processed_history = []
|
||||
event_count = 0
|
||||
@ -39,7 +43,7 @@ def history_to_json(history: ShortTermHistory, max_events=20, **kwargs):
|
||||
for event in history.get_events(reverse=True):
|
||||
if event_count >= max_events:
|
||||
break
|
||||
processed_history.append(event_to_memory(event))
|
||||
processed_history.append(event_to_memory(event, max_message_chars))
|
||||
event_count += 1
|
||||
|
||||
# history is in reverse order, let's fix it
|
||||
|
||||
@ -29,7 +29,7 @@ from opendevin.llm.llm import LLM
|
||||
from opendevin.memory.condenser import MemoryCondenser
|
||||
from opendevin.runtime.tools import RuntimeTool
|
||||
|
||||
if config.agent.memory_enabled:
|
||||
if config.get_agent_config('MonologueAgent').memory_enabled:
|
||||
from opendevin.memory.memory import LongTermMemory
|
||||
|
||||
|
||||
@ -78,7 +78,7 @@ class MonologueAgent(Agent):
|
||||
raise AgentNoInstructionError()
|
||||
|
||||
self.initial_thoughts = []
|
||||
if config.agent.memory_enabled:
|
||||
if config.get_agent_config('MonologueAgent').memory_enabled:
|
||||
self.memory = LongTermMemory()
|
||||
else:
|
||||
self.memory = None
|
||||
@ -89,6 +89,9 @@ class MonologueAgent(Agent):
|
||||
self._initialized = True
|
||||
|
||||
def _add_initial_thoughts(self, task):
|
||||
max_message_chars = config.get_llm_config_from_agent(
|
||||
'MonologueAgent'
|
||||
).max_message_chars
|
||||
previous_action = ''
|
||||
for thought in INITIAL_THOUGHTS:
|
||||
thought = thought.replace('$TASK', task)
|
||||
@ -106,7 +109,9 @@ class MonologueAgent(Agent):
|
||||
observation = BrowserOutputObservation(
|
||||
content=thought, url='', screenshot=''
|
||||
)
|
||||
self.initial_thoughts.append(event_to_memory(observation))
|
||||
self.initial_thoughts.append(
|
||||
event_to_memory(observation, max_message_chars)
|
||||
)
|
||||
previous_action = ''
|
||||
else:
|
||||
action: Action = NullAction()
|
||||
@ -133,7 +138,7 @@ class MonologueAgent(Agent):
|
||||
previous_action = ActionType.BROWSE
|
||||
else:
|
||||
action = MessageAction(thought)
|
||||
self.initial_thoughts.append(event_to_memory(action))
|
||||
self.initial_thoughts.append(event_to_memory(action, max_message_chars))
|
||||
|
||||
def step(self, state: State) -> Action:
|
||||
"""
|
||||
@ -145,7 +150,9 @@ class MonologueAgent(Agent):
|
||||
Returns:
|
||||
- Action: The next action to take based on LLM response
|
||||
"""
|
||||
|
||||
max_message_chars = config.get_llm_config_from_agent(
|
||||
'MonologueAgent'
|
||||
).max_message_chars
|
||||
goal = state.get_current_user_intent()
|
||||
self._initialize(goal)
|
||||
|
||||
@ -153,7 +160,7 @@ class MonologueAgent(Agent):
|
||||
|
||||
# add the events from state.history
|
||||
for event in state.history.get_events():
|
||||
recent_events.append(event_to_memory(event))
|
||||
recent_events.append(event_to_memory(event, max_message_chars))
|
||||
|
||||
# add the last messages to long term memory
|
||||
if self.memory is not None:
|
||||
@ -163,9 +170,11 @@ class MonologueAgent(Agent):
|
||||
# this should still work
|
||||
# we will need to do this differently: find out if there really is an action or an observation in this step
|
||||
if last_action:
|
||||
self.memory.add_event(event_to_memory(last_action))
|
||||
self.memory.add_event(event_to_memory(last_action, max_message_chars))
|
||||
if last_observation:
|
||||
self.memory.add_event(event_to_memory(last_observation))
|
||||
self.memory.add_event(
|
||||
event_to_memory(last_observation, max_message_chars)
|
||||
)
|
||||
|
||||
# the action prompt with initial thoughts and recent events
|
||||
prompt = prompts.get_request_action_prompt(
|
||||
|
||||
@ -1,4 +1,5 @@
|
||||
from opendevin.controller.state.state import State
|
||||
from opendevin.core.config import config
|
||||
from opendevin.core.logger import opendevin_logger as logger
|
||||
from opendevin.core.schema import ActionType
|
||||
from opendevin.core.utils import json
|
||||
@ -128,6 +129,9 @@ def get_prompt(state: State) -> str:
|
||||
Returns:
|
||||
- str: The formatted string prompt with historical values
|
||||
"""
|
||||
max_message_chars = config.get_llm_config_from_agent(
|
||||
'PlannerAgent'
|
||||
).max_message_chars
|
||||
|
||||
# the plan
|
||||
plan_str = json.dumps(state.root_task.to_dict(), indent=2)
|
||||
@ -142,7 +146,7 @@ def get_prompt(state: State) -> str:
|
||||
break
|
||||
if latest_action == NullAction() and isinstance(event, Action):
|
||||
latest_action = event
|
||||
history_dicts.append(event_to_memory(event))
|
||||
history_dicts.append(event_to_memory(event, max_message_chars))
|
||||
|
||||
# history_dicts is in reverse order, lets fix it
|
||||
history_dicts.reverse()
|
||||
@ -160,7 +164,7 @@ def get_prompt(state: State) -> str:
|
||||
plan_status = "You're not currently working on any tasks. Your next action MUST be to mark a task as in_progress."
|
||||
|
||||
# the hint, based on the last action
|
||||
hint = get_hint(event_to_memory(latest_action).get('action', ''))
|
||||
hint = get_hint(event_to_memory(latest_action, max_message_chars).get('action', ''))
|
||||
logger.info('HINT:\n' + hint, extra={'msg_type': 'DETAIL'})
|
||||
|
||||
# the last relevant user message (the task)
|
||||
|
||||
@ -79,8 +79,12 @@ persist_sandbox = false
|
||||
# Use host network
|
||||
#use_host_network = false
|
||||
|
||||
# Name of the default agent
|
||||
#default_agent = "CodeActAgent"
|
||||
|
||||
#################################### LLM #####################################
|
||||
# Configuration for the LLM model
|
||||
# Configuration for LLM models (group name starts with 'llm')
|
||||
# use 'llm' for the default LLM config
|
||||
##############################################################################
|
||||
[llm]
|
||||
# AWS access key ID
|
||||
@ -149,8 +153,18 @@ model = "gpt-4o"
|
||||
# Top p for the API
|
||||
#top_p = 0.5
|
||||
|
||||
[llm.gpt3]
|
||||
# API key to use
|
||||
api_key = "your-api-key"
|
||||
|
||||
# Model to use
|
||||
model = "gpt-3.5"
|
||||
|
||||
#################################### Agent ###################################
|
||||
# Configuration for the agent
|
||||
# Configuration for agents (group name starts with 'agent')
|
||||
# Use 'agent' for the default agent config
|
||||
# otherwise, group name must be `agent.<agent_name>` (case-sensitive), e.g.
|
||||
# agent.CodeActAgent
|
||||
##############################################################################
|
||||
[agent]
|
||||
# Memory enabled
|
||||
@ -159,8 +173,13 @@ model = "gpt-4o"
|
||||
# Memory maximum threads
|
||||
#memory_max_threads = 2
|
||||
|
||||
# Name of the agent
|
||||
#name = "CodeActAgent"
|
||||
# LLM config group to use
|
||||
#llm_config = 'llm'
|
||||
|
||||
[agent.RepoExplorerAgent]
|
||||
# Example: use a cheaper model for RepoExplorerAgent to reduce cost, especially
|
||||
# useful when an agent doesn't demand high quality but uses a lot of tokens
|
||||
llm_config = 'gpt3'
|
||||
|
||||
#################################### Sandbox ###################################
|
||||
# Configuration for the sandbox
|
||||
|
||||
75
docs/modules/usage/changelog.md
Normal file
75
docs/modules/usage/changelog.md
Normal file
@ -0,0 +1,75 @@
|
||||
---
|
||||
sidebar_position: 8
|
||||
---
|
||||
|
||||
# Changelog
|
||||
|
||||
## 0.8 (release date: ??)
|
||||
|
||||
### Config breaking changes
|
||||
|
||||
In this release we introduced a few breaking changes to backend configurations.
|
||||
If you have only been using OpenDevin via frontend (web GUI), nothing needs
|
||||
to be taken care of.
|
||||
|
||||
Here's a list of breaking changes in configs. They only apply to users who
|
||||
use OpenDevin CLI via `main.py`. For more detail, see [#2756](https://github.com/OpenDevin/OpenDevin/pull/2756).
|
||||
|
||||
#### Removal of --model-name option from main.py
|
||||
|
||||
Please note that `--model-name`, or `-m` option, no longer exists. You should set up the LLM
|
||||
configs in `config.toml` or via environmental variables.
|
||||
|
||||
#### LLM config groups must be subgroups of 'llm'
|
||||
|
||||
Prior to release 0.8, you can use arbitrary name for llm config in `config.toml`, e.g.
|
||||
|
||||
```toml
|
||||
[gpt-4o]
|
||||
model="gpt-4o"
|
||||
api_key="<your_api_key>"
|
||||
```
|
||||
|
||||
and then use `--llm-config` CLI argument to specify the desired LLM config group
|
||||
by name. This no longer works. Instead, the config group must be under `llm` group,
|
||||
e.g.:
|
||||
|
||||
```toml
|
||||
[llm.gpt-4o]
|
||||
model="gpt-4o"
|
||||
api_key="<your_api_key>"
|
||||
```
|
||||
|
||||
If you have a config group named `llm`, no need to change it, it will be used
|
||||
as the default LLM config group.
|
||||
|
||||
#### 'agent' group no longer contains 'name' field
|
||||
|
||||
Prior to release 0.8, you may or may not have a config group named `agent` that
|
||||
looks like this:
|
||||
|
||||
```toml
|
||||
[agent]
|
||||
name="CodeActAgent"
|
||||
memory_max_threads=2
|
||||
```
|
||||
|
||||
Note the `name` field is now removed. Instead, you should put `default_agent` field
|
||||
under `core` group, e.g.
|
||||
|
||||
```toml
|
||||
[core]
|
||||
# other configs
|
||||
default_agent='CodeActAgent'
|
||||
|
||||
[agent]
|
||||
llm_config='llm'
|
||||
memory_max_threads=2
|
||||
|
||||
[agent.CodeActAgent]
|
||||
llm_config='gpt-4o'
|
||||
```
|
||||
|
||||
Note that similar to `llm` subgroups, you can also define `agent` subgroups.
|
||||
Moreover, an agent can be associated with a specific LLM config group. For more
|
||||
detail, see the examples in `config.template.toml`.
|
||||
@ -53,14 +53,14 @@ api_key = "sk-XXX"
|
||||
|
||||
In this section, for the purpose of building an evaluation task, we don't use the standard OpenDevin web-based GUI, but rather run OpenDevin backend from CLI.
|
||||
|
||||
For example, you can run the following, which performs the specified task `-t`, with a particular model `-m` and agent `-c`, for a maximum number of iterations `-i`:
|
||||
For example, you can run the following, which performs the specified task `-t`, with a particular model config `-l` and agent `-c`, for a maximum number of iterations `-i`:
|
||||
|
||||
```bash
|
||||
poetry run python ./opendevin/core/main.py \
|
||||
-i 10 \
|
||||
-t "Write me a bash script that print hello world." \
|
||||
-c CodeActAgent \
|
||||
-m gpt-4o-2024-05-13
|
||||
-l llm
|
||||
```
|
||||
|
||||
After running the script, you will observe the following:
|
||||
|
||||
@ -29,12 +29,12 @@ enable_auto_lint = true
|
||||
box_type = "ssh"
|
||||
timeout = 120
|
||||
|
||||
[eval_gpt35_turbo]
|
||||
[llm.eval_gpt35_turbo]
|
||||
model = "gpt-3.5-turbo"
|
||||
api_key = "sk-123"
|
||||
temperature = 0.0
|
||||
|
||||
[eval_gpt4o]
|
||||
[llm.eval_gpt4o]
|
||||
model = "gpt-4o"
|
||||
api_key = "sk-123"
|
||||
temperature = 0.0
|
||||
|
||||
@ -21,12 +21,12 @@ ssh_hostname = "localhost"
|
||||
enable_auto_lint = true
|
||||
|
||||
# TODO: Change these to the model you want to evaluate
|
||||
[eval_gpt4_1106_preview]
|
||||
[llm.eval_gpt4_1106_preview]
|
||||
model = "gpt-4-1106-preview"
|
||||
api_key = "XXX"
|
||||
temperature = 0.0
|
||||
|
||||
[eval_some_openai_compatible_model]
|
||||
[llm.eval_some_openai_compatible_model]
|
||||
model = "openai/MODEL_NAME"
|
||||
base_url = "https://OPENAI_COMPATIBLE_URL/v1"
|
||||
api_key = "XXX"
|
||||
|
||||
@ -39,12 +39,12 @@ ssh_hostname = "localhost"
|
||||
enable_auto_lint = true
|
||||
|
||||
# TODO: Change these to the model you want to evaluate
|
||||
[eval_gpt4_1106_preview]
|
||||
[llm.eval_gpt4_1106_preview]
|
||||
model = "gpt-4-1106-preview"
|
||||
api_key = "XXX"
|
||||
temperature = 0.0
|
||||
|
||||
[eval_azure_openai_compatible_model]
|
||||
[llm.eval_azure_openai_compatible_model]
|
||||
model = "AZURE_OPENAI_EXACT_DEPLOYMENT_MODEL_NAME"
|
||||
base_url = "AZURE_OPENAI_ENDPOINT"
|
||||
api_key = "AZURE_ENDPOINT_API_KEY"
|
||||
|
||||
@ -21,12 +21,12 @@ ssh_hostname = "localhost"
|
||||
enable_auto_lint = true
|
||||
|
||||
# TODO: Change these to the model you want to evaluate
|
||||
[eval_gpt4_1106_preview]
|
||||
[llm.eval_gpt4_1106_preview]
|
||||
model = "gpt-4-1106-preview"
|
||||
api_key = "XXX"
|
||||
temperature = 0.0
|
||||
|
||||
[eval_some_openai_compatible_model]
|
||||
[llm.eval_some_openai_compatible_model]
|
||||
model = "openai/MODEL_NAME"
|
||||
base_url = "https://OPENAI_COMPATIBLE_URL/v1"
|
||||
api_key = "XXX"
|
||||
|
||||
@ -16,12 +16,12 @@ ssh_hostname = "localhost"
|
||||
enable_auto_lint = true
|
||||
|
||||
# TODO: Change these to the model you want to evaluate
|
||||
[eval_gpt4_1106_preview]
|
||||
[llm.eval_gpt4_1106_preview_llm]
|
||||
model = "gpt-4-1106-preview"
|
||||
api_key = "XXX"
|
||||
temperature = 0.0
|
||||
|
||||
[eval_some_openai_compatible_model]
|
||||
[llm.eval_some_openai_compatible_model_llm]
|
||||
model = "openai/MODEL_NAME"
|
||||
base_url = "https://OPENAI_COMPATIBLE_URL/v1"
|
||||
api_key = "XXX"
|
||||
@ -29,9 +29,9 @@ temperature = 0.0
|
||||
```
|
||||
|
||||
## Run Inference on logic_reasoning
|
||||
The following code will run inference on the first example of the ProntoQA dataset with model gpt-4o,
|
||||
The following code will run inference on the first example of the ProntoQA dataset,
|
||||
using OpenDevin 0.6.2 version.
|
||||
|
||||
```bash
|
||||
./evaluation/logic_reasoning/scripts/run_infer.sh ProntoQA gpt-4o 0.6.2 1
|
||||
./evaluation/logic_reasoning/scripts/run_infer.sh ProntoQA eval_gpt4_1106_preview_llm 0.6.2 1
|
||||
```
|
||||
|
||||
@ -23,12 +23,12 @@ box_type = "ssh"
|
||||
timeout = 120
|
||||
|
||||
# TODO: Change these to the model you want to evaluate
|
||||
[eval_gpt4_1106_preview]
|
||||
[llm.eval_gpt4_1106_preview]
|
||||
model = "gpt-4-1106-preview"
|
||||
api_key = "XXX"
|
||||
temperature = 0.0
|
||||
|
||||
[eval_some_openai_compatible_model]
|
||||
[llm.eval_some_openai_compatible_model]
|
||||
model = "openai/MODEL_NAME"
|
||||
base_url = "https://OPENAI_COMPATIBLE_URL/v1"
|
||||
api_key = "XXX"
|
||||
|
||||
@ -30,12 +30,12 @@ run_as_devin = false
|
||||
sandbox_container_image = "public.ecr.aws/i5g0m1f6/ml-bench" # Use the latest image from the ML-Bench repository
|
||||
|
||||
# TODO: Change these to the model you want to evaluate
|
||||
[eval_gpt4_1106_preview]
|
||||
[llm.eval_gpt4_1106_preview]
|
||||
model = "gpt-4-1106-preview"
|
||||
api_key = "XXX"
|
||||
temperature = 0.0
|
||||
|
||||
[eval_some_openai_compatible_model]
|
||||
[llm.eval_some_openai_compatible_model]
|
||||
model = "openai/MODEL_NAME"
|
||||
base_url = "https://OPENAI_COMPATIBLE_URL/v1"
|
||||
api_key = "XXX"
|
||||
|
||||
@ -57,12 +57,12 @@ enable_auto_lint = true
|
||||
max_budget_per_task = 4 # 4 USD
|
||||
|
||||
# TODO: Change these to the model you want to evaluate
|
||||
[eval_gpt4_1106_preview]
|
||||
[llm.eval_gpt4_1106_preview_llm]
|
||||
model = "gpt-4-1106-preview"
|
||||
api_key = "XXX"
|
||||
temperature = 0.0
|
||||
|
||||
[eval_some_openai_compatible_model]
|
||||
[llm.eval_some_openai_compatible_model_llm]
|
||||
model = "openai/MODEL_NAME"
|
||||
base_url = "https://OPENAI_COMPATIBLE_URL/v1"
|
||||
api_key = "XXX"
|
||||
@ -86,7 +86,7 @@ If you see an error, please make sure your `config.toml` contains all
|
||||
|
||||
```bash
|
||||
./evaluation/swe_bench/scripts/run_infer.sh [model_config] [git-version] [agent] [eval_limit]
|
||||
# e.g., ./evaluation/swe_bench/scripts/run_infer.sh eval_gpt4_1106_preview HEAD CodeActAgent 300
|
||||
# e.g., ./evaluation/swe_bench/scripts/run_infer.sh eval_gpt4_1106_preview_llm HEAD CodeActAgent 300
|
||||
```
|
||||
|
||||
where `model_config` is mandatory, while `agent` and `eval_limit` are optional.
|
||||
@ -104,11 +104,11 @@ to `CodeActAgent`.
|
||||
default, the script evaluates the entire SWE-bench_Lite test set (300 issues). Note:
|
||||
in order to use `eval_limit`, you must also set `agent`.
|
||||
|
||||
Let's say you'd like to run 10 instances using `eval_gpt4_1106_preview` and CodeActAgent,
|
||||
Let's say you'd like to run 10 instances using `eval_gpt4_1106_preview_llm` and CodeActAgent,
|
||||
then your command would be:
|
||||
|
||||
```bash
|
||||
./evaluation/swe_bench/scripts/run_infer.sh eval_gpt4_1106_preview HEAD CodeActAgent 10
|
||||
./evaluation/swe_bench/scripts/run_infer.sh eval_gpt4_1106_preview_llm HEAD CodeActAgent 10
|
||||
```
|
||||
|
||||
If you would like to specify a list of tasks you'd like to benchmark on, you could
|
||||
|
||||
@ -33,6 +33,7 @@ from opendevin.events.observation import (
|
||||
ErrorObservation,
|
||||
Observation,
|
||||
)
|
||||
from opendevin.llm.llm import LLM
|
||||
|
||||
MAX_ITERATIONS = config.max_iterations
|
||||
MAX_BUDGET_PER_TASK = config.max_budget_per_task
|
||||
@ -218,7 +219,9 @@ class AgentController:
|
||||
|
||||
async def start_delegate(self, action: AgentDelegateAction):
|
||||
agent_cls: Type[Agent] = Agent.get_cls(action.agent)
|
||||
agent = agent_cls(llm=self.agent.llm)
|
||||
llm_config = config.get_llm_config_from_agent(action.agent)
|
||||
llm = LLM(llm_config=llm_config)
|
||||
delegate_agent = agent_cls(llm=llm)
|
||||
state = State(
|
||||
inputs=action.inputs or {},
|
||||
iteration=0,
|
||||
@ -227,10 +230,12 @@ class AgentController:
|
||||
# metrics should be shared between parent and child
|
||||
metrics=self.state.metrics,
|
||||
)
|
||||
logger.info(f'[Agent Controller {self.id}]: start delegate')
|
||||
logger.info(
|
||||
f'[Agent Controller {self.id}]: start delegate, creating agent {delegate_agent.name} using LLM {llm}'
|
||||
)
|
||||
self.delegate = AgentController(
|
||||
sid=self.id + '-delegate',
|
||||
agent=agent,
|
||||
agent=delegate_agent,
|
||||
event_stream=self.event_stream,
|
||||
max_iterations=self.state.max_iterations,
|
||||
max_budget_per_task=self.max_budget_per_task,
|
||||
|
||||
@ -20,7 +20,7 @@ load_dotenv()
|
||||
|
||||
|
||||
@dataclass
|
||||
class LLMConfig(metaclass=Singleton):
|
||||
class LLMConfig:
|
||||
"""
|
||||
Configuration for the LLM model.
|
||||
|
||||
@ -101,19 +101,19 @@ class LLMConfig(metaclass=Singleton):
|
||||
|
||||
|
||||
@dataclass
|
||||
class AgentConfig(metaclass=Singleton):
|
||||
class AgentConfig:
|
||||
"""
|
||||
Configuration for the agent.
|
||||
|
||||
Attributes:
|
||||
name: The name of the agent.
|
||||
memory_enabled: Whether long-term memory (embeddings) is enabled.
|
||||
memory_max_threads: The maximum number of threads indexing at the same time for embeddings.
|
||||
llm_config: The name of the llm config to use. If specified, this will override global llm config.
|
||||
"""
|
||||
|
||||
name: str = 'CodeActAgent'
|
||||
memory_enabled: bool = False
|
||||
memory_max_threads: int = 2
|
||||
llm_config: str | None = None
|
||||
|
||||
def defaults_to_dict(self) -> dict:
|
||||
"""
|
||||
@ -180,8 +180,9 @@ class AppConfig(metaclass=Singleton):
|
||||
Configuration for the app.
|
||||
|
||||
Attributes:
|
||||
llm: The LLM configuration.
|
||||
agent: The agent configuration.
|
||||
llms: A dictionary of name -> LLM configuration. Default config is under 'llm' key.
|
||||
agents: A dictionary of name -> Agent configuration. Default config is under 'agent' key.
|
||||
default_agent: The name of the default agent to use.
|
||||
sandbox: The sandbox configuration.
|
||||
runtime: The runtime environment.
|
||||
file_store: The file store to use.
|
||||
@ -207,8 +208,9 @@ class AppConfig(metaclass=Singleton):
|
||||
file_uploads_allowed_extensions: List of allowed file extensions for uploads. ['.*'] means all extensions are allowed.
|
||||
"""
|
||||
|
||||
llm: LLMConfig = field(default_factory=LLMConfig)
|
||||
agent: AgentConfig = field(default_factory=AgentConfig)
|
||||
llms: dict = field(default_factory=dict)
|
||||
agents: dict = field(default_factory=dict)
|
||||
default_agent: str = 'CodeActAgent'
|
||||
sandbox: SandboxConfig = field(default_factory=SandboxConfig)
|
||||
runtime: str = 'server'
|
||||
file_store: str = 'memory'
|
||||
@ -243,6 +245,39 @@ class AppConfig(metaclass=Singleton):
|
||||
|
||||
defaults_dict: ClassVar[dict] = {}
|
||||
|
||||
def get_llm_config(self, name='llm') -> LLMConfig:
|
||||
"""
|
||||
llm is the name for default config (for backward compatibility prior to 0.8)
|
||||
"""
|
||||
if name in self.llms:
|
||||
return self.llms[name]
|
||||
if name is not None and name != 'llm':
|
||||
logger.warning(f'llm config group {name} not found, using default config')
|
||||
if 'llm' not in self.llms:
|
||||
self.llms['llm'] = LLMConfig()
|
||||
return self.llms['llm']
|
||||
|
||||
def set_llm_config(self, value: LLMConfig, name='llm'):
|
||||
self.llms[name] = value
|
||||
|
||||
def get_agent_config(self, name='agent') -> AgentConfig:
|
||||
"""
|
||||
agent is the name for default config (for backward compability prior to 0.8)
|
||||
"""
|
||||
if name in self.agents:
|
||||
return self.agents[name]
|
||||
if 'agent' not in self.agents:
|
||||
self.agents['agent'] = AgentConfig()
|
||||
return self.agents['agent']
|
||||
|
||||
def set_agent_config(self, value: AgentConfig, name='agent'):
|
||||
self.agents[name] = value
|
||||
|
||||
def get_llm_config_from_agent(self, name='agent') -> LLMConfig:
|
||||
agent_config: AgentConfig = self.get_agent_config(name)
|
||||
llm_config_name = agent_config.llm_config
|
||||
return self.get_llm_config(llm_config_name)
|
||||
|
||||
def __post_init__(self):
|
||||
"""
|
||||
Post-initialization hook, called when the instance is created with only default values.
|
||||
@ -346,11 +381,6 @@ def load_from_env(cfg: AppConfig, env_or_toml_dict: dict | MutableMapping[str, s
|
||||
if is_dataclass(field_type):
|
||||
# nested dataclass
|
||||
nested_sub_config = getattr(sub_config, field_name)
|
||||
|
||||
# the agent field: the env var for agent.name is just 'AGENT'
|
||||
if field_name == 'agent' and 'AGENT' in env_or_toml_dict:
|
||||
setattr(nested_sub_config, 'name', env_or_toml_dict[env_var_name])
|
||||
|
||||
set_attr_from_env(nested_sub_config, prefix=field_name + '_')
|
||||
elif env_var_name in env_or_toml_dict:
|
||||
# convert the env var to the correct type and set it
|
||||
@ -377,6 +407,13 @@ def load_from_env(cfg: AppConfig, env_or_toml_dict: dict | MutableMapping[str, s
|
||||
# Start processing from the root of the config object
|
||||
set_attr_from_env(cfg)
|
||||
|
||||
# load default LLM config from env
|
||||
default_llm_config = config.get_llm_config()
|
||||
set_attr_from_env(default_llm_config, 'LLM_')
|
||||
# load default agent config from env
|
||||
default_agent_config = config.get_agent_config()
|
||||
set_attr_from_env(default_agent_config, 'AGENT_')
|
||||
|
||||
|
||||
def load_from_toml(cfg: AppConfig, toml_file: str = 'config.toml'):
|
||||
"""Load the config from the toml file. Supports both styles of config vars.
|
||||
@ -408,17 +445,45 @@ def load_from_toml(cfg: AppConfig, toml_file: str = 'config.toml'):
|
||||
|
||||
core_config = toml_config['core']
|
||||
|
||||
# load llm configs and agent configs
|
||||
for key, value in toml_config.items():
|
||||
if isinstance(value, dict):
|
||||
try:
|
||||
if key is not None and key.lower() == 'agent':
|
||||
logger.info('Attempt to load default agent config from config toml')
|
||||
non_dict_fields = {
|
||||
k: v for k, v in value.items() if not isinstance(v, dict)
|
||||
}
|
||||
agent_config = AgentConfig(**non_dict_fields)
|
||||
cfg.set_agent_config(agent_config, 'agent')
|
||||
for nested_key, nested_value in value.items():
|
||||
if isinstance(nested_value, dict):
|
||||
logger.info(
|
||||
f'Attempt to load group {nested_key} from config toml as agent config'
|
||||
)
|
||||
agent_config = AgentConfig(**nested_value)
|
||||
cfg.set_agent_config(agent_config, nested_key)
|
||||
if key is not None and key.lower() == 'llm':
|
||||
logger.info('Attempt to load default LLM config from config toml')
|
||||
non_dict_fields = {
|
||||
k: v for k, v in value.items() if not isinstance(v, dict)
|
||||
}
|
||||
llm_config = LLMConfig(**non_dict_fields)
|
||||
cfg.set_llm_config(llm_config, 'llm')
|
||||
for nested_key, nested_value in value.items():
|
||||
if isinstance(nested_value, dict):
|
||||
logger.info(
|
||||
f'Attempt to load group {nested_key} from config toml as llm config'
|
||||
)
|
||||
llm_config = LLMConfig(**nested_value)
|
||||
cfg.set_llm_config(llm_config, nested_key)
|
||||
except (TypeError, KeyError) as e:
|
||||
logger.warning(
|
||||
f'Cannot parse config from toml, toml values have not been applied.\n Error: {e}',
|
||||
exc_info=False,
|
||||
)
|
||||
|
||||
try:
|
||||
# set llm config from the toml file
|
||||
llm_config = cfg.llm
|
||||
if 'llm' in toml_config:
|
||||
llm_config = LLMConfig(**toml_config['llm'])
|
||||
|
||||
# set agent config from the toml file
|
||||
agent_config = cfg.agent
|
||||
if 'agent' in toml_config:
|
||||
agent_config = AgentConfig(**toml_config['agent'])
|
||||
|
||||
# set sandbox config from the toml file
|
||||
sandbox_config = config.sandbox
|
||||
|
||||
@ -439,12 +504,7 @@ def load_from_toml(cfg: AppConfig, toml_file: str = 'config.toml'):
|
||||
sandbox_config = SandboxConfig(**toml_config['sandbox'])
|
||||
|
||||
# update the config object with the new values
|
||||
AppConfig(
|
||||
llm=llm_config,
|
||||
agent=agent_config,
|
||||
sandbox=sandbox_config,
|
||||
**core_config,
|
||||
)
|
||||
AppConfig(sandbox=sandbox_config, **core_config)
|
||||
except (TypeError, KeyError) as e:
|
||||
logger.warning(
|
||||
f'Cannot parse config from toml, toml values have not been applied.\nError: {e}',
|
||||
@ -472,8 +532,9 @@ def finalize_config(cfg: AppConfig):
|
||||
parts = cfg.workspace_mount_rewrite.split(':')
|
||||
cfg.workspace_mount_path = base.replace(parts[0], parts[1])
|
||||
|
||||
if cfg.llm.embedding_base_url is None:
|
||||
cfg.llm.embedding_base_url = cfg.llm.base_url
|
||||
for llm in cfg.llms.values():
|
||||
if llm.embedding_base_url is None:
|
||||
llm.embedding_base_url = llm.base_url
|
||||
|
||||
if cfg.use_host_network and platform.system() == 'Darwin':
|
||||
logger.warning(
|
||||
@ -493,14 +554,16 @@ finalize_config(config)
|
||||
|
||||
|
||||
# Utility function for command line --group argument
|
||||
def get_llm_config_arg(llm_config_arg: str):
|
||||
def get_llm_config_arg(
|
||||
llm_config_arg: str, toml_file: str = 'config.toml'
|
||||
) -> LLMConfig | None:
|
||||
"""
|
||||
Get a group of llm settings from the config file.
|
||||
|
||||
A group in config.toml can look like this:
|
||||
|
||||
```
|
||||
[gpt-3.5-for-eval]
|
||||
[llm.gpt-3.5-for-eval]
|
||||
model = 'gpt-3.5-turbo'
|
||||
api_key = '...'
|
||||
temperature = 0.5
|
||||
@ -511,6 +574,8 @@ def get_llm_config_arg(llm_config_arg: str):
|
||||
The user-defined group name, like "gpt-3.5-for-eval", is the argument to this function. The function will load the LLMConfig object
|
||||
with the settings of this group, from the config file, and set it as the LLMConfig object for the app.
|
||||
|
||||
Note that the group must be under "llm" group, or in other words, the group name must start with "llm.".
|
||||
|
||||
Args:
|
||||
llm_config_arg: The group of llm settings to get from the config.toml file.
|
||||
|
||||
@ -520,12 +585,17 @@ def get_llm_config_arg(llm_config_arg: str):
|
||||
|
||||
# keep only the name, just in case
|
||||
llm_config_arg = llm_config_arg.strip('[]')
|
||||
|
||||
# truncate the prefix, just in case
|
||||
if llm_config_arg.startswith('llm.'):
|
||||
llm_config_arg = llm_config_arg[4:]
|
||||
|
||||
logger.info(f'Loading llm config from {llm_config_arg}')
|
||||
|
||||
# load the toml file
|
||||
try:
|
||||
with open('config.toml', 'r', encoding='utf-8') as toml_file:
|
||||
toml_config = toml.load(toml_file)
|
||||
with open(toml_file, 'r', encoding='utf-8') as toml_contents:
|
||||
toml_config = toml.load(toml_contents)
|
||||
except FileNotFoundError as e:
|
||||
logger.error(f'Config file not found: {e}')
|
||||
return None
|
||||
@ -534,8 +604,8 @@ def get_llm_config_arg(llm_config_arg: str):
|
||||
return None
|
||||
|
||||
# update the llm config with the specified section
|
||||
if llm_config_arg in toml_config:
|
||||
return LLMConfig(**toml_config[llm_config_arg])
|
||||
if 'llm' in toml_config and llm_config_arg in toml_config['llm']:
|
||||
return LLMConfig(**toml_config['llm'][llm_config_arg])
|
||||
logger.debug(f'Loading from toml failed for {llm_config_arg}')
|
||||
return None
|
||||
|
||||
@ -564,16 +634,9 @@ def get_parser() -> argparse.ArgumentParser:
|
||||
parser.add_argument(
|
||||
'-c',
|
||||
'--agent-cls',
|
||||
default=config.agent.name,
|
||||
default=config.default_agent,
|
||||
type=str,
|
||||
help='The agent class to use',
|
||||
)
|
||||
parser.add_argument(
|
||||
'-m',
|
||||
'--model-name',
|
||||
default=config.llm.model,
|
||||
type=str,
|
||||
help='The (litellm) model name to use',
|
||||
help='Name of the default agent to use',
|
||||
)
|
||||
parser.add_argument(
|
||||
'-i',
|
||||
@ -619,7 +682,7 @@ def get_parser() -> argparse.ArgumentParser:
|
||||
'--llm-config',
|
||||
default=None,
|
||||
type=str,
|
||||
help='The group of llm settings, e.g. a [llama3] section in the toml file. Overrides model if both are provided.',
|
||||
help='The group of llm settings, e.g. "llama3" for [llm.llama3] section in the toml file. Overrides model if both are provided.',
|
||||
)
|
||||
return parser
|
||||
|
||||
|
||||
@ -53,7 +53,7 @@ async def run_agent_controller(
|
||||
|
||||
# Logging
|
||||
logger.info(
|
||||
f'Running agent {type(agent)}, model {agent.llm.model_name}, with task: "{task_str}"'
|
||||
f'Running agent {agent.name}, model {agent.llm.model_name}, with task: "{task_str}"'
|
||||
)
|
||||
|
||||
# set up the event stream
|
||||
@ -163,7 +163,7 @@ if __name__ == '__main__':
|
||||
raise ValueError(f'Invalid toml file, cannot read {args.llm_config}')
|
||||
llm = LLM(llm_config=llm_config)
|
||||
else:
|
||||
llm = LLM(model=args.model_name)
|
||||
llm = LLM(llm_config=config.get_llm_config_from_agent(args.agent_cls))
|
||||
|
||||
# Create the agent
|
||||
AgentCls: Type[Agent] = Agent.get_cls(args.agent_cls)
|
||||
|
||||
@ -21,8 +21,10 @@ class Singleton(type):
|
||||
# used by pytest to reset the state of the singleton instances
|
||||
for instance_type, instance in cls._instances.items():
|
||||
print('resetting... ', instance_type)
|
||||
for field in dataclasses.fields(instance_type):
|
||||
if dataclasses.is_dataclass(field.type):
|
||||
setattr(instance, field.name, field.type())
|
||||
for field_info in dataclasses.fields(instance_type):
|
||||
if dataclasses.is_dataclass(field_info.type):
|
||||
setattr(instance, field_info.name, field_info.type())
|
||||
elif field_info.default_factory is not dataclasses.MISSING:
|
||||
setattr(instance, field_info.name, field_info.default_factory())
|
||||
else:
|
||||
setattr(instance, field.name, field.default)
|
||||
setattr(instance, field_info.name, field_info.default)
|
||||
|
||||
@ -1,7 +1,6 @@
|
||||
from dataclasses import asdict
|
||||
from datetime import datetime
|
||||
|
||||
from opendevin.core.config import config
|
||||
from opendevin.events import Event, EventSource
|
||||
from opendevin.events.observation.observation import Observation
|
||||
|
||||
@ -70,7 +69,7 @@ def event_to_dict(event: 'Event') -> dict:
|
||||
return d
|
||||
|
||||
|
||||
def event_to_memory(event: 'Event') -> dict:
|
||||
def event_to_memory(event: 'Event', max_message_chars: int) -> dict:
|
||||
d = event_to_dict(event)
|
||||
d.pop('id', None)
|
||||
d.pop('cause', None)
|
||||
@ -79,17 +78,14 @@ def event_to_memory(event: 'Event') -> dict:
|
||||
if 'extras' in d:
|
||||
remove_fields(d['extras'], DELETE_FROM_MEMORY_EXTRAS)
|
||||
if isinstance(event, Observation) and 'content' in d:
|
||||
d['content'] = truncate_content(d['content'])
|
||||
d['content'] = truncate_content(d['content'], max_message_chars)
|
||||
return d
|
||||
|
||||
|
||||
def truncate_content(content: str, max_chars: int = -1) -> str:
|
||||
def truncate_content(content: str, max_chars: int) -> str:
|
||||
"""
|
||||
Truncate the middle of the observation content if it is too long.
|
||||
"""
|
||||
if max_chars == -1:
|
||||
max_chars = config.llm.max_message_chars
|
||||
|
||||
if len(content) <= max_chars:
|
||||
return content
|
||||
|
||||
|
||||
@ -5,9 +5,10 @@ import boto3
|
||||
from opendevin.core.config import config
|
||||
from opendevin.core.logger import opendevin_logger as logger
|
||||
|
||||
AWS_ACCESS_KEY_ID = config.llm.aws_access_key_id
|
||||
AWS_SECRET_ACCESS_KEY = config.llm.aws_secret_access_key
|
||||
AWS_REGION_NAME = config.llm.aws_region_name
|
||||
# TODO: this assumes AWS-specific configs are under default 'llm' group
|
||||
AWS_ACCESS_KEY_ID = config.get_llm_config().aws_access_key_id
|
||||
AWS_SECRET_ACCESS_KEY = config.get_llm_config().aws_secret_access_key
|
||||
AWS_REGION_NAME = config.get_llm_config().aws_region_name
|
||||
|
||||
# It needs to be set as an environment variable, if the variable is configured in the Config file.
|
||||
if AWS_ACCESS_KEY_ID is not None:
|
||||
|
||||
@ -63,6 +63,8 @@ class LLM:
|
||||
llm_config=None,
|
||||
metrics=None,
|
||||
cost_metric_supported=True,
|
||||
input_cost_per_token=None,
|
||||
output_cost_per_token=None,
|
||||
):
|
||||
"""
|
||||
Initializes the LLM. If LLMConfig is passed, its values will be the fallback.
|
||||
@ -84,9 +86,11 @@ class LLM:
|
||||
llm_temperature (float, optional): The temperature for LLM sampling. Defaults to LLM_TEMPERATURE.
|
||||
metrics (Metrics, optional): The metrics object to use. Defaults to None.
|
||||
cost_metric_supported (bool, optional): Whether the cost metric is supported. Defaults to True.
|
||||
input_cost_per_token (float, optional): The cost per input token.
|
||||
output_cost_per_token (float, optional): The cost per output token.
|
||||
"""
|
||||
if llm_config is None:
|
||||
llm_config = config.llm
|
||||
llm_config = config.get_llm_config()
|
||||
model = model if model is not None else llm_config.model
|
||||
api_key = api_key if api_key is not None else llm_config.api_key
|
||||
base_url = base_url if base_url is not None else llm_config.base_url
|
||||
@ -118,6 +122,16 @@ class LLM:
|
||||
if max_output_tokens is not None
|
||||
else llm_config.max_output_tokens
|
||||
)
|
||||
input_cost_per_token = (
|
||||
input_cost_per_token
|
||||
if input_cost_per_token is not None
|
||||
else llm_config.input_cost_per_token
|
||||
)
|
||||
output_cost_per_token = (
|
||||
output_cost_per_token
|
||||
if output_cost_per_token is not None
|
||||
else llm_config.output_cost_per_token
|
||||
)
|
||||
metrics = metrics if metrics is not None else Metrics()
|
||||
|
||||
logger.info(f'Initializing LLM with model: {model}')
|
||||
@ -127,6 +141,8 @@ class LLM:
|
||||
self.api_version = api_version
|
||||
self.max_input_tokens = max_input_tokens
|
||||
self.max_output_tokens = max_output_tokens
|
||||
self.input_cost_per_token = input_cost_per_token
|
||||
self.output_cost_per_token = output_cost_per_token
|
||||
self.llm_timeout = llm_timeout
|
||||
self.custom_llm_provider = custom_llm_provider
|
||||
self.metrics = metrics
|
||||
@ -292,12 +308,12 @@ class LLM:
|
||||
|
||||
extra_kwargs = {}
|
||||
if (
|
||||
config.llm.input_cost_per_token is not None
|
||||
and config.llm.output_cost_per_token is not None
|
||||
self.input_cost_per_token is not None
|
||||
and self.output_cost_per_token is not None
|
||||
):
|
||||
cost_per_token = CostPerToken(
|
||||
input_cost_per_token=config.llm.input_cost_per_token,
|
||||
output_cost_per_token=config.llm.output_cost_per_token,
|
||||
input_cost_per_token=self.input_cost_per_token,
|
||||
output_cost_per_token=self.output_cost_per_token,
|
||||
)
|
||||
logger.info(f'Using custom cost per token: {cost_per_token}')
|
||||
extra_kwargs['custom_cost_per_token'] = cost_per_token
|
||||
|
||||
@ -13,13 +13,14 @@ from tenacity import (
|
||||
wait_random_exponential,
|
||||
)
|
||||
|
||||
from opendevin.core.config import config
|
||||
from opendevin.core.config import LLMConfig, config
|
||||
from opendevin.core.logger import opendevin_logger as logger
|
||||
from opendevin.core.utils import json
|
||||
|
||||
num_retries = config.llm.num_retries
|
||||
retry_min_wait = config.llm.retry_min_wait
|
||||
retry_max_wait = config.llm.retry_max_wait
|
||||
# TODO: this should depend on specific agent setting
|
||||
num_retries = config.get_llm_config().num_retries
|
||||
retry_min_wait = config.get_llm_config().retry_min_wait
|
||||
retry_max_wait = config.get_llm_config().retry_max_wait
|
||||
|
||||
# llama-index includes a retry decorator around openai.get_embeddings() function
|
||||
# it is initialized with hard-coded values and errors
|
||||
@ -62,7 +63,7 @@ class EmbeddingsLoader:
|
||||
"""Loader for embedding model initialization."""
|
||||
|
||||
@staticmethod
|
||||
def get_embedding_model(strategy: str):
|
||||
def get_embedding_model(strategy: str, llm_config: LLMConfig):
|
||||
supported_ollama_embed_models = [
|
||||
'llama2',
|
||||
'mxbai-embed-large',
|
||||
@ -75,7 +76,7 @@ class EmbeddingsLoader:
|
||||
|
||||
return OllamaEmbedding(
|
||||
model_name=strategy,
|
||||
base_url=config.llm.embedding_base_url,
|
||||
base_url=llm_config.embedding_base_url,
|
||||
ollama_additional_kwargs={'mirostat': 0},
|
||||
)
|
||||
elif strategy == 'openai':
|
||||
@ -83,17 +84,17 @@ class EmbeddingsLoader:
|
||||
|
||||
return OpenAIEmbedding(
|
||||
model='text-embedding-ada-002',
|
||||
api_key=config.llm.api_key,
|
||||
api_key=llm_config.api_key,
|
||||
)
|
||||
elif strategy == 'azureopenai':
|
||||
from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding
|
||||
|
||||
return AzureOpenAIEmbedding(
|
||||
model='text-embedding-ada-002',
|
||||
deployment_name=config.llm.embedding_deployment_name,
|
||||
api_key=config.llm.api_key,
|
||||
azure_endpoint=config.llm.base_url,
|
||||
api_version=config.llm.api_version,
|
||||
deployment_name=llm_config.embedding_deployment_name,
|
||||
api_key=llm_config.api_key,
|
||||
azure_endpoint=llm_config.base_url,
|
||||
api_version=llm_config.api_version,
|
||||
)
|
||||
elif (strategy is not None) and (strategy.lower() == 'none'):
|
||||
# TODO: this works but is not elegant enough. The incentive is when
|
||||
@ -106,24 +107,26 @@ class EmbeddingsLoader:
|
||||
return HuggingFaceEmbedding(model_name='BAAI/bge-small-en-v1.5')
|
||||
|
||||
|
||||
sema = threading.Semaphore(value=config.agent.memory_max_threads)
|
||||
|
||||
|
||||
class LongTermMemory:
|
||||
"""
|
||||
Handles storing information for the agent to access later, using chromadb.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
def __init__(self, agent_config_name='agent'):
|
||||
"""
|
||||
Initialize the chromadb and set up ChromaVectorStore for later use.
|
||||
"""
|
||||
db = chromadb.Client(chromadb.Settings(anonymized_telemetry=False))
|
||||
self.collection = db.get_or_create_collection(name='memories')
|
||||
vector_store = ChromaVectorStore(chroma_collection=self.collection)
|
||||
embedding_strategy = config.llm.embedding_model
|
||||
embed_model = EmbeddingsLoader.get_embedding_model(embedding_strategy)
|
||||
agent_config = config.get_agent_config(agent_config_name)
|
||||
llm_config = config.get_llm_config(agent_config.llm_config)
|
||||
embedding_strategy = llm_config.embedding_model
|
||||
embed_model = EmbeddingsLoader.get_embedding_model(
|
||||
embedding_strategy, llm_config
|
||||
)
|
||||
self.index = VectorStoreIndex.from_vector_store(vector_store, embed_model)
|
||||
self.sema = threading.Semaphore(value=agent_config.memory_max_threads)
|
||||
self.thought_idx = 0
|
||||
self._add_threads = []
|
||||
|
||||
@ -158,7 +161,7 @@ class LongTermMemory:
|
||||
thread.start() # We add the doc concurrently so we don't have to wait ~500ms for the insert
|
||||
|
||||
def _add_doc(self, doc):
|
||||
with sema:
|
||||
with self.sema:
|
||||
self.index.insert(doc)
|
||||
|
||||
def search(self, query: str, k: int = 10):
|
||||
|
||||
@ -308,18 +308,22 @@ async def get_litellm_models():
|
||||
)
|
||||
bedrock_model_list = bedrock.list_foundation_models()
|
||||
model_list = litellm_model_list_without_bedrock + bedrock_model_list
|
||||
ollama_base_url = config.llm.ollama_base_url
|
||||
if config.llm.model.startswith('ollama'):
|
||||
if not ollama_base_url:
|
||||
ollama_base_url = config.llm.base_url
|
||||
if ollama_base_url:
|
||||
ollama_url = ollama_base_url.strip('/') + '/api/tags'
|
||||
try:
|
||||
ollama_models_list = requests.get(ollama_url, timeout=3).json()['models']
|
||||
for model in ollama_models_list:
|
||||
model_list.append('ollama/' + model['name'])
|
||||
except requests.exceptions.RequestException as e:
|
||||
logger.error(f'Error getting OLLAMA models: {e}', exc_info=True)
|
||||
for llm_config in config.llms.values():
|
||||
ollama_base_url = llm_config.ollama_base_url
|
||||
if llm_config.model.startswith('ollama'):
|
||||
if not ollama_base_url:
|
||||
ollama_base_url = llm_config.base_url
|
||||
if ollama_base_url:
|
||||
ollama_url = ollama_base_url.strip('/') + '/api/tags'
|
||||
try:
|
||||
ollama_models_list = requests.get(ollama_url, timeout=3).json()[
|
||||
'models'
|
||||
]
|
||||
for model in ollama_models_list:
|
||||
model_list.append('ollama/' + model['name'])
|
||||
break
|
||||
except requests.exceptions.RequestException as e:
|
||||
logger.error(f'Error getting OLLAMA models: {e}', exc_info=True)
|
||||
|
||||
return list(sorted(set(model_list)))
|
||||
|
||||
|
||||
@ -86,10 +86,11 @@ class AgentSession:
|
||||
for key, value in start_event.get('args', {}).items()
|
||||
if value != ''
|
||||
} # remove empty values, prevent FE from sending empty strings
|
||||
agent_cls = args.get(ConfigType.AGENT, config.agent.name)
|
||||
model = args.get(ConfigType.LLM_MODEL, config.llm.model)
|
||||
api_key = args.get(ConfigType.LLM_API_KEY, config.llm.api_key)
|
||||
api_base = config.llm.base_url
|
||||
agent_cls = args.get(ConfigType.AGENT, config.default_agent)
|
||||
llm_config = config.get_llm_config_from_agent(agent_cls)
|
||||
model = args.get(ConfigType.LLM_MODEL, llm_config.model)
|
||||
api_key = args.get(ConfigType.LLM_API_KEY, llm_config.api_key)
|
||||
api_base = llm_config.base_url
|
||||
max_iterations = args.get(ConfigType.MAX_ITERATIONS, config.max_iterations)
|
||||
|
||||
logger.info(f'Creating agent {agent_cls} using LLM {model}')
|
||||
|
||||
@ -49,7 +49,9 @@ def apply_prompt_and_get_mock_response(test_name: str, messages: str, id: int) -
|
||||
Note: this function blindly replaces existing prompt file with the given
|
||||
input without checking the contents.
|
||||
"""
|
||||
mock_dir = os.path.join(script_dir, 'mock', os.environ.get('AGENT'), test_name)
|
||||
mock_dir = os.path.join(
|
||||
script_dir, 'mock', os.environ.get('DEFAULT_AGENT'), test_name
|
||||
)
|
||||
prompt_file_path = os.path.join(mock_dir, f'prompt_{"{0:03}".format(id)}.log')
|
||||
resp_file_path = os.path.join(mock_dir, f'response_{"{0:03}".format(id)}.log')
|
||||
try:
|
||||
@ -82,7 +84,9 @@ def get_mock_response(test_name: str, messages: str, id: int) -> str:
|
||||
makes test code harder to understand.
|
||||
"""
|
||||
prompt = filter_out_symbols(messages)
|
||||
mock_dir = os.path.join(script_dir, 'mock', os.environ.get('AGENT'), test_name)
|
||||
mock_dir = os.path.join(
|
||||
script_dir, 'mock', os.environ.get('DEFAULT_AGENT'), test_name
|
||||
)
|
||||
prompt_file_path = os.path.join(mock_dir, f'prompt_{"{0:03}".format(id)}.log')
|
||||
resp_file_path = os.path.join(mock_dir, f'response_{"{0:03}".format(id)}.log')
|
||||
# Open the prompt file and compare its contents
|
||||
@ -130,7 +134,11 @@ def mock_user_response(*args, test_name, **kwargs):
|
||||
STDIN input for the agent to read.
|
||||
"""
|
||||
user_response_file = os.path.join(
|
||||
script_dir, 'mock', os.environ.get('AGENT'), test_name, 'user_responses.log'
|
||||
script_dir,
|
||||
'mock',
|
||||
os.environ.get('DEFAULT_AGENT'),
|
||||
test_name,
|
||||
'user_responses.log',
|
||||
)
|
||||
if not os.path.exists(user_response_file):
|
||||
return ''
|
||||
|
||||
@ -78,7 +78,7 @@ run_test() {
|
||||
WORKSPACE_MOUNT_PATH=$WORKSPACE_MOUNT_PATH \
|
||||
WORKSPACE_MOUNT_PATH_IN_SANDBOX=$WORKSPACE_MOUNT_PATH_IN_SANDBOX \
|
||||
MAX_ITERATIONS=$MAX_ITERATIONS \
|
||||
AGENT=$agent \
|
||||
DEFAULT_AGENT=$agent \
|
||||
$pytest_cmd 2>&1 | tee $TMP_FILE
|
||||
|
||||
# Capture the exit code of pytest
|
||||
@ -148,7 +148,7 @@ regenerate_without_llm() {
|
||||
WORKSPACE_MOUNT_PATH_IN_SANDBOX=$WORKSPACE_MOUNT_PATH_IN_SANDBOX \
|
||||
MAX_ITERATIONS=$MAX_ITERATIONS \
|
||||
FORCE_APPLY_PROMPTS=true \
|
||||
AGENT=$agent \
|
||||
DEFAULT_AGENT=$agent \
|
||||
poetry run pytest -s ./tests/integration/test_agent.py::$test_name
|
||||
set +x
|
||||
}
|
||||
|
||||
@ -29,16 +29,19 @@ print(f'workspace_mount_path_in_sandbox: {workspace_mount_path_in_sandbox}')
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
os.getenv('AGENT') == 'BrowsingAgent',
|
||||
os.getenv('DEFAULT_AGENT') == 'BrowsingAgent',
|
||||
reason='BrowsingAgent is a specialized agent',
|
||||
)
|
||||
@pytest.mark.skipif(
|
||||
(os.getenv('AGENT') == 'CodeActAgent' or os.getenv('AGENT') == 'CodeActSWEAgent')
|
||||
(
|
||||
os.getenv('DEFAULT_AGENT') == 'CodeActAgent'
|
||||
or os.getenv('DEFAULT_AGENT') == 'CodeActSWEAgent'
|
||||
)
|
||||
and os.getenv('SANDBOX_BOX_TYPE', '').lower() != 'ssh',
|
||||
reason='CodeActAgent/CodeActSWEAgent only supports ssh sandbox which is stateful',
|
||||
)
|
||||
@pytest.mark.skipif(
|
||||
os.getenv('AGENT') == 'ManagerAgent',
|
||||
os.getenv('DEFAULT_AGENT') == 'ManagerAgent',
|
||||
reason='Manager agent is not capable of finishing this in reasonable steps yet',
|
||||
)
|
||||
def test_write_simple_script():
|
||||
@ -46,7 +49,7 @@ def test_write_simple_script():
|
||||
args = parse_arguments()
|
||||
|
||||
# Create the agent
|
||||
agent = Agent.get_cls(args.agent_cls)(llm=LLM(args.model_name))
|
||||
agent = Agent.get_cls(args.agent_cls)(llm=LLM())
|
||||
|
||||
final_state: State | None = asyncio.run(
|
||||
run_agent_controller(agent, task, exit_on_message=True)
|
||||
@ -68,16 +71,20 @@ def test_write_simple_script():
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
os.getenv('AGENT') == 'BrowsingAgent',
|
||||
os.getenv('DEFAULT_AGENT') == 'BrowsingAgent',
|
||||
reason='BrowsingAgent is a specialized agent',
|
||||
)
|
||||
@pytest.mark.skipif(
|
||||
(os.getenv('AGENT') == 'CodeActAgent' or os.getenv('AGENT') == 'CodeActSWEAgent')
|
||||
(
|
||||
os.getenv('DEFAULT_AGENT') == 'CodeActAgent'
|
||||
or os.getenv('DEFAULT_AGENT') == 'CodeActSWEAgent'
|
||||
)
|
||||
and os.getenv('SANDBOX_BOX_TYPE', '').lower() != 'ssh',
|
||||
reason='CodeActAgent/CodeActSWEAgent only supports ssh sandbox which is stateful',
|
||||
)
|
||||
@pytest.mark.skipif(
|
||||
os.getenv('AGENT') == 'MonologueAgent' or os.getenv('AGENT') == 'PlannerAgent',
|
||||
os.getenv('DEFAULT_AGENT') == 'MonologueAgent'
|
||||
or os.getenv('DEFAULT_AGENT') == 'PlannerAgent',
|
||||
reason='We only keep basic tests for MonologueAgent and PlannerAgent',
|
||||
)
|
||||
@pytest.mark.skipif(
|
||||
@ -96,7 +103,7 @@ def test_edits():
|
||||
shutil.copy(os.path.join(source_dir, file), dest_file)
|
||||
|
||||
# Create the agent
|
||||
agent = Agent.get_cls(args.agent_cls)(llm=LLM(args.model_name))
|
||||
agent = Agent.get_cls(args.agent_cls)(llm=LLM())
|
||||
|
||||
# Execute the task
|
||||
task = 'Fix typos in bad.txt. Do not ask me for confirmation at any point.'
|
||||
@ -118,7 +125,8 @@ Enjoy!
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
os.getenv('AGENT') != 'CodeActAgent' and os.getenv('AGENT') != 'CodeActSWEAgent',
|
||||
os.getenv('DEFAULT_AGENT') != 'CodeActAgent'
|
||||
and os.getenv('DEFAULT_AGENT') != 'CodeActSWEAgent',
|
||||
reason='currently only CodeActAgent and CodeActSWEAgent have IPython (Jupyter) execution by default',
|
||||
)
|
||||
@pytest.mark.skipif(
|
||||
@ -129,7 +137,7 @@ def test_ipython():
|
||||
args = parse_arguments()
|
||||
|
||||
# Create the agent
|
||||
agent = Agent.get_cls(args.agent_cls)(llm=LLM(args.model_name))
|
||||
agent = Agent.get_cls(args.agent_cls)(llm=LLM())
|
||||
|
||||
# Execute the task
|
||||
task = "Use Jupyter IPython to write a text file containing 'hello world' to '/workspace/test.txt'. Do not ask me for confirmation at any point."
|
||||
@ -152,7 +160,7 @@ def test_ipython():
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
os.getenv('AGENT') != 'ManagerAgent',
|
||||
os.getenv('DEFAULT_AGENT') != 'ManagerAgent',
|
||||
reason='Currently, only ManagerAgent supports task rejection',
|
||||
)
|
||||
@pytest.mark.skipif(
|
||||
@ -163,7 +171,7 @@ def test_simple_task_rejection():
|
||||
args = parse_arguments()
|
||||
|
||||
# Create the agent
|
||||
agent = Agent.get_cls(args.agent_cls)(llm=LLM(args.model_name))
|
||||
agent = Agent.get_cls(args.agent_cls)(llm=LLM())
|
||||
|
||||
# Give an impossible task to do: cannot write a commit message because
|
||||
# the workspace is not a git repo
|
||||
@ -175,7 +183,8 @@ def test_simple_task_rejection():
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
os.getenv('AGENT') != 'CodeActAgent' and os.getenv('AGENT') != 'CodeActSWEAgent',
|
||||
os.getenv('DEFAULT_AGENT') != 'CodeActAgent'
|
||||
and os.getenv('DEFAULT_AGENT') != 'CodeActSWEAgent',
|
||||
reason='currently only CodeActAgent and CodeActSWEAgent have IPython (Jupyter) execution by default',
|
||||
)
|
||||
@pytest.mark.skipif(
|
||||
@ -186,7 +195,7 @@ def test_ipython_module():
|
||||
args = parse_arguments()
|
||||
|
||||
# Create the agent
|
||||
agent = Agent.get_cls(args.agent_cls)(llm=LLM(args.model_name))
|
||||
agent = Agent.get_cls(args.agent_cls)(llm=LLM())
|
||||
|
||||
# Execute the task
|
||||
task = "Install and import pymsgbox==1.0.9 and print it's version in /workspace/test.txt. Do not ask me for confirmation at any point."
|
||||
@ -210,11 +219,15 @@ def test_ipython_module():
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
os.getenv('AGENT') != 'BrowsingAgent' and os.getenv('AGENT') != 'CodeActAgent',
|
||||
os.getenv('DEFAULT_AGENT') != 'BrowsingAgent'
|
||||
and os.getenv('DEFAULT_AGENT') != 'CodeActAgent',
|
||||
reason='currently only BrowsingAgent and CodeActAgent are capable of searching the internet',
|
||||
)
|
||||
@pytest.mark.skipif(
|
||||
(os.getenv('AGENT') == 'CodeActAgent' or os.getenv('AGENT') == 'CodeActSWEAgent')
|
||||
(
|
||||
os.getenv('DEFAULT_AGENT') == 'CodeActAgent'
|
||||
or os.getenv('DEFAULT_AGENT') == 'CodeActSWEAgent'
|
||||
)
|
||||
and os.getenv('SANDBOX_BOX_TYPE', '').lower() != 'ssh',
|
||||
reason='CodeActAgent/CodeActSWEAgent only supports ssh sandbox which is stateful',
|
||||
)
|
||||
@ -222,7 +235,7 @@ def test_browse_internet(http_server):
|
||||
args = parse_arguments()
|
||||
|
||||
# Create the agent
|
||||
agent = Agent.get_cls(args.agent_cls)(llm=LLM(args.model_name))
|
||||
agent = Agent.get_cls(args.agent_cls)(llm=LLM())
|
||||
|
||||
# Execute the task
|
||||
task = 'Browse localhost:8000, and tell me the ultimate answer to life. Do not ask me for confirmation at any point.'
|
||||
|
||||
@ -1,3 +1,4 @@
|
||||
from opendevin.core.config import config
|
||||
from opendevin.events.action import (
|
||||
Action,
|
||||
AddTaskAction,
|
||||
@ -28,7 +29,9 @@ def serialization_deserialization(original_action_dict, cls):
|
||||
action_instance, cls
|
||||
), f'The action instance should be an instance of {cls.__name__}.'
|
||||
serialized_action_dict = event_to_dict(action_instance)
|
||||
serialized_action_memory = event_to_memory(action_instance)
|
||||
serialized_action_memory = event_to_memory(
|
||||
action_instance, config.get_llm_config().max_message_chars
|
||||
)
|
||||
serialized_action_dict.pop('message')
|
||||
assert (
|
||||
serialized_action_dict == original_action_dict
|
||||
|
||||
@ -10,7 +10,7 @@ def test_help_message(capsys):
|
||||
captured = capsys.readouterr()
|
||||
expected_help_message = """
|
||||
usage: pytest [-h] [-d DIRECTORY] [-t TASK] [-f FILE] [-c AGENT_CLS]
|
||||
[-m MODEL_NAME] [-i MAX_ITERATIONS] [-b MAX_BUDGET_PER_TASK]
|
||||
[-i MAX_ITERATIONS] [-b MAX_BUDGET_PER_TASK]
|
||||
[--eval-output-dir EVAL_OUTPUT_DIR]
|
||||
[--eval-n-limit EVAL_N_LIMIT]
|
||||
[--eval-num-workers EVAL_NUM_WORKERS] [--eval-note EVAL_NOTE]
|
||||
@ -26,9 +26,7 @@ options:
|
||||
-f FILE, --file FILE Path to a file containing the task. Overrides -t if
|
||||
both are provided.
|
||||
-c AGENT_CLS, --agent-cls AGENT_CLS
|
||||
The agent class to use
|
||||
-m MODEL_NAME, --model-name MODEL_NAME
|
||||
The (litellm) model name to use
|
||||
Name of the default agent to use
|
||||
-i MAX_ITERATIONS, --max-iterations MAX_ITERATIONS
|
||||
The maximum number of iterations to run the agent
|
||||
-b MAX_BUDGET_PER_TASK, --max-budget-per-task MAX_BUDGET_PER_TASK
|
||||
@ -43,8 +41,9 @@ options:
|
||||
--eval-note EVAL_NOTE
|
||||
The note to add to the evaluation directory
|
||||
-l LLM_CONFIG, --llm-config LLM_CONFIG
|
||||
The group of llm settings, e.g. a [llama3] section in
|
||||
the toml file. Overrides model if both are provided.
|
||||
The group of llm settings, e.g. "llama3" for
|
||||
[llm.llama3] section in the toml file. Overrides model
|
||||
if both are provided.
|
||||
"""
|
||||
|
||||
actual_lines = captured.out.strip().split('\n')
|
||||
|
||||
@ -8,6 +8,7 @@ from opendevin.core.config import (
|
||||
LLMConfig,
|
||||
UndefinedString,
|
||||
finalize_config,
|
||||
get_llm_config_arg,
|
||||
load_from_env,
|
||||
load_from_toml,
|
||||
)
|
||||
@ -50,7 +51,7 @@ def test_compat_env_to_config(monkeypatch, setup_env):
|
||||
monkeypatch.setenv('LLM_MODEL', 'gpt-4o')
|
||||
monkeypatch.setenv('AGENT_MEMORY_MAX_THREADS', '4')
|
||||
monkeypatch.setenv('AGENT_MEMORY_ENABLED', 'True')
|
||||
monkeypatch.setenv('AGENT', 'CodeActAgent')
|
||||
monkeypatch.setenv('DEFAULT_AGENT', 'CodeActAgent')
|
||||
monkeypatch.setenv('SANDBOX_TYPE', 'local')
|
||||
monkeypatch.setenv('SANDBOX_TIMEOUT', '10')
|
||||
|
||||
@ -58,14 +59,14 @@ def test_compat_env_to_config(monkeypatch, setup_env):
|
||||
load_from_env(config, os.environ)
|
||||
|
||||
assert config.workspace_base == '/repos/opendevin/workspace'
|
||||
assert isinstance(config.llm, LLMConfig)
|
||||
assert config.llm.api_key == 'sk-proj-rgMV0...'
|
||||
assert config.llm.model == 'gpt-4o'
|
||||
assert isinstance(config.agent, AgentConfig)
|
||||
assert isinstance(config.agent.memory_max_threads, int)
|
||||
assert config.agent.memory_max_threads == 4
|
||||
assert config.agent.memory_enabled is True
|
||||
assert config.agent.name == 'CodeActAgent'
|
||||
assert isinstance(config.get_llm_config(), LLMConfig)
|
||||
assert config.get_llm_config().api_key == 'sk-proj-rgMV0...'
|
||||
assert config.get_llm_config().model == 'gpt-4o'
|
||||
assert isinstance(config.get_agent_config(), AgentConfig)
|
||||
assert isinstance(config.get_agent_config().memory_max_threads, int)
|
||||
assert config.get_agent_config().memory_max_threads == 4
|
||||
assert config.get_agent_config().memory_enabled is True
|
||||
assert config.default_agent == 'CodeActAgent'
|
||||
assert config.sandbox.box_type == 'local'
|
||||
assert config.sandbox.timeout == 10
|
||||
|
||||
@ -74,15 +75,15 @@ def test_load_from_old_style_env(monkeypatch, default_config):
|
||||
# Test loading configuration from old-style environment variables using monkeypatch
|
||||
monkeypatch.setenv('LLM_API_KEY', 'test-api-key')
|
||||
monkeypatch.setenv('AGENT_MEMORY_ENABLED', 'True')
|
||||
monkeypatch.setenv('AGENT_NAME', 'PlannerAgent')
|
||||
monkeypatch.setenv('DEFAULT_AGENT', 'PlannerAgent')
|
||||
monkeypatch.setenv('WORKSPACE_BASE', '/opt/files/workspace')
|
||||
monkeypatch.setenv('SANDBOX_CONTAINER_IMAGE', 'custom_image')
|
||||
|
||||
load_from_env(default_config, os.environ)
|
||||
|
||||
assert default_config.llm.api_key == 'test-api-key'
|
||||
assert default_config.agent.memory_enabled is True
|
||||
assert default_config.agent.name == 'PlannerAgent'
|
||||
assert default_config.get_llm_config().api_key == 'test-api-key'
|
||||
assert default_config.get_agent_config().memory_enabled is True
|
||||
assert default_config.default_agent == 'PlannerAgent'
|
||||
assert default_config.workspace_base == '/opt/files/workspace'
|
||||
assert (
|
||||
default_config.workspace_mount_path is UndefinedString.UNDEFINED
|
||||
@ -102,25 +103,52 @@ def test_load_from_new_style_toml(default_config, temp_toml_file):
|
||||
model = "test-model"
|
||||
api_key = "toml-api-key"
|
||||
|
||||
[llm.cheap]
|
||||
model = "some-cheap-model"
|
||||
api_key = "cheap-model-api-key"
|
||||
|
||||
[agent]
|
||||
name = "TestAgent"
|
||||
memory_enabled = true
|
||||
|
||||
[agent.BrowsingAgent]
|
||||
llm_config = "cheap"
|
||||
memory_enabled = false
|
||||
|
||||
[sandbox]
|
||||
timeout = 1
|
||||
|
||||
[core]
|
||||
workspace_base = "/opt/files2/workspace"
|
||||
default_agent = "TestAgent"
|
||||
sandbox_type = "local"
|
||||
"""
|
||||
)
|
||||
|
||||
load_from_toml(default_config, temp_toml_file)
|
||||
|
||||
assert default_config.llm.model == 'test-model'
|
||||
assert default_config.llm.api_key == 'toml-api-key'
|
||||
assert default_config.agent.name == 'TestAgent'
|
||||
assert default_config.agent.memory_enabled is True
|
||||
# default llm & agent configs
|
||||
assert default_config.default_agent == 'TestAgent'
|
||||
assert default_config.get_llm_config().model == 'test-model'
|
||||
assert default_config.get_llm_config().api_key == 'toml-api-key'
|
||||
assert default_config.get_agent_config().memory_enabled is True
|
||||
|
||||
# undefined agent config inherits default ones
|
||||
assert (
|
||||
default_config.get_llm_config_from_agent('CodeActAgent')
|
||||
== default_config.get_llm_config()
|
||||
)
|
||||
assert default_config.get_agent_config('CodeActAgent').memory_enabled is True
|
||||
|
||||
# defined agent config overrides default ones
|
||||
assert default_config.get_llm_config_from_agent(
|
||||
'BrowsingAgent'
|
||||
) == default_config.get_llm_config('cheap')
|
||||
assert (
|
||||
default_config.get_llm_config_from_agent('BrowsingAgent').model
|
||||
== 'some-cheap-model'
|
||||
)
|
||||
assert default_config.get_agent_config('BrowsingAgent').memory_enabled is False
|
||||
|
||||
assert default_config.workspace_base == '/opt/files2/workspace'
|
||||
assert default_config.sandbox.box_type == 'local'
|
||||
assert default_config.sandbox.timeout == 1
|
||||
@ -152,7 +180,6 @@ def test_compat_load_sandbox_from_toml(default_config, temp_toml_file):
|
||||
model = "test-model"
|
||||
|
||||
[agent]
|
||||
name = "TestAgent"
|
||||
memory_enabled = true
|
||||
|
||||
[core]
|
||||
@ -161,14 +188,16 @@ sandbox_type = "local"
|
||||
sandbox_timeout = 500
|
||||
sandbox_container_image = "node:14"
|
||||
sandbox_user_id = 1001
|
||||
default_agent = "TestAgent"
|
||||
"""
|
||||
)
|
||||
|
||||
load_from_toml(default_config, temp_toml_file)
|
||||
|
||||
assert default_config.llm.model == 'test-model'
|
||||
assert default_config.agent.name == 'TestAgent'
|
||||
assert default_config.agent.memory_enabled is True
|
||||
assert default_config.get_llm_config().model == 'test-model'
|
||||
assert default_config.get_llm_config_from_agent().model == 'test-model'
|
||||
assert default_config.default_agent == 'TestAgent'
|
||||
assert default_config.get_agent_config().memory_enabled is True
|
||||
assert default_config.workspace_base == '/opt/files2/workspace'
|
||||
assert default_config.sandbox.box_type == 'local'
|
||||
assert default_config.sandbox.timeout == 500
|
||||
@ -220,8 +249,10 @@ sandbox_user_id = 1001
|
||||
load_from_env(default_config, os.environ)
|
||||
|
||||
assert os.environ.get('LLM_MODEL') is None
|
||||
assert default_config.llm.model == 'test-model'
|
||||
assert default_config.llm.api_key == 'env-api-key'
|
||||
assert default_config.get_llm_config().model == 'test-model'
|
||||
assert default_config.get_llm_config('llm').model == 'test-model'
|
||||
assert default_config.get_llm_config_from_agent().model == 'test-model'
|
||||
assert default_config.get_llm_config().api_key == 'env-api-key'
|
||||
|
||||
# after we set workspace_base to 'UNDEFINED' in the environment,
|
||||
# workspace_base should be set to that
|
||||
@ -271,7 +302,7 @@ user_id = 1001
|
||||
assert default_config.workspace_mount_path is UndefinedString.UNDEFINED
|
||||
|
||||
# before load_from_env, values are set to the values from the toml file
|
||||
assert default_config.llm.api_key == 'toml-api-key'
|
||||
assert default_config.get_llm_config().api_key == 'toml-api-key'
|
||||
assert default_config.sandbox.box_type == 'e2b'
|
||||
assert default_config.sandbox.timeout == 500
|
||||
assert default_config.sandbox.user_id == 1001
|
||||
@ -280,8 +311,8 @@ user_id = 1001
|
||||
|
||||
# values from env override values from toml
|
||||
assert os.environ.get('LLM_MODEL') is None
|
||||
assert default_config.llm.model == 'test-model'
|
||||
assert default_config.llm.api_key == 'env-api-key'
|
||||
assert default_config.get_llm_config().model == 'test-model'
|
||||
assert default_config.get_llm_config().api_key == 'env-api-key'
|
||||
|
||||
assert default_config.sandbox.box_type == 'local'
|
||||
assert default_config.sandbox.timeout == 1000
|
||||
@ -315,7 +346,7 @@ user_id = 1001
|
||||
load_from_env(default_config, os.environ)
|
||||
finalize_config(default_config)
|
||||
|
||||
assert default_config.llm.model == 'test-model'
|
||||
assert default_config.get_llm_config().model == 'test-model'
|
||||
assert default_config.sandbox.box_type == 'local'
|
||||
assert default_config.sandbox.timeout == 1
|
||||
assert default_config.sandbox.container_image == 'custom_image'
|
||||
@ -328,16 +359,19 @@ def test_defaults_dict_after_updates(default_config):
|
||||
assert (
|
||||
initial_defaults['workspace_mount_path']['default'] is UndefinedString.UNDEFINED
|
||||
)
|
||||
assert initial_defaults['llm']['api_key']['default'] is None
|
||||
assert initial_defaults['agent']['name']['default'] == 'CodeActAgent'
|
||||
assert initial_defaults['default_agent']['default'] == 'CodeActAgent'
|
||||
|
||||
updated_config = AppConfig()
|
||||
updated_config.llm.api_key = 'updated-api-key'
|
||||
updated_config.agent.name = 'MonologueAgent'
|
||||
updated_config.get_llm_config().api_key = 'updated-api-key'
|
||||
updated_config.get_llm_config('llm').api_key = 'updated-api-key'
|
||||
updated_config.get_llm_config_from_agent('agent').api_key = 'updated-api-key'
|
||||
updated_config.get_llm_config_from_agent(
|
||||
'MonologueAgent'
|
||||
).api_key = 'updated-api-key'
|
||||
updated_config.default_agent = 'MonologueAgent'
|
||||
|
||||
defaults_after_updates = updated_config.defaults_dict
|
||||
assert defaults_after_updates['llm']['api_key']['default'] is None
|
||||
assert defaults_after_updates['agent']['name']['default'] == 'CodeActAgent'
|
||||
assert defaults_after_updates['default_agent']['default'] == 'CodeActAgent'
|
||||
assert (
|
||||
defaults_after_updates['workspace_mount_path']['default']
|
||||
is UndefinedString.UNDEFINED
|
||||
@ -363,10 +397,10 @@ def test_invalid_toml_format(monkeypatch, temp_toml_file, default_config):
|
||||
load_from_env(default_config, os.environ)
|
||||
default_config.ssh_password = None # prevent leak
|
||||
default_config.jwt_secret = None # prevent leak
|
||||
assert default_config.llm.model == 'gpt-5-turbo-1106'
|
||||
assert default_config.llm.custom_llm_provider is None
|
||||
if default_config.llm.api_key is not None: # prevent leak
|
||||
pytest.fail('LLM API key should be empty.')
|
||||
for llm in default_config.llms.values():
|
||||
llm.api_key = None # prevent leak
|
||||
assert default_config.get_llm_config().model == 'gpt-5-turbo-1106'
|
||||
assert default_config.get_llm_config().custom_llm_provider is None
|
||||
assert default_config.workspace_mount_path == '/home/user/project'
|
||||
|
||||
|
||||
@ -413,9 +447,12 @@ def test_workspace_mount_rewrite(default_config, monkeypatch):
|
||||
|
||||
|
||||
def test_embedding_base_url_default(default_config):
|
||||
default_config.llm.base_url = 'https://api.exampleapi.com'
|
||||
default_config.get_llm_config().base_url = 'https://api.exampleapi.com'
|
||||
finalize_config(default_config)
|
||||
assert default_config.llm.embedding_base_url == 'https://api.exampleapi.com'
|
||||
assert (
|
||||
default_config.get_llm_config().embedding_base_url
|
||||
== 'https://api.exampleapi.com'
|
||||
)
|
||||
|
||||
|
||||
def test_cache_dir_creation(default_config, tmpdir):
|
||||
@ -461,9 +498,7 @@ def test_api_keys_repr_str():
|
||||
|
||||
# Test AgentConfig
|
||||
# No attrs in AgentConfig have 'key' or 'token' in their name
|
||||
agent_config = AgentConfig(
|
||||
name='my_agent', memory_enabled=True, memory_max_threads=4
|
||||
)
|
||||
agent_config = AgentConfig(memory_enabled=True, memory_max_threads=4)
|
||||
for attr_name in dir(AgentConfig):
|
||||
if not attr_name.startswith('__'):
|
||||
assert (
|
||||
@ -475,8 +510,8 @@ def test_api_keys_repr_str():
|
||||
|
||||
# Test AppConfig
|
||||
app_config = AppConfig(
|
||||
llm=llm_config,
|
||||
agent=agent_config,
|
||||
llms={'llm': llm_config},
|
||||
agents={'agent': agent_config},
|
||||
e2b_api_key='my_e2b_api_key',
|
||||
jwt_secret='my_jwt_secret',
|
||||
ssh_password='my_ssh_password',
|
||||
@ -519,3 +554,28 @@ max_budget_per_task = 4.0
|
||||
|
||||
assert config.max_iterations == 100
|
||||
assert config.max_budget_per_task == 4.0
|
||||
|
||||
|
||||
def test_get_llm_config_arg(temp_toml_file):
|
||||
temp_toml = """
|
||||
[core]
|
||||
max_iterations = 100
|
||||
max_budget_per_task = 4.0
|
||||
|
||||
[llm.gpt3]
|
||||
model="gpt-3.5-turbo"
|
||||
api_key="redacted"
|
||||
embedding_model="openai"
|
||||
|
||||
[llm.gpt4o]
|
||||
model="gpt-4o"
|
||||
api_key="redacted"
|
||||
embedding_model="openai"
|
||||
"""
|
||||
|
||||
with open(temp_toml_file, 'w') as f:
|
||||
f.write(temp_toml)
|
||||
|
||||
llm_config = get_llm_config_arg('gpt3', temp_toml_file)
|
||||
assert llm_config.model == 'gpt-3.5-turbo'
|
||||
assert llm_config.embedding_model == 'openai'
|
||||
|
||||
@ -76,9 +76,6 @@ def test_llm_config_attributes_masking(test_handler):
|
||||
assert 'AKIAIOSFODNN7EXAMPLE' not in log_output
|
||||
assert 'wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY' not in log_output
|
||||
|
||||
# reset the LLMConfig
|
||||
LLMConfig.reset()
|
||||
|
||||
|
||||
def test_app_config_attributes_masking(test_handler):
|
||||
logger, stream = test_handler
|
||||
|
||||
@ -1,3 +1,4 @@
|
||||
from opendevin.core.config import config
|
||||
from opendevin.events.observation import (
|
||||
CmdOutputObservation,
|
||||
Observation,
|
||||
@ -18,7 +19,9 @@ def serialization_deserialization(original_observation_dict, cls):
|
||||
observation_instance, cls
|
||||
), 'The observation instance should be an instance of CmdOutputObservation.'
|
||||
serialized_observation_dict = event_to_dict(observation_instance)
|
||||
serialized_observation_memory = event_to_memory(observation_instance)
|
||||
serialized_observation_memory = event_to_memory(
|
||||
observation_instance, config.get_llm_config().max_message_chars
|
||||
)
|
||||
assert (
|
||||
serialized_observation_dict == original_observation_dict
|
||||
), 'The serialized observation should match the original observation dict.'
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user