Customize LLM config per agent (#2756)

Currently, OpenDevin uses a global singleton LLM config and a global singleton agent config. This PR allows customers to configure an LLM config for each agent. A hypothetically useful scenario is to use a cheaper LLM for repo exploration / code search, and a more powerful LLM to actually do the problem solving (CodeActAgent).

Partially solves #2075 (web GUI improvement is not the goal of this PR)
This commit is contained in:
Boxuan Li 2024-07-09 22:05:54 -07:00 committed by GitHub
parent 23e2d01cf5
commit c68478f470
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
35 changed files with 522 additions and 227 deletions

View File

@ -31,7 +31,7 @@ jobs:
- name: Run tests
run: |
set -e
poetry run python opendevin/core/main.py -t "do a flip" -m ollama/not-a-model -d ./workspace/ -c DummyAgent
poetry run python opendevin/core/main.py -t "do a flip" -d ./workspace/ -c DummyAgent
- name: Check exit code
run: |
if [ $? -ne 0 ]; then

View File

@ -8,6 +8,7 @@ from agenthub.codeact_agent.prompt import (
)
from opendevin.controller.agent import Agent
from opendevin.controller.state.state import State
from opendevin.core.config import config
from opendevin.events.action import (
Action,
AgentDelegateAction,
@ -60,8 +61,11 @@ def get_action_message(action: Action) -> dict[str, str] | None:
def get_observation_message(obs) -> dict[str, str] | None:
max_message_chars = config.get_llm_config_from_agent(
'CodeActAgent'
).max_message_chars
if isinstance(obs, CmdOutputObservation):
content = 'OBSERVATION:\n' + truncate_content(obs.content)
content = 'OBSERVATION:\n' + truncate_content(obs.content, max_message_chars)
content += (
f'\n[Command {obs.command_id} finished with exit code {obs.exit_code}]'
)
@ -76,10 +80,12 @@ def get_observation_message(obs) -> dict[str, str] | None:
'![image](data:image/png;base64, ...) already displayed to user'
)
content = '\n'.join(splitted)
content = truncate_content(content)
content = truncate_content(content, max_message_chars)
return {'role': 'user', 'content': content}
elif isinstance(obs, AgentDelegateObservation):
content = 'OBSERVATION:\n' + truncate_content(str(obs.outputs))
content = 'OBSERVATION:\n' + truncate_content(
str(obs.outputs), max_message_chars
)
return {'role': 'user', 'content': content}
return None

View File

@ -7,6 +7,7 @@ from agenthub.codeact_swe_agent.prompt import (
from agenthub.codeact_swe_agent.response_parser import CodeActSWEResponseParser
from opendevin.controller.agent import Agent
from opendevin.controller.state.state import State
from opendevin.core.config import config
from opendevin.events.action import (
Action,
AgentFinishAction,
@ -52,8 +53,11 @@ def get_action_message(action: Action) -> dict[str, str] | None:
def get_observation_message(obs) -> dict[str, str] | None:
max_message_chars = config.get_llm_config_from_agent(
'CodeActSWEAgent'
).max_message_chars
if isinstance(obs, CmdOutputObservation):
content = 'OBSERVATION:\n' + truncate_content(obs.content)
content = 'OBSERVATION:\n' + truncate_content(obs.content, max_message_chars)
content += (
f'\n[Command {obs.command_id} finished with exit code {obs.exit_code}]'
)
@ -68,7 +72,7 @@ def get_observation_message(obs) -> dict[str, str] | None:
'![image](data:image/png;base64, ...) already displayed to user'
)
content = '\n'.join(splitted)
content = truncate_content(content)
content = truncate_content(content, max_message_chars)
return {'role': 'user', 'content': content}
return None

View File

@ -2,6 +2,7 @@ from jinja2 import BaseLoader, Environment
from opendevin.controller.agent import Agent
from opendevin.controller.state.state import State
from opendevin.core.config import config
from opendevin.core.utils import json
from opendevin.events.action import Action
from opendevin.events.serialization.action import action_from_dict
@ -32,6 +33,9 @@ def history_to_json(history: ShortTermHistory, max_events=20, **kwargs):
"""
Serialize and simplify history to str format
"""
# TODO: get agent specific llm config
llm_config = config.get_llm_config()
max_message_chars = llm_config.max_message_chars
processed_history = []
event_count = 0
@ -39,7 +43,7 @@ def history_to_json(history: ShortTermHistory, max_events=20, **kwargs):
for event in history.get_events(reverse=True):
if event_count >= max_events:
break
processed_history.append(event_to_memory(event))
processed_history.append(event_to_memory(event, max_message_chars))
event_count += 1
# history is in reverse order, let's fix it

View File

@ -29,7 +29,7 @@ from opendevin.llm.llm import LLM
from opendevin.memory.condenser import MemoryCondenser
from opendevin.runtime.tools import RuntimeTool
if config.agent.memory_enabled:
if config.get_agent_config('MonologueAgent').memory_enabled:
from opendevin.memory.memory import LongTermMemory
@ -78,7 +78,7 @@ class MonologueAgent(Agent):
raise AgentNoInstructionError()
self.initial_thoughts = []
if config.agent.memory_enabled:
if config.get_agent_config('MonologueAgent').memory_enabled:
self.memory = LongTermMemory()
else:
self.memory = None
@ -89,6 +89,9 @@ class MonologueAgent(Agent):
self._initialized = True
def _add_initial_thoughts(self, task):
max_message_chars = config.get_llm_config_from_agent(
'MonologueAgent'
).max_message_chars
previous_action = ''
for thought in INITIAL_THOUGHTS:
thought = thought.replace('$TASK', task)
@ -106,7 +109,9 @@ class MonologueAgent(Agent):
observation = BrowserOutputObservation(
content=thought, url='', screenshot=''
)
self.initial_thoughts.append(event_to_memory(observation))
self.initial_thoughts.append(
event_to_memory(observation, max_message_chars)
)
previous_action = ''
else:
action: Action = NullAction()
@ -133,7 +138,7 @@ class MonologueAgent(Agent):
previous_action = ActionType.BROWSE
else:
action = MessageAction(thought)
self.initial_thoughts.append(event_to_memory(action))
self.initial_thoughts.append(event_to_memory(action, max_message_chars))
def step(self, state: State) -> Action:
"""
@ -145,7 +150,9 @@ class MonologueAgent(Agent):
Returns:
- Action: The next action to take based on LLM response
"""
max_message_chars = config.get_llm_config_from_agent(
'MonologueAgent'
).max_message_chars
goal = state.get_current_user_intent()
self._initialize(goal)
@ -153,7 +160,7 @@ class MonologueAgent(Agent):
# add the events from state.history
for event in state.history.get_events():
recent_events.append(event_to_memory(event))
recent_events.append(event_to_memory(event, max_message_chars))
# add the last messages to long term memory
if self.memory is not None:
@ -163,9 +170,11 @@ class MonologueAgent(Agent):
# this should still work
# we will need to do this differently: find out if there really is an action or an observation in this step
if last_action:
self.memory.add_event(event_to_memory(last_action))
self.memory.add_event(event_to_memory(last_action, max_message_chars))
if last_observation:
self.memory.add_event(event_to_memory(last_observation))
self.memory.add_event(
event_to_memory(last_observation, max_message_chars)
)
# the action prompt with initial thoughts and recent events
prompt = prompts.get_request_action_prompt(

View File

@ -1,4 +1,5 @@
from opendevin.controller.state.state import State
from opendevin.core.config import config
from opendevin.core.logger import opendevin_logger as logger
from opendevin.core.schema import ActionType
from opendevin.core.utils import json
@ -128,6 +129,9 @@ def get_prompt(state: State) -> str:
Returns:
- str: The formatted string prompt with historical values
"""
max_message_chars = config.get_llm_config_from_agent(
'PlannerAgent'
).max_message_chars
# the plan
plan_str = json.dumps(state.root_task.to_dict(), indent=2)
@ -142,7 +146,7 @@ def get_prompt(state: State) -> str:
break
if latest_action == NullAction() and isinstance(event, Action):
latest_action = event
history_dicts.append(event_to_memory(event))
history_dicts.append(event_to_memory(event, max_message_chars))
# history_dicts is in reverse order, lets fix it
history_dicts.reverse()
@ -160,7 +164,7 @@ def get_prompt(state: State) -> str:
plan_status = "You're not currently working on any tasks. Your next action MUST be to mark a task as in_progress."
# the hint, based on the last action
hint = get_hint(event_to_memory(latest_action).get('action', ''))
hint = get_hint(event_to_memory(latest_action, max_message_chars).get('action', ''))
logger.info('HINT:\n' + hint, extra={'msg_type': 'DETAIL'})
# the last relevant user message (the task)

View File

@ -79,8 +79,12 @@ persist_sandbox = false
# Use host network
#use_host_network = false
# Name of the default agent
#default_agent = "CodeActAgent"
#################################### LLM #####################################
# Configuration for the LLM model
# Configuration for LLM models (group name starts with 'llm')
# use 'llm' for the default LLM config
##############################################################################
[llm]
# AWS access key ID
@ -149,8 +153,18 @@ model = "gpt-4o"
# Top p for the API
#top_p = 0.5
[llm.gpt3]
# API key to use
api_key = "your-api-key"
# Model to use
model = "gpt-3.5"
#################################### Agent ###################################
# Configuration for the agent
# Configuration for agents (group name starts with 'agent')
# Use 'agent' for the default agent config
# otherwise, group name must be `agent.<agent_name>` (case-sensitive), e.g.
# agent.CodeActAgent
##############################################################################
[agent]
# Memory enabled
@ -159,8 +173,13 @@ model = "gpt-4o"
# Memory maximum threads
#memory_max_threads = 2
# Name of the agent
#name = "CodeActAgent"
# LLM config group to use
#llm_config = 'llm'
[agent.RepoExplorerAgent]
# Example: use a cheaper model for RepoExplorerAgent to reduce cost, especially
# useful when an agent doesn't demand high quality but uses a lot of tokens
llm_config = 'gpt3'
#################################### Sandbox ###################################
# Configuration for the sandbox

View File

@ -0,0 +1,75 @@
---
sidebar_position: 8
---
# Changelog
## 0.8 (release date: ??)
### Config breaking changes
In this release we introduced a few breaking changes to backend configurations.
If you have only been using OpenDevin via frontend (web GUI), nothing needs
to be taken care of.
Here's a list of breaking changes in configs. They only apply to users who
use OpenDevin CLI via `main.py`. For more detail, see [#2756](https://github.com/OpenDevin/OpenDevin/pull/2756).
#### Removal of --model-name option from main.py
Please note that `--model-name`, or `-m` option, no longer exists. You should set up the LLM
configs in `config.toml` or via environmental variables.
#### LLM config groups must be subgroups of 'llm'
Prior to release 0.8, you can use arbitrary name for llm config in `config.toml`, e.g.
```toml
[gpt-4o]
model="gpt-4o"
api_key="<your_api_key>"
```
and then use `--llm-config` CLI argument to specify the desired LLM config group
by name. This no longer works. Instead, the config group must be under `llm` group,
e.g.:
```toml
[llm.gpt-4o]
model="gpt-4o"
api_key="<your_api_key>"
```
If you have a config group named `llm`, no need to change it, it will be used
as the default LLM config group.
#### 'agent' group no longer contains 'name' field
Prior to release 0.8, you may or may not have a config group named `agent` that
looks like this:
```toml
[agent]
name="CodeActAgent"
memory_max_threads=2
```
Note the `name` field is now removed. Instead, you should put `default_agent` field
under `core` group, e.g.
```toml
[core]
# other configs
default_agent='CodeActAgent'
[agent]
llm_config='llm'
memory_max_threads=2
[agent.CodeActAgent]
llm_config='gpt-4o'
```
Note that similar to `llm` subgroups, you can also define `agent` subgroups.
Moreover, an agent can be associated with a specific LLM config group. For more
detail, see the examples in `config.template.toml`.

View File

@ -53,14 +53,14 @@ api_key = "sk-XXX"
In this section, for the purpose of building an evaluation task, we don't use the standard OpenDevin web-based GUI, but rather run OpenDevin backend from CLI.
For example, you can run the following, which performs the specified task `-t`, with a particular model `-m` and agent `-c`, for a maximum number of iterations `-i`:
For example, you can run the following, which performs the specified task `-t`, with a particular model config `-l` and agent `-c`, for a maximum number of iterations `-i`:
```bash
poetry run python ./opendevin/core/main.py \
-i 10 \
-t "Write me a bash script that print hello world." \
-c CodeActAgent \
-m gpt-4o-2024-05-13
-l llm
```
After running the script, you will observe the following:

View File

@ -29,12 +29,12 @@ enable_auto_lint = true
box_type = "ssh"
timeout = 120
[eval_gpt35_turbo]
[llm.eval_gpt35_turbo]
model = "gpt-3.5-turbo"
api_key = "sk-123"
temperature = 0.0
[eval_gpt4o]
[llm.eval_gpt4o]
model = "gpt-4o"
api_key = "sk-123"
temperature = 0.0

View File

@ -21,12 +21,12 @@ ssh_hostname = "localhost"
enable_auto_lint = true
# TODO: Change these to the model you want to evaluate
[eval_gpt4_1106_preview]
[llm.eval_gpt4_1106_preview]
model = "gpt-4-1106-preview"
api_key = "XXX"
temperature = 0.0
[eval_some_openai_compatible_model]
[llm.eval_some_openai_compatible_model]
model = "openai/MODEL_NAME"
base_url = "https://OPENAI_COMPATIBLE_URL/v1"
api_key = "XXX"

View File

@ -39,12 +39,12 @@ ssh_hostname = "localhost"
enable_auto_lint = true
# TODO: Change these to the model you want to evaluate
[eval_gpt4_1106_preview]
[llm.eval_gpt4_1106_preview]
model = "gpt-4-1106-preview"
api_key = "XXX"
temperature = 0.0
[eval_azure_openai_compatible_model]
[llm.eval_azure_openai_compatible_model]
model = "AZURE_OPENAI_EXACT_DEPLOYMENT_MODEL_NAME"
base_url = "AZURE_OPENAI_ENDPOINT"
api_key = "AZURE_ENDPOINT_API_KEY"

View File

@ -21,12 +21,12 @@ ssh_hostname = "localhost"
enable_auto_lint = true
# TODO: Change these to the model you want to evaluate
[eval_gpt4_1106_preview]
[llm.eval_gpt4_1106_preview]
model = "gpt-4-1106-preview"
api_key = "XXX"
temperature = 0.0
[eval_some_openai_compatible_model]
[llm.eval_some_openai_compatible_model]
model = "openai/MODEL_NAME"
base_url = "https://OPENAI_COMPATIBLE_URL/v1"
api_key = "XXX"

View File

@ -16,12 +16,12 @@ ssh_hostname = "localhost"
enable_auto_lint = true
# TODO: Change these to the model you want to evaluate
[eval_gpt4_1106_preview]
[llm.eval_gpt4_1106_preview_llm]
model = "gpt-4-1106-preview"
api_key = "XXX"
temperature = 0.0
[eval_some_openai_compatible_model]
[llm.eval_some_openai_compatible_model_llm]
model = "openai/MODEL_NAME"
base_url = "https://OPENAI_COMPATIBLE_URL/v1"
api_key = "XXX"
@ -29,9 +29,9 @@ temperature = 0.0
```
## Run Inference on logic_reasoning
The following code will run inference on the first example of the ProntoQA dataset with model gpt-4o,
The following code will run inference on the first example of the ProntoQA dataset,
using OpenDevin 0.6.2 version.
```bash
./evaluation/logic_reasoning/scripts/run_infer.sh ProntoQA gpt-4o 0.6.2 1
./evaluation/logic_reasoning/scripts/run_infer.sh ProntoQA eval_gpt4_1106_preview_llm 0.6.2 1
```

View File

@ -23,12 +23,12 @@ box_type = "ssh"
timeout = 120
# TODO: Change these to the model you want to evaluate
[eval_gpt4_1106_preview]
[llm.eval_gpt4_1106_preview]
model = "gpt-4-1106-preview"
api_key = "XXX"
temperature = 0.0
[eval_some_openai_compatible_model]
[llm.eval_some_openai_compatible_model]
model = "openai/MODEL_NAME"
base_url = "https://OPENAI_COMPATIBLE_URL/v1"
api_key = "XXX"

View File

@ -30,12 +30,12 @@ run_as_devin = false
sandbox_container_image = "public.ecr.aws/i5g0m1f6/ml-bench" # Use the latest image from the ML-Bench repository
# TODO: Change these to the model you want to evaluate
[eval_gpt4_1106_preview]
[llm.eval_gpt4_1106_preview]
model = "gpt-4-1106-preview"
api_key = "XXX"
temperature = 0.0
[eval_some_openai_compatible_model]
[llm.eval_some_openai_compatible_model]
model = "openai/MODEL_NAME"
base_url = "https://OPENAI_COMPATIBLE_URL/v1"
api_key = "XXX"

View File

@ -57,12 +57,12 @@ enable_auto_lint = true
max_budget_per_task = 4 # 4 USD
# TODO: Change these to the model you want to evaluate
[eval_gpt4_1106_preview]
[llm.eval_gpt4_1106_preview_llm]
model = "gpt-4-1106-preview"
api_key = "XXX"
temperature = 0.0
[eval_some_openai_compatible_model]
[llm.eval_some_openai_compatible_model_llm]
model = "openai/MODEL_NAME"
base_url = "https://OPENAI_COMPATIBLE_URL/v1"
api_key = "XXX"
@ -86,7 +86,7 @@ If you see an error, please make sure your `config.toml` contains all
```bash
./evaluation/swe_bench/scripts/run_infer.sh [model_config] [git-version] [agent] [eval_limit]
# e.g., ./evaluation/swe_bench/scripts/run_infer.sh eval_gpt4_1106_preview HEAD CodeActAgent 300
# e.g., ./evaluation/swe_bench/scripts/run_infer.sh eval_gpt4_1106_preview_llm HEAD CodeActAgent 300
```
where `model_config` is mandatory, while `agent` and `eval_limit` are optional.
@ -104,11 +104,11 @@ to `CodeActAgent`.
default, the script evaluates the entire SWE-bench_Lite test set (300 issues). Note:
in order to use `eval_limit`, you must also set `agent`.
Let's say you'd like to run 10 instances using `eval_gpt4_1106_preview` and CodeActAgent,
Let's say you'd like to run 10 instances using `eval_gpt4_1106_preview_llm` and CodeActAgent,
then your command would be:
```bash
./evaluation/swe_bench/scripts/run_infer.sh eval_gpt4_1106_preview HEAD CodeActAgent 10
./evaluation/swe_bench/scripts/run_infer.sh eval_gpt4_1106_preview_llm HEAD CodeActAgent 10
```
If you would like to specify a list of tasks you'd like to benchmark on, you could

View File

@ -33,6 +33,7 @@ from opendevin.events.observation import (
ErrorObservation,
Observation,
)
from opendevin.llm.llm import LLM
MAX_ITERATIONS = config.max_iterations
MAX_BUDGET_PER_TASK = config.max_budget_per_task
@ -218,7 +219,9 @@ class AgentController:
async def start_delegate(self, action: AgentDelegateAction):
agent_cls: Type[Agent] = Agent.get_cls(action.agent)
agent = agent_cls(llm=self.agent.llm)
llm_config = config.get_llm_config_from_agent(action.agent)
llm = LLM(llm_config=llm_config)
delegate_agent = agent_cls(llm=llm)
state = State(
inputs=action.inputs or {},
iteration=0,
@ -227,10 +230,12 @@ class AgentController:
# metrics should be shared between parent and child
metrics=self.state.metrics,
)
logger.info(f'[Agent Controller {self.id}]: start delegate')
logger.info(
f'[Agent Controller {self.id}]: start delegate, creating agent {delegate_agent.name} using LLM {llm}'
)
self.delegate = AgentController(
sid=self.id + '-delegate',
agent=agent,
agent=delegate_agent,
event_stream=self.event_stream,
max_iterations=self.state.max_iterations,
max_budget_per_task=self.max_budget_per_task,

View File

@ -20,7 +20,7 @@ load_dotenv()
@dataclass
class LLMConfig(metaclass=Singleton):
class LLMConfig:
"""
Configuration for the LLM model.
@ -101,19 +101,19 @@ class LLMConfig(metaclass=Singleton):
@dataclass
class AgentConfig(metaclass=Singleton):
class AgentConfig:
"""
Configuration for the agent.
Attributes:
name: The name of the agent.
memory_enabled: Whether long-term memory (embeddings) is enabled.
memory_max_threads: The maximum number of threads indexing at the same time for embeddings.
llm_config: The name of the llm config to use. If specified, this will override global llm config.
"""
name: str = 'CodeActAgent'
memory_enabled: bool = False
memory_max_threads: int = 2
llm_config: str | None = None
def defaults_to_dict(self) -> dict:
"""
@ -180,8 +180,9 @@ class AppConfig(metaclass=Singleton):
Configuration for the app.
Attributes:
llm: The LLM configuration.
agent: The agent configuration.
llms: A dictionary of name -> LLM configuration. Default config is under 'llm' key.
agents: A dictionary of name -> Agent configuration. Default config is under 'agent' key.
default_agent: The name of the default agent to use.
sandbox: The sandbox configuration.
runtime: The runtime environment.
file_store: The file store to use.
@ -207,8 +208,9 @@ class AppConfig(metaclass=Singleton):
file_uploads_allowed_extensions: List of allowed file extensions for uploads. ['.*'] means all extensions are allowed.
"""
llm: LLMConfig = field(default_factory=LLMConfig)
agent: AgentConfig = field(default_factory=AgentConfig)
llms: dict = field(default_factory=dict)
agents: dict = field(default_factory=dict)
default_agent: str = 'CodeActAgent'
sandbox: SandboxConfig = field(default_factory=SandboxConfig)
runtime: str = 'server'
file_store: str = 'memory'
@ -243,6 +245,39 @@ class AppConfig(metaclass=Singleton):
defaults_dict: ClassVar[dict] = {}
def get_llm_config(self, name='llm') -> LLMConfig:
"""
llm is the name for default config (for backward compatibility prior to 0.8)
"""
if name in self.llms:
return self.llms[name]
if name is not None and name != 'llm':
logger.warning(f'llm config group {name} not found, using default config')
if 'llm' not in self.llms:
self.llms['llm'] = LLMConfig()
return self.llms['llm']
def set_llm_config(self, value: LLMConfig, name='llm'):
self.llms[name] = value
def get_agent_config(self, name='agent') -> AgentConfig:
"""
agent is the name for default config (for backward compability prior to 0.8)
"""
if name in self.agents:
return self.agents[name]
if 'agent' not in self.agents:
self.agents['agent'] = AgentConfig()
return self.agents['agent']
def set_agent_config(self, value: AgentConfig, name='agent'):
self.agents[name] = value
def get_llm_config_from_agent(self, name='agent') -> LLMConfig:
agent_config: AgentConfig = self.get_agent_config(name)
llm_config_name = agent_config.llm_config
return self.get_llm_config(llm_config_name)
def __post_init__(self):
"""
Post-initialization hook, called when the instance is created with only default values.
@ -346,11 +381,6 @@ def load_from_env(cfg: AppConfig, env_or_toml_dict: dict | MutableMapping[str, s
if is_dataclass(field_type):
# nested dataclass
nested_sub_config = getattr(sub_config, field_name)
# the agent field: the env var for agent.name is just 'AGENT'
if field_name == 'agent' and 'AGENT' in env_or_toml_dict:
setattr(nested_sub_config, 'name', env_or_toml_dict[env_var_name])
set_attr_from_env(nested_sub_config, prefix=field_name + '_')
elif env_var_name in env_or_toml_dict:
# convert the env var to the correct type and set it
@ -377,6 +407,13 @@ def load_from_env(cfg: AppConfig, env_or_toml_dict: dict | MutableMapping[str, s
# Start processing from the root of the config object
set_attr_from_env(cfg)
# load default LLM config from env
default_llm_config = config.get_llm_config()
set_attr_from_env(default_llm_config, 'LLM_')
# load default agent config from env
default_agent_config = config.get_agent_config()
set_attr_from_env(default_agent_config, 'AGENT_')
def load_from_toml(cfg: AppConfig, toml_file: str = 'config.toml'):
"""Load the config from the toml file. Supports both styles of config vars.
@ -408,17 +445,45 @@ def load_from_toml(cfg: AppConfig, toml_file: str = 'config.toml'):
core_config = toml_config['core']
# load llm configs and agent configs
for key, value in toml_config.items():
if isinstance(value, dict):
try:
if key is not None and key.lower() == 'agent':
logger.info('Attempt to load default agent config from config toml')
non_dict_fields = {
k: v for k, v in value.items() if not isinstance(v, dict)
}
agent_config = AgentConfig(**non_dict_fields)
cfg.set_agent_config(agent_config, 'agent')
for nested_key, nested_value in value.items():
if isinstance(nested_value, dict):
logger.info(
f'Attempt to load group {nested_key} from config toml as agent config'
)
agent_config = AgentConfig(**nested_value)
cfg.set_agent_config(agent_config, nested_key)
if key is not None and key.lower() == 'llm':
logger.info('Attempt to load default LLM config from config toml')
non_dict_fields = {
k: v for k, v in value.items() if not isinstance(v, dict)
}
llm_config = LLMConfig(**non_dict_fields)
cfg.set_llm_config(llm_config, 'llm')
for nested_key, nested_value in value.items():
if isinstance(nested_value, dict):
logger.info(
f'Attempt to load group {nested_key} from config toml as llm config'
)
llm_config = LLMConfig(**nested_value)
cfg.set_llm_config(llm_config, nested_key)
except (TypeError, KeyError) as e:
logger.warning(
f'Cannot parse config from toml, toml values have not been applied.\n Error: {e}',
exc_info=False,
)
try:
# set llm config from the toml file
llm_config = cfg.llm
if 'llm' in toml_config:
llm_config = LLMConfig(**toml_config['llm'])
# set agent config from the toml file
agent_config = cfg.agent
if 'agent' in toml_config:
agent_config = AgentConfig(**toml_config['agent'])
# set sandbox config from the toml file
sandbox_config = config.sandbox
@ -439,12 +504,7 @@ def load_from_toml(cfg: AppConfig, toml_file: str = 'config.toml'):
sandbox_config = SandboxConfig(**toml_config['sandbox'])
# update the config object with the new values
AppConfig(
llm=llm_config,
agent=agent_config,
sandbox=sandbox_config,
**core_config,
)
AppConfig(sandbox=sandbox_config, **core_config)
except (TypeError, KeyError) as e:
logger.warning(
f'Cannot parse config from toml, toml values have not been applied.\nError: {e}',
@ -472,8 +532,9 @@ def finalize_config(cfg: AppConfig):
parts = cfg.workspace_mount_rewrite.split(':')
cfg.workspace_mount_path = base.replace(parts[0], parts[1])
if cfg.llm.embedding_base_url is None:
cfg.llm.embedding_base_url = cfg.llm.base_url
for llm in cfg.llms.values():
if llm.embedding_base_url is None:
llm.embedding_base_url = llm.base_url
if cfg.use_host_network and platform.system() == 'Darwin':
logger.warning(
@ -493,14 +554,16 @@ finalize_config(config)
# Utility function for command line --group argument
def get_llm_config_arg(llm_config_arg: str):
def get_llm_config_arg(
llm_config_arg: str, toml_file: str = 'config.toml'
) -> LLMConfig | None:
"""
Get a group of llm settings from the config file.
A group in config.toml can look like this:
```
[gpt-3.5-for-eval]
[llm.gpt-3.5-for-eval]
model = 'gpt-3.5-turbo'
api_key = '...'
temperature = 0.5
@ -511,6 +574,8 @@ def get_llm_config_arg(llm_config_arg: str):
The user-defined group name, like "gpt-3.5-for-eval", is the argument to this function. The function will load the LLMConfig object
with the settings of this group, from the config file, and set it as the LLMConfig object for the app.
Note that the group must be under "llm" group, or in other words, the group name must start with "llm.".
Args:
llm_config_arg: The group of llm settings to get from the config.toml file.
@ -520,12 +585,17 @@ def get_llm_config_arg(llm_config_arg: str):
# keep only the name, just in case
llm_config_arg = llm_config_arg.strip('[]')
# truncate the prefix, just in case
if llm_config_arg.startswith('llm.'):
llm_config_arg = llm_config_arg[4:]
logger.info(f'Loading llm config from {llm_config_arg}')
# load the toml file
try:
with open('config.toml', 'r', encoding='utf-8') as toml_file:
toml_config = toml.load(toml_file)
with open(toml_file, 'r', encoding='utf-8') as toml_contents:
toml_config = toml.load(toml_contents)
except FileNotFoundError as e:
logger.error(f'Config file not found: {e}')
return None
@ -534,8 +604,8 @@ def get_llm_config_arg(llm_config_arg: str):
return None
# update the llm config with the specified section
if llm_config_arg in toml_config:
return LLMConfig(**toml_config[llm_config_arg])
if 'llm' in toml_config and llm_config_arg in toml_config['llm']:
return LLMConfig(**toml_config['llm'][llm_config_arg])
logger.debug(f'Loading from toml failed for {llm_config_arg}')
return None
@ -564,16 +634,9 @@ def get_parser() -> argparse.ArgumentParser:
parser.add_argument(
'-c',
'--agent-cls',
default=config.agent.name,
default=config.default_agent,
type=str,
help='The agent class to use',
)
parser.add_argument(
'-m',
'--model-name',
default=config.llm.model,
type=str,
help='The (litellm) model name to use',
help='Name of the default agent to use',
)
parser.add_argument(
'-i',
@ -619,7 +682,7 @@ def get_parser() -> argparse.ArgumentParser:
'--llm-config',
default=None,
type=str,
help='The group of llm settings, e.g. a [llama3] section in the toml file. Overrides model if both are provided.',
help='The group of llm settings, e.g. "llama3" for [llm.llama3] section in the toml file. Overrides model if both are provided.',
)
return parser

View File

@ -53,7 +53,7 @@ async def run_agent_controller(
# Logging
logger.info(
f'Running agent {type(agent)}, model {agent.llm.model_name}, with task: "{task_str}"'
f'Running agent {agent.name}, model {agent.llm.model_name}, with task: "{task_str}"'
)
# set up the event stream
@ -163,7 +163,7 @@ if __name__ == '__main__':
raise ValueError(f'Invalid toml file, cannot read {args.llm_config}')
llm = LLM(llm_config=llm_config)
else:
llm = LLM(model=args.model_name)
llm = LLM(llm_config=config.get_llm_config_from_agent(args.agent_cls))
# Create the agent
AgentCls: Type[Agent] = Agent.get_cls(args.agent_cls)

View File

@ -21,8 +21,10 @@ class Singleton(type):
# used by pytest to reset the state of the singleton instances
for instance_type, instance in cls._instances.items():
print('resetting... ', instance_type)
for field in dataclasses.fields(instance_type):
if dataclasses.is_dataclass(field.type):
setattr(instance, field.name, field.type())
for field_info in dataclasses.fields(instance_type):
if dataclasses.is_dataclass(field_info.type):
setattr(instance, field_info.name, field_info.type())
elif field_info.default_factory is not dataclasses.MISSING:
setattr(instance, field_info.name, field_info.default_factory())
else:
setattr(instance, field.name, field.default)
setattr(instance, field_info.name, field_info.default)

View File

@ -1,7 +1,6 @@
from dataclasses import asdict
from datetime import datetime
from opendevin.core.config import config
from opendevin.events import Event, EventSource
from opendevin.events.observation.observation import Observation
@ -70,7 +69,7 @@ def event_to_dict(event: 'Event') -> dict:
return d
def event_to_memory(event: 'Event') -> dict:
def event_to_memory(event: 'Event', max_message_chars: int) -> dict:
d = event_to_dict(event)
d.pop('id', None)
d.pop('cause', None)
@ -79,17 +78,14 @@ def event_to_memory(event: 'Event') -> dict:
if 'extras' in d:
remove_fields(d['extras'], DELETE_FROM_MEMORY_EXTRAS)
if isinstance(event, Observation) and 'content' in d:
d['content'] = truncate_content(d['content'])
d['content'] = truncate_content(d['content'], max_message_chars)
return d
def truncate_content(content: str, max_chars: int = -1) -> str:
def truncate_content(content: str, max_chars: int) -> str:
"""
Truncate the middle of the observation content if it is too long.
"""
if max_chars == -1:
max_chars = config.llm.max_message_chars
if len(content) <= max_chars:
return content

View File

@ -5,9 +5,10 @@ import boto3
from opendevin.core.config import config
from opendevin.core.logger import opendevin_logger as logger
AWS_ACCESS_KEY_ID = config.llm.aws_access_key_id
AWS_SECRET_ACCESS_KEY = config.llm.aws_secret_access_key
AWS_REGION_NAME = config.llm.aws_region_name
# TODO: this assumes AWS-specific configs are under default 'llm' group
AWS_ACCESS_KEY_ID = config.get_llm_config().aws_access_key_id
AWS_SECRET_ACCESS_KEY = config.get_llm_config().aws_secret_access_key
AWS_REGION_NAME = config.get_llm_config().aws_region_name
# It needs to be set as an environment variable, if the variable is configured in the Config file.
if AWS_ACCESS_KEY_ID is not None:

View File

@ -63,6 +63,8 @@ class LLM:
llm_config=None,
metrics=None,
cost_metric_supported=True,
input_cost_per_token=None,
output_cost_per_token=None,
):
"""
Initializes the LLM. If LLMConfig is passed, its values will be the fallback.
@ -84,9 +86,11 @@ class LLM:
llm_temperature (float, optional): The temperature for LLM sampling. Defaults to LLM_TEMPERATURE.
metrics (Metrics, optional): The metrics object to use. Defaults to None.
cost_metric_supported (bool, optional): Whether the cost metric is supported. Defaults to True.
input_cost_per_token (float, optional): The cost per input token.
output_cost_per_token (float, optional): The cost per output token.
"""
if llm_config is None:
llm_config = config.llm
llm_config = config.get_llm_config()
model = model if model is not None else llm_config.model
api_key = api_key if api_key is not None else llm_config.api_key
base_url = base_url if base_url is not None else llm_config.base_url
@ -118,6 +122,16 @@ class LLM:
if max_output_tokens is not None
else llm_config.max_output_tokens
)
input_cost_per_token = (
input_cost_per_token
if input_cost_per_token is not None
else llm_config.input_cost_per_token
)
output_cost_per_token = (
output_cost_per_token
if output_cost_per_token is not None
else llm_config.output_cost_per_token
)
metrics = metrics if metrics is not None else Metrics()
logger.info(f'Initializing LLM with model: {model}')
@ -127,6 +141,8 @@ class LLM:
self.api_version = api_version
self.max_input_tokens = max_input_tokens
self.max_output_tokens = max_output_tokens
self.input_cost_per_token = input_cost_per_token
self.output_cost_per_token = output_cost_per_token
self.llm_timeout = llm_timeout
self.custom_llm_provider = custom_llm_provider
self.metrics = metrics
@ -292,12 +308,12 @@ class LLM:
extra_kwargs = {}
if (
config.llm.input_cost_per_token is not None
and config.llm.output_cost_per_token is not None
self.input_cost_per_token is not None
and self.output_cost_per_token is not None
):
cost_per_token = CostPerToken(
input_cost_per_token=config.llm.input_cost_per_token,
output_cost_per_token=config.llm.output_cost_per_token,
input_cost_per_token=self.input_cost_per_token,
output_cost_per_token=self.output_cost_per_token,
)
logger.info(f'Using custom cost per token: {cost_per_token}')
extra_kwargs['custom_cost_per_token'] = cost_per_token

View File

@ -13,13 +13,14 @@ from tenacity import (
wait_random_exponential,
)
from opendevin.core.config import config
from opendevin.core.config import LLMConfig, config
from opendevin.core.logger import opendevin_logger as logger
from opendevin.core.utils import json
num_retries = config.llm.num_retries
retry_min_wait = config.llm.retry_min_wait
retry_max_wait = config.llm.retry_max_wait
# TODO: this should depend on specific agent setting
num_retries = config.get_llm_config().num_retries
retry_min_wait = config.get_llm_config().retry_min_wait
retry_max_wait = config.get_llm_config().retry_max_wait
# llama-index includes a retry decorator around openai.get_embeddings() function
# it is initialized with hard-coded values and errors
@ -62,7 +63,7 @@ class EmbeddingsLoader:
"""Loader for embedding model initialization."""
@staticmethod
def get_embedding_model(strategy: str):
def get_embedding_model(strategy: str, llm_config: LLMConfig):
supported_ollama_embed_models = [
'llama2',
'mxbai-embed-large',
@ -75,7 +76,7 @@ class EmbeddingsLoader:
return OllamaEmbedding(
model_name=strategy,
base_url=config.llm.embedding_base_url,
base_url=llm_config.embedding_base_url,
ollama_additional_kwargs={'mirostat': 0},
)
elif strategy == 'openai':
@ -83,17 +84,17 @@ class EmbeddingsLoader:
return OpenAIEmbedding(
model='text-embedding-ada-002',
api_key=config.llm.api_key,
api_key=llm_config.api_key,
)
elif strategy == 'azureopenai':
from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding
return AzureOpenAIEmbedding(
model='text-embedding-ada-002',
deployment_name=config.llm.embedding_deployment_name,
api_key=config.llm.api_key,
azure_endpoint=config.llm.base_url,
api_version=config.llm.api_version,
deployment_name=llm_config.embedding_deployment_name,
api_key=llm_config.api_key,
azure_endpoint=llm_config.base_url,
api_version=llm_config.api_version,
)
elif (strategy is not None) and (strategy.lower() == 'none'):
# TODO: this works but is not elegant enough. The incentive is when
@ -106,24 +107,26 @@ class EmbeddingsLoader:
return HuggingFaceEmbedding(model_name='BAAI/bge-small-en-v1.5')
sema = threading.Semaphore(value=config.agent.memory_max_threads)
class LongTermMemory:
"""
Handles storing information for the agent to access later, using chromadb.
"""
def __init__(self):
def __init__(self, agent_config_name='agent'):
"""
Initialize the chromadb and set up ChromaVectorStore for later use.
"""
db = chromadb.Client(chromadb.Settings(anonymized_telemetry=False))
self.collection = db.get_or_create_collection(name='memories')
vector_store = ChromaVectorStore(chroma_collection=self.collection)
embedding_strategy = config.llm.embedding_model
embed_model = EmbeddingsLoader.get_embedding_model(embedding_strategy)
agent_config = config.get_agent_config(agent_config_name)
llm_config = config.get_llm_config(agent_config.llm_config)
embedding_strategy = llm_config.embedding_model
embed_model = EmbeddingsLoader.get_embedding_model(
embedding_strategy, llm_config
)
self.index = VectorStoreIndex.from_vector_store(vector_store, embed_model)
self.sema = threading.Semaphore(value=agent_config.memory_max_threads)
self.thought_idx = 0
self._add_threads = []
@ -158,7 +161,7 @@ class LongTermMemory:
thread.start() # We add the doc concurrently so we don't have to wait ~500ms for the insert
def _add_doc(self, doc):
with sema:
with self.sema:
self.index.insert(doc)
def search(self, query: str, k: int = 10):

View File

@ -308,18 +308,22 @@ async def get_litellm_models():
)
bedrock_model_list = bedrock.list_foundation_models()
model_list = litellm_model_list_without_bedrock + bedrock_model_list
ollama_base_url = config.llm.ollama_base_url
if config.llm.model.startswith('ollama'):
if not ollama_base_url:
ollama_base_url = config.llm.base_url
if ollama_base_url:
ollama_url = ollama_base_url.strip('/') + '/api/tags'
try:
ollama_models_list = requests.get(ollama_url, timeout=3).json()['models']
for model in ollama_models_list:
model_list.append('ollama/' + model['name'])
except requests.exceptions.RequestException as e:
logger.error(f'Error getting OLLAMA models: {e}', exc_info=True)
for llm_config in config.llms.values():
ollama_base_url = llm_config.ollama_base_url
if llm_config.model.startswith('ollama'):
if not ollama_base_url:
ollama_base_url = llm_config.base_url
if ollama_base_url:
ollama_url = ollama_base_url.strip('/') + '/api/tags'
try:
ollama_models_list = requests.get(ollama_url, timeout=3).json()[
'models'
]
for model in ollama_models_list:
model_list.append('ollama/' + model['name'])
break
except requests.exceptions.RequestException as e:
logger.error(f'Error getting OLLAMA models: {e}', exc_info=True)
return list(sorted(set(model_list)))

View File

@ -86,10 +86,11 @@ class AgentSession:
for key, value in start_event.get('args', {}).items()
if value != ''
} # remove empty values, prevent FE from sending empty strings
agent_cls = args.get(ConfigType.AGENT, config.agent.name)
model = args.get(ConfigType.LLM_MODEL, config.llm.model)
api_key = args.get(ConfigType.LLM_API_KEY, config.llm.api_key)
api_base = config.llm.base_url
agent_cls = args.get(ConfigType.AGENT, config.default_agent)
llm_config = config.get_llm_config_from_agent(agent_cls)
model = args.get(ConfigType.LLM_MODEL, llm_config.model)
api_key = args.get(ConfigType.LLM_API_KEY, llm_config.api_key)
api_base = llm_config.base_url
max_iterations = args.get(ConfigType.MAX_ITERATIONS, config.max_iterations)
logger.info(f'Creating agent {agent_cls} using LLM {model}')

View File

@ -49,7 +49,9 @@ def apply_prompt_and_get_mock_response(test_name: str, messages: str, id: int) -
Note: this function blindly replaces existing prompt file with the given
input without checking the contents.
"""
mock_dir = os.path.join(script_dir, 'mock', os.environ.get('AGENT'), test_name)
mock_dir = os.path.join(
script_dir, 'mock', os.environ.get('DEFAULT_AGENT'), test_name
)
prompt_file_path = os.path.join(mock_dir, f'prompt_{"{0:03}".format(id)}.log')
resp_file_path = os.path.join(mock_dir, f'response_{"{0:03}".format(id)}.log')
try:
@ -82,7 +84,9 @@ def get_mock_response(test_name: str, messages: str, id: int) -> str:
makes test code harder to understand.
"""
prompt = filter_out_symbols(messages)
mock_dir = os.path.join(script_dir, 'mock', os.environ.get('AGENT'), test_name)
mock_dir = os.path.join(
script_dir, 'mock', os.environ.get('DEFAULT_AGENT'), test_name
)
prompt_file_path = os.path.join(mock_dir, f'prompt_{"{0:03}".format(id)}.log')
resp_file_path = os.path.join(mock_dir, f'response_{"{0:03}".format(id)}.log')
# Open the prompt file and compare its contents
@ -130,7 +134,11 @@ def mock_user_response(*args, test_name, **kwargs):
STDIN input for the agent to read.
"""
user_response_file = os.path.join(
script_dir, 'mock', os.environ.get('AGENT'), test_name, 'user_responses.log'
script_dir,
'mock',
os.environ.get('DEFAULT_AGENT'),
test_name,
'user_responses.log',
)
if not os.path.exists(user_response_file):
return ''

View File

@ -78,7 +78,7 @@ run_test() {
WORKSPACE_MOUNT_PATH=$WORKSPACE_MOUNT_PATH \
WORKSPACE_MOUNT_PATH_IN_SANDBOX=$WORKSPACE_MOUNT_PATH_IN_SANDBOX \
MAX_ITERATIONS=$MAX_ITERATIONS \
AGENT=$agent \
DEFAULT_AGENT=$agent \
$pytest_cmd 2>&1 | tee $TMP_FILE
# Capture the exit code of pytest
@ -148,7 +148,7 @@ regenerate_without_llm() {
WORKSPACE_MOUNT_PATH_IN_SANDBOX=$WORKSPACE_MOUNT_PATH_IN_SANDBOX \
MAX_ITERATIONS=$MAX_ITERATIONS \
FORCE_APPLY_PROMPTS=true \
AGENT=$agent \
DEFAULT_AGENT=$agent \
poetry run pytest -s ./tests/integration/test_agent.py::$test_name
set +x
}

View File

@ -29,16 +29,19 @@ print(f'workspace_mount_path_in_sandbox: {workspace_mount_path_in_sandbox}')
@pytest.mark.skipif(
os.getenv('AGENT') == 'BrowsingAgent',
os.getenv('DEFAULT_AGENT') == 'BrowsingAgent',
reason='BrowsingAgent is a specialized agent',
)
@pytest.mark.skipif(
(os.getenv('AGENT') == 'CodeActAgent' or os.getenv('AGENT') == 'CodeActSWEAgent')
(
os.getenv('DEFAULT_AGENT') == 'CodeActAgent'
or os.getenv('DEFAULT_AGENT') == 'CodeActSWEAgent'
)
and os.getenv('SANDBOX_BOX_TYPE', '').lower() != 'ssh',
reason='CodeActAgent/CodeActSWEAgent only supports ssh sandbox which is stateful',
)
@pytest.mark.skipif(
os.getenv('AGENT') == 'ManagerAgent',
os.getenv('DEFAULT_AGENT') == 'ManagerAgent',
reason='Manager agent is not capable of finishing this in reasonable steps yet',
)
def test_write_simple_script():
@ -46,7 +49,7 @@ def test_write_simple_script():
args = parse_arguments()
# Create the agent
agent = Agent.get_cls(args.agent_cls)(llm=LLM(args.model_name))
agent = Agent.get_cls(args.agent_cls)(llm=LLM())
final_state: State | None = asyncio.run(
run_agent_controller(agent, task, exit_on_message=True)
@ -68,16 +71,20 @@ def test_write_simple_script():
@pytest.mark.skipif(
os.getenv('AGENT') == 'BrowsingAgent',
os.getenv('DEFAULT_AGENT') == 'BrowsingAgent',
reason='BrowsingAgent is a specialized agent',
)
@pytest.mark.skipif(
(os.getenv('AGENT') == 'CodeActAgent' or os.getenv('AGENT') == 'CodeActSWEAgent')
(
os.getenv('DEFAULT_AGENT') == 'CodeActAgent'
or os.getenv('DEFAULT_AGENT') == 'CodeActSWEAgent'
)
and os.getenv('SANDBOX_BOX_TYPE', '').lower() != 'ssh',
reason='CodeActAgent/CodeActSWEAgent only supports ssh sandbox which is stateful',
)
@pytest.mark.skipif(
os.getenv('AGENT') == 'MonologueAgent' or os.getenv('AGENT') == 'PlannerAgent',
os.getenv('DEFAULT_AGENT') == 'MonologueAgent'
or os.getenv('DEFAULT_AGENT') == 'PlannerAgent',
reason='We only keep basic tests for MonologueAgent and PlannerAgent',
)
@pytest.mark.skipif(
@ -96,7 +103,7 @@ def test_edits():
shutil.copy(os.path.join(source_dir, file), dest_file)
# Create the agent
agent = Agent.get_cls(args.agent_cls)(llm=LLM(args.model_name))
agent = Agent.get_cls(args.agent_cls)(llm=LLM())
# Execute the task
task = 'Fix typos in bad.txt. Do not ask me for confirmation at any point.'
@ -118,7 +125,8 @@ Enjoy!
@pytest.mark.skipif(
os.getenv('AGENT') != 'CodeActAgent' and os.getenv('AGENT') != 'CodeActSWEAgent',
os.getenv('DEFAULT_AGENT') != 'CodeActAgent'
and os.getenv('DEFAULT_AGENT') != 'CodeActSWEAgent',
reason='currently only CodeActAgent and CodeActSWEAgent have IPython (Jupyter) execution by default',
)
@pytest.mark.skipif(
@ -129,7 +137,7 @@ def test_ipython():
args = parse_arguments()
# Create the agent
agent = Agent.get_cls(args.agent_cls)(llm=LLM(args.model_name))
agent = Agent.get_cls(args.agent_cls)(llm=LLM())
# Execute the task
task = "Use Jupyter IPython to write a text file containing 'hello world' to '/workspace/test.txt'. Do not ask me for confirmation at any point."
@ -152,7 +160,7 @@ def test_ipython():
@pytest.mark.skipif(
os.getenv('AGENT') != 'ManagerAgent',
os.getenv('DEFAULT_AGENT') != 'ManagerAgent',
reason='Currently, only ManagerAgent supports task rejection',
)
@pytest.mark.skipif(
@ -163,7 +171,7 @@ def test_simple_task_rejection():
args = parse_arguments()
# Create the agent
agent = Agent.get_cls(args.agent_cls)(llm=LLM(args.model_name))
agent = Agent.get_cls(args.agent_cls)(llm=LLM())
# Give an impossible task to do: cannot write a commit message because
# the workspace is not a git repo
@ -175,7 +183,8 @@ def test_simple_task_rejection():
@pytest.mark.skipif(
os.getenv('AGENT') != 'CodeActAgent' and os.getenv('AGENT') != 'CodeActSWEAgent',
os.getenv('DEFAULT_AGENT') != 'CodeActAgent'
and os.getenv('DEFAULT_AGENT') != 'CodeActSWEAgent',
reason='currently only CodeActAgent and CodeActSWEAgent have IPython (Jupyter) execution by default',
)
@pytest.mark.skipif(
@ -186,7 +195,7 @@ def test_ipython_module():
args = parse_arguments()
# Create the agent
agent = Agent.get_cls(args.agent_cls)(llm=LLM(args.model_name))
agent = Agent.get_cls(args.agent_cls)(llm=LLM())
# Execute the task
task = "Install and import pymsgbox==1.0.9 and print it's version in /workspace/test.txt. Do not ask me for confirmation at any point."
@ -210,11 +219,15 @@ def test_ipython_module():
@pytest.mark.skipif(
os.getenv('AGENT') != 'BrowsingAgent' and os.getenv('AGENT') != 'CodeActAgent',
os.getenv('DEFAULT_AGENT') != 'BrowsingAgent'
and os.getenv('DEFAULT_AGENT') != 'CodeActAgent',
reason='currently only BrowsingAgent and CodeActAgent are capable of searching the internet',
)
@pytest.mark.skipif(
(os.getenv('AGENT') == 'CodeActAgent' or os.getenv('AGENT') == 'CodeActSWEAgent')
(
os.getenv('DEFAULT_AGENT') == 'CodeActAgent'
or os.getenv('DEFAULT_AGENT') == 'CodeActSWEAgent'
)
and os.getenv('SANDBOX_BOX_TYPE', '').lower() != 'ssh',
reason='CodeActAgent/CodeActSWEAgent only supports ssh sandbox which is stateful',
)
@ -222,7 +235,7 @@ def test_browse_internet(http_server):
args = parse_arguments()
# Create the agent
agent = Agent.get_cls(args.agent_cls)(llm=LLM(args.model_name))
agent = Agent.get_cls(args.agent_cls)(llm=LLM())
# Execute the task
task = 'Browse localhost:8000, and tell me the ultimate answer to life. Do not ask me for confirmation at any point.'

View File

@ -1,3 +1,4 @@
from opendevin.core.config import config
from opendevin.events.action import (
Action,
AddTaskAction,
@ -28,7 +29,9 @@ def serialization_deserialization(original_action_dict, cls):
action_instance, cls
), f'The action instance should be an instance of {cls.__name__}.'
serialized_action_dict = event_to_dict(action_instance)
serialized_action_memory = event_to_memory(action_instance)
serialized_action_memory = event_to_memory(
action_instance, config.get_llm_config().max_message_chars
)
serialized_action_dict.pop('message')
assert (
serialized_action_dict == original_action_dict

View File

@ -10,7 +10,7 @@ def test_help_message(capsys):
captured = capsys.readouterr()
expected_help_message = """
usage: pytest [-h] [-d DIRECTORY] [-t TASK] [-f FILE] [-c AGENT_CLS]
[-m MODEL_NAME] [-i MAX_ITERATIONS] [-b MAX_BUDGET_PER_TASK]
[-i MAX_ITERATIONS] [-b MAX_BUDGET_PER_TASK]
[--eval-output-dir EVAL_OUTPUT_DIR]
[--eval-n-limit EVAL_N_LIMIT]
[--eval-num-workers EVAL_NUM_WORKERS] [--eval-note EVAL_NOTE]
@ -26,9 +26,7 @@ options:
-f FILE, --file FILE Path to a file containing the task. Overrides -t if
both are provided.
-c AGENT_CLS, --agent-cls AGENT_CLS
The agent class to use
-m MODEL_NAME, --model-name MODEL_NAME
The (litellm) model name to use
Name of the default agent to use
-i MAX_ITERATIONS, --max-iterations MAX_ITERATIONS
The maximum number of iterations to run the agent
-b MAX_BUDGET_PER_TASK, --max-budget-per-task MAX_BUDGET_PER_TASK
@ -43,8 +41,9 @@ options:
--eval-note EVAL_NOTE
The note to add to the evaluation directory
-l LLM_CONFIG, --llm-config LLM_CONFIG
The group of llm settings, e.g. a [llama3] section in
the toml file. Overrides model if both are provided.
The group of llm settings, e.g. "llama3" for
[llm.llama3] section in the toml file. Overrides model
if both are provided.
"""
actual_lines = captured.out.strip().split('\n')

View File

@ -8,6 +8,7 @@ from opendevin.core.config import (
LLMConfig,
UndefinedString,
finalize_config,
get_llm_config_arg,
load_from_env,
load_from_toml,
)
@ -50,7 +51,7 @@ def test_compat_env_to_config(monkeypatch, setup_env):
monkeypatch.setenv('LLM_MODEL', 'gpt-4o')
monkeypatch.setenv('AGENT_MEMORY_MAX_THREADS', '4')
monkeypatch.setenv('AGENT_MEMORY_ENABLED', 'True')
monkeypatch.setenv('AGENT', 'CodeActAgent')
monkeypatch.setenv('DEFAULT_AGENT', 'CodeActAgent')
monkeypatch.setenv('SANDBOX_TYPE', 'local')
monkeypatch.setenv('SANDBOX_TIMEOUT', '10')
@ -58,14 +59,14 @@ def test_compat_env_to_config(monkeypatch, setup_env):
load_from_env(config, os.environ)
assert config.workspace_base == '/repos/opendevin/workspace'
assert isinstance(config.llm, LLMConfig)
assert config.llm.api_key == 'sk-proj-rgMV0...'
assert config.llm.model == 'gpt-4o'
assert isinstance(config.agent, AgentConfig)
assert isinstance(config.agent.memory_max_threads, int)
assert config.agent.memory_max_threads == 4
assert config.agent.memory_enabled is True
assert config.agent.name == 'CodeActAgent'
assert isinstance(config.get_llm_config(), LLMConfig)
assert config.get_llm_config().api_key == 'sk-proj-rgMV0...'
assert config.get_llm_config().model == 'gpt-4o'
assert isinstance(config.get_agent_config(), AgentConfig)
assert isinstance(config.get_agent_config().memory_max_threads, int)
assert config.get_agent_config().memory_max_threads == 4
assert config.get_agent_config().memory_enabled is True
assert config.default_agent == 'CodeActAgent'
assert config.sandbox.box_type == 'local'
assert config.sandbox.timeout == 10
@ -74,15 +75,15 @@ def test_load_from_old_style_env(monkeypatch, default_config):
# Test loading configuration from old-style environment variables using monkeypatch
monkeypatch.setenv('LLM_API_KEY', 'test-api-key')
monkeypatch.setenv('AGENT_MEMORY_ENABLED', 'True')
monkeypatch.setenv('AGENT_NAME', 'PlannerAgent')
monkeypatch.setenv('DEFAULT_AGENT', 'PlannerAgent')
monkeypatch.setenv('WORKSPACE_BASE', '/opt/files/workspace')
monkeypatch.setenv('SANDBOX_CONTAINER_IMAGE', 'custom_image')
load_from_env(default_config, os.environ)
assert default_config.llm.api_key == 'test-api-key'
assert default_config.agent.memory_enabled is True
assert default_config.agent.name == 'PlannerAgent'
assert default_config.get_llm_config().api_key == 'test-api-key'
assert default_config.get_agent_config().memory_enabled is True
assert default_config.default_agent == 'PlannerAgent'
assert default_config.workspace_base == '/opt/files/workspace'
assert (
default_config.workspace_mount_path is UndefinedString.UNDEFINED
@ -102,25 +103,52 @@ def test_load_from_new_style_toml(default_config, temp_toml_file):
model = "test-model"
api_key = "toml-api-key"
[llm.cheap]
model = "some-cheap-model"
api_key = "cheap-model-api-key"
[agent]
name = "TestAgent"
memory_enabled = true
[agent.BrowsingAgent]
llm_config = "cheap"
memory_enabled = false
[sandbox]
timeout = 1
[core]
workspace_base = "/opt/files2/workspace"
default_agent = "TestAgent"
sandbox_type = "local"
"""
)
load_from_toml(default_config, temp_toml_file)
assert default_config.llm.model == 'test-model'
assert default_config.llm.api_key == 'toml-api-key'
assert default_config.agent.name == 'TestAgent'
assert default_config.agent.memory_enabled is True
# default llm & agent configs
assert default_config.default_agent == 'TestAgent'
assert default_config.get_llm_config().model == 'test-model'
assert default_config.get_llm_config().api_key == 'toml-api-key'
assert default_config.get_agent_config().memory_enabled is True
# undefined agent config inherits default ones
assert (
default_config.get_llm_config_from_agent('CodeActAgent')
== default_config.get_llm_config()
)
assert default_config.get_agent_config('CodeActAgent').memory_enabled is True
# defined agent config overrides default ones
assert default_config.get_llm_config_from_agent(
'BrowsingAgent'
) == default_config.get_llm_config('cheap')
assert (
default_config.get_llm_config_from_agent('BrowsingAgent').model
== 'some-cheap-model'
)
assert default_config.get_agent_config('BrowsingAgent').memory_enabled is False
assert default_config.workspace_base == '/opt/files2/workspace'
assert default_config.sandbox.box_type == 'local'
assert default_config.sandbox.timeout == 1
@ -152,7 +180,6 @@ def test_compat_load_sandbox_from_toml(default_config, temp_toml_file):
model = "test-model"
[agent]
name = "TestAgent"
memory_enabled = true
[core]
@ -161,14 +188,16 @@ sandbox_type = "local"
sandbox_timeout = 500
sandbox_container_image = "node:14"
sandbox_user_id = 1001
default_agent = "TestAgent"
"""
)
load_from_toml(default_config, temp_toml_file)
assert default_config.llm.model == 'test-model'
assert default_config.agent.name == 'TestAgent'
assert default_config.agent.memory_enabled is True
assert default_config.get_llm_config().model == 'test-model'
assert default_config.get_llm_config_from_agent().model == 'test-model'
assert default_config.default_agent == 'TestAgent'
assert default_config.get_agent_config().memory_enabled is True
assert default_config.workspace_base == '/opt/files2/workspace'
assert default_config.sandbox.box_type == 'local'
assert default_config.sandbox.timeout == 500
@ -220,8 +249,10 @@ sandbox_user_id = 1001
load_from_env(default_config, os.environ)
assert os.environ.get('LLM_MODEL') is None
assert default_config.llm.model == 'test-model'
assert default_config.llm.api_key == 'env-api-key'
assert default_config.get_llm_config().model == 'test-model'
assert default_config.get_llm_config('llm').model == 'test-model'
assert default_config.get_llm_config_from_agent().model == 'test-model'
assert default_config.get_llm_config().api_key == 'env-api-key'
# after we set workspace_base to 'UNDEFINED' in the environment,
# workspace_base should be set to that
@ -271,7 +302,7 @@ user_id = 1001
assert default_config.workspace_mount_path is UndefinedString.UNDEFINED
# before load_from_env, values are set to the values from the toml file
assert default_config.llm.api_key == 'toml-api-key'
assert default_config.get_llm_config().api_key == 'toml-api-key'
assert default_config.sandbox.box_type == 'e2b'
assert default_config.sandbox.timeout == 500
assert default_config.sandbox.user_id == 1001
@ -280,8 +311,8 @@ user_id = 1001
# values from env override values from toml
assert os.environ.get('LLM_MODEL') is None
assert default_config.llm.model == 'test-model'
assert default_config.llm.api_key == 'env-api-key'
assert default_config.get_llm_config().model == 'test-model'
assert default_config.get_llm_config().api_key == 'env-api-key'
assert default_config.sandbox.box_type == 'local'
assert default_config.sandbox.timeout == 1000
@ -315,7 +346,7 @@ user_id = 1001
load_from_env(default_config, os.environ)
finalize_config(default_config)
assert default_config.llm.model == 'test-model'
assert default_config.get_llm_config().model == 'test-model'
assert default_config.sandbox.box_type == 'local'
assert default_config.sandbox.timeout == 1
assert default_config.sandbox.container_image == 'custom_image'
@ -328,16 +359,19 @@ def test_defaults_dict_after_updates(default_config):
assert (
initial_defaults['workspace_mount_path']['default'] is UndefinedString.UNDEFINED
)
assert initial_defaults['llm']['api_key']['default'] is None
assert initial_defaults['agent']['name']['default'] == 'CodeActAgent'
assert initial_defaults['default_agent']['default'] == 'CodeActAgent'
updated_config = AppConfig()
updated_config.llm.api_key = 'updated-api-key'
updated_config.agent.name = 'MonologueAgent'
updated_config.get_llm_config().api_key = 'updated-api-key'
updated_config.get_llm_config('llm').api_key = 'updated-api-key'
updated_config.get_llm_config_from_agent('agent').api_key = 'updated-api-key'
updated_config.get_llm_config_from_agent(
'MonologueAgent'
).api_key = 'updated-api-key'
updated_config.default_agent = 'MonologueAgent'
defaults_after_updates = updated_config.defaults_dict
assert defaults_after_updates['llm']['api_key']['default'] is None
assert defaults_after_updates['agent']['name']['default'] == 'CodeActAgent'
assert defaults_after_updates['default_agent']['default'] == 'CodeActAgent'
assert (
defaults_after_updates['workspace_mount_path']['default']
is UndefinedString.UNDEFINED
@ -363,10 +397,10 @@ def test_invalid_toml_format(monkeypatch, temp_toml_file, default_config):
load_from_env(default_config, os.environ)
default_config.ssh_password = None # prevent leak
default_config.jwt_secret = None # prevent leak
assert default_config.llm.model == 'gpt-5-turbo-1106'
assert default_config.llm.custom_llm_provider is None
if default_config.llm.api_key is not None: # prevent leak
pytest.fail('LLM API key should be empty.')
for llm in default_config.llms.values():
llm.api_key = None # prevent leak
assert default_config.get_llm_config().model == 'gpt-5-turbo-1106'
assert default_config.get_llm_config().custom_llm_provider is None
assert default_config.workspace_mount_path == '/home/user/project'
@ -413,9 +447,12 @@ def test_workspace_mount_rewrite(default_config, monkeypatch):
def test_embedding_base_url_default(default_config):
default_config.llm.base_url = 'https://api.exampleapi.com'
default_config.get_llm_config().base_url = 'https://api.exampleapi.com'
finalize_config(default_config)
assert default_config.llm.embedding_base_url == 'https://api.exampleapi.com'
assert (
default_config.get_llm_config().embedding_base_url
== 'https://api.exampleapi.com'
)
def test_cache_dir_creation(default_config, tmpdir):
@ -461,9 +498,7 @@ def test_api_keys_repr_str():
# Test AgentConfig
# No attrs in AgentConfig have 'key' or 'token' in their name
agent_config = AgentConfig(
name='my_agent', memory_enabled=True, memory_max_threads=4
)
agent_config = AgentConfig(memory_enabled=True, memory_max_threads=4)
for attr_name in dir(AgentConfig):
if not attr_name.startswith('__'):
assert (
@ -475,8 +510,8 @@ def test_api_keys_repr_str():
# Test AppConfig
app_config = AppConfig(
llm=llm_config,
agent=agent_config,
llms={'llm': llm_config},
agents={'agent': agent_config},
e2b_api_key='my_e2b_api_key',
jwt_secret='my_jwt_secret',
ssh_password='my_ssh_password',
@ -519,3 +554,28 @@ max_budget_per_task = 4.0
assert config.max_iterations == 100
assert config.max_budget_per_task == 4.0
def test_get_llm_config_arg(temp_toml_file):
temp_toml = """
[core]
max_iterations = 100
max_budget_per_task = 4.0
[llm.gpt3]
model="gpt-3.5-turbo"
api_key="redacted"
embedding_model="openai"
[llm.gpt4o]
model="gpt-4o"
api_key="redacted"
embedding_model="openai"
"""
with open(temp_toml_file, 'w') as f:
f.write(temp_toml)
llm_config = get_llm_config_arg('gpt3', temp_toml_file)
assert llm_config.model == 'gpt-3.5-turbo'
assert llm_config.embedding_model == 'openai'

View File

@ -76,9 +76,6 @@ def test_llm_config_attributes_masking(test_handler):
assert 'AKIAIOSFODNN7EXAMPLE' not in log_output
assert 'wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY' not in log_output
# reset the LLMConfig
LLMConfig.reset()
def test_app_config_attributes_masking(test_handler):
logger, stream = test_handler

View File

@ -1,3 +1,4 @@
from opendevin.core.config import config
from opendevin.events.observation import (
CmdOutputObservation,
Observation,
@ -18,7 +19,9 @@ def serialization_deserialization(original_observation_dict, cls):
observation_instance, cls
), 'The observation instance should be an instance of CmdOutputObservation.'
serialized_observation_dict = event_to_dict(observation_instance)
serialized_observation_memory = event_to_memory(observation_instance)
serialized_observation_memory = event_to_memory(
observation_instance, config.get_llm_config().max_message_chars
)
assert (
serialized_observation_dict == original_observation_dict
), 'The serialized observation should match the original observation dict.'