From 0cda5f64af6596d5167aea7b60eeca3d05d12eba Mon Sep 17 00:00:00 2001 From: Robert Brennan Date: Tue, 30 Apr 2024 12:52:00 -0400 Subject: [PATCH] Add integration test with dummy agent (#1316) * first pass at dummy * add assertion to dummy * add dummy workflow * beef up tests * try and fix huggingface issue * remove newlines * rename test * move to pytest * Revert " move to pytest" This reverts commit de8121c400028399451de94ebd2681eedc6dee5b. * fix lint * delint * Update .github/workflows/dummy-agent-test.yml Co-authored-by: Boxuan Li --------- Co-authored-by: Boxuan Li --- .github/workflows/dummy-agent-test.yml | 21 ++++ agenthub/__init__.py | 7 +- agenthub/dummy_agent/__init__.py | 5 + agenthub/dummy_agent/agent.py | 119 ++++++++++++++++-- .../python/agenthub/dummy_agent/agent.md | 5 +- opendevin/action/agent.py | 2 +- opendevin/sandbox/docker/exec_box.py | 5 +- 7 files changed, 145 insertions(+), 19 deletions(-) create mode 100644 .github/workflows/dummy-agent-test.yml diff --git a/.github/workflows/dummy-agent-test.yml b/.github/workflows/dummy-agent-test.yml new file mode 100644 index 0000000000..0a853a7b4b --- /dev/null +++ b/.github/workflows/dummy-agent-test.yml @@ -0,0 +1,21 @@ +name: Run e2e test with dummy agent + +on: [push] + +jobs: + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + - name: Set up environment + run: | + curl -sSL https://install.python-poetry.org | python3 - + poetry install --without evaluation + wget https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/1_Pooling/config.json -P /tmp/llama_index/models--BAAI--bge-small-en-v1.5/snapshots/5c38ec7c405ec4b44b94cc5a9bb96e735b38267a/1_Pooling/ + - name: Run tests + run: | + poetry run python opendevin/main.py -t "do a flip" -m ollama/not-a-model -d ./workspace/ -c DummyAgent diff --git a/agenthub/__init__.py b/agenthub/__init__.py index 74b457793d..8c7c4027e0 100644 --- a/agenthub/__init__.py +++ b/agenthub/__init__.py @@ -8,17 +8,20 @@ from .micro.registry import all_microagents load_dotenv() -# Import agents after environment variables are loaded + from . import ( # noqa: E402 SWE_agent, codeact_agent, delegator_agent, + dummy_agent, monologue_agent, planner_agent, ) __all__ = ['monologue_agent', 'codeact_agent', - 'planner_agent', 'SWE_agent', 'delegator_agent'] + 'planner_agent', 'SWE_agent', + 'delegator_agent', + 'dummy_agent'] for agent in all_microagents.values(): name = agent['name'] diff --git a/agenthub/dummy_agent/__init__.py b/agenthub/dummy_agent/__init__.py index e69de29bb2..1c8698ccd1 100644 --- a/agenthub/dummy_agent/__init__.py +++ b/agenthub/dummy_agent/__init__.py @@ -0,0 +1,5 @@ +from opendevin.agent import Agent + +from .agent import DummyAgent + +Agent.register('DummyAgent', DummyAgent) diff --git a/agenthub/dummy_agent/agent.py b/agenthub/dummy_agent/agent.py index c0908ddeb3..64fe3bfe91 100644 --- a/agenthub/dummy_agent/agent.py +++ b/agenthub/dummy_agent/agent.py @@ -1,23 +1,118 @@ -"""Module for a Dummy agent.""" +import time +from typing import List, TypedDict -from typing import List - -from opendevin.action import Action -from opendevin.action.base import NullAction +from opendevin.action import ( + Action, + AddTaskAction, + AgentFinishAction, + AgentRecallAction, + AgentThinkAction, + BrowseURLAction, + CmdRunAction, + FileReadAction, + FileWriteAction, + ModifyTaskAction, +) from opendevin.agent import Agent -from opendevin.controller.agent_controller import AgentController -from opendevin.observation.base import NullObservation, Observation +from opendevin.llm.llm import LLM +from opendevin.observation import ( + AgentRecallObservation, + CmdOutputObservation, + FileReadObservation, + FileWriteObservation, + NullObservation, + Observation, +) from opendevin.state import State +""" +FIXME: There are a few problems this surfaced +* FileWrites seem to add an unintended newline at the end of the file +* command_id is sometimes a number, sometimes a string +* Why isn't the output of the background command split between two steps? +* Browser not working +""" + +ActionObs = TypedDict('ActionObs', {'action': Action, 'observations': List[Observation]}) + +BACKGROUND_CMD = 'echo "This is in the background" && sleep .1 && echo "This too"' + class DummyAgent(Agent): - """A dummy agent that does nothing but can be used in testing.""" + """ + The DummyAgent is used for e2e testing. It just sends the same set of actions deterministically, + without making any LLM calls. + """ - async def run(self, controller: AgentController) -> Observation: - return NullObservation('') + def __init__(self, llm: LLM): + super().__init__(llm) + self.steps: List[ActionObs] = [{ + 'action': AddTaskAction(parent='0', goal='check the current directory'), + 'observations': [NullObservation('')], + }, { + 'action': AddTaskAction(parent='0.0', goal='run ls'), + 'observations': [NullObservation('')], + }, { + 'action': ModifyTaskAction(id='0.0', state='in_progress'), + 'observations': [NullObservation('')], + }, { + 'action': AgentThinkAction(thought='Time to get started!'), + 'observations': [NullObservation('')], + }, { + 'action': CmdRunAction(command='echo "foo"'), + 'observations': [CmdOutputObservation('foo', command_id=-1, command='echo "foo"')], + }, { + 'action': FileWriteAction(content='echo "Hello, World!"', path='hello.sh'), + 'observations': [FileWriteObservation('', path='hello.sh')], + }, { + 'action': FileReadAction(path='hello.sh'), + 'observations': [FileReadObservation('echo "Hello, World!"\n', path='hello.sh')], + }, { + 'action': CmdRunAction(command='bash hello.sh'), + 'observations': [CmdOutputObservation('Hello, World!', command_id=-1, command='bash hello.sh')], + }, { + 'action': CmdRunAction(command=BACKGROUND_CMD, background=True), + 'observations': [ + CmdOutputObservation('Background command started. To stop it, send a `kill` action with id 42', command_id='42', command=BACKGROUND_CMD), # type: ignore[arg-type] + CmdOutputObservation('This is in the background\nThis too\n', command_id='42', command=BACKGROUND_CMD), # type: ignore[arg-type] + ] + }, { + 'action': AgentRecallAction(query='who am I?'), + 'observations': [ + AgentRecallObservation('', memories=['I am a computer.']), + # CmdOutputObservation('This too\n', command_id='42', command=BACKGROUND_CMD), + ], + }, { + 'action': BrowseURLAction(url='https://google.com'), + 'observations': [ + # BrowserOutputObservation('', url='https://google.com', screenshot=""), + ], + }, { + 'action': AgentFinishAction(), + 'observations': [], + }] def step(self, state: State) -> Action: - return NullAction('') + time.sleep(0.1) + if state.iteration > 0: + prev_step = self.steps[state.iteration - 1] + if 'observations' in prev_step: + expected_observations = prev_step['observations'] + hist_start = len(state.history) - len(expected_observations) + for i in range(len(expected_observations)): + hist_obs = state.history[hist_start + i][1].to_dict() + expected_obs = expected_observations[i].to_dict() + if 'command_id' in hist_obs['extras'] and hist_obs['extras']['command_id'] != -1: + del hist_obs['extras']['command_id'] + hist_obs['content'] = '' + if 'command_id' in expected_obs['extras'] and expected_obs['extras']['command_id'] != -1: + del expected_obs['extras']['command_id'] + expected_obs['content'] = '' + if hist_obs != expected_obs: + print('\nactual', hist_obs) + print('\nexpect', expected_obs) + assert hist_obs == expected_obs, f'Expected observation {expected_obs}, got {hist_obs}' + return self.steps[state.iteration]['action'] def search_memory(self, query: str) -> List[str]: - return [] + return ['I am a computer.'] diff --git a/docs/modules/python/agenthub/dummy_agent/agent.md b/docs/modules/python/agenthub/dummy_agent/agent.md index c783e7061f..e2738fb8b6 100644 --- a/docs/modules/python/agenthub/dummy_agent/agent.md +++ b/docs/modules/python/agenthub/dummy_agent/agent.md @@ -3,13 +3,12 @@ sidebar_label: agent title: agenthub.dummy_agent.agent --- -Module for a Dummy agent. - ## DummyAgent Objects ```python class DummyAgent(Agent) ``` -A dummy agent that does nothing but can be used in testing. +The DummyAgent is used for e2e testing. It just sends the same set of actions deterministically, +without making any LLM calls. diff --git a/opendevin/action/agent.py b/opendevin/action/agent.py index 11461f6f82..4efff3db8d 100644 --- a/opendevin/action/agent.py +++ b/opendevin/action/agent.py @@ -22,7 +22,7 @@ class AgentRecallAction(ExecutableAction): async def run(self, controller: 'AgentController') -> AgentRecallObservation: return AgentRecallObservation( - content='Recalling memories...', + content='', memories=controller.agent.search_memory(self.query), ) diff --git a/opendevin/sandbox/docker/exec_box.py b/opendevin/sandbox/docker/exec_box.py index c53c7d76db..c5cd1f5afc 100644 --- a/opendevin/sandbox/docker/exec_box.py +++ b/opendevin/sandbox/docker/exec_box.py @@ -122,7 +122,10 @@ class DockerExecBox(Sandbox): self.container.exec_run( f'kill -9 {pid}', workdir=SANDBOX_WORKSPACE_DIR) return -1, f'Command: "{cmd}" timed out' - return exit_code, logs.decode('utf-8').strip() + logs_out = logs.decode('utf-8') + if logs_out.endswith('\n'): + logs_out = logs_out[:-1] + return exit_code, logs_out def copy_to(self, host_src: str, sandbox_dest: str, recursive: bool = False): # mkdir -p sandbox_dest if it doesn't exist