Add integration test with dummy agent (#1316)

* first pass at dummy

* add assertion to dummy

* add dummy workflow

* beef up tests

* try and fix huggingface issue

* remove newlines

* rename test

* move to pytest

* Revert " move to pytest"

This reverts commit de8121c400028399451de94ebd2681eedc6dee5b.

* fix lint

* delint

* Update .github/workflows/dummy-agent-test.yml

Co-authored-by: Boxuan Li <liboxuan@connect.hku.hk>

---------

Co-authored-by: Boxuan Li <liboxuan@connect.hku.hk>
This commit is contained in:
Robert Brennan 2024-04-30 12:52:00 -04:00 committed by GitHub
parent eb1c3d8790
commit 0cda5f64af
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 145 additions and 19 deletions

21
.github/workflows/dummy-agent-test.yml vendored Normal file
View File

@ -0,0 +1,21 @@
name: Run e2e test with dummy agent
on: [push]
jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.11'
- name: Set up environment
run: |
curl -sSL https://install.python-poetry.org | python3 -
poetry install --without evaluation
wget https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/1_Pooling/config.json -P /tmp/llama_index/models--BAAI--bge-small-en-v1.5/snapshots/5c38ec7c405ec4b44b94cc5a9bb96e735b38267a/1_Pooling/
- name: Run tests
run: |
poetry run python opendevin/main.py -t "do a flip" -m ollama/not-a-model -d ./workspace/ -c DummyAgent

View File

@ -8,17 +8,20 @@ from .micro.registry import all_microagents
load_dotenv()
# Import agents after environment variables are loaded
from . import ( # noqa: E402
SWE_agent,
codeact_agent,
delegator_agent,
dummy_agent,
monologue_agent,
planner_agent,
)
__all__ = ['monologue_agent', 'codeact_agent',
'planner_agent', 'SWE_agent', 'delegator_agent']
'planner_agent', 'SWE_agent',
'delegator_agent',
'dummy_agent']
for agent in all_microagents.values():
name = agent['name']

View File

@ -0,0 +1,5 @@
from opendevin.agent import Agent
from .agent import DummyAgent
Agent.register('DummyAgent', DummyAgent)

View File

@ -1,23 +1,118 @@
"""Module for a Dummy agent."""
import time
from typing import List, TypedDict
from typing import List
from opendevin.action import Action
from opendevin.action.base import NullAction
from opendevin.action import (
Action,
AddTaskAction,
AgentFinishAction,
AgentRecallAction,
AgentThinkAction,
BrowseURLAction,
CmdRunAction,
FileReadAction,
FileWriteAction,
ModifyTaskAction,
)
from opendevin.agent import Agent
from opendevin.controller.agent_controller import AgentController
from opendevin.observation.base import NullObservation, Observation
from opendevin.llm.llm import LLM
from opendevin.observation import (
AgentRecallObservation,
CmdOutputObservation,
FileReadObservation,
FileWriteObservation,
NullObservation,
Observation,
)
from opendevin.state import State
"""
FIXME: There are a few problems this surfaced
* FileWrites seem to add an unintended newline at the end of the file
* command_id is sometimes a number, sometimes a string
* Why isn't the output of the background command split between two steps?
* Browser not working
"""
ActionObs = TypedDict('ActionObs', {'action': Action, 'observations': List[Observation]})
BACKGROUND_CMD = 'echo "This is in the background" && sleep .1 && echo "This too"'
class DummyAgent(Agent):
"""A dummy agent that does nothing but can be used in testing."""
"""
The DummyAgent is used for e2e testing. It just sends the same set of actions deterministically,
without making any LLM calls.
"""
async def run(self, controller: AgentController) -> Observation:
return NullObservation('')
def __init__(self, llm: LLM):
super().__init__(llm)
self.steps: List[ActionObs] = [{
'action': AddTaskAction(parent='0', goal='check the current directory'),
'observations': [NullObservation('')],
}, {
'action': AddTaskAction(parent='0.0', goal='run ls'),
'observations': [NullObservation('')],
}, {
'action': ModifyTaskAction(id='0.0', state='in_progress'),
'observations': [NullObservation('')],
}, {
'action': AgentThinkAction(thought='Time to get started!'),
'observations': [NullObservation('')],
}, {
'action': CmdRunAction(command='echo "foo"'),
'observations': [CmdOutputObservation('foo', command_id=-1, command='echo "foo"')],
}, {
'action': FileWriteAction(content='echo "Hello, World!"', path='hello.sh'),
'observations': [FileWriteObservation('', path='hello.sh')],
}, {
'action': FileReadAction(path='hello.sh'),
'observations': [FileReadObservation('echo "Hello, World!"\n', path='hello.sh')],
}, {
'action': CmdRunAction(command='bash hello.sh'),
'observations': [CmdOutputObservation('Hello, World!', command_id=-1, command='bash hello.sh')],
}, {
'action': CmdRunAction(command=BACKGROUND_CMD, background=True),
'observations': [
CmdOutputObservation('Background command started. To stop it, send a `kill` action with id 42', command_id='42', command=BACKGROUND_CMD), # type: ignore[arg-type]
CmdOutputObservation('This is in the background\nThis too\n', command_id='42', command=BACKGROUND_CMD), # type: ignore[arg-type]
]
}, {
'action': AgentRecallAction(query='who am I?'),
'observations': [
AgentRecallObservation('', memories=['I am a computer.']),
# CmdOutputObservation('This too\n', command_id='42', command=BACKGROUND_CMD),
],
}, {
'action': BrowseURLAction(url='https://google.com'),
'observations': [
# BrowserOutputObservation('<html></html>', url='https://google.com', screenshot=""),
],
}, {
'action': AgentFinishAction(),
'observations': [],
}]
def step(self, state: State) -> Action:
return NullAction('')
time.sleep(0.1)
if state.iteration > 0:
prev_step = self.steps[state.iteration - 1]
if 'observations' in prev_step:
expected_observations = prev_step['observations']
hist_start = len(state.history) - len(expected_observations)
for i in range(len(expected_observations)):
hist_obs = state.history[hist_start + i][1].to_dict()
expected_obs = expected_observations[i].to_dict()
if 'command_id' in hist_obs['extras'] and hist_obs['extras']['command_id'] != -1:
del hist_obs['extras']['command_id']
hist_obs['content'] = ''
if 'command_id' in expected_obs['extras'] and expected_obs['extras']['command_id'] != -1:
del expected_obs['extras']['command_id']
expected_obs['content'] = ''
if hist_obs != expected_obs:
print('\nactual', hist_obs)
print('\nexpect', expected_obs)
assert hist_obs == expected_obs, f'Expected observation {expected_obs}, got {hist_obs}'
return self.steps[state.iteration]['action']
def search_memory(self, query: str) -> List[str]:
return []
return ['I am a computer.']

View File

@ -3,13 +3,12 @@ sidebar_label: agent
title: agenthub.dummy_agent.agent
---
Module for a Dummy agent.
## DummyAgent Objects
```python
class DummyAgent(Agent)
```
A dummy agent that does nothing but can be used in testing.
The DummyAgent is used for e2e testing. It just sends the same set of actions deterministically,
without making any LLM calls.

View File

@ -22,7 +22,7 @@ class AgentRecallAction(ExecutableAction):
async def run(self, controller: 'AgentController') -> AgentRecallObservation:
return AgentRecallObservation(
content='Recalling memories...',
content='',
memories=controller.agent.search_memory(self.query),
)

View File

@ -122,7 +122,10 @@ class DockerExecBox(Sandbox):
self.container.exec_run(
f'kill -9 {pid}', workdir=SANDBOX_WORKSPACE_DIR)
return -1, f'Command: "{cmd}" timed out'
return exit_code, logs.decode('utf-8').strip()
logs_out = logs.decode('utf-8')
if logs_out.endswith('\n'):
logs_out = logs_out[:-1]
return exit_code, logs_out
def copy_to(self, host_src: str, sandbox_dest: str, recursive: bool = False):
# mkdir -p sandbox_dest if it doesn't exist