mirror of
https://github.com/OpenHands/OpenHands.git
synced 2025-12-26 05:48:36 +08:00
Add integration test with dummy agent (#1316)
* first pass at dummy * add assertion to dummy * add dummy workflow * beef up tests * try and fix huggingface issue * remove newlines * rename test * move to pytest * Revert " move to pytest" This reverts commit de8121c400028399451de94ebd2681eedc6dee5b. * fix lint * delint * Update .github/workflows/dummy-agent-test.yml Co-authored-by: Boxuan Li <liboxuan@connect.hku.hk> --------- Co-authored-by: Boxuan Li <liboxuan@connect.hku.hk>
This commit is contained in:
parent
eb1c3d8790
commit
0cda5f64af
21
.github/workflows/dummy-agent-test.yml
vendored
Normal file
21
.github/workflows/dummy-agent-test.yml
vendored
Normal file
@ -0,0 +1,21 @@
|
||||
name: Run e2e test with dummy agent
|
||||
|
||||
on: [push]
|
||||
|
||||
jobs:
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11'
|
||||
- name: Set up environment
|
||||
run: |
|
||||
curl -sSL https://install.python-poetry.org | python3 -
|
||||
poetry install --without evaluation
|
||||
wget https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/1_Pooling/config.json -P /tmp/llama_index/models--BAAI--bge-small-en-v1.5/snapshots/5c38ec7c405ec4b44b94cc5a9bb96e735b38267a/1_Pooling/
|
||||
- name: Run tests
|
||||
run: |
|
||||
poetry run python opendevin/main.py -t "do a flip" -m ollama/not-a-model -d ./workspace/ -c DummyAgent
|
||||
@ -8,17 +8,20 @@ from .micro.registry import all_microagents
|
||||
load_dotenv()
|
||||
|
||||
|
||||
# Import agents after environment variables are loaded
|
||||
|
||||
from . import ( # noqa: E402
|
||||
SWE_agent,
|
||||
codeact_agent,
|
||||
delegator_agent,
|
||||
dummy_agent,
|
||||
monologue_agent,
|
||||
planner_agent,
|
||||
)
|
||||
|
||||
__all__ = ['monologue_agent', 'codeact_agent',
|
||||
'planner_agent', 'SWE_agent', 'delegator_agent']
|
||||
'planner_agent', 'SWE_agent',
|
||||
'delegator_agent',
|
||||
'dummy_agent']
|
||||
|
||||
for agent in all_microagents.values():
|
||||
name = agent['name']
|
||||
|
||||
@ -0,0 +1,5 @@
|
||||
from opendevin.agent import Agent
|
||||
|
||||
from .agent import DummyAgent
|
||||
|
||||
Agent.register('DummyAgent', DummyAgent)
|
||||
@ -1,23 +1,118 @@
|
||||
"""Module for a Dummy agent."""
|
||||
import time
|
||||
from typing import List, TypedDict
|
||||
|
||||
from typing import List
|
||||
|
||||
from opendevin.action import Action
|
||||
from opendevin.action.base import NullAction
|
||||
from opendevin.action import (
|
||||
Action,
|
||||
AddTaskAction,
|
||||
AgentFinishAction,
|
||||
AgentRecallAction,
|
||||
AgentThinkAction,
|
||||
BrowseURLAction,
|
||||
CmdRunAction,
|
||||
FileReadAction,
|
||||
FileWriteAction,
|
||||
ModifyTaskAction,
|
||||
)
|
||||
from opendevin.agent import Agent
|
||||
from opendevin.controller.agent_controller import AgentController
|
||||
from opendevin.observation.base import NullObservation, Observation
|
||||
from opendevin.llm.llm import LLM
|
||||
from opendevin.observation import (
|
||||
AgentRecallObservation,
|
||||
CmdOutputObservation,
|
||||
FileReadObservation,
|
||||
FileWriteObservation,
|
||||
NullObservation,
|
||||
Observation,
|
||||
)
|
||||
from opendevin.state import State
|
||||
|
||||
"""
|
||||
FIXME: There are a few problems this surfaced
|
||||
* FileWrites seem to add an unintended newline at the end of the file
|
||||
* command_id is sometimes a number, sometimes a string
|
||||
* Why isn't the output of the background command split between two steps?
|
||||
* Browser not working
|
||||
"""
|
||||
|
||||
ActionObs = TypedDict('ActionObs', {'action': Action, 'observations': List[Observation]})
|
||||
|
||||
BACKGROUND_CMD = 'echo "This is in the background" && sleep .1 && echo "This too"'
|
||||
|
||||
|
||||
class DummyAgent(Agent):
|
||||
"""A dummy agent that does nothing but can be used in testing."""
|
||||
"""
|
||||
The DummyAgent is used for e2e testing. It just sends the same set of actions deterministically,
|
||||
without making any LLM calls.
|
||||
"""
|
||||
|
||||
async def run(self, controller: AgentController) -> Observation:
|
||||
return NullObservation('')
|
||||
def __init__(self, llm: LLM):
|
||||
super().__init__(llm)
|
||||
self.steps: List[ActionObs] = [{
|
||||
'action': AddTaskAction(parent='0', goal='check the current directory'),
|
||||
'observations': [NullObservation('')],
|
||||
}, {
|
||||
'action': AddTaskAction(parent='0.0', goal='run ls'),
|
||||
'observations': [NullObservation('')],
|
||||
}, {
|
||||
'action': ModifyTaskAction(id='0.0', state='in_progress'),
|
||||
'observations': [NullObservation('')],
|
||||
}, {
|
||||
'action': AgentThinkAction(thought='Time to get started!'),
|
||||
'observations': [NullObservation('')],
|
||||
}, {
|
||||
'action': CmdRunAction(command='echo "foo"'),
|
||||
'observations': [CmdOutputObservation('foo', command_id=-1, command='echo "foo"')],
|
||||
}, {
|
||||
'action': FileWriteAction(content='echo "Hello, World!"', path='hello.sh'),
|
||||
'observations': [FileWriteObservation('', path='hello.sh')],
|
||||
}, {
|
||||
'action': FileReadAction(path='hello.sh'),
|
||||
'observations': [FileReadObservation('echo "Hello, World!"\n', path='hello.sh')],
|
||||
}, {
|
||||
'action': CmdRunAction(command='bash hello.sh'),
|
||||
'observations': [CmdOutputObservation('Hello, World!', command_id=-1, command='bash hello.sh')],
|
||||
}, {
|
||||
'action': CmdRunAction(command=BACKGROUND_CMD, background=True),
|
||||
'observations': [
|
||||
CmdOutputObservation('Background command started. To stop it, send a `kill` action with id 42', command_id='42', command=BACKGROUND_CMD), # type: ignore[arg-type]
|
||||
CmdOutputObservation('This is in the background\nThis too\n', command_id='42', command=BACKGROUND_CMD), # type: ignore[arg-type]
|
||||
]
|
||||
}, {
|
||||
'action': AgentRecallAction(query='who am I?'),
|
||||
'observations': [
|
||||
AgentRecallObservation('', memories=['I am a computer.']),
|
||||
# CmdOutputObservation('This too\n', command_id='42', command=BACKGROUND_CMD),
|
||||
],
|
||||
}, {
|
||||
'action': BrowseURLAction(url='https://google.com'),
|
||||
'observations': [
|
||||
# BrowserOutputObservation('<html></html>', url='https://google.com', screenshot=""),
|
||||
],
|
||||
}, {
|
||||
'action': AgentFinishAction(),
|
||||
'observations': [],
|
||||
}]
|
||||
|
||||
def step(self, state: State) -> Action:
|
||||
return NullAction('')
|
||||
time.sleep(0.1)
|
||||
if state.iteration > 0:
|
||||
prev_step = self.steps[state.iteration - 1]
|
||||
if 'observations' in prev_step:
|
||||
expected_observations = prev_step['observations']
|
||||
hist_start = len(state.history) - len(expected_observations)
|
||||
for i in range(len(expected_observations)):
|
||||
hist_obs = state.history[hist_start + i][1].to_dict()
|
||||
expected_obs = expected_observations[i].to_dict()
|
||||
if 'command_id' in hist_obs['extras'] and hist_obs['extras']['command_id'] != -1:
|
||||
del hist_obs['extras']['command_id']
|
||||
hist_obs['content'] = ''
|
||||
if 'command_id' in expected_obs['extras'] and expected_obs['extras']['command_id'] != -1:
|
||||
del expected_obs['extras']['command_id']
|
||||
expected_obs['content'] = ''
|
||||
if hist_obs != expected_obs:
|
||||
print('\nactual', hist_obs)
|
||||
print('\nexpect', expected_obs)
|
||||
assert hist_obs == expected_obs, f'Expected observation {expected_obs}, got {hist_obs}'
|
||||
return self.steps[state.iteration]['action']
|
||||
|
||||
def search_memory(self, query: str) -> List[str]:
|
||||
return []
|
||||
return ['I am a computer.']
|
||||
|
||||
@ -3,13 +3,12 @@ sidebar_label: agent
|
||||
title: agenthub.dummy_agent.agent
|
||||
---
|
||||
|
||||
Module for a Dummy agent.
|
||||
|
||||
## DummyAgent Objects
|
||||
|
||||
```python
|
||||
class DummyAgent(Agent)
|
||||
```
|
||||
|
||||
A dummy agent that does nothing but can be used in testing.
|
||||
The DummyAgent is used for e2e testing. It just sends the same set of actions deterministically,
|
||||
without making any LLM calls.
|
||||
|
||||
|
||||
@ -22,7 +22,7 @@ class AgentRecallAction(ExecutableAction):
|
||||
|
||||
async def run(self, controller: 'AgentController') -> AgentRecallObservation:
|
||||
return AgentRecallObservation(
|
||||
content='Recalling memories...',
|
||||
content='',
|
||||
memories=controller.agent.search_memory(self.query),
|
||||
)
|
||||
|
||||
|
||||
@ -122,7 +122,10 @@ class DockerExecBox(Sandbox):
|
||||
self.container.exec_run(
|
||||
f'kill -9 {pid}', workdir=SANDBOX_WORKSPACE_DIR)
|
||||
return -1, f'Command: "{cmd}" timed out'
|
||||
return exit_code, logs.decode('utf-8').strip()
|
||||
logs_out = logs.decode('utf-8')
|
||||
if logs_out.endswith('\n'):
|
||||
logs_out = logs_out[:-1]
|
||||
return exit_code, logs_out
|
||||
|
||||
def copy_to(self, host_src: str, sandbox_dest: str, recursive: bool = False):
|
||||
# mkdir -p sandbox_dest if it doesn't exist
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user