From 0cda5f64af6596d5167aea7b60eeca3d05d12eba Mon Sep 17 00:00:00 2001
From: Robert Brennan <accounts@rbren.io>
Date: Tue, 30 Apr 2024 12:52:00 -0400
Subject: [PATCH] Add integration test with dummy agent (#1316)

* first pass at dummy

* add assertion to dummy

* add dummy workflow

* beef up tests

* try and fix huggingface issue

* remove newlines

* rename test

* move to pytest

* Revert " move to pytest"

This reverts commit de8121c400028399451de94ebd2681eedc6dee5b.

* fix lint

* delint

* Update .github/workflows/dummy-agent-test.yml

Co-authored-by: Boxuan Li <liboxuan@connect.hku.hk>

---------

Co-authored-by: Boxuan Li <liboxuan@connect.hku.hk>
---
 .github/workflows/dummy-agent-test.yml        |  21 ++++
 agenthub/__init__.py                          |   7 +-
 agenthub/dummy_agent/__init__.py              |   5 +
 agenthub/dummy_agent/agent.py                 | 119 ++++++++++++++++--
 .../python/agenthub/dummy_agent/agent.md      |   5 +-
 opendevin/action/agent.py                     |   2 +-
 opendevin/sandbox/docker/exec_box.py          |   5 +-
 7 files changed, 145 insertions(+), 19 deletions(-)
 create mode 100644 .github/workflows/dummy-agent-test.yml

diff --git a/.github/workflows/dummy-agent-test.yml b/.github/workflows/dummy-agent-test.yml
new file mode 100644
index 0000000000..0a853a7b4b
--- /dev/null
+++ b/.github/workflows/dummy-agent-test.yml
@@ -0,0 +1,21 @@
+name: Run e2e test with dummy agent
+
+on: [push]
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+      - name: Set up environment
+        run: |
+          curl -sSL https://install.python-poetry.org | python3 -
+          poetry install --without evaluation
+          wget https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/1_Pooling/config.json -P /tmp/llama_index/models--BAAI--bge-small-en-v1.5/snapshots/5c38ec7c405ec4b44b94cc5a9bb96e735b38267a/1_Pooling/
+      - name: Run tests
+        run: |
+          poetry run python opendevin/main.py -t "do a flip" -m ollama/not-a-model -d ./workspace/ -c DummyAgent
diff --git a/agenthub/__init__.py b/agenthub/__init__.py
index 74b457793d..8c7c4027e0 100644
--- a/agenthub/__init__.py
+++ b/agenthub/__init__.py
@@ -8,17 +8,20 @@ from .micro.registry import all_microagents
 load_dotenv()
 
 
-# Import agents after environment variables are loaded
+
 from . import (  # noqa: E402
     SWE_agent,
     codeact_agent,
     delegator_agent,
+    dummy_agent,
     monologue_agent,
     planner_agent,
 )
 
 __all__ = ['monologue_agent', 'codeact_agent',
-           'planner_agent', 'SWE_agent', 'delegator_agent']
+           'planner_agent', 'SWE_agent',
+           'delegator_agent',
+           'dummy_agent']
 
 for agent in all_microagents.values():
     name = agent['name']
diff --git a/agenthub/dummy_agent/__init__.py b/agenthub/dummy_agent/__init__.py
index e69de29bb2..1c8698ccd1 100644
--- a/agenthub/dummy_agent/__init__.py
+++ b/agenthub/dummy_agent/__init__.py
@@ -0,0 +1,5 @@
+from opendevin.agent import Agent
+
+from .agent import DummyAgent
+
+Agent.register('DummyAgent', DummyAgent)
diff --git a/agenthub/dummy_agent/agent.py b/agenthub/dummy_agent/agent.py
index c0908ddeb3..64fe3bfe91 100644
--- a/agenthub/dummy_agent/agent.py
+++ b/agenthub/dummy_agent/agent.py
@@ -1,23 +1,118 @@
-"""Module for a Dummy agent."""
+import time
+from typing import List, TypedDict
 
-from typing import List
-
-from opendevin.action import Action
-from opendevin.action.base import NullAction
+from opendevin.action import (
+    Action,
+    AddTaskAction,
+    AgentFinishAction,
+    AgentRecallAction,
+    AgentThinkAction,
+    BrowseURLAction,
+    CmdRunAction,
+    FileReadAction,
+    FileWriteAction,
+    ModifyTaskAction,
+)
 from opendevin.agent import Agent
-from opendevin.controller.agent_controller import AgentController
-from opendevin.observation.base import NullObservation, Observation
+from opendevin.llm.llm import LLM
+from opendevin.observation import (
+    AgentRecallObservation,
+    CmdOutputObservation,
+    FileReadObservation,
+    FileWriteObservation,
+    NullObservation,
+    Observation,
+)
 from opendevin.state import State
 
+"""
+FIXME: There are a few problems this surfaced
+* FileWrites seem to add an unintended newline at the end of the file
+* command_id is sometimes a number, sometimes a string
+* Why isn't the output of the background command split between two steps?
+* Browser not working
+"""
+
+ActionObs = TypedDict('ActionObs', {'action': Action, 'observations': List[Observation]})
+
+BACKGROUND_CMD = 'echo "This is in the background" && sleep .1 && echo "This too"'
+
 
 class DummyAgent(Agent):
-    """A dummy agent that does nothing but can be used in testing."""
+    """
+    The DummyAgent is used for e2e testing. It just sends the same set of actions deterministically,
+    without making any LLM calls.
+    """
 
-    async def run(self, controller: AgentController) -> Observation:
-        return NullObservation('')
+    def __init__(self, llm: LLM):
+        super().__init__(llm)
+        self.steps: List[ActionObs] = [{
+            'action': AddTaskAction(parent='0', goal='check the current directory'),
+            'observations': [NullObservation('')],
+        }, {
+            'action': AddTaskAction(parent='0.0', goal='run ls'),
+            'observations': [NullObservation('')],
+        }, {
+            'action': ModifyTaskAction(id='0.0', state='in_progress'),
+            'observations': [NullObservation('')],
+        }, {
+            'action': AgentThinkAction(thought='Time to get started!'),
+            'observations': [NullObservation('')],
+        }, {
+            'action': CmdRunAction(command='echo "foo"'),
+            'observations': [CmdOutputObservation('foo', command_id=-1, command='echo "foo"')],
+        }, {
+            'action': FileWriteAction(content='echo "Hello, World!"', path='hello.sh'),
+            'observations': [FileWriteObservation('', path='hello.sh')],
+        }, {
+            'action': FileReadAction(path='hello.sh'),
+            'observations': [FileReadObservation('echo "Hello, World!"\n', path='hello.sh')],
+        }, {
+            'action': CmdRunAction(command='bash hello.sh'),
+            'observations': [CmdOutputObservation('Hello, World!', command_id=-1, command='bash hello.sh')],
+        }, {
+            'action': CmdRunAction(command=BACKGROUND_CMD, background=True),
+            'observations': [
+                CmdOutputObservation('Background command started. To stop it, send a `kill` action with id 42', command_id='42', command=BACKGROUND_CMD),  # type: ignore[arg-type]
+                CmdOutputObservation('This is in the background\nThis too\n', command_id='42', command=BACKGROUND_CMD),  # type: ignore[arg-type]
+            ]
+        }, {
+            'action': AgentRecallAction(query='who am I?'),
+            'observations': [
+                AgentRecallObservation('', memories=['I am a computer.']),
+                # CmdOutputObservation('This too\n', command_id='42', command=BACKGROUND_CMD),
+            ],
+        }, {
+            'action': BrowseURLAction(url='https://google.com'),
+            'observations': [
+                # BrowserOutputObservation('<html></html>', url='https://google.com', screenshot=""),
+            ],
+        }, {
+            'action': AgentFinishAction(),
+            'observations': [],
+        }]
 
     def step(self, state: State) -> Action:
-        return NullAction('')
+        time.sleep(0.1)
+        if state.iteration > 0:
+            prev_step = self.steps[state.iteration - 1]
+            if 'observations' in prev_step:
+                expected_observations = prev_step['observations']
+                hist_start = len(state.history) - len(expected_observations)
+                for i in range(len(expected_observations)):
+                    hist_obs = state.history[hist_start + i][1].to_dict()
+                    expected_obs = expected_observations[i].to_dict()
+                    if 'command_id' in hist_obs['extras'] and hist_obs['extras']['command_id'] != -1:
+                        del hist_obs['extras']['command_id']
+                        hist_obs['content'] = ''
+                    if 'command_id' in expected_obs['extras'] and expected_obs['extras']['command_id'] != -1:
+                        del expected_obs['extras']['command_id']
+                        expected_obs['content'] = ''
+                    if hist_obs != expected_obs:
+                        print('\nactual', hist_obs)
+                        print('\nexpect', expected_obs)
+                    assert hist_obs == expected_obs, f'Expected observation {expected_obs}, got {hist_obs}'
+        return self.steps[state.iteration]['action']
 
     def search_memory(self, query: str) -> List[str]:
-        return []
+        return ['I am a computer.']
diff --git a/docs/modules/python/agenthub/dummy_agent/agent.md b/docs/modules/python/agenthub/dummy_agent/agent.md
index c783e7061f..e2738fb8b6 100644
--- a/docs/modules/python/agenthub/dummy_agent/agent.md
+++ b/docs/modules/python/agenthub/dummy_agent/agent.md
@@ -3,13 +3,12 @@ sidebar_label: agent
 title: agenthub.dummy_agent.agent
 ---
 
-Module for a Dummy agent.
-
 ## DummyAgent Objects
 
 ```python
 class DummyAgent(Agent)
 ```
 
-A dummy agent that does nothing but can be used in testing.
+The DummyAgent is used for e2e testing. It just sends the same set of actions deterministically,
+without making any LLM calls.
 
diff --git a/opendevin/action/agent.py b/opendevin/action/agent.py
index 11461f6f82..4efff3db8d 100644
--- a/opendevin/action/agent.py
+++ b/opendevin/action/agent.py
@@ -22,7 +22,7 @@ class AgentRecallAction(ExecutableAction):
 
     async def run(self, controller: 'AgentController') -> AgentRecallObservation:
         return AgentRecallObservation(
-            content='Recalling memories...',
+            content='',
             memories=controller.agent.search_memory(self.query),
         )
 
diff --git a/opendevin/sandbox/docker/exec_box.py b/opendevin/sandbox/docker/exec_box.py
index c53c7d76db..c5cd1f5afc 100644
--- a/opendevin/sandbox/docker/exec_box.py
+++ b/opendevin/sandbox/docker/exec_box.py
@@ -122,7 +122,10 @@ class DockerExecBox(Sandbox):
                     self.container.exec_run(
                         f'kill -9 {pid}', workdir=SANDBOX_WORKSPACE_DIR)
                 return -1, f'Command: "{cmd}" timed out'
-        return exit_code, logs.decode('utf-8').strip()
+        logs_out = logs.decode('utf-8')
+        if logs_out.endswith('\n'):
+            logs_out = logs_out[:-1]
+        return exit_code, logs_out
 
     def copy_to(self, host_src: str, sandbox_dest: str, recursive: bool = False):
         # mkdir -p sandbox_dest if it doesn't exist