Microagents and Delegation (#1238)

* basic microagent structure * start on jinja * add instructions parser * add action instructions * add history instructions * fix a few issues * fix a few issues * fix issues * fix agent encoding * fix up anon class * prompt to fix errors * less debug info when errors happen * add another traceback * add output to finish * fix math prompt * fix pg prompt * fix up json prompt * fix math prompt * fix math prompt * fix repo prompt * fix up repo explorer * update lock * revert changes to agent_controller * refactor microagent registration a bit * create delegate action * delegation working * add finish action to manager * fix tests * rename microagents registry * rename fn * logspam * add metadata to manager agent * fix message * move repo_explorer * add delegator agent * rename agent_definition * fix up input-output plumbing * fix tests * Update agenthub/micro/math_agent/agent.yaml Co-authored-by: Boxuan Li <liboxuan@connect.hku.hk> * Update agenthub/delegator_agent/prompt.py Co-authored-by: Boxuan Li <liboxuan@connect.hku.hk> * Update agenthub/delegator_agent/prompt.py Co-authored-by: Boxuan Li <liboxuan@connect.hku.hk> * remove prompt.py * fix lint * Update agenthub/micro/postgres_agent/agent.yaml Co-authored-by: Boxuan Li <liboxuan@connect.hku.hk> * Update agenthub/micro/postgres_agent/agent.yaml Co-authored-by: Boxuan Li <liboxuan@connect.hku.hk> * fix error --------- Co-authored-by: Boxuan Li <liboxuan@connect.hku.hk> Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>
2025-12-26 05:48:36 +08:00 · 2024-04-24 17:46:14 -04:00 · 2024-04-24 17:46:14 -04:00 · 1e95fa435d
commit 1e95fa435d
parent 8828d9836d
45 changed files with 1185 additions and 625 deletions
--- a/agenthub/init.py
+++ b/agenthub/init.py
@ -1,12 +1,28 @@
+from .micro.registry import all_microagents
+from .micro.agent import MicroAgent
+from opendevin.agent import Agent

 from dotenv import load_dotenv
 load_dotenv()

+
 # Import agents after environment variables are loaded
 from . import monologue_agent  # noqa: E402
-from . import codeact_agent  # noqa: E402
-from . import planner_agent  # noqa: E402
-from . import SWE_agent      # noqa: E402
+from . import codeact_agent    # noqa: E402
+from . import planner_agent    # noqa: E402
+from . import SWE_agent        # noqa: E402
+from . import delegator_agent  # noqa: E402

 __all__ = ['monologue_agent', 'codeact_agent',
-           'planner_agent', 'SWE_agent']
+           'planner_agent', 'SWE_agent', 'delegator_agent']
+
+for agent in all_microagents.values():
+    name = agent['name']
+    prompt = agent['prompt']
+
+    anon_class = type(name, (MicroAgent,), {
+        'prompt': prompt,
+        'agent_definition': agent,
+    })
+
+    Agent.register(name, anon_class)
--- a/agenthub/delegator_agent/init.py
+++ b/agenthub/delegator_agent/init.py
@ -0,0 +1,4 @@
+from opendevin.agent import Agent
+from .agent import DelegatorAgent
+
+Agent.register('DelegatorAgent', DelegatorAgent)
--- a/agenthub/delegator_agent/agent.py
+++ b/agenthub/delegator_agent/agent.py
@ -0,0 +1,73 @@
+from typing import List
+
+from opendevin.agent import Agent
+from opendevin.action import AgentFinishAction, AgentDelegateAction
+from opendevin.observation import AgentDelegateObservation
+from opendevin.llm.llm import LLM
+from opendevin.state import State
+from opendevin.action import Action
+
+
+class DelegatorAgent(Agent):
+    """
+    The planner agent utilizes a special prompting strategy to create long term plans for solving problems.
+    The agent is given its previous action-observation pairs, current task, and hint based on last action taken at every step.
+    """
+    current_delegate: str = ''
+
+    def __init__(self, llm: LLM):
+        """
+        Initialize the Delegator Agent with an LLM
+
+        Parameters:
+        - llm (LLM): The llm to be used by this agent
+        """
+        super().__init__(llm)
+
+    def step(self, state: State) -> Action:
+        """
+        Checks to see if current step is completed, returns AgentFinishAction if True.
+        Otherwise, creates a plan prompt and sends to model for inference, returning the result as the next action.
+
+        Parameters:
+        - state (State): The current state given the previous actions and observations
+
+        Returns:
+        - AgentFinishAction: If the last state was 'completed', 'verified', or 'abandoned'
+        - Action: The next action to take based on llm response
+        """
+        if self.current_delegate == '':
+            self.current_delegate = 'study'
+            return AgentDelegateAction(agent='StudyRepoForTaskAgent', inputs={
+                'task': state.plan.main_goal
+            })
+
+        lastObservation = state.history[-1][1]
+        if not isinstance(lastObservation, AgentDelegateObservation):
+            raise Exception('Last observation is not an AgentDelegateObservation')
+
+        if self.current_delegate == 'study':
+            self.current_delegate = 'coder'
+            return AgentDelegateAction(agent='Coder', inputs={
+                'task': state.plan.main_goal,
+                'summary': lastObservation.outputs['summary'],
+            })
+        elif self.current_delegate == 'coder':
+            self.current_delegate = 'verifier'
+            return AgentDelegateAction(agent='Verifier', inputs={
+                'task': state.plan.main_goal,
+            })
+        elif self.current_delegate == 'verifier':
+            if 'completed' in lastObservation.outputs and lastObservation.outputs['completed']:
+                return AgentFinishAction()
+            else:
+                self.current_delegate = 'coder'
+                return AgentDelegateAction(agent='Coder', inputs={
+                    'task': state.plan.main_goal,
+                    'summary': lastObservation.outputs['summary'],
+                })
+        else:
+            raise Exception('Invalid delegate state')
+
+    def search_memory(self, query: str) -> List[str]:
+        return []
--- a/agenthub/micro/_instructions/actions/add_task.md
+++ b/agenthub/micro/_instructions/actions/add_task.md
@ -0,0 +1,4 @@
+* `add_task` - add a task to your plan. Arguments:
+  * `parent` - the ID of the parent task
+  * `goal` - the goal of the task
+  * `subtasks` - a list of subtasks, each of which is a map with a `goal` key.
--- a/agenthub/micro/_instructions/actions/browse.md
+++ b/agenthub/micro/_instructions/actions/browse.md
@ -0,0 +1,2 @@
+* `browse` - opens a web page. Arguments:
+  * `url` - the URL to open
--- a/agenthub/micro/_instructions/actions/delegate.md
+++ b/agenthub/micro/_instructions/actions/delegate.md
@ -0,0 +1,3 @@
+* `delegate` - send a task to another agent from the list provided. Arguments:
+  * `agent` - the agent to which the task is delegated. MUST match a name in the list of agents provided.
+  * `inputs` - a dictionary of input parameters to the agent, as specified in the list
--- a/agenthub/micro/_instructions/actions/finish.md
+++ b/agenthub/micro/_instructions/actions/finish.md
@ -0,0 +1,2 @@
+* `finish` - if ALL of your tasks and subtasks have been verified or abandoned, and you're absolutely certain that you've completed your task and have tested your work, use the finish action to stop working. Arguments:
+  * `outputs` - a dictionary representing the outputs of your task, if any
--- a/agenthub/micro/_instructions/actions/kill.md
+++ b/agenthub/micro/_instructions/actions/kill.md
@ -0,0 +1,2 @@
+* `kill` - kills a background command
+  * `id` - the ID of the background command to kill
--- a/agenthub/micro/_instructions/actions/modify_task.md
+++ b/agenthub/micro/_instructions/actions/modify_task.md
@ -0,0 +1,3 @@
+* `modify_task` - close a task. Arguments:
+  * `id` - the ID of the task to close
+  * `state` - set to 'in_progress' to start the task, 'completed' to finish it, 'verified' to assert that it was successful, 'abandoned' to give up on it permanently, or `open` to stop working on it for now.
--- a/agenthub/micro/_instructions/actions/read.md
+++ b/agenthub/micro/_instructions/actions/read.md
@ -0,0 +1,2 @@
+* `read` - reads the content of a file. Arguments:
+  * `path` - the path of the file to read
--- a/agenthub/micro/_instructions/actions/run.md
+++ b/agenthub/micro/_instructions/actions/run.md
@ -0,0 +1,3 @@
+* `run` - runs a command on the command line in a Linux shell. Arguments:
+  * `command` - the command to run
+  * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true.
--- a/agenthub/micro/_instructions/actions/think.md
+++ b/agenthub/micro/_instructions/actions/think.md
@ -0,0 +1,2 @@
+* `think` - make a plan, set a goal, or record your thoughts. Arguments:
+  * `thought` - the thought to record
--- a/agenthub/micro/_instructions/actions/write.md
+++ b/agenthub/micro/_instructions/actions/write.md
@ -0,0 +1,3 @@
+* `write` - writes the content to a file. Arguments:
+  * `path` - the path of the file to write
+  * `content` - the content to write to the file
--- a/agenthub/micro/_instructions/format/action.md
+++ b/agenthub/micro/_instructions/format/action.md
@ -0,0 +1,5 @@
+Your response MUST be in JSON format. It must be an object, and it must contain two fields:
+* `action`, which is one of the actions specified here
+* `args`, which is a map of key-value pairs, specifying the arguments for that action
+
+You MUST NOT include any other text besides the JSON response
--- a/agenthub/micro/_instructions/history_truncated.md
+++ b/agenthub/micro/_instructions/history_truncated.md
@ -0,0 +1,4 @@
+Here is a recent history of actions you've taken in service of this plan,
+as well as observations you've made. This only includes the MOST RECENT
+actions and observations--more may have happened before that.
+They are time-ordered, with your most recent action at the bottom.
--- a/agenthub/micro/agent.py
+++ b/agenthub/micro/agent.py
@ -0,0 +1,82 @@
+import json
+from typing import List, Dict
+
+from jinja2 import Environment, BaseLoader
+
+from opendevin.agent import Agent
+from opendevin.llm.llm import LLM
+from opendevin.state import State
+from opendevin.action import Action, action_from_dict
+from opendevin.exceptions import LLMOutputError
+
+from .instructions import instructions
+from .registry import all_microagents
+
+
+def parse_response(orig_response: str) -> Action:
+    json_start = orig_response.find('{')
+    json_end = orig_response.rfind('}') + 1
+    response = orig_response[json_start:json_end]
+    try:
+        action_dict = json.loads(response)
+    except json.JSONDecodeError as e:
+        raise LLMOutputError(
+            'Invalid JSON in response. Please make sure the response is a valid JSON object'
+        ) from e
+    action = action_from_dict(action_dict)
+    return action
+
+
+def my_encoder(obj):
+    """
+    Encodes objects as dictionaries
+
+    Parameters:
+    - obj (Object): An object that will be converted
+
+    Returns:
+    - dict: If the object can be converted it is returned in dict format
+    """
+    if hasattr(obj, 'to_dict'):
+        return obj.to_dict()
+
+
+def to_json(obj, **kwargs):
+    """
+    Serialize an object to str format
+    """
+    return json.dumps(obj, default=my_encoder, **kwargs)
+
+
+class MicroAgent(Agent):
+    prompt = ''
+    agent_definition: Dict = {}
+
+    def __init__(self, llm: LLM):
+        super().__init__(llm)
+        if 'name' not in self.agent_definition:
+            raise ValueError('Agent definition must contain a name')
+        self.name = self.agent_definition['name']
+        self.description = self.agent_definition['description'] if 'description' in self.agent_definition else ''
+        self.inputs = self.agent_definition['inputs'] if 'inputs' in self.agent_definition else []
+        self.outputs = self.agent_definition['outputs'] if 'outputs' in self.agent_definition else []
+        self.examples = self.agent_definition['examples'] if 'examples' in self.agent_definition else []
+        self.prompt_template = Environment(loader=BaseLoader).from_string(self.prompt)
+        self.delegates = all_microagents.copy()
+        del self.delegates[self.name]
+
+    def step(self, state: State) -> Action:
+        prompt = self.prompt_template.render(
+            state=state,
+            instructions=instructions,
+            to_json=to_json,
+            delegates=self.delegates)
+        messages = [{'content': prompt, 'role': 'user'}]
+        resp = self.llm.completion(messages=messages)
+        action_resp = resp['choices'][0]['message']['content']
+        state.num_of_chars += len(prompt) + len(action_resp)
+        action = parse_response(action_resp)
+        return action
+
+    def search_memory(self, query: str) -> List[str]:
+        return []
--- a/agenthub/micro/coder/agent.yaml
+++ b/agenthub/micro/coder/agent.yaml
@ -0,0 +1,6 @@
+name: Coder
+description: Given a particular task, and a detailed description of the codebase, accomplishes the task
+inputs:
+  - task: string
+  - codebase_summary: string
+outputs: []
--- a/agenthub/micro/coder/prompt.md
+++ b/agenthub/micro/coder/prompt.md
@ -0,0 +1,25 @@
+# Task
+You are a software engineer. You've inherited an existing codebase, which you
+need to modify to complete this task:
+
+{{ state.plan.main_goal }}
+
+Here's a summary of the codebase, as it relates to this task:
+
+{{ state.inputs.summary }}
+
+## Available Actions
+{{ instructions.actions.run }}
+{{ instructions.actions.write }}
+{{ instructions.actions.read }}
+{{ instructions.actions.think }}
+{{ instructions.actions.finish }}
+
+Do NOT finish until you have completed the tasks.
+
+## History
+{{ instructions.history_truncated }}
+{{ to_json(state.history[-10:]) }}
+
+## Format
+{{ instructions.format.action }}
--- a/agenthub/micro/instructions.py
+++ b/agenthub/micro/instructions.py
@ -0,0 +1,20 @@
+from typing import Dict
+import os
+
+instructions: Dict = {}
+
+base_dir = os.path.dirname(os.path.abspath(__file__)) + '/_instructions'
+for root, dirs, files in os.walk(base_dir):
+    if len(files) == 0:
+        continue
+    rel_base = os.path.relpath(root, base_dir)
+    keys = rel_base.split('/')
+    obj = instructions
+    for key in keys:
+        if key not in obj:
+            obj[key] = {}
+        obj = obj[key]
+    for file in files:
+        without_ext = os.path.splitext(file)[0]
+        with open(os.path.join(root, file), 'r') as f:
+            obj[without_ext] = f.read()
--- a/agenthub/micro/manager/agent.yaml
+++ b/agenthub/micro/manager/agent.yaml
@ -0,0 +1,5 @@
+name: Manager
+description: Delegates tasks to microagents based on their area of expertise
+generates: Action
+inputs:
+  task: string
--- a/agenthub/micro/manager/prompt.md
+++ b/agenthub/micro/manager/prompt.md
@ -0,0 +1,27 @@
+# Task
+You are in charge of accomplishing the following task:
+{{ state.plan.main_goal }}
+
+In order to accomplish this goal, you must delegate tasks to one or more agents, who
+can do the actual work. A description of each agent is provided below. You MUST
+select one of the delegates below to move towards accomplishing the task, and you MUST
+provide the correct inputs for the delegate you select.
+
+## Agents
+{% for name, details in delegates.items() %}
+### {{ name }}
+{{ details.description }}
+#### Inputs
+{{ to_json(details.inputs) }}
+{% endfor %}
+
+## History
+{{ instructions.history_truncated }}
+{{ to_json(state.history[-10:]) }}
+
+## Available Actions
+{{ instructions.actions.delegate }}
+{{ instructions.actions.finish }}
+
+## Format
+{{ instructions.format.action }}
--- a/agenthub/micro/math_agent/agent.yaml
+++ b/agenthub/micro/math_agent/agent.yaml
@ -0,0 +1,25 @@
+name: MathAgent
+description: "Solves simple and complex math problems using python"
+generates: Action
+container: python:3.12.3-bookworm
+inputs:
+  task: string
+outputs:
+  answer: string
+examples:
+  - input:
+      task: "What is 2 + 2?"
+    output:
+      answer: "4"
+  - input:
+      task: "What is the area of a circle with radius 7.324 inches?"
+    output:
+      answer: "168.518 square inches"
+  - input:
+      task: "What day of the week is 2099-01-01?"
+    output:
+      answer: "Saturday"
+  - input:
+      task: "What is the integral of sin(x^2) evaluated from -1 to 1?"
+    output:
+      answer: "0.603848"
--- a/agenthub/micro/math_agent/prompt.md
+++ b/agenthub/micro/math_agent/prompt.md
@ -0,0 +1,23 @@
+# Task
+You are a brilliant mathematician and programmer. You've been given the follwoing problem to solve:
+
+{{ state.plan.main_goal }}
+
+Please write a python script that solves this problem, and prints the answer to stdout.
+ONLY print the answer to stdout, nothing else.
+You should then run the python script with `python3`,
+and call the `finish` action with `outputs.answer` set to the answer.
+
+## History
+{{ instructions.history_truncated }}
+{{ to_json(state.history[-10:]) }}
+
+If the last item in the history is an error, you should try to fix it.
+
+## Available Actions
+{{ instructions.actions.write }}
+{{ instructions.actions.run }}
+{{ instructions.actions.finish }}
+
+## Format
+{{ instructions.format.action }}
--- a/agenthub/micro/postgres_agent/agent.yaml
+++ b/agenthub/micro/postgres_agent/agent.yaml
@ -0,0 +1,6 @@
+name: PostgresAgent
+description: Writes and maintains PostgreSQL migrations
+generates: Action
+inputs:
+  - task: string
+outputs: []
--- a/agenthub/micro/postgres_agent/prompt.md
+++ b/agenthub/micro/postgres_agent/prompt.md
@ -0,0 +1,24 @@
+# Task
+You are a database engineer. You are working on an existing project, and have been given
+the following task:
+
+{{ state.plan.main_goal }}
+
+You must:
+* Investigate the existing migrations to understand the current schema
+* Write a new migration to accomplish the task above
+* Test that the migrations work properly
+
+## Actions
+You may take any of the following actions:
+{{ instructions.actions.think }}
+{{ instructions.actions.read }}
+{{ instructions.actions.write }}
+{{ instructions.actions.run }}
+
+## History
+{{ instructions.history_truncated }}
+{{ to_json(state.history[-10:]) }}
+
+## Format
+{{ instructions.format.action }}
--- a/agenthub/micro/registry.py
+++ b/agenthub/micro/registry.py
@ -0,0 +1,24 @@
+import os
+import yaml
+
+all_microagents = {}
+
+for dir in os.listdir(os.path.dirname(__file__)):
+    base = os.path.dirname(__file__) + '/' + dir
+    if os.path.isfile(base):
+        continue
+    if dir.startswith('_'):
+        continue
+    promptFile = base + '/prompt.md'
+    agentFile = base + '/agent.yaml'
+    if not os.path.isfile(promptFile) or not os.path.isfile(agentFile):
+        raise Exception(
+            f'Missing prompt or agent file in {base}. Please create them.')
+    with open(promptFile, 'r') as f:
+        prompt = f.read()
+    with open(agentFile, 'r') as f:
+        agent = yaml.safe_load(f)
+    if 'name' not in agent:
+        raise Exception(f'Missing name in {agentFile}')
+    agent['prompt'] = prompt
+    all_microagents[agent['name']] = agent
--- a/agenthub/micro/repo_explorer/agent.yaml
+++ b/agenthub/micro/repo_explorer/agent.yaml
@ -0,0 +1,5 @@
+name: RepoExplorer
+description: Generates a detailed summary of an existing codebase
+inputs: []
+outputs:
+  - summary: string
--- a/agenthub/micro/repo_explorer/prompt.md
+++ b/agenthub/micro/repo_explorer/prompt.md
@ -0,0 +1,26 @@
+# Task
+You are a software engineer. You've inherited an existing codebase, which you're
+learning about for the first time. Your goal is to produce a detailed summary
+of the codebase, including:
+* The overall purpose of the project
+* The directory structure
+* The main components of the codebase
+* How the components fit together
+
+## Available Actions
+{{ instructions.actions.run }}
+{{ instructions.actions.read }}
+{{ instructions.actions.think }}
+{{ instructions.actions.finish }}
+
+You should ONLY `run` commands that have no side-effects, like `ls` and `grep`.
+
+Do NOT finish until you have a complete understanding of the codebase.
+When you're done, put your summary into the output of the `finish` action.
+
+## History
+{{ instructions.history_truncated }}
+{{ to_json(state.history[-10:]) }}
+
+## Format
+{{ instructions.format.action }}
--- a/agenthub/micro/study_repo_for_task/agent.yaml
+++ b/agenthub/micro/study_repo_for_task/agent.yaml
@ -0,0 +1,6 @@
+name: StudyRepoForTaskAgent
+description: Given a particular task, finds and describes all relevant parts of the codebase
+inputs:
+  - task: string
+outputs:
+  - summary: string
--- a/agenthub/micro/study_repo_for_task/prompt.md
+++ b/agenthub/micro/study_repo_for_task/prompt.md
@ -0,0 +1,25 @@
+# Task
+You are a software engineer. You've inherited an existing codebase, which you're
+learning about for the first time. You need to study the codebase to find all
+the information needed to complete this task:
+
+{{ state.plan.main_goal }}
+
+## Available Actions
+{{ instructions.actions.run }}
+{{ instructions.actions.read }}
+{{ instructions.actions.think }}
+{{ instructions.actions.finish }}
+
+You must ONLY `run` commands that have no side-effects, like `ls` and `grep`.
+
+Do NOT finish until you have a complete understanding of which parts of the
+codebase are relevant to the task, including particular files, function, functions, and classes.
+When you're done, put your summary in `outputs.summary` in the `finish` action.
+
+## History
+{{ instructions.history_truncated }}
+{{ to_json(state.history[-10:]) }}
+
+## Format
+{{ instructions.format.action }}
--- a/agenthub/micro/verifier/agent.yaml
+++ b/agenthub/micro/verifier/agent.yaml
@ -0,0 +1,7 @@
+name: Verifier
+description: Given a particular task, verifies that the task has been completed
+inputs:
+  - task: string
+outputs:
+  - completed: boolean
+  - summary: string
--- a/agenthub/micro/verifier/prompt.md
+++ b/agenthub/micro/verifier/prompt.md
@ -0,0 +1,27 @@
+# Task
+You are a quality assurance engineer. Another engineer has made changes to the
+codebase which are supposed to solve this task:
+
+{{ state.plan.main_goal }}
+
+Your goal is to verify that the changes are correct and bug-free.
+
+## Available Actions
+{{ instructions.actions.run }}
+{{ instructions.actions.read }}
+{{ instructions.actions.think }}
+{{ instructions.actions.finish }}
+
+You must ONLY `run` commands that have no side-effects, like `ls`, `grep`, and test scripts.
+
+Do NOT finish until you know whether the task is complete.
+When you're done, add a `completed` boolean to the `outputs` of the `finish` action.
+If `completed` is `false`, you MUST also provide a `summary` in the `outputs` of the `finish` action
+explaining what the problem is.
+
+## History
+{{ instructions.history_truncated }}
+{{ to_json(state.history[-10:]) }}
+
+## Format
+{{ instructions.format.action }}
--- a/opendevin/action/init.py
+++ b/opendevin/action/init.py
@ -8,6 +8,7 @@ from .agent import (
    AgentFinishAction,
    AgentEchoAction,
    AgentSummarizeAction,
+    AgentDelegateAction,
 )
 from .tasks import AddTaskAction, ModifyTaskAction
 from ..exceptions import AgentMalformedActionError
@ -21,6 +22,7 @@ actions = (
    AgentRecallAction,
    AgentThinkAction,
    AgentFinishAction,
+    AgentDelegateAction,
    AddTaskAction,
    ModifyTaskAction,
 )
@ -58,6 +60,7 @@ __all__ = [
    'AgentRecallAction',
    'AgentThinkAction',
    'AgentFinishAction',
+    'AgentDelegateAction',
    'AgentEchoAction',
    'AgentSummarizeAction',
    'AddTaskAction',
--- a/opendevin/action/agent.py
+++ b/opendevin/action/agent.py
@ -1,9 +1,10 @@
-from dataclasses import dataclass
-from typing import TYPE_CHECKING
+from dataclasses import dataclass, field
+from typing import TYPE_CHECKING, Dict

 from opendevin.observation import (
    AgentRecallObservation,
    AgentMessageObservation,
+    NullObservation,
    Observation,
 )
 from opendevin.schema import ActionType
@ -67,6 +68,7 @@ class AgentSummarizeAction(NotExecutableAction):

@dataclass
 class AgentFinishAction(NotExecutableAction):
+    outputs: Dict = field(default_factory=dict)
    action: str = ActionType.FINISH

    async def run(self, controller: 'AgentController') -> 'Observation':
@ -75,3 +77,18 @@ class AgentFinishAction(NotExecutableAction):
    @property
    def message(self) -> str:
        return "All done! What's next on the agenda?"
+
+
+@dataclass
+class AgentDelegateAction(ExecutableAction):
+    agent: str
+    inputs: dict
+    action: str = ActionType.DELEGATE
+
+    async def run(self, controller: 'AgentController') -> 'Observation':
+        await controller.start_delegate(self)
+        return NullObservation('')
+
+    @property
+    def message(self) -> str:
+        return f"I'm asking {self.agent} for help with this task."
--- a/opendevin/controller/action_manager.py
+++ b/opendevin/controller/action_manager.py
@ -21,18 +21,17 @@ class ActionManager:
    def __init__(
            self,
            sid: str,
-            container_image: str | None = None,
    ):
        sandbox_type = config.get(ConfigType.SANDBOX_TYPE).lower()
        if sandbox_type == 'exec':
            self.sandbox = DockerExecBox(
-                sid=(sid or 'default'), container_image=container_image
+                sid=(sid or 'default'),
            )
        elif sandbox_type == 'local':
            self.sandbox = LocalBox()
        elif sandbox_type == 'ssh':
            self.sandbox = DockerSSHBox(
-                sid=(sid or 'default'), container_image=container_image
+                sid=(sid or 'default')
            )
        elif sandbox_type == 'e2b':
            self.sandbox = E2BBox()
--- a/opendevin/controller/agent_controller.py
+++ b/opendevin/controller/agent_controller.py
@ -1,17 +1,23 @@
 import asyncio
-from typing import Callable, List
+from typing import Callable, List, Type


 from opendevin import config
 from opendevin.action import (
    Action,
    AgentFinishAction,
+    AgentDelegateAction,
    NullAction,
 )
+from opendevin.observation import (
+    Observation,
+    AgentErrorObservation,
+    AgentDelegateObservation,
+    NullObservation,
+)
 from opendevin.agent import Agent
 from opendevin.exceptions import AgentMalformedActionError, AgentNoActionError, MaxCharsExceedError
 from opendevin.logger import opendevin_logger as logger
-from opendevin.observation import AgentErrorObservation, NullObservation, Observation
 from opendevin.plan import Plan
 from opendevin.state import State

@ -30,6 +36,7 @@ class AgentController:
    action_manager: ActionManager
    callbacks: List[Callable]

+    delegate: 'AgentController | None' = None
    state: State | None = None

    _task_state: TaskState = TaskState.INIT
@ -38,16 +45,16 @@ class AgentController:
    def __init__(
        self,
        agent: Agent,
-        sid: str = '',
+        inputs: dict = {},
+        sid: str = 'default',
        max_iterations: int = MAX_ITERATIONS,
        max_chars: int = MAX_CHARS,
-        container_image: str | None = None,
        callbacks: List[Callable] = [],
    ):
        self.id = sid
        self.agent = agent
        self.max_iterations = max_iterations
-        self.action_manager = ActionManager(self.id, container_image)
+        self.action_manager = ActionManager(self.id)
        self.max_chars = max_chars
        self.callbacks = callbacks
        # Initialize agent-required plugins for sandbox (if any)
@ -116,15 +123,19 @@ class AgentController:
                await self.set_task_state_to(TaskState.STOPPED)
                break

+    async def setup_task(self, task: str, inputs: dict = {}):
+        """Sets up the agent controller with a task.
+        """
+        self._task_state = TaskState.RUNNING
+        await self.notify_task_state_changed()
+        self.state = State(Plan(task))
+        self.state.inputs = inputs
+
    async def start(self, task: str):
        """Starts the agent controller with a task.
        If task already run before, it will continue from the last step.
        """
-        self._task_state = TaskState.RUNNING
-        await self.notify_task_state_changed()
-
-        self.state = State(Plan(task))
-
+        await self.setup_task(task)
        await self._run()

    async def resume(self):
@ -156,9 +167,32 @@ class AgentController:
    async def notify_task_state_changed(self):
        await self._run_callbacks(TaskStateChangedAction(self._task_state))

-    async def step(self, i: int):
+    async def start_delegate(self, action: AgentDelegateAction):
+        AgentCls: Type[Agent] = Agent.get_cls(action.agent)
+        agent = AgentCls(llm=self.agent.llm)
+        self.delegate = AgentController(
+            sid=self.id + '-delegate',
+            agent=agent,
+            max_iterations=self.max_iterations,
+            max_chars=self.max_chars,
+            callbacks=self.callbacks,
+        )
+        task = action.inputs.get('task') or ''
+        await self.delegate.setup_task(task, action.inputs)
+
+    async def step(self, i: int) -> bool:
        if self.state is None:
-            return
+            raise ValueError('No task to run')
+        if self.delegate is not None:
+            delegate_done = await self.delegate.step(i)
+            if delegate_done:
+                outputs = self.delegate.state.outputs if self.delegate.state else {}
+                obs: Observation = AgentDelegateObservation(content='', outputs=outputs)
+                self.add_history(NullAction(), obs)
+                self.delegate = None
+                self.delegateAction = None
+            return False
+
        logger.info(f'STEP {i}', extra={'msg_type': 'STEP'})
        logger.info(self.state.plan.main_goal, extra={'msg_type': 'PLAN'})
        if self.state.num_of_chars > self.max_chars:
@ -187,6 +221,7 @@ class AgentController:

        finished = isinstance(action, AgentFinishAction)
        if finished:
+            self.state.outputs = action.outputs  # type: ignore[attr-defined]
            logger.info(action, extra={'msg_type': 'INFO'})
            return True

@ -198,6 +233,7 @@ class AgentController:

        self.add_history(action, observation)
        await self._run_callbacks(observation)
+        return False

    async def _run_callbacks(self, event):
        if event is None:
--- a/opendevin/observation/init.py
+++ b/opendevin/observation/init.py
@ -4,6 +4,7 @@ from .browse import BrowserOutputObservation
 from .files import FileReadObservation, FileWriteObservation
 from .message import UserMessageObservation, AgentMessageObservation
 from .recall import AgentRecallObservation
+from .delegate import AgentDelegateObservation
 from .error import AgentErrorObservation

 observations = (
@ -14,6 +15,7 @@ observations = (
    UserMessageObservation,
    AgentMessageObservation,
    AgentRecallObservation,
+    AgentDelegateObservation,
    AgentErrorObservation,
 )

--- a/opendevin/observation/delegate.py
+++ b/opendevin/observation/delegate.py
@ -0,0 +1,19 @@
+from dataclasses import dataclass
+
+from .base import Observation
+from opendevin.schema import ObservationType
+
+
+@dataclass
+class AgentDelegateObservation(Observation):
+    """
+    This data class represents a delegate observation.
+    This is used when the produced action is NOT executable.
+    """
+
+    outputs: dict
+    observation: str = ObservationType.DELEGATE
+
+    @property
+    def message(self) -> str:
+        return ''
--- a/opendevin/schema/action.py
+++ b/opendevin/schema/action.py
@ -42,6 +42,10 @@ class ActionTypeSchema(BaseModel):
    """Allows the agent to make a plan, set a goal, or record thoughts
    """

+    DELEGATE: str = Field(default='delegate')
+    """Delegates a task to another agent.
+    """
+
    FINISH: str = Field(default='finish')
    """If you're absolutely certain that you've completed your task and have tested your work,
    use the finish action to stop working.
--- a/opendevin/schema/observation.py
+++ b/opendevin/schema/observation.py
@ -28,6 +28,10 @@ class ObservationTypeSchema(BaseModel):
    """A message from the user
    """

+    DELEGATE: str = Field(default='delegate')
+    """The result of a task delegated to another agent
+    """
+
    MESSAGE: str = Field(default='message')

    ERROR: str = Field(default='error')
--- a/opendevin/server/agent/agent.py
+++ b/opendevin/server/agent/agent.py
@ -138,7 +138,6 @@ class AgentUnit:
        model = self.get_arg_or_default(args, ConfigType.LLM_MODEL)
        api_key = config.get(ConfigType.LLM_API_KEY)
        api_base = config.get(ConfigType.LLM_BASE_URL)
-        container_image = config.get(ConfigType.SANDBOX_CONTAINER_IMAGE)
        max_iterations = self.get_arg_or_default(args, ConfigType.MAX_ITERATIONS)
        max_chars = self.get_arg_or_default(args, ConfigType.MAX_CHARS)

@ -150,7 +149,6 @@ class AgentUnit:
                agent=Agent.get_cls(agent_cls)(llm),
                max_iterations=int(max_iterations),
                max_chars=int(max_chars),
-                container_image=container_image,
                callbacks=[self.on_agent_event],
            )
        except Exception as e:
--- a/opendevin/state.py
+++ b/opendevin/state.py
@ -1,5 +1,5 @@
 from dataclasses import dataclass, field
-from typing import List, Tuple
+from typing import List, Tuple, Dict

 from opendevin.plan import Plan

@ -23,3 +23,5 @@ class State:
    history: List[Tuple[Action, Observation]] = field(default_factory=list)
    updated_info: List[Tuple[Action, Observation]
                       ] = field(default_factory=list)
+    inputs: Dict = field(default_factory=dict)
+    outputs: Dict = field(default_factory=dict)
--- a/poetry.lock
+++ b/poetry.lock
--- a/pyproject.toml
+++ b/pyproject.toml
@ -25,6 +25,7 @@ json-repair = "*"
 playwright = "*"
 e2b = "^0.14.13"
 pexpect = "*"
+jinja2 = "^3.1.3"

 [tool.poetry.group.llama-index.dependencies]
 llama-index = "*"
--- a/tests/test_action_serialization.py
+++ b/tests/test_action_serialization.py
@ -46,7 +46,7 @@ def test_agent_recall_action_serialization_deserialization():
 def test_agent_finish_action_serialization_deserialization():
    original_action_dict = {
        'action': 'finish',
-        'args': {}
+        'args': {'outputs': {}},
    }
    serialization_deserialization(original_action_dict, AgentFinishAction)