Implement Planning (#267)

* add outline of agent * add plan class * add initial prompt * plumb plan through a bit * refactor state management * move task into state * fix errors * add prompt parsing * add task actions * better serialization * more serialization hacks * fix fn * fix recursion error * refine prompt * better description of run * update prompt * tighter planning mechanism * prompt tweaks * fix merge * fix lint issues * add error handling for tasks * add graphic for plans * remove base_path from file actions * rename subtask to task * better planning * prompt updates for verification * remove verify field * ruff * mypy * fix actions
2025-12-26 05:48:36 +08:00 · 2024-03-29 11:47:29 -04:00 · 2024-03-29 11:47:29 -04:00 · a6f0c066b5
commit a6f0c066b5
parent 32a3a0259a
12 changed files with 454 additions and 13 deletions
--- a/agenthub/init.py
+++ b/agenthub/init.py
@ -4,5 +4,6 @@ load_dotenv()
 # Import agents after environment variables are loaded
 from . import langchains_agent # noqa: E402
 from . import codeact_agent # noqa: E402
+from . import planner_agent # noqa: E402

-__all__ = ['langchains_agent', 'codeact_agent']
+__all__ = ['langchains_agent', 'codeact_agent', 'planner_agent']
--- a/agenthub/codeact_agent/codeact_agent.py
+++ b/agenthub/codeact_agent/codeact_agent.py
@ -70,12 +70,12 @@ class CodeActAgent(Agent):

    def step(self, state: State) -> Action:
        if len(self.messages) == 0:
-            assert state.task, "Expecting instruction to be set"
+            assert state.plan.main_goal, "Expecting instruction to be set"
            self.messages = [
                {"role": "system", "content": SYSTEM_MESSAGE},
-                {"role": "user", "content": state.task},
+                {"role": "user", "content": state.plan.main_goal},
            ]
-            print(colored("===USER:===\n" + state.task, "green"))
+            print(colored("===USER:===\n" + state.plan.main_goal, "green"))
        updated_info = state.updated_info
        if updated_info:
            for prev_action, obs in updated_info:
--- a/agenthub/langchains_agent/langchains_agent.py
+++ b/agenthub/langchains_agent/langchains_agent.py
@ -119,7 +119,7 @@ class LangchainsAgent(Agent):
        self._initialized = True

    def step(self, state: State) -> Action:
-        self._initialize(state.task)
+        self._initialize(state.plan.main_goal)
        # TODO: make langchains agent use Action & Observation
        # completly from ground up

@ -163,7 +163,7 @@ class LangchainsAgent(Agent):
        state.updated_info = []

        prompt = prompts.get_request_action_prompt(
-            state.task,
+            state.plan.main_goal,
            self.monologue.get_thoughts(),
            state.background_commands_obs,
        )
--- a/agenthub/planner_agent/init.py
+++ b/agenthub/planner_agent/init.py
@ -0,0 +1,4 @@
+from opendevin.agent import Agent
+from .agent import PlannerAgent
+
+Agent.register("PlannerAgent", PlannerAgent)
--- a/agenthub/planner_agent/agent.py
+++ b/agenthub/planner_agent/agent.py
@ -0,0 +1,26 @@
+from typing import List
+from .prompt import get_prompt, parse_response
+
+from opendevin.agent import Agent
+from opendevin.action import AgentFinishAction
+from opendevin.llm.llm import LLM
+from opendevin.state import State
+from opendevin.action import Action
+
+class PlannerAgent(Agent):
+    def __init__(self, llm: LLM):
+        super().__init__(llm)
+
+    def step(self, state: State) -> Action:
+        if state.plan.task.state in ['completed', 'verified', 'abandoned']:
+            return AgentFinishAction()
+        prompt = get_prompt(state.plan, state.history)
+        messages = [{"content": prompt, "role": "user"}]
+        resp = self.llm.completion(messages=messages)
+        action_resp = resp['choices'][0]['message']['content']
+        action = parse_response(action_resp)
+        return action
+
+    def search_memory(self, query: str) -> List[str]:
+        return []
+
--- a/agenthub/planner_agent/prompt.py
+++ b/agenthub/planner_agent/prompt.py
@ -0,0 +1,242 @@
+import json
+from typing import List, Tuple, Dict, Type
+
+from opendevin.controller.agent_controller import print_with_indent
+from opendevin.plan import Plan
+from opendevin.action import Action
+from opendevin.observation import Observation
+
+from opendevin.action import (
+    NullAction,
+    CmdRunAction,
+    CmdKillAction,
+    BrowseURLAction,
+    FileReadAction,
+    FileWriteAction,
+    AgentRecallAction,
+    AgentThinkAction,
+    AgentFinishAction,
+    AgentSummarizeAction,
+    AddTaskAction,
+    ModifyTaskAction,
+)
+
+from opendevin.observation import (
+    NullObservation,
+)
+
+ACTION_TYPE_TO_CLASS: Dict[str, Type[Action]] = {
+    "run": CmdRunAction,
+    "kill": CmdKillAction,
+    "browse": BrowseURLAction,
+    "read": FileReadAction,
+    "write": FileWriteAction,
+    "recall": AgentRecallAction,
+    "think": AgentThinkAction,
+    "summarize": AgentSummarizeAction,
+    "finish": AgentFinishAction,
+    "add_task": AddTaskAction,
+    "modify_task": ModifyTaskAction,
+}
+
+HISTORY_SIZE = 10
+
+prompt = """
+# Task
+You're a diligent software engineer AI. You can't see, draw, or interact with a
+browser, but you can read and write files, and you can run commands, and you can think.
+
+You've been given the following task:
+
+%(task)s
+
+## Plan
+As you complete this task, you're building a plan and keeping
+track of your progress. Here's a JSON representation of your plan:
+```json
+%(plan)s
+```
+
+%(plan_status)s
+
+You're responsible for managing this plan and the status of tasks in
+it, by using the `add_task` and `modify_task` actions described below.
+
+If the History below contradicts the state of any of these tasks, you
+MUST modify the task using the `modify_task` action described below.
+
+Be sure NOT to duplicate any tasks. Do NOT use the `add_task` action for
+a task that's already represented. Every task must be represented only once.
+
+Tasks that are sequential MUST be siblings. They must be added in order
+to their parent task.
+
+If you mark a task as 'completed', 'verified', or 'abandoned',
+all non-abandoned subtasks will be marked the same way.
+So before closing a task this way, you MUST not only be sure that it has
+been completed successfully--you must ALSO be sure that all its subtasks
+are ready to be marked the same way.
+
+If, and only if, ALL tasks have already been marked verified,
+you MUST respond with the `finish` action.
+
+## History
+Here is a recent history of actions you've taken in service of this plan,
+as well as observations you've made. This only includes the MOST RECENT
+ten actions--more happened before that.
+```json
+%(history)s
+```
+
+Your most recent action is at the bottom of that history.
+
+## Action
+What is your next thought or action? Your response must be in JSON format.
+
+It must be an object, and it must contain two fields:
+* `action`, which is one of the actions below
+* `args`, which is a map of key-value pairs, specifying the arguments for that action
+
+* `read` - reads the contents of a file. Arguments:
+  * `path` - the path of the file to read
+* `write` - writes the contents to a file. Arguments:
+  * `path` - the path of the file to write
+  * `contents` - the contents to write to the file
+* `run` - runs a command on the command line in a Linux shell. Arguments:
+  * `command` - the command to run
+  * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true.
+* `kill` - kills a background command
+  * `id` - the ID of the background command to kill
+* `browse` - opens a web page. Arguments:
+  * `url` - the URL to open
+* `think` - make a plan, set a goal, or record your thoughts. Arguments:
+  * `thought` - the thought to record
+* `add_task` - add a task to your plan. Arguments:
+  * `parent` - the ID of the parent task
+  * `goal` - the goal of the task
+  * `subtasks` - a list of subtasks, each of which is a map with a `goal` key.
+* `modify_task` - close a task. Arguments:
+  * `id` - the ID of the task to close
+  * `state` - set to 'in_progress' to start the task, 'completed' to finish it, 'verified' to assert that it was successful, 'abandoned' to give up on it permanently, or `open` to stop working on it for now.
+* `finish` - if ALL of your tasks and subtasks have been verified or abanded, and you're absolutely certain that you've completed your task and have tested your work, use the finish action to stop working.
+
+You MUST take time to think in between read, write, run, browse, and recall actions.
+You should never act twice in a row without thinking. But if your last several
+actions are all `think` actions, you should consider taking a different action.
+
+What is your next thought or action? Again, you must reply with JSON, and only with JSON.
+
+%(hint)s
+"""
+
+def get_prompt(plan: Plan, history: List[Tuple[Action, Observation]]):
+    plan_str = json.dumps(plan.task.to_dict(), indent=2)
+    sub_history = history[-HISTORY_SIZE:]
+    history_dicts = []
+    latest_action: Action = NullAction()
+    for action, observation in sub_history:
+        if not isinstance(action, NullAction):
+            #if not isinstance(action, ModifyTaskAction) and not isinstance(action, AddTaskAction):
+            action_dict = action.to_dict()
+            action_dict["action"] = convert_action(action_dict["action"])
+            history_dicts.append(action_dict)
+            latest_action = action
+        if not isinstance(observation, NullObservation):
+            observation_dict = observation.to_dict()
+            observation_dict["observation"] = convert_observation(observation_dict["observation"])
+            history_dicts.append(observation_dict)
+    history_str = json.dumps(history_dicts, indent=2)
+
+    hint = ""
+    current_task = plan.get_current_task()
+    if current_task is not None:
+        plan_status = f"You're currently working on this task:\n{current_task.goal}."
+        if len(current_task.subtasks) == 0:
+            plan_status += "\nIf it's not achievable AND verifiable with a SINGLE action, you MUST break it down into subtasks NOW."
+    else:
+        plan_status = "You're not currently working on any tasks. Your next action MUST be to mark a task as in_progress."
+        hint = plan_status
+
+    latest_action_id = convert_action(latest_action.to_dict()["action"])
+
+    if current_task is not None:
+        if latest_action_id == "":
+            hint = "You haven't taken any actions yet. Start by using `ls` to check out what files you're working with."
+        elif latest_action_id == "run":
+            hint = "You should think about the command you just ran, what output it gave, and how that affects your plan."
+        elif latest_action_id == "read":
+            hint = "You should think about the file you just read, what you learned from it, and how that affects your plan."
+        elif latest_action_id == "write":
+            hint = "You just changed a file. You should think about how it affects your plan."
+        elif latest_action_id == "browse":
+            hint = "You should think about the page you just visited, and what you learned from it."
+        elif latest_action_id == "think":
+            hint = "Look at your last thought in the history above. What does it suggest? Don't think anymore--take action."
+        elif latest_action_id == "recall":
+            hint = "You should think about the information you just recalled, and how it should affect your plan."
+        elif latest_action_id == "add_task":
+            hint = "You should think about the next action to take."
+        elif latest_action_id == "modify_task":
+            hint = "You should think about the next action to take."
+        elif latest_action_id == "summarize":
+            hint = ""
+        elif latest_action_id == "finish":
+            hint = ""
+
+    print_with_indent("HINT:\n" + hint)
+    return prompt % {
+        'task': plan.main_goal,
+        'plan': plan_str,
+        'history': history_str,
+        'hint': hint,
+        'plan_status': plan_status,
+    }
+
+def parse_response(response: str) -> Action:
+    json_start = response.find("{")
+    json_end = response.rfind("}") + 1
+    response = response[json_start:json_end]
+    action_dict = json.loads(response)
+    if 'content' in action_dict:
+        # The LLM gets confused here. Might as well be robust
+        action_dict['contents'] = action_dict.pop('content')
+
+    args_dict = action_dict.get("args", {})
+    action = ACTION_TYPE_TO_CLASS[action_dict["action"]](**args_dict)
+    return action
+
+def convert_action(action):
+    if action == "CmdRunAction":
+        action = "run"
+    elif action == "CmdKillAction":
+        action = "kill"
+    elif action == "BrowseURLAction":
+        action = "browse"
+    elif action == "FileReadAction":
+        action = "read"
+    elif action == "FileWriteAction":
+        action = "write"
+    elif action == "AgentFinishAction":
+        action = "finish"
+    elif action == "AgentRecallAction":
+        action = "recall"
+    elif action == "AgentThinkAction":
+        action = "think"
+    elif action == "AgentSummarizeAction":
+        action = "summarize"
+    elif action == "AddTaskAction":
+        action = "add_task"
+    elif action == "ModifyTaskAction":
+        action = "modify_task"
+    return action
+
+def convert_observation(observation):
+    if observation == "UserMessageObservation":
+        observation = "chat"
+    elif observation == "AgentMessageObservation":
+        observation = "chat"
+    elif observation == "CmdOutputObservation":
+        observation = "run"
+    elif observation == "FileReadObservation":
+        observation = "read"
+    return observation
--- a/opendevin/action/init.py
+++ b/opendevin/action/init.py
@ -3,6 +3,7 @@ from .bash import CmdRunAction, CmdKillAction
 from .browse import BrowseURLAction
 from .fileop import FileReadAction, FileWriteAction
 from .agent import AgentRecallAction, AgentThinkAction, AgentFinishAction, AgentEchoAction, AgentSummarizeAction
+from .tasks import AddTaskAction, ModifyTaskAction

 actions = (
    CmdKillAction,
@ -38,4 +39,6 @@ __all__ = [
    "AgentFinishAction",
    "AgentEchoAction",
    "AgentSummarizeAction",
+    "AddTaskAction",
+    "ModifyTaskAction"
 ]
--- a/opendevin/action/tasks.py
+++ b/opendevin/action/tasks.py
@ -0,0 +1,25 @@
+from dataclasses import dataclass, field
+
+from .base import NotExecutableAction
+
+@dataclass
+class AddTaskAction(NotExecutableAction):
+    parent: str
+    goal: str
+    subtasks: list = field(default_factory=list)
+    action: str = "add_task"
+
+    @property
+    def message(self) -> str:
+        return f"Added task: {self.goal}"
+
+@dataclass
+class ModifyTaskAction(NotExecutableAction):
+    id: str
+    state: str
+    action: str = "modify_task"
+
+    @property
+    def message(self) -> str:
+        return f"Set task {self.id} to {self.state}"
+
--- a/opendevin/controller/init.py
+++ b/opendevin/controller/init.py
@ -4,4 +4,4 @@ from .command_manager import CommandManager
 __all__ = [
    'AgentController',
    'CommandManager'
-]
+]
--- a/opendevin/controller/agent_controller.py
+++ b/opendevin/controller/agent_controller.py
@ -1,13 +1,17 @@
+
 import asyncio
 from typing import List, Callable
 import traceback

+from opendevin.plan import Plan
 from opendevin.state import State
 from opendevin.agent import Agent
 from opendevin.action import (
    Action,
    NullAction,
    AgentFinishAction,
+    AddTaskAction,
+    ModifyTaskAction
 )
 from opendevin.observation import (
    Observation,
@ -15,10 +19,8 @@ from opendevin.observation import (
    NullObservation
 )

-
 from .command_manager import CommandManager

-
 def print_with_indent(text: str):
    print("\t"+text.replace("\n","\n\t"), flush=True)

@ -51,10 +53,10 @@ class AgentController:
        self.state.history.append((action, observation))
        self.state.updated_info.append((action, observation))

-
    async def start_loop(self, task: str):
        finished = False
-        self.state = State(task)
+        plan = Plan(task)
+        self.state = State(plan)
        for i in range(self.max_iterations):
            try:
                finished = await self.step(i)
@ -70,6 +72,9 @@ class AgentController:
    async def step(self, i: int):
        print("\n\n==============", flush=True)
        print("STEP", i, flush=True)
+        print_with_indent("\nPLAN:\n")
+        print_with_indent(self.state.plan.__str__())
+
        log_obs = self.command_manager.get_background_obs()
        for obs in log_obs:
            self.add_history(NullAction(), obs)
@ -92,10 +97,26 @@ class AgentController:

        await self._run_callbacks(action)

-        if isinstance(action, AgentFinishAction):
+        finished = isinstance(action, AgentFinishAction)
+        if finished:
            print_with_indent("\nFINISHED")
            return True

+        if isinstance(action, AddTaskAction):
+            try:
+                self.state.plan.add_subtask(action.parent, action.goal, action.subtasks)
+            except Exception as e:
+                observation = AgentErrorObservation(str(e))
+                print_with_indent("\nADD TASK ERROR:\n%s" % observation)
+                traceback.print_exc()
+        elif isinstance(action, ModifyTaskAction):
+            try:
+                self.state.plan.set_subtask_state(action.id, action.state)
+            except Exception as e:
+                observation = AgentErrorObservation(str(e))
+                print_with_indent("\nMODIFY TASK ERROR:\n%s" % observation)
+                traceback.print_exc()
+
        if action.executable:
            try:
                observation = action.run(self)
--- a/opendevin/plan.py
+++ b/opendevin/plan.py
@ -0,0 +1,117 @@
+from typing import List
+
+OPEN_STATE = 'open'
+COMPLETED_STATE = 'completed'
+ABANDONED_STATE = 'abandoned'
+IN_PROGRESS_STATE = 'in_progress'
+VERIFIED_STATE = 'verified'
+STATES = [OPEN_STATE, COMPLETED_STATE, ABANDONED_STATE, IN_PROGRESS_STATE, VERIFIED_STATE]
+
+class Task:
+    id: str
+    goal: str
+    parent: "Task | None"
+    subtasks: List["Task"]
+
+    def __init__(self, parent: "Task | None", goal: str, state: str=OPEN_STATE, subtasks: List = []):
+        if parent is None:
+            self.id = '0'
+        else:
+            self.id = parent.id + '.' + str(len(parent.subtasks))
+        self.parent = parent
+        self.goal = goal
+        self.subtasks = []
+        for subtask in (subtasks or []):
+            if isinstance(subtask, Task):
+                self.subtasks.append(subtask)
+            else:
+                goal = subtask.get('goal')
+                state = subtask.get('state')
+                subtasks = subtask.get('subtasks')
+                self.subtasks.append(Task(self, goal, state, subtasks))
+
+        self.state = OPEN_STATE
+
+    def to_string(self, indent=""):
+        emoji = ''
+        if self.state == VERIFIED_STATE:
+            emoji = '✅'
+        elif self.state == COMPLETED_STATE:
+            emoji = '🟢'
+        elif self.state == ABANDONED_STATE:
+            emoji = '❌'
+        elif self.state == IN_PROGRESS_STATE:
+            emoji = '💪'
+        elif self.state == OPEN_STATE:
+            emoji = '🔵'
+        result = indent + emoji + ' ' + self.id + ' ' + self.goal + '\n'
+        for subtask in self.subtasks:
+            result += subtask.to_string(indent + '    ')
+        return result
+
+    def to_dict(self):
+        return {
+            'id': self.id,
+            'goal': self.goal,
+            'state': self.state,
+            'subtasks': [t.to_dict() for t in self.subtasks]
+        }
+
+    def set_state(self, state):
+        if state not in STATES:
+            raise ValueError('Invalid state:' + state)
+        self.state = state
+        if state == COMPLETED_STATE or state == ABANDONED_STATE or state == VERIFIED_STATE:
+            for subtask in self.subtasks:
+                if subtask.state != ABANDONED_STATE:
+                    subtask.set_state(state)
+        elif state == IN_PROGRESS_STATE:
+            if self.parent is not None:
+                self.parent.set_state(state)
+
+    def get_current_task(self) -> "Task | None":
+        for subtask in self.subtasks:
+            if subtask.state == IN_PROGRESS_STATE:
+                return subtask.get_current_task()
+        if self.state == IN_PROGRESS_STATE:
+            return self
+        return None
+
+class Plan:
+    main_goal: str
+    task: Task
+
+    def __init__(self, task: str):
+        self.main_goal = task
+        self.task = Task(parent=None, goal=task, subtasks=[])
+
+    def __str__(self):
+        return self.task.to_string()
+
+    def get_task_by_id(self, id: str) -> Task:
+        try:
+            parts = [int(p) for p in id.split('.')]
+        except ValueError:
+            raise ValueError('Invalid task id, non-integer:' + id)
+        if parts[0] != 0:
+            raise ValueError('Invalid task id, must start with 0:' + id)
+        parts = parts[1:]
+        task = self.task
+        for part in parts:
+            if part >= len(task.subtasks):
+                raise ValueError('Task does not exist:' + id)
+            task = task.subtasks[part]
+        return task
+
+    def add_subtask(self, parent_id: str, goal: str, subtasks: List = []):
+        parent = self.get_task_by_id(parent_id)
+        child = Task(parent=parent, goal=goal, subtasks=subtasks)
+        parent.subtasks.append(child)
+
+    def set_subtask_state(self, id: str, state: str):
+        task = self.get_task_by_id(id)
+        task.set_state(state)
+
+    def get_current_task(self):
+        return self.task.get_current_task()
+
--- a/opendevin/state.py
+++ b/opendevin/state.py
@ -1,6 +1,8 @@
 from dataclasses import dataclass, field
 from typing import List, Tuple

+from opendevin.plan import Plan
+
 from opendevin.action import (
    Action,
 )
@ -11,7 +13,7 @@ from opendevin.observation import (

@dataclass
 class State:
-    task: str
+    plan: Plan
    iteration: int = 0
    background_commands_obs: List[CmdOutputObservation] = field(default_factory=list)
    history: List[Tuple[Action, Observation]] = field(default_factory=list)