mirror of
https://github.com/OpenHands/OpenHands.git
synced 2025-12-26 05:48:36 +08:00
remove old microagents (#6964)
Co-authored-by: Engel Nyst <enyst@users.noreply.github.com> Co-authored-by: openhands <openhands@all-hands.dev>
This commit is contained in:
parent
49a29c19cb
commit
b3cac69121
70
.github/workflows/integration-runner.yml
vendored
70
.github/workflows/integration-runner.yml
vendored
@ -117,68 +117,6 @@ jobs:
|
||||
echo "EOF" >> $GITHUB_ENV
|
||||
|
||||
# -------------------------------------------------------------
|
||||
# Run DelegatorAgent tests for Haiku, limited to t01 and t02
|
||||
- name: Wait a little bit (again)
|
||||
run: sleep 5
|
||||
|
||||
- name: Configure config.toml for testing DelegatorAgent (Haiku)
|
||||
env:
|
||||
LLM_MODEL: "litellm_proxy/claude-3-5-haiku-20241022"
|
||||
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
|
||||
LLM_BASE_URL: ${{ secrets.LLM_BASE_URL }}
|
||||
MAX_ITERATIONS: 30
|
||||
run: |
|
||||
echo "[llm.eval]" > config.toml
|
||||
echo "model = \"$LLM_MODEL\"" >> config.toml
|
||||
echo "api_key = \"$LLM_API_KEY\"" >> config.toml
|
||||
echo "base_url = \"$LLM_BASE_URL\"" >> config.toml
|
||||
echo "temperature = 0.0" >> config.toml
|
||||
|
||||
- name: Run integration test evaluation for DelegatorAgent (Haiku)
|
||||
env:
|
||||
SANDBOX_FORCE_REBUILD_RUNTIME: True
|
||||
run: |
|
||||
poetry run ./evaluation/integration_tests/scripts/run_infer.sh llm.eval HEAD DelegatorAgent '' 30 $N_PROCESSES "t01_fix_simple_typo,t02_add_bash_hello" 'delegator_haiku_run'
|
||||
|
||||
# Find and export the delegator test results
|
||||
REPORT_FILE_DELEGATOR_HAIKU=$(find evaluation/evaluation_outputs/outputs/integration_tests/DelegatorAgent/*haiku*_maxiter_30_N* -name "report.md" -type f | head -n 1)
|
||||
echo "REPORT_FILE_DELEGATOR_HAIKU: $REPORT_FILE_DELEGATOR_HAIKU"
|
||||
echo "INTEGRATION_TEST_REPORT_DELEGATOR_HAIKU<<EOF" >> $GITHUB_ENV
|
||||
cat $REPORT_FILE_DELEGATOR_HAIKU >> $GITHUB_ENV
|
||||
echo >> $GITHUB_ENV
|
||||
echo "EOF" >> $GITHUB_ENV
|
||||
|
||||
# -------------------------------------------------------------
|
||||
# Run DelegatorAgent tests for DeepSeek, limited to t01 and t02
|
||||
- name: Wait a little bit (again)
|
||||
run: sleep 5
|
||||
|
||||
- name: Configure config.toml for testing DelegatorAgent (DeepSeek)
|
||||
env:
|
||||
LLM_MODEL: "litellm_proxy/deepseek-chat"
|
||||
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
|
||||
LLM_BASE_URL: ${{ secrets.LLM_BASE_URL }}
|
||||
MAX_ITERATIONS: 30
|
||||
run: |
|
||||
echo "[llm.eval]" > config.toml
|
||||
echo "model = \"$LLM_MODEL\"" >> config.toml
|
||||
echo "api_key = \"$LLM_API_KEY\"" >> config.toml
|
||||
echo "base_url = \"$LLM_BASE_URL\"" >> config.toml
|
||||
echo "temperature = 0.0" >> config.toml
|
||||
- name: Run integration test evaluation for DelegatorAgent (DeepSeek)
|
||||
env:
|
||||
SANDBOX_FORCE_REBUILD_RUNTIME: True
|
||||
run: |
|
||||
poetry run ./evaluation/integration_tests/scripts/run_infer.sh llm.eval HEAD DelegatorAgent '' 30 $N_PROCESSES "t01_fix_simple_typo,t02_add_bash_hello" 'delegator_deepseek_run'
|
||||
|
||||
# Find and export the delegator test results
|
||||
REPORT_FILE_DELEGATOR_DEEPSEEK=$(find evaluation/evaluation_outputs/outputs/integration_tests/DelegatorAgent/deepseek*_maxiter_30_N* -name "report.md" -type f | head -n 1)
|
||||
echo "REPORT_FILE_DELEGATOR_DEEPSEEK: $REPORT_FILE_DELEGATOR_DEEPSEEK"
|
||||
echo "INTEGRATION_TEST_REPORT_DELEGATOR_DEEPSEEK<<EOF" >> $GITHUB_ENV
|
||||
cat $REPORT_FILE_DELEGATOR_DEEPSEEK >> $GITHUB_ENV
|
||||
echo >> $GITHUB_ENV
|
||||
echo "EOF" >> $GITHUB_ENV
|
||||
# -------------------------------------------------------------
|
||||
# Run VisualBrowsingAgent tests for DeepSeek, limited to t05 and t06
|
||||
- name: Wait a little bit (again)
|
||||
run: sleep 5
|
||||
@ -213,7 +151,7 @@ jobs:
|
||||
run: |
|
||||
TIMESTAMP=$(date +'%y-%m-%d-%H-%M')
|
||||
cd evaluation/evaluation_outputs/outputs # Change to the outputs directory
|
||||
tar -czvf ../../../integration_tests_${TIMESTAMP}.tar.gz integration_tests/CodeActAgent/* integration_tests/DelegatorAgent/* integration_tests/VisualBrowsingAgent/* # Only include the actual result directories
|
||||
tar -czvf ../../../integration_tests_${TIMESTAMP}.tar.gz integration_tests/CodeActAgent/* integration_tests/VisualBrowsingAgent/* # Only include the actual result directories
|
||||
|
||||
- name: Upload evaluation results as artifact
|
||||
uses: actions/upload-artifact@v4
|
||||
@ -254,12 +192,6 @@ jobs:
|
||||
**Integration Tests Report (DeepSeek)**
|
||||
DeepSeek LLM Test Results:
|
||||
${{ env.INTEGRATION_TEST_REPORT_DEEPSEEK }}
|
||||
---
|
||||
**Integration Tests Report Delegator (Haiku)**
|
||||
${{ env.INTEGRATION_TEST_REPORT_DELEGATOR_HAIKU }}
|
||||
---
|
||||
**Integration Tests Report Delegator (DeepSeek)**
|
||||
${{ env.INTEGRATION_TEST_REPORT_DELEGATOR_DEEPSEEK }}
|
||||
---
|
||||
**Integration Tests Report VisualBrowsing (DeepSeek)**
|
||||
${{ env.INTEGRATION_TEST_REPORT_VISUALBROWSING_DEEPSEEK }}
|
||||
|
||||
@ -44,14 +44,14 @@ sudo apt-get install -y \
|
||||
1. Find the latest Swift version for Debian:
|
||||
|
||||
Go to the [Swift.org download page](https://www.swift.org/download/) to find the latest Swift version compatible with Debian 12 (Bookworm).
|
||||
|
||||
|
||||
Look for a tarball named something like `swift-<VERSION>-RELEASE-debian12.tar.gz` (e.g., `swift-6.0.3-RELEASE-debian12.tar.gz`).
|
||||
|
||||
|
||||
The URL pattern is typically:
|
||||
```
|
||||
https://download.swift.org/swift-<VERSION>-release/debian12/swift-<VERSION>-RELEASE/swift-<VERSION>-RELEASE-debian12.tar.gz
|
||||
```
|
||||
|
||||
|
||||
Where `<VERSION>` is the Swift version number (e.g., `6.0.3`).
|
||||
|
||||
2. Download the Swift binary for Debian 12:
|
||||
@ -80,4 +80,4 @@ Verify that Swift is correctly installed by running:
|
||||
|
||||
```bash
|
||||
swift --version
|
||||
```
|
||||
```
|
||||
|
||||
@ -1,9 +1,5 @@
|
||||
from dotenv import load_dotenv
|
||||
|
||||
from openhands.agenthub.micro.agent import MicroAgent
|
||||
from openhands.agenthub.micro.registry import all_microagents
|
||||
from openhands.controller.agent import Agent
|
||||
|
||||
load_dotenv()
|
||||
|
||||
|
||||
@ -14,26 +10,13 @@ from openhands.agenthub import ( # noqa: E402
|
||||
dummy_agent,
|
||||
visualbrowsing_agent,
|
||||
)
|
||||
from openhands.controller.agent import Agent # noqa: E402
|
||||
|
||||
__all__ = [
|
||||
'Agent',
|
||||
'codeact_agent',
|
||||
'delegator_agent',
|
||||
'dummy_agent',
|
||||
'browsing_agent',
|
||||
'visualbrowsing_agent',
|
||||
]
|
||||
|
||||
for agent in all_microagents.values():
|
||||
name = agent['name']
|
||||
prompt = agent['prompt']
|
||||
|
||||
anon_class = type(
|
||||
name,
|
||||
(MicroAgent,),
|
||||
{
|
||||
'prompt': prompt,
|
||||
'agent_definition': agent,
|
||||
},
|
||||
)
|
||||
|
||||
Agent.register(name, anon_class)
|
||||
|
||||
@ -1,17 +0,0 @@
|
||||
## Introduction
|
||||
|
||||
This package contains definitions of micro-agents. A micro-agent is defined
|
||||
in the following structure:
|
||||
|
||||
```
|
||||
[AgentName]
|
||||
├── agent.yaml
|
||||
└── prompt.md
|
||||
```
|
||||
|
||||
Note that `prompt.md` could use jinja2 template syntax. During runtime, `prompt.md`
|
||||
is loaded and rendered, and used together with `agent.yaml` to initialize a
|
||||
micro-agent.
|
||||
|
||||
Micro-agents can be used independently. You can also use `ManagerAgent` which knows
|
||||
how to coordinate the agents and collaboratively finish a task.
|
||||
@ -1,2 +0,0 @@
|
||||
* `browse` - opens a web page. Arguments:
|
||||
* `url` - the URL to open
|
||||
@ -1,3 +0,0 @@
|
||||
* `delegate` - send a task to another agent from the list provided. Arguments:
|
||||
* `agent` - the agent to which the task is delegated. MUST match a name in the list of agents provided.
|
||||
* `inputs` - a dictionary of input parameters to the agent, as specified in the list
|
||||
@ -1,2 +0,0 @@
|
||||
* `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments:
|
||||
* `outputs` - a dictionary representing the outputs of your task, if any
|
||||
@ -1,3 +0,0 @@
|
||||
* `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments:
|
||||
* `content` - the thought to record
|
||||
* `wait_for_response` - set to `true` to wait for the user to respond before proceeding
|
||||
@ -1,2 +0,0 @@
|
||||
* `read` - reads the content of a file. Arguments:
|
||||
* `path` - the path of the file to read
|
||||
@ -1,2 +0,0 @@
|
||||
* `reject` - reject the task. Arguments:
|
||||
* `outputs` - a dictionary with only a `reason` attribute
|
||||
@ -1,2 +0,0 @@
|
||||
* `run` - runs a command on the command line in a Linux shell. Arguments:
|
||||
* `command` - the command to run
|
||||
@ -1,3 +0,0 @@
|
||||
* `write` - writes the content to a file. Arguments:
|
||||
* `path` - the path of the file to write
|
||||
* `content` - the content to write to the file
|
||||
@ -1,5 +0,0 @@
|
||||
Your response MUST be in JSON format. It must be an object, and it must contain two fields:
|
||||
* `action`, which is one of the actions specified here
|
||||
* `args`, which is a map of key-value pairs, specifying the arguments for that action
|
||||
|
||||
You MUST NOT include any other text besides the JSON response
|
||||
@ -1,4 +0,0 @@
|
||||
Here is a recent history of actions you've taken in service of this plan,
|
||||
as well as observations you've made. This only includes the MOST RECENT
|
||||
actions and observations--more may have happened before that.
|
||||
They are time-ordered, with your most recent action at the bottom.
|
||||
@ -1,88 +0,0 @@
|
||||
from jinja2 import BaseLoader, Environment
|
||||
|
||||
from openhands.agenthub.micro.instructions import instructions
|
||||
from openhands.agenthub.micro.registry import all_microagents
|
||||
from openhands.controller.agent import Agent
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import AgentConfig
|
||||
from openhands.core.message import ImageContent, Message, TextContent
|
||||
from openhands.events.action import Action
|
||||
from openhands.events.event import Event
|
||||
from openhands.events.serialization.action import action_from_dict
|
||||
from openhands.events.serialization.event import event_to_memory
|
||||
from openhands.io import json
|
||||
from openhands.llm.llm import LLM
|
||||
|
||||
|
||||
def parse_response(orig_response: str) -> Action:
|
||||
# attempt to load the JSON dict from the response
|
||||
action_dict = json.loads(orig_response)
|
||||
|
||||
# load the action from the dict
|
||||
return action_from_dict(action_dict)
|
||||
|
||||
|
||||
def to_json(obj: object, **kwargs: dict) -> str:
|
||||
"""Serialize an object to str format"""
|
||||
return json.dumps(obj, **kwargs)
|
||||
|
||||
|
||||
class MicroAgent(Agent):
|
||||
VERSION = '1.0'
|
||||
prompt = ''
|
||||
agent_definition: dict = {}
|
||||
|
||||
def history_to_json(
|
||||
self, history: list[Event], max_events: int = 20, **kwargs: dict
|
||||
) -> str:
|
||||
"""
|
||||
Serialize and simplify history to str format
|
||||
"""
|
||||
processed_history = []
|
||||
event_count = 0
|
||||
|
||||
for event in reversed(history):
|
||||
if event_count >= max_events:
|
||||
break
|
||||
processed_history.append(
|
||||
event_to_memory(event, self.llm.config.max_message_chars)
|
||||
)
|
||||
event_count += 1
|
||||
|
||||
# history is in reverse order, let's fix it
|
||||
processed_history.reverse()
|
||||
|
||||
# everything starts with a message
|
||||
# the first message is already in the prompt as the task
|
||||
# TODO: so we don't need to include it in the history
|
||||
|
||||
return json.dumps(processed_history, **kwargs)
|
||||
|
||||
def __init__(self, llm: LLM, config: AgentConfig):
|
||||
super().__init__(llm, config)
|
||||
if 'name' not in self.agent_definition:
|
||||
raise ValueError('Agent definition must contain a name')
|
||||
self.prompt_template = Environment(loader=BaseLoader()).from_string(self.prompt)
|
||||
self.delegates = all_microagents.copy()
|
||||
del self.delegates[self.agent_definition['name']]
|
||||
|
||||
def step(self, state: State) -> Action:
|
||||
last_user_message, last_image_urls = state.get_current_user_intent()
|
||||
prompt = self.prompt_template.render(
|
||||
state=state,
|
||||
instructions=instructions,
|
||||
to_json=to_json,
|
||||
history_to_json=self.history_to_json,
|
||||
delegates=self.delegates,
|
||||
latest_user_message=last_user_message,
|
||||
)
|
||||
content: list[TextContent | ImageContent] = [TextContent(text=prompt)]
|
||||
if self.llm.vision_is_active() and last_image_urls:
|
||||
content.append(ImageContent(image_urls=last_image_urls))
|
||||
message = Message(role='user', content=content)
|
||||
resp = self.llm.completion(
|
||||
messages=self.llm.format_messages_for_llm(message),
|
||||
)
|
||||
action_resp = resp['choices'][0]['message']['content']
|
||||
action = parse_response(action_resp)
|
||||
return action
|
||||
@ -1,6 +0,0 @@
|
||||
name: CoderAgent
|
||||
description: Given a particular task, and a detailed description of the codebase, accomplishes the task
|
||||
inputs:
|
||||
task: string
|
||||
summary: string
|
||||
outputs: {}
|
||||
@ -1,27 +0,0 @@
|
||||
# Task
|
||||
You are a software engineer. You've inherited an existing codebase, which you
|
||||
need to modify to complete this task:
|
||||
|
||||
{{ state.inputs.task }}
|
||||
|
||||
{% if state.inputs.summary %}
|
||||
Here's a summary of the codebase, as it relates to this task:
|
||||
|
||||
{{ state.inputs.summary }}
|
||||
{% endif %}
|
||||
|
||||
## Available Actions
|
||||
{{ instructions.actions.run }}
|
||||
{{ instructions.actions.write }}
|
||||
{{ instructions.actions.read }}
|
||||
{{ instructions.actions.message }}
|
||||
{{ instructions.actions.finish }}
|
||||
|
||||
Do NOT finish until you have completed the tasks.
|
||||
|
||||
## History
|
||||
{{ instructions.history_truncated }}
|
||||
{{ history_to_json(state.history, max_events=20) }}
|
||||
|
||||
## Format
|
||||
{{ instructions.format.action }}
|
||||
@ -1,25 +0,0 @@
|
||||
## Introduction
|
||||
|
||||
CommitWriterAgent can help write git commit message. Example:
|
||||
|
||||
```bash
|
||||
WORKSPACE_MOUNT_PATH="`PWD`" \
|
||||
poetry run python openhands/core/main.py -t "dummy task" -c CommitWriterAgent -d ./
|
||||
```
|
||||
|
||||
This agent is special in the sense that it doesn't need a task. Once called,
|
||||
it attempts to read all diff in the git staging area and write a good commit
|
||||
message.
|
||||
|
||||
## Future work
|
||||
|
||||
### Feedback loop
|
||||
|
||||
The commit message could be (optionally) shown to the customer or
|
||||
other agents, so that CommitWriterAgent could gather feedback to further
|
||||
improve the commit message.
|
||||
|
||||
### Task rejection
|
||||
|
||||
When the agent cannot compile a commit message (e.g. not git repository), it
|
||||
should reject the task with an explanation.
|
||||
@ -1,6 +0,0 @@
|
||||
name: CommitWriterAgent
|
||||
description: "Write a git commit message for files in the git staging area"
|
||||
inputs: {}
|
||||
outputs:
|
||||
answer: string
|
||||
reason: string
|
||||
@ -1,33 +0,0 @@
|
||||
# Task
|
||||
You are a responsible software engineer and always write good commit messages.
|
||||
|
||||
Please analyze the diff in the staging area, understand the context and content
|
||||
of the updates from the diff only. Identify key elements like:
|
||||
- Which files are affected?
|
||||
- What types of changes were made (e.g., new features, bug fixes, refactoring, documentation, testing)?
|
||||
|
||||
Then you should generate a commit message that succinctly summarizes the staged
|
||||
changes. The commit message should include:
|
||||
- A summary line that clearly states the purpose of the changes.
|
||||
- Optionally, a detailed description if the changes are complex or need further explanation.
|
||||
|
||||
You should first use `git status` to check whether it's a valid git repo and there
|
||||
is diff in the staging area. If not, please call the `reject` action.
|
||||
|
||||
If it is a valid git repo and there is diff in the staging area, you should find
|
||||
the diff using `git diff --cached`, compile a commit message, and call the `finish`
|
||||
action with `outputs.answer` set to the answer.
|
||||
|
||||
## History
|
||||
{{ instructions.history_truncated }}
|
||||
{{ history_to_json(state.history, max_events=20) }}
|
||||
|
||||
If the last item in the history is an error, you should try to fix it.
|
||||
|
||||
## Available Actions
|
||||
{{ instructions.actions.run }}
|
||||
{{ instructions.actions.reject }}
|
||||
{{ instructions.actions.finish }}
|
||||
|
||||
## Format
|
||||
{{ instructions.format.action }}
|
||||
@ -1,22 +0,0 @@
|
||||
import os
|
||||
|
||||
instructions: dict = {}
|
||||
|
||||
base_dir = os.path.dirname(os.path.abspath(__file__)) + '/_instructions'
|
||||
for root, dirs, files in os.walk(base_dir):
|
||||
if len(files) == 0:
|
||||
continue
|
||||
if root == base_dir:
|
||||
obj = instructions
|
||||
else:
|
||||
rel_base = os.path.relpath(root, base_dir)
|
||||
keys = rel_base.split('/')
|
||||
obj = instructions
|
||||
for key in keys:
|
||||
if key not in obj:
|
||||
obj[key] = {}
|
||||
obj = obj[key]
|
||||
for file in files:
|
||||
without_ext = os.path.splitext(file)[0]
|
||||
with open(os.path.join(root, file), 'r') as f:
|
||||
obj[without_ext] = f.read()
|
||||
@ -1,8 +0,0 @@
|
||||
name: ManagerAgent
|
||||
description: Delegates tasks to microagents based on their area of expertise
|
||||
generates: Action
|
||||
inputs:
|
||||
task: string
|
||||
outputs:
|
||||
summary: string # if finished
|
||||
reason: string # if rejected
|
||||
@ -1,42 +0,0 @@
|
||||
# Task
|
||||
You are in charge of accomplishing the following task:
|
||||
{% set goal = latest_user_message if latest_user_message is not none else state.inputs.task %}
|
||||
{{ goal }}
|
||||
|
||||
In order to accomplish this goal, you must delegate tasks to one or more agents, who
|
||||
can do the actual work. A description of each agent is provided below. You MUST
|
||||
select one of the delegates below to move towards accomplishing the task, and you MUST
|
||||
provide the correct inputs for the delegate you select.
|
||||
|
||||
Note: the delegated agent either returns "finish" or "reject".
|
||||
- If the action is "finish", but the full task is not done yet, you should
|
||||
continue to delegate to one of the agents below to until the full task is finished.
|
||||
- If the action is "reject", it means the delegated agent is not capable of the
|
||||
task you send to. You should revisit the input you send to the delegate, and consider
|
||||
whether any other delegate would be able to solve the task. If you cannot find
|
||||
a proper delegate agent, or the delegate attempts keep failing, call the `reject`
|
||||
action. In `reason` attribute, make sure you include your attempts (e.g. what agent
|
||||
you have delegated to, and why they failed).
|
||||
|
||||
## Agents
|
||||
{% for name, details in delegates.items() %}
|
||||
### {{ name }}
|
||||
{{ details.description }}
|
||||
#### Inputs
|
||||
{{ to_json(details.inputs) }}
|
||||
{% endfor %}
|
||||
|
||||
## History
|
||||
{{ instructions.history_truncated }}
|
||||
{{ history_to_json(state.history, max_events=20) }}
|
||||
|
||||
If the last item in the history is an error, you should try to fix it. If you
|
||||
cannot fix it, call the `reject` action.
|
||||
|
||||
## Available Actions
|
||||
{{ instructions.actions.delegate }}
|
||||
{{ instructions.actions.finish }}
|
||||
{{ instructions.actions.reject }}
|
||||
|
||||
## Format
|
||||
{{ instructions.format.action }}
|
||||
@ -1,24 +0,0 @@
|
||||
name: MathAgent
|
||||
description: "Solves simple and complex math problems using python"
|
||||
container: python:3.12.3-bookworm
|
||||
inputs:
|
||||
task: string
|
||||
outputs:
|
||||
answer: string
|
||||
examples:
|
||||
- inputs:
|
||||
task: "What is 2 + 2?"
|
||||
outputs:
|
||||
answer: "4"
|
||||
- inputs:
|
||||
task: "What is the area of a circle with radius 7.324 inches?"
|
||||
output:
|
||||
answer: "168.518 square inches"
|
||||
- inputs:
|
||||
task: "What day of the week is 2099-01-01?"
|
||||
outputs:
|
||||
answer: "Saturday"
|
||||
- inputs:
|
||||
task: "What is the integral of sin(x^2) evaluated from -1 to 1?"
|
||||
outputs:
|
||||
answer: "0.603848"
|
||||
@ -1,23 +0,0 @@
|
||||
# Task
|
||||
You are a brilliant mathematician and programmer. You've been given the following problem to solve:
|
||||
|
||||
`{{ state.inputs.task }}`
|
||||
|
||||
Please write a python script that solves this problem, and prints the answer to stdout.
|
||||
ONLY print the answer to stdout, nothing else.
|
||||
You should then run the python script with `python3`,
|
||||
and call the `finish` action with `outputs.answer` set to the answer.
|
||||
|
||||
## History
|
||||
{{ instructions.history_truncated }}
|
||||
{{ history_to_json(state.history, max_events=20) }}
|
||||
|
||||
If the last item in the history is an error, you should try to fix it.
|
||||
|
||||
## Available Actions
|
||||
{{ instructions.actions.write }}
|
||||
{{ instructions.actions.run }}
|
||||
{{ instructions.actions.finish }}
|
||||
|
||||
## Format
|
||||
{{ instructions.format.action }}
|
||||
@ -1,5 +0,0 @@
|
||||
name: PostgresAgent
|
||||
description: Writes and maintains PostgreSQL migrations
|
||||
inputs:
|
||||
task: string
|
||||
outputs: {}
|
||||
@ -1,24 +0,0 @@
|
||||
# Task
|
||||
You are a database engineer. You are working on an existing Postgres project, and have been given
|
||||
the following task:
|
||||
|
||||
{{ state.inputs.task }}
|
||||
|
||||
You must:
|
||||
* Investigate the existing migrations to understand the current schema
|
||||
* Write a new migration to accomplish the task above
|
||||
* Test that the migrations work properly
|
||||
|
||||
## Actions
|
||||
You may take any of the following actions:
|
||||
{{ instructions.actions.message }}
|
||||
{{ instructions.actions.read }}
|
||||
{{ instructions.actions.write }}
|
||||
{{ instructions.actions.run }}
|
||||
|
||||
## History
|
||||
{{ instructions.history_truncated }}
|
||||
{{ history_to_json(state.history, max_events=20) }}
|
||||
|
||||
## Format
|
||||
{{ instructions.format.action }}
|
||||
@ -1,27 +0,0 @@
|
||||
import os
|
||||
|
||||
import yaml
|
||||
|
||||
all_microagents = {}
|
||||
|
||||
# Get the list of directories and sort them to preserve determinism
|
||||
dirs = sorted(os.listdir(os.path.dirname(__file__)))
|
||||
|
||||
for dir in dirs:
|
||||
base = os.path.dirname(__file__) + '/' + dir
|
||||
if os.path.isfile(base):
|
||||
continue
|
||||
if dir.startswith('_'):
|
||||
continue
|
||||
promptFile = base + '/prompt.md'
|
||||
agentFile = base + '/agent.yaml'
|
||||
if not os.path.isfile(promptFile) or not os.path.isfile(agentFile):
|
||||
raise Exception(f'Missing prompt or agent file in {base}. Please create them.')
|
||||
with open(promptFile, 'r') as f:
|
||||
prompt = f.read()
|
||||
with open(agentFile, 'r') as f:
|
||||
agent = yaml.safe_load(f)
|
||||
if 'name' not in agent:
|
||||
raise Exception(f'Missing name in {agentFile}')
|
||||
agent['prompt'] = prompt
|
||||
all_microagents[agent['name']] = agent
|
||||
@ -1,5 +0,0 @@
|
||||
name: RepoExplorerAgent
|
||||
description: Generates a detailed summary of an existing codebase
|
||||
inputs: {}
|
||||
outputs:
|
||||
summary: string
|
||||
@ -1,26 +0,0 @@
|
||||
# Task
|
||||
You are a software engineer. You've inherited an existing codebase, which you're
|
||||
learning about for the first time. Your goal is to produce a detailed summary
|
||||
of the codebase, including:
|
||||
* The overall purpose of the project
|
||||
* The directory structure
|
||||
* The main components of the codebase
|
||||
* How the components fit together
|
||||
|
||||
## Available Actions
|
||||
{{ instructions.actions.run }}
|
||||
{{ instructions.actions.read }}
|
||||
{{ instructions.actions.message }}
|
||||
{{ instructions.actions.finish }}
|
||||
|
||||
You should ONLY `run` commands that have no side-effects, like `ls` and `grep`.
|
||||
|
||||
Do NOT finish until you have a complete understanding of the codebase.
|
||||
When you're done, put your summary into the output of the `finish` action.
|
||||
|
||||
## History
|
||||
{{ instructions.history_truncated }}
|
||||
{{ history_to_json(state.history, max_events=20) }}
|
||||
|
||||
## Format
|
||||
{{ instructions.format.action }}
|
||||
@ -1,6 +0,0 @@
|
||||
name: StudyRepoForTaskAgent
|
||||
description: Given a particular task, finds and describes all relevant parts of the codebase
|
||||
inputs:
|
||||
task: string
|
||||
outputs:
|
||||
summary: string
|
||||
@ -1,62 +0,0 @@
|
||||
# Task
|
||||
You are a software architect. Your team has inherited an existing codebase, and
|
||||
need to finish a project:
|
||||
|
||||
{{ state.inputs.task }}
|
||||
|
||||
As an architect, you need to study the codebase to find all the information that
|
||||
might be helpful for your software engineering team.
|
||||
|
||||
## Available Actions
|
||||
{{ instructions.actions.run }}
|
||||
{{ instructions.actions.read }}
|
||||
{{ instructions.actions.message }}
|
||||
{{ instructions.actions.finish }}
|
||||
|
||||
You must ONLY `run` commands that have no side-effects, like `ls` and `grep`. You
|
||||
MUST NOT modify or write to any file.
|
||||
|
||||
Do NOT finish until you have a complete understanding of which parts of the
|
||||
codebase are relevant to the project, including particular files, functions, and classes.
|
||||
When you're done, put your summary in `outputs.summary` in the `finish` action.
|
||||
Remember, your task is to explore and study the current repository, not actually
|
||||
implement the solution. If the codebase is empty, you should call the `finish` action.
|
||||
|
||||
## History
|
||||
{{ instructions.history_truncated }}
|
||||
{{ history_to_json(state.history, max_events=20) }}
|
||||
|
||||
## Format
|
||||
{{ instructions.format.action }}
|
||||
|
||||
## Examples
|
||||
|
||||
Here is an example of how you can interact with the environment for task solving:
|
||||
|
||||
--- START OF EXAMPLE ---
|
||||
|
||||
USER: Can you create a list of numbers from 1 to 10, and create a web page to display them at port 5000?
|
||||
|
||||
ASSISTANT:
|
||||
{
|
||||
"action": "run",
|
||||
"args": {
|
||||
"command": "ls"
|
||||
}
|
||||
}
|
||||
|
||||
USER:
|
||||
OBSERVATION:
|
||||
[]
|
||||
|
||||
ASSISTANT:
|
||||
{
|
||||
"action": "finish",
|
||||
"args": {
|
||||
"outputs": {
|
||||
"summary": "The codebase appears to be empty. Engineers should start everything from scratch."
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
--- END OF EXAMPLE ---
|
||||
@ -1,6 +0,0 @@
|
||||
name: TypoFixerAgent
|
||||
description: Fixes typos in files in the current working directory
|
||||
inputs:
|
||||
task: string
|
||||
outputs:
|
||||
summary: string
|
||||
@ -1,54 +0,0 @@
|
||||
# Task
|
||||
You are a proofreader tasked with fixing typos in the files in your current working directory.
|
||||
|
||||
{% if state.inputs.task %}
|
||||
Specifically, your task is:
|
||||
{{ state.inputs.task }}
|
||||
{% endif %}
|
||||
|
||||
To achieve this goal, you should:
|
||||
|
||||
1. Scan the files for typos
|
||||
2. Overwrite the files with the typos fixed
|
||||
3. Provide a summary of the typos fixed
|
||||
|
||||
## Available Actions
|
||||
{{ instructions.actions.read }}
|
||||
{{ instructions.actions.write }}
|
||||
{{ instructions.actions.run }}
|
||||
{{ instructions.actions.message }}
|
||||
{{ instructions.actions.finish }}
|
||||
|
||||
To complete this task:
|
||||
1. Use the `read` action to read the contents of the files in your current working directory. Make sure to provide the file path in the format `'./file_name.ext'`.
|
||||
2. Use the `message` action to analyze the contents and identify typos.
|
||||
3. Use the `write` action to create new versions of the files with the typos fixed.
|
||||
- Overwrite the original files with the corrected content. Make sure to provide the file path in the format `'./file_name.ext'`.
|
||||
4. Use the `message` action to generate a summary of the typos fixed, including the original and fixed versions of each typo, and the file(s) they were found in.
|
||||
5. Use the `finish` action to return the summary in the `outputs.summary` field.
|
||||
|
||||
Do NOT finish until you have fixed all the typos and generated a summary.
|
||||
|
||||
## History
|
||||
{{ instructions.history_truncated }}
|
||||
{{ history_to_json(state.history, max_events=10) }}
|
||||
|
||||
## Format
|
||||
{{ instructions.format.action }}
|
||||
|
||||
For example, if you want to use the read action to read the contents of a file named example.txt, your response should look like this:
|
||||
{
|
||||
"action": "read",
|
||||
"args": {
|
||||
"path": "./example.txt"
|
||||
}
|
||||
}
|
||||
|
||||
Similarly, if you want to use the write action to write content to a file named output.txt, your response should look like this:
|
||||
{
|
||||
"action": "write",
|
||||
"args": {
|
||||
"path": "./output.txt",
|
||||
"content": "This is the content to be written to the file."
|
||||
}
|
||||
}
|
||||
@ -1,7 +0,0 @@
|
||||
name: VerifierAgent
|
||||
description: Given a particular task, verifies that the task has been completed
|
||||
inputs:
|
||||
task: string
|
||||
outputs:
|
||||
completed: boolean
|
||||
summary: string
|
||||
@ -1,28 +0,0 @@
|
||||
# Task
|
||||
You are a quality assurance engineer. Another engineer has made changes to the
|
||||
codebase which are supposed to solve this task:
|
||||
|
||||
{{ state.inputs.task }}
|
||||
|
||||
Note the changes might have already been applied in-line. You should focus on
|
||||
validating if the task is solved, nothing else.
|
||||
|
||||
## Available Actions
|
||||
{{ instructions.actions.run }}
|
||||
{{ instructions.actions.read }}
|
||||
{{ instructions.actions.message }}
|
||||
{{ instructions.actions.finish }}
|
||||
|
||||
You must ONLY `run` commands that have no side-effects, like `ls`, `grep`, and test scripts.
|
||||
|
||||
Do NOT finish until you know whether the task is complete and correct.
|
||||
When you're done, add a `completed` boolean to the `outputs` of the `finish` action.
|
||||
If `completed` is `false`, you MUST also provide a `summary` in the `outputs` of the `finish` action
|
||||
explaining what the problem is.
|
||||
|
||||
## History
|
||||
{{ instructions.history_truncated }}
|
||||
{{ history_to_json(state.history, max_events=20) }}
|
||||
|
||||
## Format
|
||||
{{ instructions.format.action }}
|
||||
@ -61,7 +61,7 @@ def create_runtime(
|
||||
if agent:
|
||||
agent_cls = type(agent)
|
||||
else:
|
||||
agent_cls = openhands.agenthub.Agent.get_cls(config.default_agent)
|
||||
agent_cls = Agent.get_cls(config.default_agent)
|
||||
|
||||
# runtime and tools
|
||||
runtime_cls = get_runtime_cls(config.runtime)
|
||||
|
||||
@ -23,7 +23,14 @@ TOP_KEYS = [
|
||||
'tool_call_metadata',
|
||||
'llm_metrics',
|
||||
]
|
||||
UNDERSCORE_KEYS = ['id', 'timestamp', 'source', 'cause', 'tool_call_metadata', 'llm_metrics']
|
||||
UNDERSCORE_KEYS = [
|
||||
'id',
|
||||
'timestamp',
|
||||
'source',
|
||||
'cause',
|
||||
'tool_call_metadata',
|
||||
'llm_metrics',
|
||||
]
|
||||
|
||||
DELETE_FROM_TRAJECTORY_EXTRAS = {
|
||||
'screenshot',
|
||||
@ -62,8 +69,13 @@ def event_from_dict(data) -> 'Event':
|
||||
metrics.accumulated_cost = value.get('accumulated_cost', 0.0)
|
||||
for cost in value.get('costs', []):
|
||||
metrics._costs.append(Cost(**cost))
|
||||
metrics.response_latencies = [ResponseLatency(**latency) for latency in value.get('response_latencies', [])]
|
||||
metrics.token_usages = [TokenUsage(**usage) for usage in value.get('token_usages', [])]
|
||||
metrics.response_latencies = [
|
||||
ResponseLatency(**latency)
|
||||
for latency in value.get('response_latencies', [])
|
||||
]
|
||||
metrics.token_usages = [
|
||||
TokenUsage(**usage) for usage in value.get('token_usages', [])
|
||||
]
|
||||
value = metrics
|
||||
setattr(evt, '_' + key, value)
|
||||
return evt
|
||||
|
||||
@ -16,7 +16,10 @@ class DummyAgent:
|
||||
self.llm = type(
|
||||
'DummyLLM',
|
||||
(),
|
||||
{'metrics': Metrics()},
|
||||
{
|
||||
'metrics': Metrics(),
|
||||
'config': type('DummyConfig', (), {'max_message_chars': 10000})(),
|
||||
},
|
||||
)()
|
||||
|
||||
def reset(self):
|
||||
|
||||
@ -1,135 +0,0 @@
|
||||
import json
|
||||
import os
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
import yaml
|
||||
from pytest import TempPathFactory
|
||||
|
||||
from openhands.agenthub.micro.registry import all_microagents
|
||||
from openhands.controller.agent import Agent
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import AgentConfig
|
||||
from openhands.events.action import MessageAction
|
||||
from openhands.events.stream import EventStream
|
||||
from openhands.storage import get_file_store
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def temp_dir(tmp_path_factory: TempPathFactory) -> str:
|
||||
return str(tmp_path_factory.mktemp('test_micro_agents'))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def event_stream(temp_dir):
|
||||
file_store = get_file_store('local', temp_dir)
|
||||
event_stream = EventStream('asdf', file_store)
|
||||
yield event_stream
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def agent_configs():
|
||||
return {
|
||||
'CoderAgent': AgentConfig(enable_prompt_extensions=True),
|
||||
'BrowsingAgent': AgentConfig(enable_prompt_extensions=True),
|
||||
}
|
||||
|
||||
|
||||
def test_all_agents_are_loaded():
|
||||
assert all_microagents is not None
|
||||
assert len(all_microagents) > 1
|
||||
|
||||
base = os.path.join('openhands', 'agenthub', 'micro')
|
||||
full_path = os.path.dirname(__file__) + '/../../' + base
|
||||
agent_names = set()
|
||||
for root, _, files in os.walk(full_path):
|
||||
for file in files:
|
||||
if file == 'agent.yaml':
|
||||
file_path = os.path.join(root, file)
|
||||
with open(file_path, 'r') as yaml_file:
|
||||
data = yaml.safe_load(yaml_file)
|
||||
agent_names.add(data['name'])
|
||||
assert agent_names == set(all_microagents.keys())
|
||||
|
||||
|
||||
def test_coder_agent_with_summary(event_stream: EventStream, agent_configs: dict):
|
||||
"""Coder agent should render code summary as part of prompt."""
|
||||
mock_llm = MagicMock()
|
||||
content = json.dumps({'action': 'finish', 'args': {}})
|
||||
mock_llm.completion.return_value = {'choices': [{'message': {'content': content}}]}
|
||||
mock_llm.format_messages_for_llm.return_value = [
|
||||
{
|
||||
'role': 'user',
|
||||
'content': "This is a dummy task. This is a dummy summary about this repo. Here's a summary of the codebase, as it relates to this task.",
|
||||
}
|
||||
]
|
||||
|
||||
coder_agent = Agent.get_cls('CoderAgent')(
|
||||
llm=mock_llm, config=agent_configs['CoderAgent']
|
||||
)
|
||||
assert coder_agent is not None
|
||||
|
||||
# give it some history
|
||||
task = 'This is a dummy task'
|
||||
history = list()
|
||||
history.append(MessageAction(content=task))
|
||||
|
||||
summary = 'This is a dummy summary about this repo'
|
||||
state = State(history=history, inputs={'summary': summary})
|
||||
coder_agent.step(state)
|
||||
|
||||
mock_llm.completion.assert_called_once()
|
||||
_, kwargs = mock_llm.completion.call_args
|
||||
prompt_element = kwargs['messages'][0]['content']
|
||||
if isinstance(prompt_element, dict):
|
||||
prompt = prompt_element['content']
|
||||
else:
|
||||
prompt = prompt_element
|
||||
assert task in prompt
|
||||
assert "Here's a summary of the codebase, as it relates to this task" in prompt
|
||||
assert summary in prompt
|
||||
|
||||
|
||||
def test_coder_agent_without_summary(event_stream: EventStream, agent_configs: dict):
|
||||
"""When there's no codebase_summary available, there shouldn't be any prompt about 'code summary'.
|
||||
|
||||
This test verifies that the prompt doesn't include code summary text when no summary is provided.
|
||||
"""
|
||||
mock_llm = MagicMock()
|
||||
content = json.dumps({'action': 'finish', 'args': {}})
|
||||
mock_llm.completion.return_value = {'choices': [{'message': {'content': content}}]}
|
||||
mock_llm.format_messages_for_llm.return_value = [
|
||||
{
|
||||
'role': 'user',
|
||||
'content': [
|
||||
{
|
||||
'type': 'text',
|
||||
'text': "This is a dummy task. This is a dummy summary about this repo. Here's a summary of the codebase, as it relates to this task.",
|
||||
}
|
||||
],
|
||||
}
|
||||
]
|
||||
|
||||
coder_agent = Agent.get_cls('CoderAgent')(
|
||||
llm=mock_llm, config=agent_configs['CoderAgent']
|
||||
)
|
||||
assert coder_agent is not None
|
||||
|
||||
# give it some history
|
||||
task = 'This is a dummy task'
|
||||
history = list()
|
||||
history.append(MessageAction(content=task))
|
||||
|
||||
# set state without codebase summary
|
||||
state = State(history=history)
|
||||
coder_agent.step(state)
|
||||
|
||||
mock_llm.completion.assert_called_once()
|
||||
_, kwargs = mock_llm.completion.call_args
|
||||
prompt_element = kwargs['messages'][0]['content']
|
||||
if isinstance(prompt_element, dict):
|
||||
prompt = prompt_element['content']
|
||||
else:
|
||||
prompt = prompt_element
|
||||
print(f'\n{prompt_element}\n')
|
||||
assert "Here's a summary of the codebase, as it relates to this task" not in prompt
|
||||
@ -1,92 +0,0 @@
|
||||
import pytest
|
||||
|
||||
from openhands.agenthub.micro.agent import parse_response as parse_response_micro
|
||||
from openhands.core.exceptions import LLMResponseError
|
||||
from openhands.events.action import (
|
||||
FileWriteAction,
|
||||
MessageAction,
|
||||
)
|
||||
from openhands.io import loads as custom_loads
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'parse_response_module',
|
||||
[parse_response_micro],
|
||||
)
|
||||
def test_parse_single_complete_json(parse_response_module):
|
||||
input_response = """
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "The following typos were fixed:\\n* 'futur' -> 'future'\\n* 'imagin' -> 'imagine'\\n* 'techological' -> 'technological'\\n* 'responsability' -> 'responsibility'\\nThe corrected file is ./short_essay.txt."
|
||||
}
|
||||
}
|
||||
"""
|
||||
expected = MessageAction(
|
||||
"The following typos were fixed:\n* 'futur' -> 'future'\n* 'imagin' -> 'imagine'\n* 'techological' -> 'technological'\n* 'responsability' -> 'responsibility'\nThe corrected file is ./short_essay.txt."
|
||||
)
|
||||
result = parse_response_module(input_response)
|
||||
assert result == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'parse_response_module',
|
||||
[parse_response_micro],
|
||||
)
|
||||
def test_parse_json_with_surrounding_text(parse_response_module):
|
||||
input_response = """
|
||||
Some initial text that is not JSON formatted.
|
||||
{
|
||||
"action": "write",
|
||||
"args": {
|
||||
"path": "./updated_file.txt",
|
||||
"content": "Updated text content here..."
|
||||
}
|
||||
}
|
||||
Some trailing text that is also not JSON formatted.
|
||||
"""
|
||||
expected = FileWriteAction(
|
||||
path='./updated_file.txt', content='Updated text content here...'
|
||||
)
|
||||
result = parse_response_module(input_response)
|
||||
assert result == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'parse_response_module',
|
||||
[parse_response_micro],
|
||||
)
|
||||
def test_parse_first_of_multiple_jsons(parse_response_module):
|
||||
input_response = """
|
||||
I will firstly do
|
||||
{
|
||||
"action": "write",
|
||||
"args": {
|
||||
"path": "./short_essay.txt",
|
||||
"content": "Text content here..."
|
||||
}
|
||||
}
|
||||
Then I will continue with
|
||||
{
|
||||
"action": "think",
|
||||
"args": {
|
||||
"thought": "This should not be parsed."
|
||||
}
|
||||
}
|
||||
"""
|
||||
expected = FileWriteAction(path='./short_essay.txt', content='Text content here...')
|
||||
result = parse_response_module(input_response)
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_invalid_json_raises_error():
|
||||
# This should fail if repair_json is able to fix this faulty JSON
|
||||
input_response = '{"action": "write", "args": { "path": "./short_essay.txt", "content": "Missing closing brace" }'
|
||||
with pytest.raises(LLMResponseError):
|
||||
custom_loads(input_response)
|
||||
|
||||
|
||||
def test_no_json_found():
|
||||
input_response = 'This is just a string with no JSON object.'
|
||||
with pytest.raises(LLMResponseError):
|
||||
custom_loads(input_response)
|
||||
Loading…
x
Reference in New Issue
Block a user