remove old microagents (#6964)

Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>
Co-authored-by: openhands <openhands@all-hands.dev>
This commit is contained in:
Robert Brennan 2025-03-12 16:18:19 -04:00 committed by GitHub
parent 49a29c19cb
commit b3cac69121
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
42 changed files with 27 additions and 923 deletions

View File

@ -117,68 +117,6 @@ jobs:
echo "EOF" >> $GITHUB_ENV
# -------------------------------------------------------------
# Run DelegatorAgent tests for Haiku, limited to t01 and t02
- name: Wait a little bit (again)
run: sleep 5
- name: Configure config.toml for testing DelegatorAgent (Haiku)
env:
LLM_MODEL: "litellm_proxy/claude-3-5-haiku-20241022"
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
LLM_BASE_URL: ${{ secrets.LLM_BASE_URL }}
MAX_ITERATIONS: 30
run: |
echo "[llm.eval]" > config.toml
echo "model = \"$LLM_MODEL\"" >> config.toml
echo "api_key = \"$LLM_API_KEY\"" >> config.toml
echo "base_url = \"$LLM_BASE_URL\"" >> config.toml
echo "temperature = 0.0" >> config.toml
- name: Run integration test evaluation for DelegatorAgent (Haiku)
env:
SANDBOX_FORCE_REBUILD_RUNTIME: True
run: |
poetry run ./evaluation/integration_tests/scripts/run_infer.sh llm.eval HEAD DelegatorAgent '' 30 $N_PROCESSES "t01_fix_simple_typo,t02_add_bash_hello" 'delegator_haiku_run'
# Find and export the delegator test results
REPORT_FILE_DELEGATOR_HAIKU=$(find evaluation/evaluation_outputs/outputs/integration_tests/DelegatorAgent/*haiku*_maxiter_30_N* -name "report.md" -type f | head -n 1)
echo "REPORT_FILE_DELEGATOR_HAIKU: $REPORT_FILE_DELEGATOR_HAIKU"
echo "INTEGRATION_TEST_REPORT_DELEGATOR_HAIKU<<EOF" >> $GITHUB_ENV
cat $REPORT_FILE_DELEGATOR_HAIKU >> $GITHUB_ENV
echo >> $GITHUB_ENV
echo "EOF" >> $GITHUB_ENV
# -------------------------------------------------------------
# Run DelegatorAgent tests for DeepSeek, limited to t01 and t02
- name: Wait a little bit (again)
run: sleep 5
- name: Configure config.toml for testing DelegatorAgent (DeepSeek)
env:
LLM_MODEL: "litellm_proxy/deepseek-chat"
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
LLM_BASE_URL: ${{ secrets.LLM_BASE_URL }}
MAX_ITERATIONS: 30
run: |
echo "[llm.eval]" > config.toml
echo "model = \"$LLM_MODEL\"" >> config.toml
echo "api_key = \"$LLM_API_KEY\"" >> config.toml
echo "base_url = \"$LLM_BASE_URL\"" >> config.toml
echo "temperature = 0.0" >> config.toml
- name: Run integration test evaluation for DelegatorAgent (DeepSeek)
env:
SANDBOX_FORCE_REBUILD_RUNTIME: True
run: |
poetry run ./evaluation/integration_tests/scripts/run_infer.sh llm.eval HEAD DelegatorAgent '' 30 $N_PROCESSES "t01_fix_simple_typo,t02_add_bash_hello" 'delegator_deepseek_run'
# Find and export the delegator test results
REPORT_FILE_DELEGATOR_DEEPSEEK=$(find evaluation/evaluation_outputs/outputs/integration_tests/DelegatorAgent/deepseek*_maxiter_30_N* -name "report.md" -type f | head -n 1)
echo "REPORT_FILE_DELEGATOR_DEEPSEEK: $REPORT_FILE_DELEGATOR_DEEPSEEK"
echo "INTEGRATION_TEST_REPORT_DELEGATOR_DEEPSEEK<<EOF" >> $GITHUB_ENV
cat $REPORT_FILE_DELEGATOR_DEEPSEEK >> $GITHUB_ENV
echo >> $GITHUB_ENV
echo "EOF" >> $GITHUB_ENV
# -------------------------------------------------------------
# Run VisualBrowsingAgent tests for DeepSeek, limited to t05 and t06
- name: Wait a little bit (again)
run: sleep 5
@ -213,7 +151,7 @@ jobs:
run: |
TIMESTAMP=$(date +'%y-%m-%d-%H-%M')
cd evaluation/evaluation_outputs/outputs # Change to the outputs directory
tar -czvf ../../../integration_tests_${TIMESTAMP}.tar.gz integration_tests/CodeActAgent/* integration_tests/DelegatorAgent/* integration_tests/VisualBrowsingAgent/* # Only include the actual result directories
tar -czvf ../../../integration_tests_${TIMESTAMP}.tar.gz integration_tests/CodeActAgent/* integration_tests/VisualBrowsingAgent/* # Only include the actual result directories
- name: Upload evaluation results as artifact
uses: actions/upload-artifact@v4
@ -254,12 +192,6 @@ jobs:
**Integration Tests Report (DeepSeek)**
DeepSeek LLM Test Results:
${{ env.INTEGRATION_TEST_REPORT_DEEPSEEK }}
---
**Integration Tests Report Delegator (Haiku)**
${{ env.INTEGRATION_TEST_REPORT_DELEGATOR_HAIKU }}
---
**Integration Tests Report Delegator (DeepSeek)**
${{ env.INTEGRATION_TEST_REPORT_DELEGATOR_DEEPSEEK }}
---
**Integration Tests Report VisualBrowsing (DeepSeek)**
${{ env.INTEGRATION_TEST_REPORT_VISUALBROWSING_DEEPSEEK }}

View File

@ -44,14 +44,14 @@ sudo apt-get install -y \
1. Find the latest Swift version for Debian:
Go to the [Swift.org download page](https://www.swift.org/download/) to find the latest Swift version compatible with Debian 12 (Bookworm).
Look for a tarball named something like `swift-<VERSION>-RELEASE-debian12.tar.gz` (e.g., `swift-6.0.3-RELEASE-debian12.tar.gz`).
The URL pattern is typically:
```
https://download.swift.org/swift-<VERSION>-release/debian12/swift-<VERSION>-RELEASE/swift-<VERSION>-RELEASE-debian12.tar.gz
```
Where `<VERSION>` is the Swift version number (e.g., `6.0.3`).
2. Download the Swift binary for Debian 12:
@ -80,4 +80,4 @@ Verify that Swift is correctly installed by running:
```bash
swift --version
```
```

View File

@ -1,9 +1,5 @@
from dotenv import load_dotenv
from openhands.agenthub.micro.agent import MicroAgent
from openhands.agenthub.micro.registry import all_microagents
from openhands.controller.agent import Agent
load_dotenv()
@ -14,26 +10,13 @@ from openhands.agenthub import ( # noqa: E402
dummy_agent,
visualbrowsing_agent,
)
from openhands.controller.agent import Agent # noqa: E402
__all__ = [
'Agent',
'codeact_agent',
'delegator_agent',
'dummy_agent',
'browsing_agent',
'visualbrowsing_agent',
]
for agent in all_microagents.values():
name = agent['name']
prompt = agent['prompt']
anon_class = type(
name,
(MicroAgent,),
{
'prompt': prompt,
'agent_definition': agent,
},
)
Agent.register(name, anon_class)

View File

@ -1,17 +0,0 @@
## Introduction
This package contains definitions of micro-agents. A micro-agent is defined
in the following structure:
```
[AgentName]
├── agent.yaml
└── prompt.md
```
Note that `prompt.md` could use jinja2 template syntax. During runtime, `prompt.md`
is loaded and rendered, and used together with `agent.yaml` to initialize a
micro-agent.
Micro-agents can be used independently. You can also use `ManagerAgent` which knows
how to coordinate the agents and collaboratively finish a task.

View File

@ -1,2 +0,0 @@
* `browse` - opens a web page. Arguments:
* `url` - the URL to open

View File

@ -1,3 +0,0 @@
* `delegate` - send a task to another agent from the list provided. Arguments:
* `agent` - the agent to which the task is delegated. MUST match a name in the list of agents provided.
* `inputs` - a dictionary of input parameters to the agent, as specified in the list

View File

@ -1,2 +0,0 @@
* `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments:
* `outputs` - a dictionary representing the outputs of your task, if any

View File

@ -1,3 +0,0 @@
* `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments:
* `content` - the thought to record
* `wait_for_response` - set to `true` to wait for the user to respond before proceeding

View File

@ -1,2 +0,0 @@
* `read` - reads the content of a file. Arguments:
* `path` - the path of the file to read

View File

@ -1,2 +0,0 @@
* `reject` - reject the task. Arguments:
* `outputs` - a dictionary with only a `reason` attribute

View File

@ -1,2 +0,0 @@
* `run` - runs a command on the command line in a Linux shell. Arguments:
* `command` - the command to run

View File

@ -1,3 +0,0 @@
* `write` - writes the content to a file. Arguments:
* `path` - the path of the file to write
* `content` - the content to write to the file

View File

@ -1,5 +0,0 @@
Your response MUST be in JSON format. It must be an object, and it must contain two fields:
* `action`, which is one of the actions specified here
* `args`, which is a map of key-value pairs, specifying the arguments for that action
You MUST NOT include any other text besides the JSON response

View File

@ -1,4 +0,0 @@
Here is a recent history of actions you've taken in service of this plan,
as well as observations you've made. This only includes the MOST RECENT
actions and observations--more may have happened before that.
They are time-ordered, with your most recent action at the bottom.

View File

@ -1,88 +0,0 @@
from jinja2 import BaseLoader, Environment
from openhands.agenthub.micro.instructions import instructions
from openhands.agenthub.micro.registry import all_microagents
from openhands.controller.agent import Agent
from openhands.controller.state.state import State
from openhands.core.config import AgentConfig
from openhands.core.message import ImageContent, Message, TextContent
from openhands.events.action import Action
from openhands.events.event import Event
from openhands.events.serialization.action import action_from_dict
from openhands.events.serialization.event import event_to_memory
from openhands.io import json
from openhands.llm.llm import LLM
def parse_response(orig_response: str) -> Action:
# attempt to load the JSON dict from the response
action_dict = json.loads(orig_response)
# load the action from the dict
return action_from_dict(action_dict)
def to_json(obj: object, **kwargs: dict) -> str:
"""Serialize an object to str format"""
return json.dumps(obj, **kwargs)
class MicroAgent(Agent):
VERSION = '1.0'
prompt = ''
agent_definition: dict = {}
def history_to_json(
self, history: list[Event], max_events: int = 20, **kwargs: dict
) -> str:
"""
Serialize and simplify history to str format
"""
processed_history = []
event_count = 0
for event in reversed(history):
if event_count >= max_events:
break
processed_history.append(
event_to_memory(event, self.llm.config.max_message_chars)
)
event_count += 1
# history is in reverse order, let's fix it
processed_history.reverse()
# everything starts with a message
# the first message is already in the prompt as the task
# TODO: so we don't need to include it in the history
return json.dumps(processed_history, **kwargs)
def __init__(self, llm: LLM, config: AgentConfig):
super().__init__(llm, config)
if 'name' not in self.agent_definition:
raise ValueError('Agent definition must contain a name')
self.prompt_template = Environment(loader=BaseLoader()).from_string(self.prompt)
self.delegates = all_microagents.copy()
del self.delegates[self.agent_definition['name']]
def step(self, state: State) -> Action:
last_user_message, last_image_urls = state.get_current_user_intent()
prompt = self.prompt_template.render(
state=state,
instructions=instructions,
to_json=to_json,
history_to_json=self.history_to_json,
delegates=self.delegates,
latest_user_message=last_user_message,
)
content: list[TextContent | ImageContent] = [TextContent(text=prompt)]
if self.llm.vision_is_active() and last_image_urls:
content.append(ImageContent(image_urls=last_image_urls))
message = Message(role='user', content=content)
resp = self.llm.completion(
messages=self.llm.format_messages_for_llm(message),
)
action_resp = resp['choices'][0]['message']['content']
action = parse_response(action_resp)
return action

View File

@ -1,6 +0,0 @@
name: CoderAgent
description: Given a particular task, and a detailed description of the codebase, accomplishes the task
inputs:
task: string
summary: string
outputs: {}

View File

@ -1,27 +0,0 @@
# Task
You are a software engineer. You've inherited an existing codebase, which you
need to modify to complete this task:
{{ state.inputs.task }}
{% if state.inputs.summary %}
Here's a summary of the codebase, as it relates to this task:
{{ state.inputs.summary }}
{% endif %}
## Available Actions
{{ instructions.actions.run }}
{{ instructions.actions.write }}
{{ instructions.actions.read }}
{{ instructions.actions.message }}
{{ instructions.actions.finish }}
Do NOT finish until you have completed the tasks.
## History
{{ instructions.history_truncated }}
{{ history_to_json(state.history, max_events=20) }}
## Format
{{ instructions.format.action }}

View File

@ -1,25 +0,0 @@
## Introduction
CommitWriterAgent can help write git commit message. Example:
```bash
WORKSPACE_MOUNT_PATH="`PWD`" \
poetry run python openhands/core/main.py -t "dummy task" -c CommitWriterAgent -d ./
```
This agent is special in the sense that it doesn't need a task. Once called,
it attempts to read all diff in the git staging area and write a good commit
message.
## Future work
### Feedback loop
The commit message could be (optionally) shown to the customer or
other agents, so that CommitWriterAgent could gather feedback to further
improve the commit message.
### Task rejection
When the agent cannot compile a commit message (e.g. not git repository), it
should reject the task with an explanation.

View File

@ -1,6 +0,0 @@
name: CommitWriterAgent
description: "Write a git commit message for files in the git staging area"
inputs: {}
outputs:
answer: string
reason: string

View File

@ -1,33 +0,0 @@
# Task
You are a responsible software engineer and always write good commit messages.
Please analyze the diff in the staging area, understand the context and content
of the updates from the diff only. Identify key elements like:
- Which files are affected?
- What types of changes were made (e.g., new features, bug fixes, refactoring, documentation, testing)?
Then you should generate a commit message that succinctly summarizes the staged
changes. The commit message should include:
- A summary line that clearly states the purpose of the changes.
- Optionally, a detailed description if the changes are complex or need further explanation.
You should first use `git status` to check whether it's a valid git repo and there
is diff in the staging area. If not, please call the `reject` action.
If it is a valid git repo and there is diff in the staging area, you should find
the diff using `git diff --cached`, compile a commit message, and call the `finish`
action with `outputs.answer` set to the answer.
## History
{{ instructions.history_truncated }}
{{ history_to_json(state.history, max_events=20) }}
If the last item in the history is an error, you should try to fix it.
## Available Actions
{{ instructions.actions.run }}
{{ instructions.actions.reject }}
{{ instructions.actions.finish }}
## Format
{{ instructions.format.action }}

View File

@ -1,22 +0,0 @@
import os
instructions: dict = {}
base_dir = os.path.dirname(os.path.abspath(__file__)) + '/_instructions'
for root, dirs, files in os.walk(base_dir):
if len(files) == 0:
continue
if root == base_dir:
obj = instructions
else:
rel_base = os.path.relpath(root, base_dir)
keys = rel_base.split('/')
obj = instructions
for key in keys:
if key not in obj:
obj[key] = {}
obj = obj[key]
for file in files:
without_ext = os.path.splitext(file)[0]
with open(os.path.join(root, file), 'r') as f:
obj[without_ext] = f.read()

View File

@ -1,8 +0,0 @@
name: ManagerAgent
description: Delegates tasks to microagents based on their area of expertise
generates: Action
inputs:
task: string
outputs:
summary: string # if finished
reason: string # if rejected

View File

@ -1,42 +0,0 @@
# Task
You are in charge of accomplishing the following task:
{% set goal = latest_user_message if latest_user_message is not none else state.inputs.task %}
{{ goal }}
In order to accomplish this goal, you must delegate tasks to one or more agents, who
can do the actual work. A description of each agent is provided below. You MUST
select one of the delegates below to move towards accomplishing the task, and you MUST
provide the correct inputs for the delegate you select.
Note: the delegated agent either returns "finish" or "reject".
- If the action is "finish", but the full task is not done yet, you should
continue to delegate to one of the agents below to until the full task is finished.
- If the action is "reject", it means the delegated agent is not capable of the
task you send to. You should revisit the input you send to the delegate, and consider
whether any other delegate would be able to solve the task. If you cannot find
a proper delegate agent, or the delegate attempts keep failing, call the `reject`
action. In `reason` attribute, make sure you include your attempts (e.g. what agent
you have delegated to, and why they failed).
## Agents
{% for name, details in delegates.items() %}
### {{ name }}
{{ details.description }}
#### Inputs
{{ to_json(details.inputs) }}
{% endfor %}
## History
{{ instructions.history_truncated }}
{{ history_to_json(state.history, max_events=20) }}
If the last item in the history is an error, you should try to fix it. If you
cannot fix it, call the `reject` action.
## Available Actions
{{ instructions.actions.delegate }}
{{ instructions.actions.finish }}
{{ instructions.actions.reject }}
## Format
{{ instructions.format.action }}

View File

@ -1,24 +0,0 @@
name: MathAgent
description: "Solves simple and complex math problems using python"
container: python:3.12.3-bookworm
inputs:
task: string
outputs:
answer: string
examples:
- inputs:
task: "What is 2 + 2?"
outputs:
answer: "4"
- inputs:
task: "What is the area of a circle with radius 7.324 inches?"
output:
answer: "168.518 square inches"
- inputs:
task: "What day of the week is 2099-01-01?"
outputs:
answer: "Saturday"
- inputs:
task: "What is the integral of sin(x^2) evaluated from -1 to 1?"
outputs:
answer: "0.603848"

View File

@ -1,23 +0,0 @@
# Task
You are a brilliant mathematician and programmer. You've been given the following problem to solve:
`{{ state.inputs.task }}`
Please write a python script that solves this problem, and prints the answer to stdout.
ONLY print the answer to stdout, nothing else.
You should then run the python script with `python3`,
and call the `finish` action with `outputs.answer` set to the answer.
## History
{{ instructions.history_truncated }}
{{ history_to_json(state.history, max_events=20) }}
If the last item in the history is an error, you should try to fix it.
## Available Actions
{{ instructions.actions.write }}
{{ instructions.actions.run }}
{{ instructions.actions.finish }}
## Format
{{ instructions.format.action }}

View File

@ -1,5 +0,0 @@
name: PostgresAgent
description: Writes and maintains PostgreSQL migrations
inputs:
task: string
outputs: {}

View File

@ -1,24 +0,0 @@
# Task
You are a database engineer. You are working on an existing Postgres project, and have been given
the following task:
{{ state.inputs.task }}
You must:
* Investigate the existing migrations to understand the current schema
* Write a new migration to accomplish the task above
* Test that the migrations work properly
## Actions
You may take any of the following actions:
{{ instructions.actions.message }}
{{ instructions.actions.read }}
{{ instructions.actions.write }}
{{ instructions.actions.run }}
## History
{{ instructions.history_truncated }}
{{ history_to_json(state.history, max_events=20) }}
## Format
{{ instructions.format.action }}

View File

@ -1,27 +0,0 @@
import os
import yaml
all_microagents = {}
# Get the list of directories and sort them to preserve determinism
dirs = sorted(os.listdir(os.path.dirname(__file__)))
for dir in dirs:
base = os.path.dirname(__file__) + '/' + dir
if os.path.isfile(base):
continue
if dir.startswith('_'):
continue
promptFile = base + '/prompt.md'
agentFile = base + '/agent.yaml'
if not os.path.isfile(promptFile) or not os.path.isfile(agentFile):
raise Exception(f'Missing prompt or agent file in {base}. Please create them.')
with open(promptFile, 'r') as f:
prompt = f.read()
with open(agentFile, 'r') as f:
agent = yaml.safe_load(f)
if 'name' not in agent:
raise Exception(f'Missing name in {agentFile}')
agent['prompt'] = prompt
all_microagents[agent['name']] = agent

View File

@ -1,5 +0,0 @@
name: RepoExplorerAgent
description: Generates a detailed summary of an existing codebase
inputs: {}
outputs:
summary: string

View File

@ -1,26 +0,0 @@
# Task
You are a software engineer. You've inherited an existing codebase, which you're
learning about for the first time. Your goal is to produce a detailed summary
of the codebase, including:
* The overall purpose of the project
* The directory structure
* The main components of the codebase
* How the components fit together
## Available Actions
{{ instructions.actions.run }}
{{ instructions.actions.read }}
{{ instructions.actions.message }}
{{ instructions.actions.finish }}
You should ONLY `run` commands that have no side-effects, like `ls` and `grep`.
Do NOT finish until you have a complete understanding of the codebase.
When you're done, put your summary into the output of the `finish` action.
## History
{{ instructions.history_truncated }}
{{ history_to_json(state.history, max_events=20) }}
## Format
{{ instructions.format.action }}

View File

@ -1,6 +0,0 @@
name: StudyRepoForTaskAgent
description: Given a particular task, finds and describes all relevant parts of the codebase
inputs:
task: string
outputs:
summary: string

View File

@ -1,62 +0,0 @@
# Task
You are a software architect. Your team has inherited an existing codebase, and
need to finish a project:
{{ state.inputs.task }}
As an architect, you need to study the codebase to find all the information that
might be helpful for your software engineering team.
## Available Actions
{{ instructions.actions.run }}
{{ instructions.actions.read }}
{{ instructions.actions.message }}
{{ instructions.actions.finish }}
You must ONLY `run` commands that have no side-effects, like `ls` and `grep`. You
MUST NOT modify or write to any file.
Do NOT finish until you have a complete understanding of which parts of the
codebase are relevant to the project, including particular files, functions, and classes.
When you're done, put your summary in `outputs.summary` in the `finish` action.
Remember, your task is to explore and study the current repository, not actually
implement the solution. If the codebase is empty, you should call the `finish` action.
## History
{{ instructions.history_truncated }}
{{ history_to_json(state.history, max_events=20) }}
## Format
{{ instructions.format.action }}
## Examples
Here is an example of how you can interact with the environment for task solving:
--- START OF EXAMPLE ---
USER: Can you create a list of numbers from 1 to 10, and create a web page to display them at port 5000?
ASSISTANT:
{
"action": "run",
"args": {
"command": "ls"
}
}
USER:
OBSERVATION:
[]
ASSISTANT:
{
"action": "finish",
"args": {
"outputs": {
"summary": "The codebase appears to be empty. Engineers should start everything from scratch."
}
}
}
--- END OF EXAMPLE ---

View File

@ -1,6 +0,0 @@
name: TypoFixerAgent
description: Fixes typos in files in the current working directory
inputs:
task: string
outputs:
summary: string

View File

@ -1,54 +0,0 @@
# Task
You are a proofreader tasked with fixing typos in the files in your current working directory.
{% if state.inputs.task %}
Specifically, your task is:
{{ state.inputs.task }}
{% endif %}
To achieve this goal, you should:
1. Scan the files for typos
2. Overwrite the files with the typos fixed
3. Provide a summary of the typos fixed
## Available Actions
{{ instructions.actions.read }}
{{ instructions.actions.write }}
{{ instructions.actions.run }}
{{ instructions.actions.message }}
{{ instructions.actions.finish }}
To complete this task:
1. Use the `read` action to read the contents of the files in your current working directory. Make sure to provide the file path in the format `'./file_name.ext'`.
2. Use the `message` action to analyze the contents and identify typos.
3. Use the `write` action to create new versions of the files with the typos fixed.
- Overwrite the original files with the corrected content. Make sure to provide the file path in the format `'./file_name.ext'`.
4. Use the `message` action to generate a summary of the typos fixed, including the original and fixed versions of each typo, and the file(s) they were found in.
5. Use the `finish` action to return the summary in the `outputs.summary` field.
Do NOT finish until you have fixed all the typos and generated a summary.
## History
{{ instructions.history_truncated }}
{{ history_to_json(state.history, max_events=10) }}
## Format
{{ instructions.format.action }}
For example, if you want to use the read action to read the contents of a file named example.txt, your response should look like this:
{
"action": "read",
"args": {
"path": "./example.txt"
}
}
Similarly, if you want to use the write action to write content to a file named output.txt, your response should look like this:
{
"action": "write",
"args": {
"path": "./output.txt",
"content": "This is the content to be written to the file."
}
}

View File

@ -1,7 +0,0 @@
name: VerifierAgent
description: Given a particular task, verifies that the task has been completed
inputs:
task: string
outputs:
completed: boolean
summary: string

View File

@ -1,28 +0,0 @@
# Task
You are a quality assurance engineer. Another engineer has made changes to the
codebase which are supposed to solve this task:
{{ state.inputs.task }}
Note the changes might have already been applied in-line. You should focus on
validating if the task is solved, nothing else.
## Available Actions
{{ instructions.actions.run }}
{{ instructions.actions.read }}
{{ instructions.actions.message }}
{{ instructions.actions.finish }}
You must ONLY `run` commands that have no side-effects, like `ls`, `grep`, and test scripts.
Do NOT finish until you know whether the task is complete and correct.
When you're done, add a `completed` boolean to the `outputs` of the `finish` action.
If `completed` is `false`, you MUST also provide a `summary` in the `outputs` of the `finish` action
explaining what the problem is.
## History
{{ instructions.history_truncated }}
{{ history_to_json(state.history, max_events=20) }}
## Format
{{ instructions.format.action }}

View File

@ -61,7 +61,7 @@ def create_runtime(
if agent:
agent_cls = type(agent)
else:
agent_cls = openhands.agenthub.Agent.get_cls(config.default_agent)
agent_cls = Agent.get_cls(config.default_agent)
# runtime and tools
runtime_cls = get_runtime_cls(config.runtime)

View File

@ -23,7 +23,14 @@ TOP_KEYS = [
'tool_call_metadata',
'llm_metrics',
]
UNDERSCORE_KEYS = ['id', 'timestamp', 'source', 'cause', 'tool_call_metadata', 'llm_metrics']
UNDERSCORE_KEYS = [
'id',
'timestamp',
'source',
'cause',
'tool_call_metadata',
'llm_metrics',
]
DELETE_FROM_TRAJECTORY_EXTRAS = {
'screenshot',
@ -62,8 +69,13 @@ def event_from_dict(data) -> 'Event':
metrics.accumulated_cost = value.get('accumulated_cost', 0.0)
for cost in value.get('costs', []):
metrics._costs.append(Cost(**cost))
metrics.response_latencies = [ResponseLatency(**latency) for latency in value.get('response_latencies', [])]
metrics.token_usages = [TokenUsage(**usage) for usage in value.get('token_usages', [])]
metrics.response_latencies = [
ResponseLatency(**latency)
for latency in value.get('response_latencies', [])
]
metrics.token_usages = [
TokenUsage(**usage) for usage in value.get('token_usages', [])
]
value = metrics
setattr(evt, '_' + key, value)
return evt

View File

@ -16,7 +16,10 @@ class DummyAgent:
self.llm = type(
'DummyLLM',
(),
{'metrics': Metrics()},
{
'metrics': Metrics(),
'config': type('DummyConfig', (), {'max_message_chars': 10000})(),
},
)()
def reset(self):

View File

@ -1,135 +0,0 @@
import json
import os
from unittest.mock import MagicMock
import pytest
import yaml
from pytest import TempPathFactory
from openhands.agenthub.micro.registry import all_microagents
from openhands.controller.agent import Agent
from openhands.controller.state.state import State
from openhands.core.config import AgentConfig
from openhands.events.action import MessageAction
from openhands.events.stream import EventStream
from openhands.storage import get_file_store
@pytest.fixture
def temp_dir(tmp_path_factory: TempPathFactory) -> str:
return str(tmp_path_factory.mktemp('test_micro_agents'))
@pytest.fixture
def event_stream(temp_dir):
file_store = get_file_store('local', temp_dir)
event_stream = EventStream('asdf', file_store)
yield event_stream
@pytest.fixture
def agent_configs():
return {
'CoderAgent': AgentConfig(enable_prompt_extensions=True),
'BrowsingAgent': AgentConfig(enable_prompt_extensions=True),
}
def test_all_agents_are_loaded():
assert all_microagents is not None
assert len(all_microagents) > 1
base = os.path.join('openhands', 'agenthub', 'micro')
full_path = os.path.dirname(__file__) + '/../../' + base
agent_names = set()
for root, _, files in os.walk(full_path):
for file in files:
if file == 'agent.yaml':
file_path = os.path.join(root, file)
with open(file_path, 'r') as yaml_file:
data = yaml.safe_load(yaml_file)
agent_names.add(data['name'])
assert agent_names == set(all_microagents.keys())
def test_coder_agent_with_summary(event_stream: EventStream, agent_configs: dict):
"""Coder agent should render code summary as part of prompt."""
mock_llm = MagicMock()
content = json.dumps({'action': 'finish', 'args': {}})
mock_llm.completion.return_value = {'choices': [{'message': {'content': content}}]}
mock_llm.format_messages_for_llm.return_value = [
{
'role': 'user',
'content': "This is a dummy task. This is a dummy summary about this repo. Here's a summary of the codebase, as it relates to this task.",
}
]
coder_agent = Agent.get_cls('CoderAgent')(
llm=mock_llm, config=agent_configs['CoderAgent']
)
assert coder_agent is not None
# give it some history
task = 'This is a dummy task'
history = list()
history.append(MessageAction(content=task))
summary = 'This is a dummy summary about this repo'
state = State(history=history, inputs={'summary': summary})
coder_agent.step(state)
mock_llm.completion.assert_called_once()
_, kwargs = mock_llm.completion.call_args
prompt_element = kwargs['messages'][0]['content']
if isinstance(prompt_element, dict):
prompt = prompt_element['content']
else:
prompt = prompt_element
assert task in prompt
assert "Here's a summary of the codebase, as it relates to this task" in prompt
assert summary in prompt
def test_coder_agent_without_summary(event_stream: EventStream, agent_configs: dict):
"""When there's no codebase_summary available, there shouldn't be any prompt about 'code summary'.
This test verifies that the prompt doesn't include code summary text when no summary is provided.
"""
mock_llm = MagicMock()
content = json.dumps({'action': 'finish', 'args': {}})
mock_llm.completion.return_value = {'choices': [{'message': {'content': content}}]}
mock_llm.format_messages_for_llm.return_value = [
{
'role': 'user',
'content': [
{
'type': 'text',
'text': "This is a dummy task. This is a dummy summary about this repo. Here's a summary of the codebase, as it relates to this task.",
}
],
}
]
coder_agent = Agent.get_cls('CoderAgent')(
llm=mock_llm, config=agent_configs['CoderAgent']
)
assert coder_agent is not None
# give it some history
task = 'This is a dummy task'
history = list()
history.append(MessageAction(content=task))
# set state without codebase summary
state = State(history=history)
coder_agent.step(state)
mock_llm.completion.assert_called_once()
_, kwargs = mock_llm.completion.call_args
prompt_element = kwargs['messages'][0]['content']
if isinstance(prompt_element, dict):
prompt = prompt_element['content']
else:
prompt = prompt_element
print(f'\n{prompt_element}\n')
assert "Here's a summary of the codebase, as it relates to this task" not in prompt

View File

@ -1,92 +0,0 @@
import pytest
from openhands.agenthub.micro.agent import parse_response as parse_response_micro
from openhands.core.exceptions import LLMResponseError
from openhands.events.action import (
FileWriteAction,
MessageAction,
)
from openhands.io import loads as custom_loads
@pytest.mark.parametrize(
'parse_response_module',
[parse_response_micro],
)
def test_parse_single_complete_json(parse_response_module):
input_response = """
{
"action": "message",
"args": {
"content": "The following typos were fixed:\\n* 'futur' -> 'future'\\n* 'imagin' -> 'imagine'\\n* 'techological' -> 'technological'\\n* 'responsability' -> 'responsibility'\\nThe corrected file is ./short_essay.txt."
}
}
"""
expected = MessageAction(
"The following typos were fixed:\n* 'futur' -> 'future'\n* 'imagin' -> 'imagine'\n* 'techological' -> 'technological'\n* 'responsability' -> 'responsibility'\nThe corrected file is ./short_essay.txt."
)
result = parse_response_module(input_response)
assert result == expected
@pytest.mark.parametrize(
'parse_response_module',
[parse_response_micro],
)
def test_parse_json_with_surrounding_text(parse_response_module):
input_response = """
Some initial text that is not JSON formatted.
{
"action": "write",
"args": {
"path": "./updated_file.txt",
"content": "Updated text content here..."
}
}
Some trailing text that is also not JSON formatted.
"""
expected = FileWriteAction(
path='./updated_file.txt', content='Updated text content here...'
)
result = parse_response_module(input_response)
assert result == expected
@pytest.mark.parametrize(
'parse_response_module',
[parse_response_micro],
)
def test_parse_first_of_multiple_jsons(parse_response_module):
input_response = """
I will firstly do
{
"action": "write",
"args": {
"path": "./short_essay.txt",
"content": "Text content here..."
}
}
Then I will continue with
{
"action": "think",
"args": {
"thought": "This should not be parsed."
}
}
"""
expected = FileWriteAction(path='./short_essay.txt', content='Text content here...')
result = parse_response_module(input_response)
assert result == expected
def test_invalid_json_raises_error():
# This should fail if repair_json is able to fix this faulty JSON
input_response = '{"action": "write", "args": { "path": "./short_essay.txt", "content": "Missing closing brace" }'
with pytest.raises(LLMResponseError):
custom_loads(input_response)
def test_no_json_found():
input_response = 'This is just a string with no JSON object.'
with pytest.raises(LLMResponseError):
custom_loads(input_response)