mirror of
https://github.com/OpenHands/OpenHands.git
synced 2026-03-22 13:47:19 +08:00
* Fix AgentRejectAction handling * Add ManagerAgent to integration tests * Fix regenerate.sh * Fix merge * Update README for micro-agents * Add test reject to regenerate.sh * regenerate.sh: Add support for running a specific test and/or agent * Refine reject schema, and allow ManagerAgent to handle reject * Add test artifacts for test_simple_task_rejection * Fix manager agent tests * Fix README * test_simple_task_rejection: check final agent state * Integration test: exit if mock prompt not found * Update test_simple_task_rejection tests * Fix test_edits test artifacts after prompt update * Fix ManagerAgent test_edits * WIP * Fix tests * update test_edits for ManagerAgent * Skip local sandbox for reject test * Fix test comparison
175 lines
6.4 KiB
Python
175 lines
6.4 KiB
Python
import asyncio
|
|
import os
|
|
import shutil
|
|
import subprocess
|
|
|
|
import pytest
|
|
|
|
from opendevin.controller.state.state import State
|
|
from opendevin.core.main import main
|
|
from opendevin.core.schema import AgentState
|
|
from opendevin.events.action import (
|
|
AgentFinishAction,
|
|
AgentRejectAction,
|
|
)
|
|
|
|
workspace_base = os.getenv('WORKSPACE_BASE')
|
|
|
|
|
|
@pytest.mark.skipif(
|
|
os.getenv('AGENT') == 'BrowsingAgent',
|
|
reason='BrowsingAgent is a specialized agent',
|
|
)
|
|
@pytest.mark.skipif(
|
|
os.getenv('AGENT') == 'CodeActAgent' and os.getenv('SANDBOX_TYPE').lower() != 'ssh',
|
|
reason='CodeActAgent only supports ssh sandbox which is stateful',
|
|
)
|
|
@pytest.mark.skipif(
|
|
os.getenv('AGENT') == 'ManagerAgent',
|
|
reason='Manager agent is not capable of finishing this in reasonable steps yet',
|
|
)
|
|
def test_write_simple_script():
|
|
task = "Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point."
|
|
final_state: State = asyncio.run(main(task, exit_on_message=True))
|
|
assert final_state.agent_state == AgentState.STOPPED
|
|
|
|
# Verify the script file exists
|
|
script_path = os.path.join(workspace_base, 'hello.sh')
|
|
assert os.path.exists(script_path), 'The file "hello.sh" does not exist'
|
|
|
|
# Run the script and capture the output
|
|
result = subprocess.run(['bash', script_path], capture_output=True, text=True)
|
|
|
|
# Verify the output from the script
|
|
assert (
|
|
result.stdout.strip() == 'hello'
|
|
), f'Expected output "hello", but got "{result.stdout.strip()}"'
|
|
|
|
|
|
@pytest.mark.skipif(
|
|
os.getenv('AGENT') == 'BrowsingAgent',
|
|
reason='BrowsingAgent is a specialized agent',
|
|
)
|
|
@pytest.mark.skipif(
|
|
os.getenv('AGENT') == 'CodeActAgent' and os.getenv('SANDBOX_TYPE').lower() != 'ssh',
|
|
reason='CodeActAgent only supports ssh sandbox which is stateful',
|
|
)
|
|
@pytest.mark.skipif(
|
|
os.getenv('AGENT') == 'MonologueAgent' or os.getenv('AGENT') == 'PlannerAgent',
|
|
reason='We only keep basic tests for MonologueAgent and PlannerAgent',
|
|
)
|
|
@pytest.mark.skipif(
|
|
os.getenv('SANDBOX_TYPE') == 'local',
|
|
reason='local sandbox shows environment-dependent absolute path for pwd command',
|
|
)
|
|
def test_edits():
|
|
# Move workspace artifacts to workspace_base location
|
|
source_dir = os.path.join(os.path.dirname(__file__), 'workspace/test_edits/')
|
|
files = os.listdir(source_dir)
|
|
for file in files:
|
|
dest_file = os.path.join(workspace_base, file)
|
|
if os.path.exists(dest_file):
|
|
os.remove(dest_file)
|
|
shutil.copy(os.path.join(source_dir, file), dest_file)
|
|
|
|
# Execute the task
|
|
task = 'Fix typos in bad.txt. Do not ask me for confirmation at any point.'
|
|
final_state: State = asyncio.run(main(task, exit_on_message=True))
|
|
assert final_state.agent_state == AgentState.STOPPED
|
|
|
|
# Verify bad.txt has been fixed
|
|
text = """This is a stupid typo.
|
|
Really?
|
|
No more typos!
|
|
Enjoy!
|
|
"""
|
|
with open(os.path.join(workspace_base, 'bad.txt'), 'r') as f:
|
|
content = f.read()
|
|
assert content.strip() == text.strip()
|
|
|
|
|
|
@pytest.mark.skipif(
|
|
os.getenv('AGENT') != 'CodeActAgent',
|
|
reason='currently only CodeActAgent defaults to have IPython (Jupyter) execution',
|
|
)
|
|
@pytest.mark.skipif(
|
|
os.getenv('SANDBOX_TYPE') != 'ssh',
|
|
reason='Currently, only ssh sandbox supports stateful tasks',
|
|
)
|
|
def test_ipython():
|
|
# Execute the task
|
|
task = "Use Jupyter IPython to write a text file containing 'hello world' to '/workspace/test.txt'. Do not ask me for confirmation at any point."
|
|
final_state: State = asyncio.run(main(task, exit_on_message=True))
|
|
assert final_state.agent_state == AgentState.STOPPED
|
|
|
|
# Verify the file exists
|
|
file_path = os.path.join(workspace_base, 'test.txt')
|
|
assert os.path.exists(file_path), 'The file "test.txt" does not exist'
|
|
|
|
# Verify the file contains the expected content
|
|
with open(file_path, 'r') as f:
|
|
content = f.read()
|
|
assert (
|
|
content.strip() == 'hello world'
|
|
), f'Expected content "hello world", but got "{content.strip()}"'
|
|
|
|
|
|
@pytest.mark.skipif(
|
|
os.getenv('AGENT') != 'ManagerAgent',
|
|
reason='Currently, only ManagerAgent supports task rejection',
|
|
)
|
|
@pytest.mark.skipif(
|
|
os.getenv('SANDBOX_TYPE') == 'local',
|
|
reason='FIXME: local sandbox does not capture stderr',
|
|
)
|
|
def test_simple_task_rejection():
|
|
# Give an impossible task to do: cannot write a commit message because
|
|
# the workspace is not a git repo
|
|
task = 'Write a git commit message for the current staging area. Do not ask me for confirmation at any point.'
|
|
final_state: State = asyncio.run(main(task))
|
|
assert final_state.agent_state == AgentState.STOPPED
|
|
assert isinstance(final_state.history[-1][0], AgentRejectAction)
|
|
|
|
|
|
@pytest.mark.skipif(
|
|
os.getenv('AGENT') != 'CodeActAgent',
|
|
reason='currently only CodeActAgent defaults to have IPython (Jupyter) execution',
|
|
)
|
|
@pytest.mark.skipif(
|
|
os.getenv('SANDBOX_TYPE') != 'ssh',
|
|
reason='Currently, only ssh sandbox supports stateful tasks',
|
|
)
|
|
def test_ipython_module():
|
|
# Execute the task
|
|
task = "Install and import pymsgbox==1.0.9 and print it's version in /workspace/test.txt. Do not ask me for confirmation at any point."
|
|
final_state: State = asyncio.run(main(task, exit_on_message=True))
|
|
assert final_state.agent_state == AgentState.STOPPED
|
|
|
|
# Verify the file exists
|
|
file_path = os.path.join(workspace_base, 'test.txt')
|
|
assert os.path.exists(file_path), 'The file "test.txt" does not exist'
|
|
|
|
# Verify the file contains the expected content
|
|
with open(file_path, 'r') as f:
|
|
content = f.read()
|
|
assert (
|
|
content.strip() == '1.0.9'
|
|
), f'Expected content "1.0.9", but got "{content.strip()}"'
|
|
|
|
|
|
@pytest.mark.skipif(
|
|
os.getenv('AGENT') != 'BrowsingAgent' and os.getenv('AGENT') != 'CodeActAgent',
|
|
reason='currently only BrowsingAgent and CodeActAgent are capable of searching the internet',
|
|
)
|
|
@pytest.mark.skipif(
|
|
os.getenv('AGENT') == 'CodeActAgent' and os.getenv('SANDBOX_TYPE').lower() != 'ssh',
|
|
reason='CodeActAgent only supports ssh sandbox which is stateful',
|
|
)
|
|
def test_browse_internet(http_server):
|
|
# Execute the task
|
|
task = 'Browse localhost:8000, and tell me the ultimate answer to life. Do not ask me for confirmation at any point.'
|
|
final_state: State = asyncio.run(main(task, exit_on_message=True))
|
|
assert final_state.agent_state == AgentState.STOPPED
|
|
assert isinstance(final_state.history[-1][0], AgentFinishAction)
|
|
assert 'OpenDevin is all you need!' in str(final_state.history)
|