mirror of
https://github.com/OpenHands/OpenHands.git
synced 2025-12-26 05:48:36 +08:00
tests: more Agentskills tests; updated .gitignore (#2307)
* added tests related to backticks * updated .gitignore * added extra linter test for #2210 * hotfix for integration test --------- Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>
This commit is contained in:
parent
6aba337416
commit
b431fce938
13
.gitignore
vendored
13
.gitignore
vendored
@ -161,9 +161,14 @@ cython_debug/
|
||||
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||
.idea/
|
||||
.vscode/
|
||||
.cursorignore
|
||||
|
||||
# evaluation
|
||||
evaluation/evaluation_outputs
|
||||
evaluation/outputs
|
||||
evaluation/swe_bench/eval_workspace*
|
||||
evaluation/SWE-bench/data
|
||||
evaluation/webarena/scripts/webarena_env.sh
|
||||
|
||||
# frontend
|
||||
|
||||
@ -176,6 +181,8 @@ frontend/yarn.lock
|
||||
|
||||
# testing
|
||||
frontend/coverage
|
||||
test_results*
|
||||
/_test_files_tmp/
|
||||
|
||||
# production
|
||||
frontend/build
|
||||
@ -204,9 +211,3 @@ cache
|
||||
# configuration
|
||||
config.toml
|
||||
config.toml.bak
|
||||
evaluation/swe_bench/eval_workspace*
|
||||
evaluation/outputs
|
||||
evaluation/evaluation_outputs
|
||||
test_results*
|
||||
/_test_files_tmp/
|
||||
evaluation/webarena/scripts/webarena_env.sh
|
||||
|
||||
@ -117,10 +117,7 @@ class BrowsingAgent(Agent):
|
||||
error_prefix = ''
|
||||
last_obs = None
|
||||
last_action = None
|
||||
if len(state.history) == 1:
|
||||
# initialize and retrieve the first observation by issuing an noop OP
|
||||
# TODO: need more elegant way of doing this
|
||||
return BrowseInteractiveAction(browser_actions='noop()')
|
||||
|
||||
for prev_action, obs in state.history:
|
||||
if isinstance(prev_action, BrowseInteractiveAction):
|
||||
prev_actions.append(prev_action.browser_actions)
|
||||
@ -133,7 +130,7 @@ class BrowsingAgent(Agent):
|
||||
# agent has responded, task finish.
|
||||
return AgentFinishAction(outputs={'content': prev_action.content})
|
||||
|
||||
prev_action_str = '\n'.join(prev_actions[1:])
|
||||
prev_action_str = '\n'.join(prev_actions)
|
||||
# if the final BrowserInteractiveAction exec BrowserGym's send_msg_to_user,
|
||||
# we should also send a message back to the user in OpenDevin and call it a day
|
||||
if (
|
||||
|
||||
@ -34,6 +34,9 @@ WINDOW = 100
|
||||
|
||||
ENABLE_AUTO_LINT = os.getenv('ENABLE_AUTO_LINT', 'false').lower() == 'true'
|
||||
|
||||
# This is also used in unit tests!
|
||||
MSG_FILE_UPDATED = '[File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]'
|
||||
|
||||
# OPENAI
|
||||
OPENAI_API_KEY = os.getenv(
|
||||
'OPENAI_API_KEY', os.getenv('SANDBOX_ENV_OPENAI_API_KEY', '')
|
||||
@ -311,6 +314,7 @@ def edit_file(start: int, end: int, content: str) -> None:
|
||||
|
||||
lint_error = _lint_file(CURRENT_FILE)
|
||||
if lint_error:
|
||||
# only change any literal strings here in combination with unit tests!
|
||||
print(
|
||||
'[Your proposed edit has introduced new syntax error(s). Please understand the errors and retry your edit command.]'
|
||||
)
|
||||
@ -351,9 +355,7 @@ def edit_file(start: int, end: int, content: str) -> None:
|
||||
f'[File: {os.path.abspath(CURRENT_FILE)} ({n_total_lines} lines total after edit)]'
|
||||
)
|
||||
_print_window(CURRENT_FILE, CURRENT_LINE, WINDOW)
|
||||
print(
|
||||
'[File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]'
|
||||
)
|
||||
print(MSG_FILE_UPDATED)
|
||||
|
||||
|
||||
@update_pwd_decorator
|
||||
|
||||
@ -114,7 +114,7 @@ Don't execute multiple actions at once if you need feedback from the page.
|
||||
----------
|
||||
|
||||
# Current Accessibility Tree:
|
||||
RootWebArea '', focused
|
||||
|
||||
|
||||
# Previous Actions
|
||||
|
||||
|
||||
@ -121,7 +121,7 @@ RootWebArea 'The Ultimate Answer', focused
|
||||
[10] button 'Click me', clickable
|
||||
|
||||
# Previous Actions
|
||||
noop()
|
||||
goto('http://localhost:8000')
|
||||
|
||||
Here is an example with chain of thought of a valid action when clicking on a button:
|
||||
"
|
||||
|
||||
@ -122,7 +122,7 @@ RootWebArea 'The Ultimate Answer', focused
|
||||
StaticText 'The answer is OpenDevin is all you need!'
|
||||
|
||||
# Previous Actions
|
||||
noop()
|
||||
goto('http://localhost:8000')
|
||||
click("10")
|
||||
|
||||
Here is an example with chain of thought of a valid action when clicking on a button:
|
||||
|
||||
@ -6,6 +6,8 @@ import docx
|
||||
import pytest
|
||||
|
||||
from opendevin.runtime.plugins.agent_skills.agentskills import (
|
||||
MSG_FILE_UPDATED,
|
||||
_print_window,
|
||||
create_file,
|
||||
edit_file,
|
||||
find_file,
|
||||
@ -274,6 +276,127 @@ def test_scroll_down_edge(tmp_path):
|
||||
assert result.split('\n') == expected.split('\n')
|
||||
|
||||
|
||||
def test_print_window_internal(tmp_path):
|
||||
test_file_path = tmp_path / 'a.txt'
|
||||
create_file(str(test_file_path))
|
||||
open_file(str(test_file_path))
|
||||
with open(test_file_path, 'w') as file:
|
||||
for i in range(1, 101):
|
||||
file.write(f'Line `{i}`\n')
|
||||
|
||||
# Define the parameters for the test
|
||||
current_line = 50
|
||||
window = 2
|
||||
|
||||
# Test _print_window especially with backticks
|
||||
with io.StringIO() as buf:
|
||||
with contextlib.redirect_stdout(buf):
|
||||
_print_window(str(test_file_path), current_line, window, return_str=False)
|
||||
result = buf.getvalue()
|
||||
expected = (
|
||||
'(49 more lines above)\n'
|
||||
'50|Line `50`\n'
|
||||
'51|Line `51`\n'
|
||||
'(49 more lines below)\n'
|
||||
)
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_edit_file_window(tmp_path, monkeypatch):
|
||||
# Set environment variable via monkeypatch does NOT work!
|
||||
monkeypatch.setattr(
|
||||
'opendevin.runtime.plugins.agent_skills.agentskills.ENABLE_AUTO_LINT', True
|
||||
)
|
||||
|
||||
content = """def any_int(a, b, c):
|
||||
return isinstance(a, int) and isinstance(b, int) and isinstance(c, int)
|
||||
|
||||
def test_any_int():
|
||||
assert any_int(1, 2, 3) == True
|
||||
assert any_int(1.5, 2, 3) == False
|
||||
assert any_int(1, 2.5, 3) == False
|
||||
assert any_int(1, 2, 3.5) == False
|
||||
assert any_int(1.0, 2, 3) == False
|
||||
assert any_int(1, 2.0, 3) == False
|
||||
assert any_int(1, 2, 3.0) == False
|
||||
assert any_int(0, 0, 0) == True
|
||||
assert any_int(-1, -2, -3) == True
|
||||
assert any_int(1, -2, 3) == True
|
||||
assert any_int(1.5, -2, 3) == False
|
||||
assert any_int(1, -2.5, 3) == False
|
||||
|
||||
def check(any_int):
|
||||
# Check some simple cases
|
||||
assert any_int(2, 3, 1)==True, "This prints if this assert fails 1 (good for debugging!)"
|
||||
assert any_int(2.5, 2, 3)==False, "This prints if this assert fails 2 (good for debugging!)"
|
||||
assert any_int(1.5, 5, 3.5)==False, "This prints if this assert fails 3 (good for debugging!)"
|
||||
assert any_int(2, 6, 2)==False, "This prints if this assert fails 4 (good for debugging!)"
|
||||
assert any_int(4, 2, 2)==True, "This prints if this assert fails 5 (good for debugging!)"
|
||||
assert any_int(2.2, 2.2, 2.2)==False, "This prints if this assert fails 6 (good for debugging!)"
|
||||
assert any_int(-4, 6, 2)==True, "This prints if this assert fails 7 (good for debugging!)"
|
||||
|
||||
# Check some edge cases that are easy to work out by hand.
|
||||
assert any_int(2,1,1)==True, "This prints if this assert fails 8 (also good for debugging!)"
|
||||
assert any_int(3,4,7)==True, "This prints if this assert fails 9 (also good for debugging!)"
|
||||
assert any_int(3.0,4,7)==False, "This prints if this assert fails 10 (also good for debugging!)"
|
||||
|
||||
check(any_int)"""
|
||||
|
||||
temp_file_path = tmp_path / 'error-test.py'
|
||||
temp_file_path.write_text(content)
|
||||
|
||||
open_file(str(temp_file_path))
|
||||
|
||||
with io.StringIO() as buf:
|
||||
with contextlib.redirect_stdout(buf):
|
||||
edit_file(
|
||||
start=9, end=9, content=' assert any_int(1.0, 2, 3) == False'
|
||||
)
|
||||
result = buf.getvalue()
|
||||
expected = (
|
||||
'[Your proposed edit has introduced new syntax error(s). Please understand the errors and retry your edit command.]\n'
|
||||
'ERRORS:\n'
|
||||
+ str(temp_file_path)
|
||||
+ ':9:9: '
|
||||
+ 'E999 IndentationError: unexpected indent\n'
|
||||
'[This is how your edit would have looked if applied]\n'
|
||||
'-------------------------------------------------\n'
|
||||
'(5 more lines above)\n'
|
||||
'6| assert any_int(1.5, 2, 3) == False\n'
|
||||
'7| assert any_int(1, 2.5, 3) == False\n'
|
||||
'8| assert any_int(1, 2, 3.5) == False\n'
|
||||
'9| assert any_int(1.0, 2, 3) == False\n'
|
||||
'10| assert any_int(1, 2.0, 3) == False\n'
|
||||
'11| assert any_int(1, 2, 3.0) == False\n'
|
||||
'12| assert any_int(0, 0, 0) == True\n'
|
||||
'13| assert any_int(-1, -2, -3) == True\n'
|
||||
'14| assert any_int(1, -2, 3) == True\n'
|
||||
'15| assert any_int(1.5, -2, 3) == False\n'
|
||||
'(18 more lines below)\n'
|
||||
'-------------------------------------------------\n'
|
||||
'\n'
|
||||
'[This is the original code before your edit]\n'
|
||||
'-------------------------------------------------\n'
|
||||
'(5 more lines above)\n'
|
||||
'6| assert any_int(1.5, 2, 3) == False\n'
|
||||
'7| assert any_int(1, 2.5, 3) == False\n'
|
||||
'8| assert any_int(1, 2, 3.5) == False\n'
|
||||
'9| assert any_int(1.0, 2, 3) == False\n'
|
||||
'10| assert any_int(1, 2.0, 3) == False\n'
|
||||
'11| assert any_int(1, 2, 3.0) == False\n'
|
||||
'12| assert any_int(0, 0, 0) == True\n'
|
||||
'13| assert any_int(-1, -2, -3) == True\n'
|
||||
'14| assert any_int(1, -2, 3) == True\n'
|
||||
'15| assert any_int(1.5, -2, 3) == False\n'
|
||||
'(18 more lines below)\n'
|
||||
'-------------------------------------------------\n'
|
||||
'Your changes have NOT been applied. Please fix your edit command and try again.\n'
|
||||
'You either need to 1) Specify the correct start/end line arguments or 2) Correct your edit code.\n'
|
||||
'DO NOT re-run the same failed edit command. Running it again will lead to the same error.\n'
|
||||
)
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_edit_file(tmp_path):
|
||||
temp_file_path = tmp_path / 'a.txt'
|
||||
content = 'Line 1\nLine 2\nLine 3\nLine 4\nLine 5'
|
||||
@ -289,8 +412,7 @@ def test_edit_file(tmp_path):
|
||||
f'[File: {temp_file_path} (3 lines total after edit)]\n'
|
||||
'1|REPLACE TEXT\n'
|
||||
'2|Line 4\n'
|
||||
'3|Line 5\n'
|
||||
'[File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]\n'
|
||||
'3|Line 5\n' + MSG_FILE_UPDATED + '\n'
|
||||
)
|
||||
assert result.split('\n') == expected.split('\n')
|
||||
|
||||
@ -313,8 +435,7 @@ def test_edit_file_from_scratch(tmp_path):
|
||||
result = buf.getvalue()
|
||||
expected = (
|
||||
f'[File: {temp_file_path} (1 lines total after edit)]\n'
|
||||
'1|REPLACE TEXT\n'
|
||||
'[File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]\n'
|
||||
'1|REPLACE TEXT\n' + MSG_FILE_UPDATED + '\n'
|
||||
)
|
||||
assert result.split('\n') == expected.split('\n')
|
||||
|
||||
@ -324,6 +445,65 @@ def test_edit_file_from_scratch(tmp_path):
|
||||
assert lines[0].rstrip() == 'REPLACE TEXT'
|
||||
|
||||
|
||||
def test_edit_file_from_scratch_multiline_with_backticks_and_second_edit(tmp_path):
|
||||
temp_file_path = tmp_path / 'a.txt'
|
||||
create_file(str(temp_file_path))
|
||||
open_file(str(temp_file_path))
|
||||
|
||||
with io.StringIO() as buf:
|
||||
with contextlib.redirect_stdout(buf):
|
||||
edit_file(
|
||||
1,
|
||||
1,
|
||||
'`REPLACE TEXT1`\n`REPLACE TEXT2`\n`REPLACE TEXT3`',
|
||||
)
|
||||
result = buf.getvalue()
|
||||
expected = (
|
||||
f'[File: {temp_file_path} (3 lines total after edit)]\n'
|
||||
'1|`REPLACE TEXT1`\n'
|
||||
'2|`REPLACE TEXT2`\n'
|
||||
'3|`REPLACE TEXT3`\n' + MSG_FILE_UPDATED + '\n'
|
||||
)
|
||||
assert result.split('\n') == expected.split('\n')
|
||||
|
||||
with open(temp_file_path, 'r') as file:
|
||||
lines = file.readlines()
|
||||
assert len(lines) == 3
|
||||
assert lines[0].rstrip() == '`REPLACE TEXT1`'
|
||||
assert lines[1].rstrip() == '`REPLACE TEXT2`'
|
||||
assert lines[2].rstrip() == '`REPLACE TEXT3`'
|
||||
|
||||
# Check that no backticks are escaped in the edit_file call
|
||||
assert '\\`' not in result
|
||||
|
||||
# Perform a second edit
|
||||
with io.StringIO() as buf:
|
||||
with contextlib.redirect_stdout(buf):
|
||||
edit_file(
|
||||
1,
|
||||
3,
|
||||
'`REPLACED TEXT1`\n`REPLACED TEXT2`\n`REPLACED TEXT3`',
|
||||
)
|
||||
second_result = buf.getvalue()
|
||||
second_expected = (
|
||||
f'[File: {temp_file_path} (3 lines total after edit)]\n'
|
||||
'1|`REPLACED TEXT1`\n'
|
||||
'2|`REPLACED TEXT2`\n'
|
||||
'3|`REPLACED TEXT3`\n' + MSG_FILE_UPDATED + '\n'
|
||||
)
|
||||
assert second_result.split('\n') == second_expected.split('\n')
|
||||
|
||||
with open(temp_file_path, 'r') as file:
|
||||
lines = file.readlines()
|
||||
assert len(lines) == 3
|
||||
assert lines[0].rstrip() == '`REPLACED TEXT1`'
|
||||
assert lines[1].rstrip() == '`REPLACED TEXT2`'
|
||||
assert lines[2].rstrip() == '`REPLACED TEXT3`'
|
||||
|
||||
# Check that no backticks are escaped in the second edit_file call
|
||||
assert '\\`' not in second_result
|
||||
|
||||
|
||||
def test_edit_file_from_scratch_multiline(tmp_path):
|
||||
temp_file_path = tmp_path / 'a.txt'
|
||||
create_file(str(temp_file_path))
|
||||
@ -341,8 +521,7 @@ def test_edit_file_from_scratch_multiline(tmp_path):
|
||||
f'[File: {temp_file_path} (3 lines total after edit)]\n'
|
||||
'1|REPLACE TEXT1\n'
|
||||
'2|REPLACE TEXT2\n'
|
||||
'3|REPLACE TEXT3\n'
|
||||
'[File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]\n'
|
||||
'3|REPLACE TEXT3\n' + MSG_FILE_UPDATED + '\n'
|
||||
)
|
||||
assert result.split('\n') == expected.split('\n')
|
||||
|
||||
@ -550,8 +729,7 @@ def test_edit_lint_file_pass(tmp_path, monkeypatch):
|
||||
'1|\n'
|
||||
f'[File: {file_path} (2 lines total after edit)]\n'
|
||||
"1|print('hello')\n"
|
||||
'2|\n'
|
||||
'[File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]\n'
|
||||
'2|\n' + MSG_FILE_UPDATED + '\n'
|
||||
)
|
||||
assert result.split('\n') == expected.split('\n')
|
||||
|
||||
@ -663,7 +841,7 @@ def test_lint_file_disabled_undefined_name(tmp_path, monkeypatch, capsys):
|
||||
file_path = tmp_path / 'test_file.py'
|
||||
file_path.write_text('\n')
|
||||
|
||||
# Set environment variable to enable linting
|
||||
# Set environment variable to disable linting
|
||||
monkeypatch.setattr(
|
||||
'opendevin.runtime.plugins.agent_skills.agentskills.ENABLE_AUTO_LINT', False
|
||||
)
|
||||
@ -678,8 +856,7 @@ def test_lint_file_disabled_undefined_name(tmp_path, monkeypatch, capsys):
|
||||
'1|\n'
|
||||
f'[File: {file_path} (2 lines total after edit)]\n'
|
||||
'1|undefined_name()\n'
|
||||
'2|\n'
|
||||
'[File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]\n'
|
||||
'2|\n' + MSG_FILE_UPDATED + '\n'
|
||||
)
|
||||
assert result.split('\n') == expected.split('\n')
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user