From 823298e0d0a5e7bc8ecf694f5ddb015b0d846318 Mon Sep 17 00:00:00 2001 From: tobitege Date: Sun, 16 Jun 2024 21:06:46 +0200 Subject: [PATCH] fix: Agentskills enhancements (#2384) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * avoid repeat logging of unneeded messages * refactored append/edit_file (tests next) * agentskills and unit test fixes * testing * more changes and test prompts * smaller changes * final test fixes * remove dead code from test_agent.py * reverting unneeded changes * updated tests, more tweaks to skills * refactor (#2442) * chores: fix DelegatorAgent description (#2446) * change * change comments * fix * stopped container to prevent port issues. (#2447) * chore: remove useless browsing code in CodeActSWEAgent (#2438) * remove useless * fix integration test * Regenerate test_ipython_module artifacts for CodeActSWEAgent --------- Co-authored-by: Boxuan Li * Merge remote-tracking branch 'upstream/main' into agent-fileops * unneeded tweak * * fix edit_file to not introduce extra newline * updated docstrings with more details for LLM * fix legacy typo in prompts causing ]] instead of ] * several mock files regenerated * Regen'ed CodeActSWEAgent integration tests * fix _print_window signature; explicit exception type in _is_valid_path * splitlines with named param --------- Co-authored-by: Yufan Song <33971064+yufansong@users.noreply.github.com> Co-authored-by: மனோஜ்குமார் பழனிச்சாமி Co-authored-by: Boxuan Li --- agenthub/codeact_agent/codeact_agent.py | 2 +- agenthub/codeact_agent/prompt.py | 5 +- .../codeact_swe_agent/codeact_swe_agent.py | 2 +- containers/README.md | 5 +- .../plugins/agent_skills/agentskills.py | 414 ++++++------ tests/integration/README.md | 73 ++- .../test_browse_internet/prompt_001.log | 15 +- .../test_browse_internet/prompt_005.log | 15 +- .../CodeActAgent/test_edits/prompt_001.log | 15 +- .../CodeActAgent/test_edits/prompt_002.log | 27 +- .../CodeActAgent/test_edits/prompt_003.log | 45 +- .../CodeActAgent/test_edits/prompt_004.log | 391 ++++++++++++ .../CodeActAgent/test_edits/response_001.log | 4 +- .../CodeActAgent/test_edits/response_002.log | 8 +- .../CodeActAgent/test_edits/response_003.log | 12 +- .../CodeActAgent/test_edits/response_004.log | 1 + .../CodeActAgent/test_ipython/prompt_001.log | 13 +- .../CodeActAgent/test_ipython/prompt_002.log | 12 +- .../test_ipython_module/prompt_001.log | 15 +- .../test_ipython_module/prompt_002.log | 15 +- .../test_ipython_module/prompt_003.log | 15 +- .../test_write_simple_script/prompt_001.log | 15 +- .../test_write_simple_script/prompt_002.log | 17 +- .../test_write_simple_script/prompt_003.log | 19 +- .../CodeActSWEAgent/test_edits/prompt_001.log | 12 +- .../CodeActSWEAgent/test_edits/prompt_002.log | 12 +- .../CodeActSWEAgent/test_edits/prompt_003.log | 19 +- .../test_ipython/prompt_001.log | 13 +- .../test_ipython/prompt_002.log | 13 +- .../test_ipython_module/prompt_001.log | 12 +- .../test_ipython_module/prompt_002.log | 29 +- .../test_ipython_module/prompt_003.log | 49 +- .../test_ipython_module/prompt_004.log | 588 ++++++++++++++++++ .../test_ipython_module/response_001.log | 12 +- .../test_ipython_module/response_002.log | 17 +- .../test_ipython_module/response_003.log | 5 +- .../test_ipython_module/response_004.log | 1 + .../test_write_simple_script/prompt_001.log | 12 +- .../test_write_simple_script/prompt_002.log | 16 +- .../test_write_simple_script/prompt_003.log | 18 +- .../test_write_simple_script/response_001.log | 3 +- tests/integration/regenerate.sh | 4 + tests/integration/test_agent.py | 26 +- tests/unit/test_agent_skill.py | 165 +++-- tests/unit/test_micro_agents.py | 6 +- 45 files changed, 1645 insertions(+), 542 deletions(-) create mode 100644 tests/integration/mock/CodeActAgent/test_edits/prompt_004.log create mode 100644 tests/integration/mock/CodeActAgent/test_edits/response_004.log create mode 100644 tests/integration/mock/CodeActSWEAgent/test_ipython_module/prompt_004.log create mode 100644 tests/integration/mock/CodeActSWEAgent/test_ipython_module/response_004.log diff --git a/agenthub/codeact_agent/codeact_agent.py b/agenthub/codeact_agent/codeact_agent.py index d6aeae31ca..37586162e7 100644 --- a/agenthub/codeact_agent/codeact_agent.py +++ b/agenthub/codeact_agent/codeact_agent.py @@ -63,7 +63,7 @@ def get_observation_message(obs) -> dict[str, str] | None: if isinstance(obs, CmdOutputObservation): content = 'OBSERVATION:\n' + truncate_observation(obs.content) content += ( - f'\n[Command {obs.command_id} finished with exit code {obs.exit_code}]]' + f'\n[Command {obs.command_id} finished with exit code {obs.exit_code}]' ) return {'role': 'user', 'content': content} elif isinstance(obs, IPythonRunCellObservation): diff --git a/agenthub/codeact_agent/prompt.py b/agenthub/codeact_agent/prompt.py index 4411c17c5d..da0c899200 100644 --- a/agenthub/codeact_agent/prompt.py +++ b/agenthub/codeact_agent/prompt.py @@ -37,9 +37,10 @@ For example, to push a branch `my_branch` to the GitHub repo `owner/repo`: If $GITHUB_TOKEN is not set, ask the user to set it.""" SYSTEM_SUFFIX = """Responses should be concise. -The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block. -Include ONLY ONE , , or per response, unless the assistant is finished with the task or need more input or action from the user in order to proceed. +The assistant should attempt fewer things at a time instead of putting too many commands OR too much code in one "execute" block. +Include ONLY ONE , , or per response, unless the assistant is finished with the task or needs more input or action from the user in order to proceed. IMPORTANT: Execute code using , , or whenever possible. +When handling files, try to use full paths and pwd to avoid errors. """ diff --git a/agenthub/codeact_swe_agent/codeact_swe_agent.py b/agenthub/codeact_swe_agent/codeact_swe_agent.py index a875635e47..23a1ec644c 100644 --- a/agenthub/codeact_swe_agent/codeact_swe_agent.py +++ b/agenthub/codeact_swe_agent/codeact_swe_agent.py @@ -54,7 +54,7 @@ def get_observation_message(obs) -> dict[str, str] | None: if isinstance(obs, CmdOutputObservation): content = 'OBSERVATION:\n' + truncate_observation(obs.content) content += ( - f'\n[Command {obs.command_id} finished with exit code {obs.exit_code}]]' + f'\n[Command {obs.command_id} finished with exit code {obs.exit_code}]' ) return {'role': 'user', 'content': content} elif isinstance(obs, IPythonRunCellObservation): diff --git a/containers/README.md b/containers/README.md index 1f66cbd6cf..526bfee938 100644 --- a/containers/README.md +++ b/containers/README.md @@ -1,11 +1,12 @@ # Docker Containers + Each folder here contains a Dockerfile, and a config.sh describing how to build -the image and where to push it. These are images are built and pushed in GitHub Actions +the images and where to push them. These images are built and pushed in GitHub Actions by the `ghcr.yml` workflow. ## Building Manually -``` +```bash docker build -f containers/app/Dockerfile -t opendevin . docker build -f containers/sandbox/Dockerfile -t sandbox . ``` diff --git a/opendevin/runtime/plugins/agent_skills/agentskills.py b/opendevin/runtime/plugins/agent_skills/agentskills.py index a9645ad505..d415fd9e4e 100644 --- a/opendevin/runtime/plugins/agent_skills/agentskills.py +++ b/opendevin/runtime/plugins/agent_skills/agentskills.py @@ -4,7 +4,7 @@ agentskills.py This module provides various file manipulation skills for the OpenDevin agent. Functions: -- open_file(path, line_number=None): Opens a file and optionally moves to a specific line. +- open_file(path: str, line_number: int | None = 1, context_lines: int = 100): Opens a file and optionally moves to a specific line. - goto_line(line_number): Moves the window to show the specified line number. - scroll_down(): Moves the window down by the number of lines specified in WINDOW. - scroll_up(): Moves the window up by the number of lines specified in WINDOW. @@ -19,6 +19,7 @@ Functions: import base64 import functools import os +import shutil import subprocess import tempfile from inspect import signature @@ -30,7 +31,7 @@ from openai import OpenAI from pptx import Presentation from pylatexenc.latex2text import LatexNodes2Text -CURRENT_FILE = None +CURRENT_FILE: str | None = None CURRENT_LINE = 1 WINDOW = 100 @@ -68,6 +69,53 @@ def update_pwd_decorator(func): return wrapper +def _is_valid_filename(file_name) -> bool: + if not file_name or not isinstance(file_name, str) or not file_name.strip(): + return False + invalid_chars = '<>:"/\\|?*' + if os.name == 'nt': # Windows + invalid_chars = '<>:"/\\|?*' + elif os.name == 'posix': # Unix-like systems + invalid_chars = '\0' + + for char in invalid_chars: + if char in file_name: + return False + return True + + +def _is_valid_path(path) -> bool: + if not path or not isinstance(path, str): + return False + try: + return os.path.exists(os.path.normpath(path)) + except PermissionError: + return False + + +def _create_paths(file_name) -> bool: + try: + dirname = os.path.dirname(file_name) + if dirname: + os.makedirs(dirname, exist_ok=True) + return True + except PermissionError: + return False + + +def _check_current_file(file_path: str | None = None) -> bool: + global CURRENT_FILE + if not file_path: + file_path = CURRENT_FILE + if not file_path or not os.path.isfile(file_path): + raise ValueError('No file open. Use the open_file function first.') + return True + + +def _clamp(value, min_value, max_value): + return max(min_value, min(value, max_value)) + + def _lint_file(file_path: str) -> tuple[Optional[str], Optional[int]]: """ Lint the file at the given path and return a tuple with a boolean indicating if there are errors, @@ -100,7 +148,7 @@ def _lint_file(file_path: str) -> tuple[Optional[str], Optional[int]]: error_message = result.stdout.decode().strip() lint_error = 'ERRORS:\n' + error_message first_error_line = None - for line in error_message.split('\n'): + for line in error_message.splitlines(True): if line.strip(): # The format of the error message is: ::: parts = line.split(':') @@ -118,27 +166,45 @@ def _lint_file(file_path: str) -> tuple[Optional[str], Optional[int]]: return None, None -def _print_window(CURRENT_FILE, CURRENT_LINE, WINDOW, return_str=False): - if CURRENT_FILE is None: - raise FileNotFoundError('No file open. Use the open_file function first.') - with open(CURRENT_FILE, 'r') as file: - lines = file.readlines() - start = max(0, CURRENT_LINE - WINDOW // 2) - end = min(len(lines), CURRENT_LINE + WINDOW // 2) +def _print_window(file_path, targeted_line, WINDOW, return_str=False): + global CURRENT_LINE + _check_current_file(file_path) + with open(file_path) as file: + content = file.read() + + # Ensure the content ends with a newline character + if not content.endswith('\n'): + content += '\n' + + lines = content.splitlines(True) # Keep all line ending characters + total_lines = len(lines) + + # cover edge cases + CURRENT_LINE = _clamp(targeted_line, 1, total_lines) + half_window = max(1, WINDOW // 2) + + # Ensure at least one line above and below the targeted line + start = max(1, CURRENT_LINE - half_window) + end = min(total_lines, CURRENT_LINE + half_window) + + # Adjust start and end to ensure at least one line above and below + if start == 1: + end = min(total_lines, start + WINDOW - 1) + if end == total_lines: + start = max(1, end - WINDOW + 1) + output = '' - # only display this when there's line above - if start > 0: - n_above_lines = start - output += f'({n_above_lines} more lines above)\n' - for i in range(start, end): - _new_line = f'{i + 1}|{lines[i]}' + # only display this when there's at least one line above + if start > 1: + output += f'({start - 1} more lines above)\n' + for i in range(start, end + 1): + _new_line = f'{i}|{lines[i-1]}' if not _new_line.endswith('\n'): _new_line += '\n' output += _new_line - if end < len(lines): - n_below_lines = len(lines) - end - output += f'({n_below_lines} more lines below)\n' + if end < total_lines: + output += f'({total_lines - end} more lines below)\n' output = output.rstrip() if return_str: @@ -147,37 +213,43 @@ def _print_window(CURRENT_FILE, CURRENT_LINE, WINDOW, return_str=False): print(output) -def _cur_file_header(CURRENT_FILE, total_lines): +def _cur_file_header(CURRENT_FILE, total_lines) -> str: + if not CURRENT_FILE: + return '' return f'[File: {os.path.abspath(CURRENT_FILE)} ({total_lines} lines total)]\n' @update_pwd_decorator -def open_file(path: str, line_number: Optional[int] = None) -> None: +def open_file( + path: str, line_number: int | None = 1, context_lines: int | None = 100 +) -> None: """ Opens the file at the given path in the editor. If line_number is provided, the window will be moved to include that line. + It only shows the first 100 lines by default! Max `context_lines` supported is 2000, use `scroll up/down` + to view the file if you want to see more. Args: - path: str: The path to the file to open. - line_number: Optional[int]: The line number to move to. + path: str: The path to the file to open, preferredly absolute path. + line_number: int | None = 1: The line number to move to. Defaults to 1. + context_lines: int | None = 100: Only shows this number of lines in the context window (usually from line 1), with line_number as the center (if possible). Defaults to 100. """ - global CURRENT_FILE, CURRENT_LINE + global CURRENT_FILE, CURRENT_LINE, WINDOW + if not os.path.isfile(path): raise FileNotFoundError(f'File {path} not found') CURRENT_FILE = os.path.abspath(path) with open(CURRENT_FILE) as file: - total_lines = sum(1 for _ in file) + total_lines = max(1, sum(1 for _ in file)) - if line_number is not None: - if ( - not isinstance(line_number, int) - or line_number < 1 - or line_number > total_lines - ): - raise ValueError(f'Line number must be between 1 and {total_lines}') - CURRENT_LINE = line_number - else: - CURRENT_LINE = 1 + if not isinstance(line_number, int) or line_number < 1 or line_number > total_lines: + raise ValueError(f'Line number must be between 1 and {total_lines}') + CURRENT_LINE = line_number + + # Override WINDOW with context_lines + if context_lines is None or context_lines < 1: + context_lines = 100 + WINDOW = _clamp(context_lines, 1, 2000) output = _cur_file_header(CURRENT_FILE, total_lines) output += _print_window(CURRENT_FILE, CURRENT_LINE, WINDOW, return_str=True) @@ -193,15 +265,14 @@ def goto_line(line_number: int) -> None: line_number: int: The line number to move to. """ global CURRENT_FILE, CURRENT_LINE, WINDOW - if CURRENT_FILE is None: - raise FileNotFoundError('No file open. Use the open_file function first.') + _check_current_file() - with open(CURRENT_FILE) as file: - total_lines = sum(1 for _ in file) + with open(str(CURRENT_FILE)) as file: + total_lines = max(1, sum(1 for _ in file)) if not isinstance(line_number, int) or line_number < 1 or line_number > total_lines: raise ValueError(f'Line number must be between 1 and {total_lines}') - CURRENT_LINE = line_number + CURRENT_LINE = _clamp(line_number, 1, total_lines) output = _cur_file_header(CURRENT_FILE, total_lines) output += _print_window(CURRENT_FILE, CURRENT_LINE, WINDOW, return_str=True) @@ -216,12 +287,11 @@ def scroll_down() -> None: None """ global CURRENT_FILE, CURRENT_LINE, WINDOW - if CURRENT_FILE is None: - raise FileNotFoundError('No file open. Use the open_file function first.') + _check_current_file() - with open(CURRENT_FILE) as file: - total_lines = sum(1 for _ in file) - CURRENT_LINE = min(CURRENT_LINE + WINDOW, total_lines) + with open(str(CURRENT_FILE)) as file: + total_lines = max(1, sum(1 for _ in file)) + CURRENT_LINE = _clamp(CURRENT_LINE + WINDOW, 1, total_lines) output = _cur_file_header(CURRENT_FILE, total_lines) output += _print_window(CURRENT_FILE, CURRENT_LINE, WINDOW, return_str=True) print(output) @@ -235,12 +305,11 @@ def scroll_up() -> None: None """ global CURRENT_FILE, CURRENT_LINE, WINDOW - if CURRENT_FILE is None: - raise FileNotFoundError('No file open. Use the open_file function first.') + _check_current_file() - CURRENT_LINE = max(CURRENT_LINE - WINDOW, 1) - with open(CURRENT_FILE) as file: - total_lines = sum(1 for _ in file) + with open(str(CURRENT_FILE)) as file: + total_lines = max(1, sum(1 for _ in file)) + CURRENT_LINE = _clamp(CURRENT_LINE - WINDOW, 1, total_lines) output = _cur_file_header(CURRENT_FILE, total_lines) output += _print_window(CURRENT_FILE, CURRENT_LINE, WINDOW, return_str=True) print(output) @@ -253,7 +322,6 @@ def create_file(filename: str) -> None: Args: filename: str: The name of the file to create. """ - global CURRENT_FILE, CURRENT_LINE if os.path.exists(filename): raise FileExistsError(f"File '{filename}' already exists.") @@ -264,25 +332,23 @@ def create_file(filename: str) -> None: print(f'[File {filename} created.]') -@update_pwd_decorator -def edit_file(file_name: str, start: int, end: int, content: str) -> None: - """Edit a file. - - Replaces in given file `file_name` the lines `start` through `end` (inclusive) with the given text `content`. +def _edit_or_append_file( + file_name: str, + start: int | None = None, + end: int | None = None, + content: str = '', + is_append: bool = False, +) -> None: + """Internal method to handle common logic for edit_/append_file methods. Args: - file_name: str: The name of the file to edit. - start: int: The start line number. Must satisfy start >= 1. - end: int: The end line number. Must satisfy start <= end <= number of lines in the file. - content: str: The content to replace the lines with. + file_name: str: The name of the file to edit or append to. + start: int | None = None: The start line number for editing. Ignored if is_append is True. + end: int | None = None: The end line number for editing. Ignored if is_append is True. + content: str: The content to replace the lines with or to append. + is_append: bool = False: Whether to append content to the file instead of editing. """ global CURRENT_FILE, CURRENT_LINE, WINDOW - if not os.path.isfile(file_name): - raise FileNotFoundError(f'File {file_name} not found.') - - # Load the file - with open(file_name, 'r') as file: - lines = file.readlines() ERROR_MSG = f'[Error editing file {file_name}. Please confirm the file is correct.]' ERROR_MSG_SUFFIX = ( @@ -290,118 +356,23 @@ def edit_file(file_name: str, start: int, end: int, content: str) -> None: 'You either need to 1) Open the correct file and try again or 2) Specify the correct start/end line arguments.\n' 'DO NOT re-run the same failed edit command. Running it again will lead to the same error.' ) - # Check arguments - if not (1 <= start <= len(lines)): - print( - f'{ERROR_MSG}\n' - f'Invalid start line number: {start}. Line numbers must be between 1 and {len(lines)} (inclusive).\n' - f'{ERROR_MSG_SUFFIX}' - ) - return - if not (1 <= end <= len(lines)): - print( - f'{ERROR_MSG}\n' - f'Invalid end line number: {end}. Line numbers must be between 1 and {len(lines)} (inclusive).\n' - f'{ERROR_MSG_SUFFIX}' - ) - return - if start > end: - print( - f'{ERROR_MSG}\n' - f'Invalid line range: {start}-{end}. Start must be less than or equal to end.\n' - f'{ERROR_MSG_SUFFIX}' - ) - return + if not _is_valid_filename(file_name): + raise FileNotFoundError('Invalid file name.') - edited_content = content + '\n' - new_lines = lines[: start - 1] + [edited_content] + lines[end:] + if not _is_valid_path(file_name): + raise FileNotFoundError('Invalid path or file name.') - # directly write edited lines to the file - with open(file_name, 'w') as file: - file.writelines(new_lines) + if not _create_paths(file_name): + raise PermissionError('Could not access or create directories.') - # set current line to the center of the edited lines - CURRENT_LINE = (start + end) // 2 - first_error_line = None - - # Handle linting - if ENABLE_AUTO_LINT: - # BACKUP the original file - original_file_backup_path = os.path.join( - os.path.dirname(file_name), f'.backup.{os.path.basename(file_name)}' - ) - with open(original_file_backup_path, 'w') as f: - f.writelines(lines) - - lint_error, first_error_line = _lint_file(file_name) - if lint_error is not None: - if first_error_line is not None: - CURRENT_LINE = int(first_error_line) - # only change any literal strings here in combination with unit tests! - print( - '[Your proposed edit has introduced new syntax error(s). Please understand the errors and retry your edit command.]' - ) - print(lint_error) - - print('[This is how your edit would have looked if applied]') - print('-------------------------------------------------') - cur_line = first_error_line - _print_window(file_name, cur_line, 10) - print('-------------------------------------------------\n') - - print('[This is the original code before your edit]') - print('-------------------------------------------------') - _print_window(original_file_backup_path, cur_line, 10) - print('-------------------------------------------------') - - print( - 'Your changes have NOT been applied. Please fix your edit command and try again.\n' - 'You either need to 1) Specify the correct start/end line arguments or 2) Correct your edit code.\n' - 'DO NOT re-run the same failed edit command. Running it again will lead to the same error.' - ) - - # recover the original file - with open(original_file_backup_path, 'r') as fin, open( - file_name, 'w' - ) as fout: - fout.write(fin.read()) - os.remove(original_file_backup_path) - return - - os.remove(original_file_backup_path) - - # Update the file information and print the updated content - with open(file_name, 'r') as file: - n_total_lines = len(file.readlines()) - if first_error_line is not None and int(first_error_line) > 0: - CURRENT_LINE = first_error_line - else: - CURRENT_LINE = n_total_lines - print( - f'[File: {os.path.abspath(file_name)} ({n_total_lines} lines total after edit)]' - ) - CURRENT_FILE = file_name - _print_window(CURRENT_FILE, CURRENT_LINE, WINDOW) - print(MSG_FILE_UPDATED) - - -@update_pwd_decorator -def append_file(file_name: str, content: str) -> None: - """Append content to the given file. - - It appends text `content` to the end of the specified file. - - Args: - file_name: str: The name of the file to append to. - content: str: The content to append to the file. - """ - global CURRENT_FILE, CURRENT_LINE, WINDOW if not os.path.isfile(file_name): raise FileNotFoundError(f'File {file_name} not found.') # Use a temporary file to write changes + content = str(content or '') temp_file_path = '' + src_abs_path = os.path.abspath(file_name) first_error_line = None try: # Create a temporary file @@ -409,24 +380,58 @@ def append_file(file_name: str, content: str) -> None: temp_file_path = temp_file.name # Read the original file and check if empty and for a trailing newline - with open(file_name, 'r') as original_file: + with open(file_name) as original_file: lines = original_file.readlines() - if lines and not (len(lines) == 1 and lines[0].strip() == ''): - if not lines[-1].endswith('\n'): - lines[-1] += '\n' - content = ''.join(lines) + content + if is_append: + if lines and not (len(lines) == 1 and lines[0].strip() == ''): + if not lines[-1].endswith('\n'): + lines[-1] += '\n' + content_lines = content.splitlines(keepends=True) + new_lines = lines + content_lines + content = ''.join(new_lines) else: - content = content + # Handle cases where start or end are None + if start is None: + start = 1 # Default to the beginning + if end is None: + end = len(lines) # Default to the end + # Check arguments + if not (1 <= start <= len(lines)): + print( + f'{ERROR_MSG}\n' + f'Invalid start line number: {start}. Line numbers must be between 1 and {len(lines)} (inclusive).\n' + f'{ERROR_MSG_SUFFIX}' + ) + return + if not (1 <= end <= len(lines)): + print( + f'{ERROR_MSG}\n' + f'Invalid end line number: {end}. Line numbers must be between 1 and {len(lines)} (inclusive).\n' + f'{ERROR_MSG_SUFFIX}' + ) + return + if start > end: + print( + f'{ERROR_MSG}\n' + f'Invalid line range: {start}-{end}. Start must be less than or equal to end.\n' + f'{ERROR_MSG_SUFFIX}' + ) + return + if not content.endswith('\n'): + content += '\n' + content_lines = content.splitlines(True) + new_lines = lines[: start - 1] + content_lines + lines[end:] + content = ''.join(new_lines) if not content.endswith('\n'): content += '\n' - # Append the new content with a trailing newline + # Write the new content to the temporary file temp_file.write(content) # Replace the original file with the temporary file atomically - os.replace(temp_file_path, file_name) + shutil.move(temp_file_path, src_abs_path) # Handle linting if ENABLE_AUTO_LINT: @@ -459,41 +464,80 @@ def append_file(file_name: str, content: str) -> None: print( 'Your changes have NOT been applied. Please fix your edit command and try again.\n' - 'You need to correct your added code.\n' + 'You either need to 1) Specify the correct start/end line arguments or 2) Correct your edit code.\n' 'DO NOT re-run the same failed edit command. Running it again will lead to the same error.' ) # recover the original file - with open(original_file_backup_path, 'r') as fin, open( + with open(original_file_backup_path) as fin, open( file_name, 'w' ) as fout: fout.write(fin.read()) os.remove(original_file_backup_path) return + except FileNotFoundError as e: + print(f'File not found: {e}') + except IOError as e: + print(f'An error occurred while handling the file: {e}') + except ValueError as e: + print(f'Invalid input: {e}') except Exception as e: # Clean up the temporary file if an error occurs if temp_file_path and os.path.exists(temp_file_path): os.remove(temp_file_path) + print(f'An unexpected error occurred: {e}') raise e # Update the file information and print the updated content with open(file_name, 'r', encoding='utf-8') as file: - n_total_lines = len(file.readlines()) + n_total_lines = max(1, len(file.readlines())) if first_error_line is not None and int(first_error_line) > 0: CURRENT_LINE = first_error_line else: - CURRENT_LINE = n_total_lines + if is_append: + CURRENT_LINE = max(1, len(lines)) # end of original file + else: + CURRENT_LINE = start or n_total_lines or 1 print( f'[File: {os.path.abspath(file_name)} ({n_total_lines} lines total after edit)]' ) CURRENT_FILE = file_name _print_window(CURRENT_FILE, CURRENT_LINE, WINDOW) - print( - '[File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]' + print(MSG_FILE_UPDATED) + + +@update_pwd_decorator +def edit_file(file_name: str, start: int, end: int, content: str) -> None: + """Edit a file. + + Replaces in given file `file_name` the lines `start` through `end` (inclusive) with the given text `content`. + If a line must be inserted, an already existing line must be passed in `content` with new content accordingly! + + Args: + file_name: str: The name of the file to edit. + start: int: The start line number. Must satisfy start >= 1. + end: int: The end line number. Must satisfy start <= end <= number of lines in the file. + content: str: The content to replace the lines with. + """ + _edit_or_append_file( + file_name, start=start, end=end, content=content, is_append=False ) +@update_pwd_decorator +def append_file(file_name: str, content: str) -> None: + """Append content to the given file. + + It appends text `content` to the end of the specified file. + + Args: + file_name: str: The name of the file to append to. + content: str: The content to append to the file. + """ + _edit_or_append_file(file_name, start=1, end=None, content=content, is_append=True) + + @update_pwd_decorator def search_dir(search_term: str, dir_path: str = './') -> None: """Searches for search_term in all files in dir. If dir is not provided, searches in the current directory. @@ -553,7 +597,7 @@ def search_file(search_term: str, file_path: Optional[str] = None) -> None: raise FileNotFoundError(f'File {file_path} not found') matches = [] - with open(file_path, 'r') as file: + with open(file_path) as file: for i, line in enumerate(file, 1): if search_term in line: matches.append((i, line.strip())) @@ -637,7 +681,7 @@ def parse_latex(file_path: str) -> None: file_path: str: The path to the file to open. """ print(f'[Reading LaTex file from {file_path}]') - with open(file_path, 'r') as f: + with open(file_path) as f: data = f.read() text = LatexNodes2Text().latex_to_text(data) print(text.strip()) diff --git a/tests/integration/README.md b/tests/integration/README.md index 7daa04e356..c83027c605 100644 --- a/tests/integration/README.md +++ b/tests/integration/README.md @@ -1,12 +1,16 @@ -## Introduction +# Introduction This folder contains backend integration tests that rely on a mock LLM. It serves two purposes: + 1. Ensure the quality of development, including OpenDevin framework and agents. 2. Help contributors learn the workflow of OpenDevin, and examples of real interactions with (powerful) LLM, without spending real money. -Why don't we launch an open-source model, e.g. LLAMA3? There are two reasons: +## Why don't we launch an open-source model, e.g. LLAMA3? + +There are two reasons: + 1. LLMs cannot guarantee determinism, meaning the test behavior might change. 2. CI machines are not powerful enough to run any LLM that is sophisticated enough to finish the tasks defined in tests. @@ -18,7 +22,8 @@ require real LLMs which are non-deterministic and costly. We run integration test suite for every single commit, which is not possible with benchmarks. -Known limitations: +## Known limitations + 1. To avoid the potential impact of non-determinism, we remove all special characters when doing the comparison. If two prompts for the same task only differ in non-alphanumeric characters, a wrong mock response might be picked up. @@ -46,17 +51,17 @@ of agents with real LLMs are stored under `mock/AgentName/TestName` folders. ## Run Integration Tests -Take a look at `run-integration-tests.yml` to learn how integration tests are -launched in CI environment. You can also simply run: +Take a look at `run-integration-tests.yml` (in the `.github` folder) to learn +how integration tests are launched in a CI environment. You can also simply run: ```bash TEST_ONLY=true ./tests/integration/regenerate.sh ``` -to run all integration tests until the first failure. +to run all integration tests until the first failure occurs. -If you only want to run a specific test, set environment variable -`ONLY_TEST_NAME` to the test name. If you only want to run a specific agent, +If you'd only plan to run a specific test, set environment variable +`ONLY_TEST_NAME` to the actual test name. If you only want to run a specific agent, set environment variable `ONLY_TEST_AGENT` to the agent. You could also use both, e.g. @@ -64,35 +69,38 @@ e.g. TEST_ONLY=true ONLY_TEST_NAME="test_simple_task_rejection" ONLY_TEST_AGENT="ManagerAgent" ./tests/integration/regenerate.sh ``` - ## Regenerate Integration Tests + When you make changes to an agent's prompt, the integration tests will fail. You'll need to regenerate them -by running the following command from project root directory: +by running the following command from OpenDevin's project root directory: + ```bash ./tests/integration/regenerate.sh ``` -Note that this will: + +Please note that this will: + 1. Run existing tests first. If a test passes, then no regeneration would happen. 2. Regenerate the prompts, but attempt to use existing responses from LLM (if any). For example, if you only fix a typo in the prompt, it shouldn't affect LLM's behaviour. -If we rerun OpenDevin against a real LLM, then due to LLM's non-deterministic nature, +If we rerun integration tests against a real LLM, then due to LLM's non-deterministic nature, a series of different prompts and responses will be generated, causing a lot of -unnecessary diffs and is hard to review. If you want to skip this step, see below +unnecessary diffs which are hard to review. If you want to skip this step, see below sections. 3. Rerun the failed test again. If it succeeds, continue to the next test or agent. If it fails again, goto next step. -4. Rerun OpenDevin against a real LLM, record all prompts and +4. Rerun integration tests against a real LLM, record all prompts and responses, and replace the existing test artifacts (if any). 5. Rerun the failed test again. If it succeeds, continue to the next test or agent. If it fails again, abort the script. -Note that step 4 calls real LLM_MODEL only for failed tests that cannot be fixed +Note that step 4 calls *real* LLM_MODEL only for failed tests that cannot be fixed by regenerating prompts alone, but it still costs money! If you don't want to cover the cost, ask one of the maintainers to regenerate for you. Before asking, -please try running the script first without setting `LLM_API_KEY`. Chance is the -test could be fixed after step 2. +please try running the script first *without* setting `LLM_API_KEY`. +Chance is, the test could be fixed after step 2. -### Regenerate a Specific Agent and/or Test +## Regenerate a Specific Agent and/or Test If you only want to run a specific test, set environment variable `ONLY_TEST_NAME` to the test name. If you only want to run a specific agent, @@ -103,24 +111,24 @@ e.g. ONLY_TEST_NAME="test_write_simple_script" ONLY_TEST_AGENT="CodeActAgent" ./tests/integration/regenerate.sh ``` -### Force Regenerate with real LLM +## Force Regenerate with real LLM Sometimes, step 2 would fix the broken test by simply reusing existing responses from LLM. This may not be what you want - for example, you might have greatly improved -the prompt that you believe LLM will do better jobs using fewer steps, or you might -have added a new action type and you think LLM would be able to use the new type. -In this case you can skip step 2 and run OpenDevin against a real LLM. Simply -set `FORCE_USE_LLM` environmental variable to true, or run the script like this: +the prompt that you believe the LLM will do a better job using fewer steps, or you might +have added a new action type and you think the LLM should be able to use the new type. +In this case you can skip step 2 and run integration tests against a real LLM. +Simply set `FORCE_USE_LLM` environmental variable to true, or run the script like this: ```bash FORCE_USE_LLM=true ./tests/integration/regenerate.sh ``` -Note: FORCE_USE_LLM doesn't take effect if all tests are passing. If you want to -regenerate regardless, you could remove everything under `tests/integration/mock/[agent]/[test_name]` -folder. +Note: `FORCE_USE_LLM` doesn't take effect if all tests are passing. If you want to +regenerate regardless, you could remove everything under the +`tests/integration/mock/[agent]/[test_name]` folder. -### Known Issues +## Known Issues The test framework cannot handle non-determinism. If anything in the prompt (including observed result after executing an action) is non-deterministic (e.g. a PID), the @@ -132,11 +140,16 @@ numbers or any other known patterns when matching prompts for your test. To write an integration test, there are essentially two steps: 1. Decide your task prompt, and the result you want to verify. -2. Add your prompt to ./regenerate.sh +2. Add your prompt to the `regenerate.sh` script. -**NOTE**: If your agent decide to support user-agent interaction via natural language (e.g., you will prompted to enter user resposes when running the above `main.py` command), you should create a file named `tests/integration/mock///user_responses.log` containing all the responses in order you provided to the agent, delimited by newline ('\n'). This will be used to mock the STDIN during testing. +**NOTE**: If your agent decides to support user-agent interaction via natural +language (e.g., you're prompted to enter user responses when running the above +`main.py` command), you should create a file named +`tests/integration/mock///user_responses.log` +containing all the responses in order you provided to the agent, +delimited by a single newline ('\n'). This will be used to mock the STDIN during testing. That's it, you are good to go! When you launch an integration test, mock -responses are loaded and used to replace a real LLM, so that we get +responses are loaded and used to replace a real LLM's response, so that we get deterministic and consistent behavior, and most importantly, without spending real money. diff --git a/tests/integration/mock/CodeActAgent/test_browse_internet/prompt_001.log b/tests/integration/mock/CodeActAgent/test_browse_internet/prompt_001.log index 9359beb337..f8d00b8fbf 100644 --- a/tests/integration/mock/CodeActAgent/test_browse_internet/prompt_001.log +++ b/tests/integration/mock/CodeActAgent/test_browse_internet/prompt_001.log @@ -25,11 +25,14 @@ If $GITHUB_TOKEN is not set, ask the user to set it. Apart from the standard Python library, the assistant can also use the following functions (already imported) in environment: -open_file(path: str, line_number: Optional[int] = None) -> None: +open_file(path: str, line_number: int | None = 1, context_lines: int | None = 100) -> None: Opens the file at the given path in the editor. If line_number is provided, the window will be moved to include that line. + It only shows the first 100 lines by default! Max `context_lines` supported is 2000, use `scroll up/down` + to view the file if you want to see more. Args: - path: str: The path to the file to open. - line_number: Optional[int]: The line number to move to. + path: str: The path to the file to open, preferredly absolute path. + line_number: int | None = 1: The line number to move to. Defaults to 1. + context_lines: int | None = 100: Only shows this number of lines in the context window (usually from line 1), with line_number as the center (if possible). Defaults to 100. goto_line(line_number: int) -> None: Moves the window to show the specified line number. @@ -61,6 +64,7 @@ append_file(file_name: str, content: str) -> None: edit_file(file_name: str, start: int, end: int, content: str) -> None: Edit a file. Replaces in given file `file_name` the lines `start` through `end` (inclusive) with the given text `content`. + If a line must be inserted, an already existing line must be passed in `content` with new content accordingly! Args: file_name: str: The name of the file to edit. start: int: The start line number. Must satisfy start >= 1. @@ -108,9 +112,10 @@ parse_pptx(file_path: str) -> None: Please note that THE `edit_file` and `append_file` FUNCTIONS REQUIRE PROPER INDENTATION. If the assistant would like to add the line ' print(x)', it must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. Responses should be concise. -The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block. -Include ONLY ONE , , or per response, unless the assistant is finished with the task or need more input or action from the user in order to proceed. +The assistant should attempt fewer things at a time instead of putting too many commands OR too much code in one "execute" block. +Include ONLY ONE , , or per response, unless the assistant is finished with the task or needs more input or action from the user in order to proceed. IMPORTANT: Execute code using , , or whenever possible. +When handling files, try to use full paths and pwd to avoid errors. ---------- diff --git a/tests/integration/mock/CodeActAgent/test_browse_internet/prompt_005.log b/tests/integration/mock/CodeActAgent/test_browse_internet/prompt_005.log index 9c713ac800..47a1f2497c 100644 --- a/tests/integration/mock/CodeActAgent/test_browse_internet/prompt_005.log +++ b/tests/integration/mock/CodeActAgent/test_browse_internet/prompt_005.log @@ -25,11 +25,14 @@ If $GITHUB_TOKEN is not set, ask the user to set it. Apart from the standard Python library, the assistant can also use the following functions (already imported) in environment: -open_file(path: str, line_number: Optional[int] = None) -> None: +open_file(path: str, line_number: int | None = 1, context_lines: int | None = 100) -> None: Opens the file at the given path in the editor. If line_number is provided, the window will be moved to include that line. + It only shows the first 100 lines by default! Max `context_lines` supported is 2000, use `scroll up/down` + to view the file if you want to see more. Args: - path: str: The path to the file to open. - line_number: Optional[int]: The line number to move to. + path: str: The path to the file to open, preferredly absolute path. + line_number: int | None = 1: The line number to move to. Defaults to 1. + context_lines: int | None = 100: Only shows this number of lines in the context window (usually from line 1), with line_number as the center (if possible). Defaults to 100. goto_line(line_number: int) -> None: Moves the window to show the specified line number. @@ -61,6 +64,7 @@ append_file(file_name: str, content: str) -> None: edit_file(file_name: str, start: int, end: int, content: str) -> None: Edit a file. Replaces in given file `file_name` the lines `start` through `end` (inclusive) with the given text `content`. + If a line must be inserted, an already existing line must be passed in `content` with new content accordingly! Args: file_name: str: The name of the file to edit. start: int: The start line number. Must satisfy start >= 1. @@ -108,9 +112,10 @@ parse_pptx(file_path: str) -> None: Please note that THE `edit_file` and `append_file` FUNCTIONS REQUIRE PROPER INDENTATION. If the assistant would like to add the line ' print(x)', it must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. Responses should be concise. -The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block. -Include ONLY ONE , , or per response, unless the assistant is finished with the task or need more input or action from the user in order to proceed. +The assistant should attempt fewer things at a time instead of putting too many commands OR too much code in one "execute" block. +Include ONLY ONE , , or per response, unless the assistant is finished with the task or needs more input or action from the user in order to proceed. IMPORTANT: Execute code using , , or whenever possible. +When handling files, try to use full paths and pwd to avoid errors. ---------- diff --git a/tests/integration/mock/CodeActAgent/test_edits/prompt_001.log b/tests/integration/mock/CodeActAgent/test_edits/prompt_001.log index 435842765a..51d04cf0cc 100644 --- a/tests/integration/mock/CodeActAgent/test_edits/prompt_001.log +++ b/tests/integration/mock/CodeActAgent/test_edits/prompt_001.log @@ -25,11 +25,14 @@ If $GITHUB_TOKEN is not set, ask the user to set it. Apart from the standard Python library, the assistant can also use the following functions (already imported) in environment: -open_file(path: str, line_number: Optional[int] = None) -> None: +open_file(path: str, line_number: int | None = 1, context_lines: int | None = 100) -> None: Opens the file at the given path in the editor. If line_number is provided, the window will be moved to include that line. + It only shows the first 100 lines by default! Max `context_lines` supported is 2000, use `scroll up/down` + to view the file if you want to see more. Args: - path: str: The path to the file to open. - line_number: Optional[int]: The line number to move to. + path: str: The path to the file to open, preferredly absolute path. + line_number: int | None = 1: The line number to move to. Defaults to 1. + context_lines: int | None = 100: Only shows this number of lines in the context window (usually from line 1), with line_number as the center (if possible). Defaults to 100. goto_line(line_number: int) -> None: Moves the window to show the specified line number. @@ -61,6 +64,7 @@ append_file(file_name: str, content: str) -> None: edit_file(file_name: str, start: int, end: int, content: str) -> None: Edit a file. Replaces in given file `file_name` the lines `start` through `end` (inclusive) with the given text `content`. + If a line must be inserted, an already existing line must be passed in `content` with new content accordingly! Args: file_name: str: The name of the file to edit. start: int: The start line number. Must satisfy start >= 1. @@ -108,9 +112,10 @@ parse_pptx(file_path: str) -> None: Please note that THE `edit_file` and `append_file` FUNCTIONS REQUIRE PROPER INDENTATION. If the assistant would like to add the line ' print(x)', it must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. Responses should be concise. -The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block. -Include ONLY ONE , , or per response, unless the assistant is finished with the task or need more input or action from the user in order to proceed. +The assistant should attempt fewer things at a time instead of putting too many commands OR too much code in one "execute" block. +Include ONLY ONE , , or per response, unless the assistant is finished with the task or needs more input or action from the user in order to proceed. IMPORTANT: Execute code using , , or whenever possible. +When handling files, try to use full paths and pwd to avoid errors. ---------- diff --git a/tests/integration/mock/CodeActAgent/test_edits/prompt_002.log b/tests/integration/mock/CodeActAgent/test_edits/prompt_002.log index 17d476cada..2dbb296d96 100644 --- a/tests/integration/mock/CodeActAgent/test_edits/prompt_002.log +++ b/tests/integration/mock/CodeActAgent/test_edits/prompt_002.log @@ -25,11 +25,14 @@ If $GITHUB_TOKEN is not set, ask the user to set it. Apart from the standard Python library, the assistant can also use the following functions (already imported) in environment: -open_file(path: str, line_number: Optional[int] = None) -> None: +open_file(path: str, line_number: int | None = 1, context_lines: int | None = 100) -> None: Opens the file at the given path in the editor. If line_number is provided, the window will be moved to include that line. + It only shows the first 100 lines by default! Max `context_lines` supported is 2000, use `scroll up/down` + to view the file if you want to see more. Args: - path: str: The path to the file to open. - line_number: Optional[int]: The line number to move to. + path: str: The path to the file to open, preferredly absolute path. + line_number: int | None = 1: The line number to move to. Defaults to 1. + context_lines: int | None = 100: Only shows this number of lines in the context window (usually from line 1), with line_number as the center (if possible). Defaults to 100. goto_line(line_number: int) -> None: Moves the window to show the specified line number. @@ -61,6 +64,7 @@ append_file(file_name: str, content: str) -> None: edit_file(file_name: str, start: int, end: int, content: str) -> None: Edit a file. Replaces in given file `file_name` the lines `start` through `end` (inclusive) with the given text `content`. + If a line must be inserted, an already existing line must be passed in `content` with new content accordingly! Args: file_name: str: The name of the file to edit. start: int: The start line number. Must satisfy start >= 1. @@ -108,9 +112,10 @@ parse_pptx(file_path: str) -> None: Please note that THE `edit_file` and `append_file` FUNCTIONS REQUIRE PROPER INDENTATION. If the assistant would like to add the line ' print(x)', it must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. Responses should be concise. -The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block. -Include ONLY ONE , , or per response, unless the assistant is finished with the task or need more input or action from the user in order to proceed. +The assistant should attempt fewer things at a time instead of putting too many commands OR too much code in one "execute" block. +Include ONLY ONE , , or per response, unless the assistant is finished with the task or needs more input or action from the user in order to proceed. IMPORTANT: Execute code using , , or whenever possible. +When handling files, try to use full paths and pwd to avoid errors. ---------- @@ -327,19 +332,17 @@ Fix typos in bad.txt. Do not ask me for confirmation at any point. ---------- -Sure! Let's start by opening the `bad.txt` file to see its content. +Sure! Let's start by opening the `bad.txt` file to identify and fix the typos. -open_file('bad.txt') +find_file('bad.txt') ---------- OBSERVATION: -[File: /workspace/bad.txt (4 lines total)] -1|This is a stupid typoo. -2|Really? -3|No mor typos! -4|Enjoy! +[Found 1 matches for "bad.txt" in ./] +./bad.txt +[End of matches for "bad.txt" in ./] ENVIRONMENT REMINDER: You have 8 turns left to complete the task. diff --git a/tests/integration/mock/CodeActAgent/test_edits/prompt_003.log b/tests/integration/mock/CodeActAgent/test_edits/prompt_003.log index 9c7e73d7c7..fea465bedb 100644 --- a/tests/integration/mock/CodeActAgent/test_edits/prompt_003.log +++ b/tests/integration/mock/CodeActAgent/test_edits/prompt_003.log @@ -25,11 +25,14 @@ If $GITHUB_TOKEN is not set, ask the user to set it. Apart from the standard Python library, the assistant can also use the following functions (already imported) in environment: -open_file(path: str, line_number: Optional[int] = None) -> None: +open_file(path: str, line_number: int | None = 1, context_lines: int | None = 100) -> None: Opens the file at the given path in the editor. If line_number is provided, the window will be moved to include that line. + It only shows the first 100 lines by default! Max `context_lines` supported is 2000, use `scroll up/down` + to view the file if you want to see more. Args: - path: str: The path to the file to open. - line_number: Optional[int]: The line number to move to. + path: str: The path to the file to open, preferredly absolute path. + line_number: int | None = 1: The line number to move to. Defaults to 1. + context_lines: int | None = 100: Only shows this number of lines in the context window (usually from line 1), with line_number as the center (if possible). Defaults to 100. goto_line(line_number: int) -> None: Moves the window to show the specified line number. @@ -61,6 +64,7 @@ append_file(file_name: str, content: str) -> None: edit_file(file_name: str, start: int, end: int, content: str) -> None: Edit a file. Replaces in given file `file_name` the lines `start` through `end` (inclusive) with the given text `content`. + If a line must be inserted, an already existing line must be passed in `content` with new content accordingly! Args: file_name: str: The name of the file to edit. start: int: The start line number. Must satisfy start >= 1. @@ -108,9 +112,10 @@ parse_pptx(file_path: str) -> None: Please note that THE `edit_file` and `append_file` FUNCTIONS REQUIRE PROPER INDENTATION. If the assistant would like to add the line ' print(x)', it must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. Responses should be concise. -The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block. -Include ONLY ONE , , or per response, unless the assistant is finished with the task or need more input or action from the user in order to proceed. +The assistant should attempt fewer things at a time instead of putting too many commands OR too much code in one "execute" block. +Include ONLY ONE , , or per response, unless the assistant is finished with the task or needs more input or action from the user in order to proceed. IMPORTANT: Execute code using , , or whenever possible. +When handling files, try to use full paths and pwd to avoid errors. ---------- @@ -327,40 +332,34 @@ Fix typos in bad.txt. Do not ask me for confirmation at any point. ---------- -Sure! Let's start by opening the `bad.txt` file to see its content. +Sure! Let's start by opening the `bad.txt` file to identify and fix the typos. -open_file('bad.txt') +find_file('bad.txt') ---------- OBSERVATION: -[File: /workspace/bad.txt (4 lines total)] -1|This is a stupid typoo. -2|Really? -3|No mor typos! -4|Enjoy! +[Found 1 matches for "bad.txt" in ./] +./bad.txt +[End of matches for "bad.txt" in ./] ---------- -Let's correct the typos in the `bad.txt` file. +Let's open the `bad.txt` file to review its content and identify any typos. -edit_file('bad.txt', start=1, end=4, content="""This is a stupid typo. -Really? -No more typos! -Enjoy!""") +open_file('/workspace/bad.txt') ---------- OBSERVATION: -[File: /workspace/bad.txt (4 lines total after edit)] -1|This is a stupid typo. -2|Really? -3|No more typos! -4|Enjoy! -[File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.] +[File: /workspace/bad.txt (4 lines total)] +1|This is a stupid typoo. +2|Really? +3|No mor typos! +4|Enjoy! ENVIRONMENT REMINDER: You have 7 turns left to complete the task. diff --git a/tests/integration/mock/CodeActAgent/test_edits/prompt_004.log b/tests/integration/mock/CodeActAgent/test_edits/prompt_004.log new file mode 100644 index 0000000000..48bd5e2064 --- /dev/null +++ b/tests/integration/mock/CodeActAgent/test_edits/prompt_004.log @@ -0,0 +1,391 @@ + + +---------- + +A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. +The assistant can use an interactive Python (Jupyter Notebook) environment, executing code with . + +print("Hello World!") + +The assistant can execute bash commands on behalf of the user by wrapping them with and . + +For example, you can list the files in the current directory by ls . +Important, however: do not run interactive commands. You do not have access to stdin. +Also, you need to handle commands that may run indefinitely and not return a result. For such cases, you should redirect the output to a file and run the command in the background to avoid blocking the execution. +For example, to run a Python script that might run indefinitely without returning immediately, you can use the following format: python3 app.py > server.log 2>&1 & +Also, if a command execution result saying like: Command: "npm start" timed out. Sending SIGINT to the process, you should also retry with running the command in the background. +The assistant can browse the Internet with and . +For example, Tell me the usa's president using google search . +Or Tell me what is in http://example.com . +The assistant can install Python packages using the %pip magic command in an IPython environment by using the following syntax: %pip install [package needed] and should always import packages and define variables before starting to use them. +To interact with GitHub, use the $GITHUB_TOKEN environment variable. +For example, to push a branch `my_branch` to the GitHub repo `owner/repo`: + git push https://$GITHUB_TOKEN@github.com/owner/repo.git my_branch +If $GITHUB_TOKEN is not set, ask the user to set it. + + +Apart from the standard Python library, the assistant can also use the following functions (already imported) in environment: +open_file(path: str, line_number: int | None = 1, context_lines: int | None = 100) -> None: + Opens the file at the given path in the editor. If line_number is provided, the window will be moved to include that line. + It only shows the first 100 lines by default! Max `context_lines` supported is 2000, use `scroll up/down` + to view the file if you want to see more. + Args: + path: str: The path to the file to open, preferredly absolute path. + line_number: int | None = 1: The line number to move to. Defaults to 1. + context_lines: int | None = 100: Only shows this number of lines in the context window (usually from line 1), with line_number as the center (if possible). Defaults to 100. + +goto_line(line_number: int) -> None: + Moves the window to show the specified line number. + Args: + line_number: int: The line number to move to. + +scroll_down() -> None: + Moves the window down by 100 lines. + Args: + None + +scroll_up() -> None: + Moves the window up by 100 lines. + Args: + None + +create_file(filename: str) -> None: + Creates and opens a new file with the given name. + Args: + filename: str: The name of the file to create. + +append_file(file_name: str, content: str) -> None: + Append content to the given file. + It appends text `content` to the end of the specified file. + Args: + file_name: str: The name of the file to append to. + content: str: The content to append to the file. + +edit_file(file_name: str, start: int, end: int, content: str) -> None: + Edit a file. + Replaces in given file `file_name` the lines `start` through `end` (inclusive) with the given text `content`. + If a line must be inserted, an already existing line must be passed in `content` with new content accordingly! + Args: + file_name: str: The name of the file to edit. + start: int: The start line number. Must satisfy start >= 1. + end: int: The end line number. Must satisfy start <= end <= number of lines in the file. + content: str: The content to replace the lines with. + +search_dir(search_term: str, dir_path: str = './') -> None: + Searches for search_term in all files in dir. If dir is not provided, searches in the current directory. + Args: + search_term: str: The term to search for. + dir_path: Optional[str]: The path to the directory to search. + +search_file(search_term: str, file_path: Optional[str] = None) -> None: + Searches for search_term in file. If file is not provided, searches in the current open file. + Args: + search_term: str: The term to search for. + file_path: Optional[str]: The path to the file to search. + +find_file(file_name: str, dir_path: str = './') -> None: + Finds all files with the given name in the specified directory. + Args: + file_name: str: The name of the file to find. + dir_path: Optional[str]: The path to the directory to search. + +parse_pdf(file_path: str) -> None: + Parses the content of a PDF file and prints it. + Args: + file_path: str: The path to the file to open. + +parse_docx(file_path: str) -> None: + Parses the content of a DOCX file and prints it. + Args: + file_path: str: The path to the file to open. + +parse_latex(file_path: str) -> None: + Parses the content of a LaTex file and prints it. + Args: + file_path: str: The path to the file to open. + +parse_pptx(file_path: str) -> None: + Parses the content of a pptx file and prints it. + Args: + file_path: str: The path to the file to open. + +Please note that THE `edit_file` and `append_file` FUNCTIONS REQUIRE PROPER INDENTATION. If the assistant would like to add the line ' print(x)', it must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. + +Responses should be concise. +The assistant should attempt fewer things at a time instead of putting too many commands OR too much code in one "execute" block. +Include ONLY ONE , , or per response, unless the assistant is finished with the task or needs more input or action from the user in order to proceed. +IMPORTANT: Execute code using , , or whenever possible. +When handling files, try to use full paths and pwd to avoid errors. + + +---------- + +Here is an example of how you can interact with the environment for task solving: + +--- START OF EXAMPLE --- + +USER: Create a list of numbers from 1 to 10, and display them in a web page at port 5000. + +ASSISTANT: +Sure! Let me create a Python file `app.py`: + +create_file('app.py') + + +USER: +OBSERVATION: +[File: /workspace/app.py (1 lines total)] +1| +[File app.py created.] + +ASSISTANT: +Now I will write the Python code for starting a web server and save it to the file `app.py`: + +EDITED_CODE="""from flask import Flask +app = Flask(__name__) + +@app.route('/') +def index(): + numbers = list(range(1, 11)) + return str(numbers) + +if __name__ == '__main__': + app.run(port=5000)""" +edit_file('app.py', start=1, end=1, content=EDITED_CODE) + + +USER: +OBSERVATION: +1|from flask import Flask +2|app = Flask(__name__) +3| +4|@app.route('/') +5|def index(): +6| numbers = list(range(1, 11)) +7| return str(numbers) +8| +9|if __name__ == '__main__': +10| app.run(port=5000) +[File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.] + +ASSISTANT: +I have created a Python file `app.py` that will display a list of numbers from 1 to 10 when you run it. Let me run the Python file for you: + +python3 app.py > server.log 2>&1 & + + +USER: +OBSERVATION: +[1] 121[1]+ Exit 1 python3 app.py > server.log 2>&1 + +ASSISTANT: Looks like the server is running with PID 121 then crashed. Let me check the server log: + +cat server.log + + +USER: +OBSERVATION: +Traceback (most recent call last): + File "/workspace/app.py", line 2, in + from flask import Flask +ModuleNotFoundError: No module named 'flask' + +ASSISTANT: +It seems that Flask is not installed. Let me install Flask for you: + +pip install flask + + +USER: +OBSERVATION: +Defaulting to user installation because normal site-packages is not writeable +Collecting flask + Using cached flask-3.0.3-py3-none-any.whl (101 kB) +Collecting blinker>=1.6.2 + Using cached blinker-1.7.0-py3-none-any.whl (13 kB) +Collecting Werkzeug>=3.0.0 + Using cached werkzeug-3.0.2-py3-none-any.whl (226 kB) +Collecting click>=8.1.3 + Using cached click-8.1.7-py3-none-any.whl (97 kB) +Collecting itsdangerous>=2.1.2 + Using cached itsdangerous-2.2.0-py3-none-any.whl (16 kB) +Requirement already satisfied: Jinja2>=3.1.2 in /home/opendevin/.local/lib/python3.10/site-packages (from flask) (3.1.3) +Requirement already satisfied: MarkupSafe>=2.0 in /home/opendevin/.local/lib/python3.10/site-packages (from Jinja2>=3.1.2->flask) (2.1.5) +Installing collected packages: Werkzeug, itsdangerous, click, blinker, flask +Successfully installed Werkzeug-3.0.2 blinker-1.7.0 click-8.1.7 flask-3.0.3 itsdangerous-2.2.0 + +ASSISTANT: +Now that Flask is installed, let me run the Python file again: + +python3 app.py > server.log 2>&1 & + + +USER: +OBSERVATION: +[1] 124 + +ASSISTANT: +Let me check the server log again: + +cat server.log + + +USER: +OBSERVATION: +* Serving Flask app 'app' + * Debug mode: off +WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead. + * Running on http://127.0.0.1:5000 +Press CTRL+C to quit + +ASSISTANT: +The server is running on port 5000 with PID 124. You can access the list of numbers by visiting http://127.0.0.1:5000. If you have any further questions, feel free to ask! + +USER: Now browse the newly started server's homepage and show me the content. + +ASSISTANT: +Sure! Let me browse the server's homepage at http://127.0.0.1:5000: + +Get the content on "http://127.0.0.1:5000" + + +USER: +OBSERVATION: +[1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + +ASSISTANT: +The content of the server's homepage is "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]". If you have any further questions, feel free to ask! + +USER: Now kill the server, make it display the numbers in a table format. + +ASSISTANT: +Sure! Let me stop the server first: + +kill 124 + + +USER: +OBSERVATION: +[1]+ Terminated python3 app.py > server.log 2>&1 + +ASSISTANT: +The server has been stopped. Let me open the Python file and modify it. + +open_file('app.py') + + +USER: +[File: /workspace/app.py (10 lines total)] +1|from flask import Flask +2|app = Flask(__name__) +3| +4|@app.route('/') +5|def index(): +6| numbers = list(range(1, 11)) +7| return str(numbers) +8| +9|if __name__ == '__main__': +10| app.run(port=5000) + +ASSISTANT: +I should edit the file to display the numbers in a table format. I should include correct indentation. Let me update the file: + +edit_file('app.py', start=7, end=7, content=" return '' + ''.join([f'' for i in numbers]) + '
{i}
'") +
+ +USER: +Observation: +[File: /workspace/app.py (10 lines total after edit)] +1|from flask import Flask +2|app = Flask(__name__) +3| +4|@app.route('/') +5|def index(): +6| numbers = list(range(1, 11)) +7| return '' + ''.join([f'' for i in numbers]) + '
{i}
' +8| +9|if __name__ == '__main__': +10| app.run(port=5000) +[File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.] + +ASSISTANT: +Running the updated file: + +python3 app.py > server.log 2>&1 & + + +USER: +Observation: +[1] 126 + +ASSISTANT: +The server is running on port 5000 with PID 126. You can access the list of numbers in a table format by visiting http://127.0.0.1:5000. Let me know if you have any further requests! + +--- END OF EXAMPLE --- + + +NOW, LET'S START! + +---------- + +Fix typos in bad.txt. Do not ask me for confirmation at any point. + +---------- + +Sure! Let's start by opening the `bad.txt` file to identify and fix the typos. + +find_file('bad.txt') + + +---------- + +OBSERVATION: +[Found 1 matches for "bad.txt" in ./] +./bad.txt +[End of matches for "bad.txt" in ./] + + +---------- + +Let's open the `bad.txt` file to review its content and identify any typos. + +open_file('/workspace/bad.txt') + + +---------- + +OBSERVATION: +[File: /workspace/bad.txt (4 lines total)] +1|This is a stupid typoo. +2|Really? +3|No mor typos! +4|Enjoy! + + +---------- + +I have identified the typos in the file. Let's correct them: + +1. "typoo" should be "typo". +2. "mor" should be "more". + +I will now edit the file to fix these typos. + +edit_file('/workspace/bad.txt', start=1, end=4, content="""This is a stupid typo. +Really? +No more typos! +Enjoy!""") + + +---------- + +OBSERVATION: +[File: /workspace/bad.txt (4 lines total after edit)] +1|This is a stupid typo. +2|Really? +3|No more typos! +4|Enjoy! +[File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.] + + +ENVIRONMENT REMINDER: You have 6 turns left to complete the task. diff --git a/tests/integration/mock/CodeActAgent/test_edits/response_001.log b/tests/integration/mock/CodeActAgent/test_edits/response_001.log index ad0e602b8f..4fddd397eb 100644 --- a/tests/integration/mock/CodeActAgent/test_edits/response_001.log +++ b/tests/integration/mock/CodeActAgent/test_edits/response_001.log @@ -1,4 +1,4 @@ -Sure! Let's start by opening the `bad.txt` file to see its content. +Sure! Let's start by opening the `bad.txt` file to identify and fix the typos. -open_file('bad.txt') +find_file('bad.txt') diff --git a/tests/integration/mock/CodeActAgent/test_edits/response_002.log b/tests/integration/mock/CodeActAgent/test_edits/response_002.log index 8b045100ed..bded27d2d5 100644 --- a/tests/integration/mock/CodeActAgent/test_edits/response_002.log +++ b/tests/integration/mock/CodeActAgent/test_edits/response_002.log @@ -1,7 +1,3 @@ -Let's correct the typos in the `bad.txt` file. - +Let's open the `bad.txt` file to review its content and identify any typos. -edit_file('bad.txt', start=1, end=4, content="""This is a stupid typo. -Really? -No more typos! -Enjoy!""") +open_file('/workspace/bad.txt') diff --git a/tests/integration/mock/CodeActAgent/test_edits/response_003.log b/tests/integration/mock/CodeActAgent/test_edits/response_003.log index 613b77e658..23d3850ead 100644 --- a/tests/integration/mock/CodeActAgent/test_edits/response_003.log +++ b/tests/integration/mock/CodeActAgent/test_edits/response_003.log @@ -1 +1,11 @@ -The typos in `bad.txt` have been corrected. If you need any further assistance, feel free to ask! +I have identified the typos in the file. Let's correct them: + +1. "typoo" should be "typo". +2. "mor" should be "more". + +I will now edit the file to fix these typos. + +edit_file('/workspace/bad.txt', start=1, end=4, content="""This is a stupid typo. +Really? +No more typos! +Enjoy!""") diff --git a/tests/integration/mock/CodeActAgent/test_edits/response_004.log b/tests/integration/mock/CodeActAgent/test_edits/response_004.log new file mode 100644 index 0000000000..5ce5e9b189 --- /dev/null +++ b/tests/integration/mock/CodeActAgent/test_edits/response_004.log @@ -0,0 +1 @@ +The typos in `bad.txt` have been fixed successfully. If you need any further assistance, feel free to ask! diff --git a/tests/integration/mock/CodeActAgent/test_ipython/prompt_001.log b/tests/integration/mock/CodeActAgent/test_ipython/prompt_001.log index 3d443fc3c2..5736defcae 100644 --- a/tests/integration/mock/CodeActAgent/test_ipython/prompt_001.log +++ b/tests/integration/mock/CodeActAgent/test_ipython/prompt_001.log @@ -25,11 +25,12 @@ If $GITHUB_TOKEN is not set, ask the user to set it. Apart from the standard Python library, the assistant can also use the following functions (already imported) in environment: -open_file(path: str, line_number: Optional[int] = None) -> None: +open_file(path: str, line_number: int | None = 1, context_lines: int | None = 100) -> None: Opens the file at the given path in the editor. If line_number is provided, the window will be moved to include that line. Args: - path: str: The path to the file to open. - line_number: Optional[int]: The line number to move to. + path: str: The path to the file to open, preferredly absolute path. + line_number: int | None = 1: The line number to move to. Defaults to 1. + context_lines: int | None = 100: The total number of lines to display in the window, with line_number as the center (if possible). Defaults to 100. goto_line(line_number: int) -> None: Moves the window to show the specified line number. @@ -108,10 +109,10 @@ parse_pptx(file_path: str) -> None: Please note that THE `edit_file` and `append_file` FUNCTIONS REQUIRE PROPER INDENTATION. If the assistant would like to add the line ' print(x)', it must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. Responses should be concise. -The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block. -Include ONLY ONE , , or per response, unless the assistant is finished with the task or need more input or action from the user in order to proceed. +The assistant should attempt fewer things at a time instead of putting too many commands OR too much code in one "execute" block. +Include ONLY ONE , , or per response, unless the assistant is finished with the task or needs more input or action from the user in order to proceed. IMPORTANT: Execute code using , , or whenever possible. - +When handling files, try to use full paths and pwd to avoid errors. ---------- diff --git a/tests/integration/mock/CodeActAgent/test_ipython/prompt_002.log b/tests/integration/mock/CodeActAgent/test_ipython/prompt_002.log index c697a03502..89c6f50c81 100644 --- a/tests/integration/mock/CodeActAgent/test_ipython/prompt_002.log +++ b/tests/integration/mock/CodeActAgent/test_ipython/prompt_002.log @@ -25,11 +25,12 @@ If $GITHUB_TOKEN is not set, ask the user to set it. Apart from the standard Python library, the assistant can also use the following functions (already imported) in environment: -open_file(path: str, line_number: Optional[int] = None) -> None: +open_file(path: str, line_number: int | None = 1, context_lines: int | None = 100) -> None: Opens the file at the given path in the editor. If line_number is provided, the window will be moved to include that line. Args: - path: str: The path to the file to open. - line_number: Optional[int]: The line number to move to. + path: str: The path to the file to open, preferredly absolute path. + line_number: int | None = 1: The line number to move to. Defaults to 1. + context_lines: int | None = 100: The total number of lines to display in the window, with line_number as the center (if possible). Defaults to 100. goto_line(line_number: int) -> None: Moves the window to show the specified line number. @@ -108,9 +109,10 @@ parse_pptx(file_path: str) -> None: Please note that THE `edit_file` and `append_file` FUNCTIONS REQUIRE PROPER INDENTATION. If the assistant would like to add the line ' print(x)', it must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. Responses should be concise. -The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block. -Include ONLY ONE , , or per response, unless the assistant is finished with the task or need more input or action from the user in order to proceed. +The assistant should attempt fewer things at a time instead of putting too many commands OR too much code in one "execute" block. +Include ONLY ONE , , or per response, unless the assistant is finished with the task or needs more input or action from the user in order to proceed. IMPORTANT: Execute code using , , or whenever possible. +When handling files, try to use full paths and pwd to avoid errors. ---------- diff --git a/tests/integration/mock/CodeActAgent/test_ipython_module/prompt_001.log b/tests/integration/mock/CodeActAgent/test_ipython_module/prompt_001.log index ef86e61d5b..5a9cc9e9e8 100644 --- a/tests/integration/mock/CodeActAgent/test_ipython_module/prompt_001.log +++ b/tests/integration/mock/CodeActAgent/test_ipython_module/prompt_001.log @@ -25,11 +25,14 @@ If $GITHUB_TOKEN is not set, ask the user to set it. Apart from the standard Python library, the assistant can also use the following functions (already imported) in environment: -open_file(path: str, line_number: Optional[int] = None) -> None: +open_file(path: str, line_number: int | None = 1, context_lines: int | None = 100) -> None: Opens the file at the given path in the editor. If line_number is provided, the window will be moved to include that line. + It only shows the first 100 lines by default! Max `context_lines` supported is 2000, use `scroll up/down` + to view the file if you want to see more. Args: - path: str: The path to the file to open. - line_number: Optional[int]: The line number to move to. + path: str: The path to the file to open, preferredly absolute path. + line_number: int | None = 1: The line number to move to. Defaults to 1. + context_lines: int | None = 100: Only shows this number of lines in the context window (usually from line 1), with line_number as the center (if possible). Defaults to 100. goto_line(line_number: int) -> None: Moves the window to show the specified line number. @@ -61,6 +64,7 @@ append_file(file_name: str, content: str) -> None: edit_file(file_name: str, start: int, end: int, content: str) -> None: Edit a file. Replaces in given file `file_name` the lines `start` through `end` (inclusive) with the given text `content`. + If a line must be inserted, an already existing line must be passed in `content` with new content accordingly! Args: file_name: str: The name of the file to edit. start: int: The start line number. Must satisfy start >= 1. @@ -108,9 +112,10 @@ parse_pptx(file_path: str) -> None: Please note that THE `edit_file` and `append_file` FUNCTIONS REQUIRE PROPER INDENTATION. If the assistant would like to add the line ' print(x)', it must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. Responses should be concise. -The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block. -Include ONLY ONE , , or per response, unless the assistant is finished with the task or need more input or action from the user in order to proceed. +The assistant should attempt fewer things at a time instead of putting too many commands OR too much code in one "execute" block. +Include ONLY ONE , , or per response, unless the assistant is finished with the task or needs more input or action from the user in order to proceed. IMPORTANT: Execute code using , , or whenever possible. +When handling files, try to use full paths and pwd to avoid errors. ---------- diff --git a/tests/integration/mock/CodeActAgent/test_ipython_module/prompt_002.log b/tests/integration/mock/CodeActAgent/test_ipython_module/prompt_002.log index 69dcae34be..56d27758b4 100644 --- a/tests/integration/mock/CodeActAgent/test_ipython_module/prompt_002.log +++ b/tests/integration/mock/CodeActAgent/test_ipython_module/prompt_002.log @@ -25,11 +25,14 @@ If $GITHUB_TOKEN is not set, ask the user to set it. Apart from the standard Python library, the assistant can also use the following functions (already imported) in environment: -open_file(path: str, line_number: Optional[int] = None) -> None: +open_file(path: str, line_number: int | None = 1, context_lines: int | None = 100) -> None: Opens the file at the given path in the editor. If line_number is provided, the window will be moved to include that line. + It only shows the first 100 lines by default! Max `context_lines` supported is 2000, use `scroll up/down` + to view the file if you want to see more. Args: - path: str: The path to the file to open. - line_number: Optional[int]: The line number to move to. + path: str: The path to the file to open, preferredly absolute path. + line_number: int | None = 1: The line number to move to. Defaults to 1. + context_lines: int | None = 100: Only shows this number of lines in the context window (usually from line 1), with line_number as the center (if possible). Defaults to 100. goto_line(line_number: int) -> None: Moves the window to show the specified line number. @@ -61,6 +64,7 @@ append_file(file_name: str, content: str) -> None: edit_file(file_name: str, start: int, end: int, content: str) -> None: Edit a file. Replaces in given file `file_name` the lines `start` through `end` (inclusive) with the given text `content`. + If a line must be inserted, an already existing line must be passed in `content` with new content accordingly! Args: file_name: str: The name of the file to edit. start: int: The start line number. Must satisfy start >= 1. @@ -108,9 +112,10 @@ parse_pptx(file_path: str) -> None: Please note that THE `edit_file` and `append_file` FUNCTIONS REQUIRE PROPER INDENTATION. If the assistant would like to add the line ' print(x)', it must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. Responses should be concise. -The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block. -Include ONLY ONE , , or per response, unless the assistant is finished with the task or need more input or action from the user in order to proceed. +The assistant should attempt fewer things at a time instead of putting too many commands OR too much code in one "execute" block. +Include ONLY ONE , , or per response, unless the assistant is finished with the task or needs more input or action from the user in order to proceed. IMPORTANT: Execute code using , , or whenever possible. +When handling files, try to use full paths and pwd to avoid errors. ---------- diff --git a/tests/integration/mock/CodeActAgent/test_ipython_module/prompt_003.log b/tests/integration/mock/CodeActAgent/test_ipython_module/prompt_003.log index 50b524f68a..285d8681d9 100644 --- a/tests/integration/mock/CodeActAgent/test_ipython_module/prompt_003.log +++ b/tests/integration/mock/CodeActAgent/test_ipython_module/prompt_003.log @@ -25,11 +25,14 @@ If $GITHUB_TOKEN is not set, ask the user to set it. Apart from the standard Python library, the assistant can also use the following functions (already imported) in environment: -open_file(path: str, line_number: Optional[int] = None) -> None: +open_file(path: str, line_number: int | None = 1, context_lines: int | None = 100) -> None: Opens the file at the given path in the editor. If line_number is provided, the window will be moved to include that line. + It only shows the first 100 lines by default! Max `context_lines` supported is 2000, use `scroll up/down` + to view the file if you want to see more. Args: - path: str: The path to the file to open. - line_number: Optional[int]: The line number to move to. + path: str: The path to the file to open, preferredly absolute path. + line_number: int | None = 1: The line number to move to. Defaults to 1. + context_lines: int | None = 100: Only shows this number of lines in the context window (usually from line 1), with line_number as the center (if possible). Defaults to 100. goto_line(line_number: int) -> None: Moves the window to show the specified line number. @@ -61,6 +64,7 @@ append_file(file_name: str, content: str) -> None: edit_file(file_name: str, start: int, end: int, content: str) -> None: Edit a file. Replaces in given file `file_name` the lines `start` through `end` (inclusive) with the given text `content`. + If a line must be inserted, an already existing line must be passed in `content` with new content accordingly! Args: file_name: str: The name of the file to edit. start: int: The start line number. Must satisfy start >= 1. @@ -108,9 +112,10 @@ parse_pptx(file_path: str) -> None: Please note that THE `edit_file` and `append_file` FUNCTIONS REQUIRE PROPER INDENTATION. If the assistant would like to add the line ' print(x)', it must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. Responses should be concise. -The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block. -Include ONLY ONE , , or per response, unless the assistant is finished with the task or need more input or action from the user in order to proceed. +The assistant should attempt fewer things at a time instead of putting too many commands OR too much code in one "execute" block. +Include ONLY ONE , , or per response, unless the assistant is finished with the task or needs more input or action from the user in order to proceed. IMPORTANT: Execute code using , , or whenever possible. +When handling files, try to use full paths and pwd to avoid errors. ---------- diff --git a/tests/integration/mock/CodeActAgent/test_write_simple_script/prompt_001.log b/tests/integration/mock/CodeActAgent/test_write_simple_script/prompt_001.log index 4f61b27afd..9fc396d348 100644 --- a/tests/integration/mock/CodeActAgent/test_write_simple_script/prompt_001.log +++ b/tests/integration/mock/CodeActAgent/test_write_simple_script/prompt_001.log @@ -25,11 +25,14 @@ If $GITHUB_TOKEN is not set, ask the user to set it. Apart from the standard Python library, the assistant can also use the following functions (already imported) in environment: -open_file(path: str, line_number: Optional[int] = None) -> None: +open_file(path: str, line_number: int | None = 1, context_lines: int | None = 100) -> None: Opens the file at the given path in the editor. If line_number is provided, the window will be moved to include that line. + It only shows the first 100 lines by default! Max `context_lines` supported is 2000, use `scroll up/down` + to view the file if you want to see more. Args: - path: str: The path to the file to open. - line_number: Optional[int]: The line number to move to. + path: str: The path to the file to open, preferredly absolute path. + line_number: int | None = 1: The line number to move to. Defaults to 1. + context_lines: int | None = 100: Only shows this number of lines in the context window (usually from line 1), with line_number as the center (if possible). Defaults to 100. goto_line(line_number: int) -> None: Moves the window to show the specified line number. @@ -61,6 +64,7 @@ append_file(file_name: str, content: str) -> None: edit_file(file_name: str, start: int, end: int, content: str) -> None: Edit a file. Replaces in given file `file_name` the lines `start` through `end` (inclusive) with the given text `content`. + If a line must be inserted, an already existing line must be passed in `content` with new content accordingly! Args: file_name: str: The name of the file to edit. start: int: The start line number. Must satisfy start >= 1. @@ -108,9 +112,10 @@ parse_pptx(file_path: str) -> None: Please note that THE `edit_file` and `append_file` FUNCTIONS REQUIRE PROPER INDENTATION. If the assistant would like to add the line ' print(x)', it must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. Responses should be concise. -The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block. -Include ONLY ONE , , or per response, unless the assistant is finished with the task or need more input or action from the user in order to proceed. +The assistant should attempt fewer things at a time instead of putting too many commands OR too much code in one "execute" block. +Include ONLY ONE , , or per response, unless the assistant is finished with the task or needs more input or action from the user in order to proceed. IMPORTANT: Execute code using , , or whenever possible. +When handling files, try to use full paths and pwd to avoid errors. ---------- diff --git a/tests/integration/mock/CodeActAgent/test_write_simple_script/prompt_002.log b/tests/integration/mock/CodeActAgent/test_write_simple_script/prompt_002.log index ec885986b7..74ffc95241 100644 --- a/tests/integration/mock/CodeActAgent/test_write_simple_script/prompt_002.log +++ b/tests/integration/mock/CodeActAgent/test_write_simple_script/prompt_002.log @@ -25,11 +25,14 @@ If $GITHUB_TOKEN is not set, ask the user to set it. Apart from the standard Python library, the assistant can also use the following functions (already imported) in environment: -open_file(path: str, line_number: Optional[int] = None) -> None: +open_file(path: str, line_number: int | None = 1, context_lines: int | None = 100) -> None: Opens the file at the given path in the editor. If line_number is provided, the window will be moved to include that line. + It only shows the first 100 lines by default! Max `context_lines` supported is 2000, use `scroll up/down` + to view the file if you want to see more. Args: - path: str: The path to the file to open. - line_number: Optional[int]: The line number to move to. + path: str: The path to the file to open, preferredly absolute path. + line_number: int | None = 1: The line number to move to. Defaults to 1. + context_lines: int | None = 100: Only shows this number of lines in the context window (usually from line 1), with line_number as the center (if possible). Defaults to 100. goto_line(line_number: int) -> None: Moves the window to show the specified line number. @@ -61,6 +64,7 @@ append_file(file_name: str, content: str) -> None: edit_file(file_name: str, start: int, end: int, content: str) -> None: Edit a file. Replaces in given file `file_name` the lines `start` through `end` (inclusive) with the given text `content`. + If a line must be inserted, an already existing line must be passed in `content` with new content accordingly! Args: file_name: str: The name of the file to edit. start: int: The start line number. Must satisfy start >= 1. @@ -108,9 +112,10 @@ parse_pptx(file_path: str) -> None: Please note that THE `edit_file` and `append_file` FUNCTIONS REQUIRE PROPER INDENTATION. If the assistant would like to add the line ' print(x)', it must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. Responses should be concise. -The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block. -Include ONLY ONE , , or per response, unless the assistant is finished with the task or need more input or action from the user in order to proceed. +The assistant should attempt fewer things at a time instead of putting too many commands OR too much code in one "execute" block. +Include ONLY ONE , , or per response, unless the assistant is finished with the task or needs more input or action from the user in order to proceed. IMPORTANT: Execute code using , , or whenever possible. +When handling files, try to use full paths and pwd to avoid errors. ---------- @@ -337,6 +342,6 @@ chmod +x hello.sh OBSERVATION: -[Command -1 finished with exit code 0]] +[Command -1 finished with exit code 0] ENVIRONMENT REMINDER: You have 8 turns left to complete the task. diff --git a/tests/integration/mock/CodeActAgent/test_write_simple_script/prompt_003.log b/tests/integration/mock/CodeActAgent/test_write_simple_script/prompt_003.log index 4b1516bc5c..357157ec59 100644 --- a/tests/integration/mock/CodeActAgent/test_write_simple_script/prompt_003.log +++ b/tests/integration/mock/CodeActAgent/test_write_simple_script/prompt_003.log @@ -25,11 +25,14 @@ If $GITHUB_TOKEN is not set, ask the user to set it. Apart from the standard Python library, the assistant can also use the following functions (already imported) in environment: -open_file(path: str, line_number: Optional[int] = None) -> None: +open_file(path: str, line_number: int | None = 1, context_lines: int | None = 100) -> None: Opens the file at the given path in the editor. If line_number is provided, the window will be moved to include that line. + It only shows the first 100 lines by default! Max `context_lines` supported is 2000, use `scroll up/down` + to view the file if you want to see more. Args: - path: str: The path to the file to open. - line_number: Optional[int]: The line number to move to. + path: str: The path to the file to open, preferredly absolute path. + line_number: int | None = 1: The line number to move to. Defaults to 1. + context_lines: int | None = 100: Only shows this number of lines in the context window (usually from line 1), with line_number as the center (if possible). Defaults to 100. goto_line(line_number: int) -> None: Moves the window to show the specified line number. @@ -61,6 +64,7 @@ append_file(file_name: str, content: str) -> None: edit_file(file_name: str, start: int, end: int, content: str) -> None: Edit a file. Replaces in given file `file_name` the lines `start` through `end` (inclusive) with the given text `content`. + If a line must be inserted, an already existing line must be passed in `content` with new content accordingly! Args: file_name: str: The name of the file to edit. start: int: The start line number. Must satisfy start >= 1. @@ -108,9 +112,10 @@ parse_pptx(file_path: str) -> None: Please note that THE `edit_file` and `append_file` FUNCTIONS REQUIRE PROPER INDENTATION. If the assistant would like to add the line ' print(x)', it must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. Responses should be concise. -The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block. -Include ONLY ONE , , or per response, unless the assistant is finished with the task or need more input or action from the user in order to proceed. +The assistant should attempt fewer things at a time instead of putting too many commands OR too much code in one "execute" block. +Include ONLY ONE , , or per response, unless the assistant is finished with the task or needs more input or action from the user in order to proceed. IMPORTANT: Execute code using , , or whenever possible. +When handling files, try to use full paths and pwd to avoid errors. ---------- @@ -337,7 +342,7 @@ chmod +x hello.sh OBSERVATION: -[Command -1 finished with exit code 0]] +[Command -1 finished with exit code 0] ---------- @@ -350,6 +355,6 @@ The shell script `hello.sh` has been created and made executable. Let's run it t OBSERVATION: hello -[Command -1 finished with exit code 0]] +[Command -1 finished with exit code 0] ENVIRONMENT REMINDER: You have 7 turns left to complete the task. diff --git a/tests/integration/mock/CodeActSWEAgent/test_edits/prompt_001.log b/tests/integration/mock/CodeActSWEAgent/test_edits/prompt_001.log index 1cd3173f5b..57aebc9a55 100644 --- a/tests/integration/mock/CodeActSWEAgent/test_edits/prompt_001.log +++ b/tests/integration/mock/CodeActSWEAgent/test_edits/prompt_001.log @@ -13,11 +13,14 @@ For example, you can list the files in the current directory by l Apart from the standard Python library, the assistant can also use the following functions (already imported) in environment: -open_file(path: str, line_number: Optional[int] = None) -> None: +open_file(path: str, line_number: int | None = 1, context_lines: int | None = 100) -> None: Opens the file at the given path in the editor. If line_number is provided, the window will be moved to include that line. + It only shows the first 100 lines by default! Max `context_lines` supported is 2000, use `scroll up/down` + to view the file if you want to see more. Args: - path: str: The path to the file to open. - line_number: Optional[int]: The line number to move to. + path: str: The path to the file to open, preferredly absolute path. + line_number: int | None = 1: The line number to move to. Defaults to 1. + context_lines: int | None = 100: Only shows this number of lines in the context window (usually from line 1), with line_number as the center (if possible). Defaults to 100. goto_line(line_number: int) -> None: Moves the window to show the specified line number. @@ -49,6 +52,7 @@ append_file(file_name: str, content: str) -> None: edit_file(file_name: str, start: int, end: int, content: str) -> None: Edit a file. Replaces in given file `file_name` the lines `start` through `end` (inclusive) with the given text `content`. + If a line must be inserted, an already existing line must be passed in `content` with new content accordingly! Args: file_name: str: The name of the file to edit. start: int: The start line number. Must satisfy start >= 1. @@ -536,4 +540,4 @@ NOW, LET'S START! Fix typos in bad.txt. Do not ask me for confirmation at any point. -ENVIRONMENT REMINDER: You have 9 turns left to complete the task. \ No newline at end of file +ENVIRONMENT REMINDER: You have 9 turns left to complete the task. diff --git a/tests/integration/mock/CodeActSWEAgent/test_edits/prompt_002.log b/tests/integration/mock/CodeActSWEAgent/test_edits/prompt_002.log index 01459cd24f..4595e95863 100644 --- a/tests/integration/mock/CodeActSWEAgent/test_edits/prompt_002.log +++ b/tests/integration/mock/CodeActSWEAgent/test_edits/prompt_002.log @@ -13,11 +13,14 @@ For example, you can list the files in the current directory by l Apart from the standard Python library, the assistant can also use the following functions (already imported) in environment: -open_file(path: str, line_number: Optional[int] = None) -> None: +open_file(path: str, line_number: int | None = 1, context_lines: int | None = 100) -> None: Opens the file at the given path in the editor. If line_number is provided, the window will be moved to include that line. + It only shows the first 100 lines by default! Max `context_lines` supported is 2000, use `scroll up/down` + to view the file if you want to see more. Args: - path: str: The path to the file to open. - line_number: Optional[int]: The line number to move to. + path: str: The path to the file to open, preferredly absolute path. + line_number: int | None = 1: The line number to move to. Defaults to 1. + context_lines: int | None = 100: Only shows this number of lines in the context window (usually from line 1), with line_number as the center (if possible). Defaults to 100. goto_line(line_number: int) -> None: Moves the window to show the specified line number. @@ -49,6 +52,7 @@ append_file(file_name: str, content: str) -> None: edit_file(file_name: str, start: int, end: int, content: str) -> None: Edit a file. Replaces in given file `file_name` the lines `start` through `end` (inclusive) with the given text `content`. + If a line must be inserted, an already existing line must be passed in `content` with new content accordingly! Args: file_name: str: The name of the file to edit. start: int: The start line number. Must satisfy start >= 1. @@ -553,4 +557,4 @@ OBSERVATION: 4|Enjoy! -ENVIRONMENT REMINDER: You have 8 turns left to complete the task. \ No newline at end of file +ENVIRONMENT REMINDER: You have 8 turns left to complete the task. diff --git a/tests/integration/mock/CodeActSWEAgent/test_edits/prompt_003.log b/tests/integration/mock/CodeActSWEAgent/test_edits/prompt_003.log index 5b37a872aa..04a22f9316 100644 --- a/tests/integration/mock/CodeActSWEAgent/test_edits/prompt_003.log +++ b/tests/integration/mock/CodeActSWEAgent/test_edits/prompt_003.log @@ -13,11 +13,14 @@ For example, you can list the files in the current directory by l Apart from the standard Python library, the assistant can also use the following functions (already imported) in environment: -open_file(path: str, line_number: Optional[int] = None) -> None: +open_file(path: str, line_number: int | None = 1, context_lines: int | None = 100) -> None: Opens the file at the given path in the editor. If line_number is provided, the window will be moved to include that line. + It only shows the first 100 lines by default! Max `context_lines` supported is 2000, use `scroll up/down` + to view the file if you want to see more. Args: - path: str: The path to the file to open. - line_number: Optional[int]: The line number to move to. + path: str: The path to the file to open, preferredly absolute path. + line_number: int | None = 1: The line number to move to. Defaults to 1. + context_lines: int | None = 100: Only shows this number of lines in the context window (usually from line 1), with line_number as the center (if possible). Defaults to 100. goto_line(line_number: int) -> None: Moves the window to show the specified line number. @@ -49,6 +52,7 @@ append_file(file_name: str, content: str) -> None: edit_file(file_name: str, start: int, end: int, content: str) -> None: Edit a file. Replaces in given file `file_name` the lines `start` through `end` (inclusive) with the given text `content`. + If a line must be inserted, an already existing line must be passed in `content` with new content accordingly! Args: file_name: str: The name of the file to edit. start: int: The start line number. Must satisfy start >= 1. @@ -560,9 +564,12 @@ Let's correct the typos in the `bad.txt` file. 1. "stupid typoo" should be "stupid typo". 2. "No mor typos!" should be "No more typos!". -I will make these changes now. +I will make these corrections now. -edit_file("bad.txt", start=1, end=4, content="This is a stupid typo.\nReally?\nNo more typos!\nEnjoy!") +edit_file("bad.txt", start=1, end=4, content="""This is a stupid typo. +Really? +No more typos! +Enjoy!""") ---------- @@ -576,4 +583,4 @@ OBSERVATION: [File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.] -ENVIRONMENT REMINDER: You have 7 turns left to complete the task. \ No newline at end of file +ENVIRONMENT REMINDER: You have 7 turns left to complete the task. diff --git a/tests/integration/mock/CodeActSWEAgent/test_ipython/prompt_001.log b/tests/integration/mock/CodeActSWEAgent/test_ipython/prompt_001.log index 25dc81a0aa..1a8131aa36 100644 --- a/tests/integration/mock/CodeActSWEAgent/test_ipython/prompt_001.log +++ b/tests/integration/mock/CodeActSWEAgent/test_ipython/prompt_001.log @@ -25,11 +25,12 @@ If $GITHUB_TOKEN is not set, ask the user to set it. Apart from the standard Python library, the assistant can also use the following functions (already imported) in environment: -open_file(path: str, line_number: Optional[int] = None) -> None: +open_file(path: str, line_number: int | None = 1, context_lines: int | None = 100) -> None: Opens the file at the given path in the editor. If line_number is provided, the window will be moved to include that line. Args: - path: str: The path to the file to open. - line_number: Optional[int]: The line number to move to. + path: str: The path to the file to open, preferredly absolute path. + line_number: int | None = 1: The line number to move to. Defaults to 1. + context_lines: int | None = 100: The total number of lines to display in the window, with line_number as the center (if possible). Defaults to 100. goto_line(line_number: int) -> None: Moves the window to show the specified line number. @@ -100,10 +101,10 @@ parse_pptx(file_path: str) -> None: Please note that THE `edit_file` FUNCTION REQUIRES PROPER INDENTATION. If the assistant would like to add the line ' print(x)', it must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. Responses should be concise. -The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block. -Include ONLY ONE , , or per response, unless the assistant is finished with the task or need more input or action from the user in order to proceed. +The assistant should attempt fewer things at a time instead of putting too many commands OR too much code in one "execute" block. +Include ONLY ONE , , or per response, unless the assistant is finished with the task or needs more input or action from the user in order to proceed. IMPORTANT: Execute code using , , or whenever possible. - +When handling files, try to use full paths and pwd to avoid errors. ---------- diff --git a/tests/integration/mock/CodeActSWEAgent/test_ipython/prompt_002.log b/tests/integration/mock/CodeActSWEAgent/test_ipython/prompt_002.log index 84f44f3a48..6bc910e0a5 100644 --- a/tests/integration/mock/CodeActSWEAgent/test_ipython/prompt_002.log +++ b/tests/integration/mock/CodeActSWEAgent/test_ipython/prompt_002.log @@ -25,11 +25,12 @@ If $GITHUB_TOKEN is not set, ask the user to set it. Apart from the standard Python library, the assistant can also use the following functions (already imported) in environment: -open_file(path: str, line_number: Optional[int] = None) -> None: +open_file(path: str, line_number: int | None = 1, context_lines: int | None = 100) -> None: Opens the file at the given path in the editor. If line_number is provided, the window will be moved to include that line. Args: - path: str: The path to the file to open. - line_number: Optional[int]: The line number to move to. + path: str: The path to the file to open, preferredly absolute path. + line_number: int | None = 1: The line number to move to. Defaults to 1. + context_lines: int | None = 100: The total number of lines to display in the window, with line_number as the center (if possible). Defaults to 100. goto_line(line_number: int) -> None: Moves the window to show the specified line number. @@ -100,10 +101,10 @@ parse_pptx(file_path: str) -> None: Please note that THE `edit_file` FUNCTION REQUIRES PROPER INDENTATION. If the assistant would like to add the line ' print(x)', it must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. Responses should be concise. -The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block. -Include ONLY ONE , , or per response, unless the assistant is finished with the task or need more input or action from the user in order to proceed. +The assistant should attempt fewer things at a time instead of putting too many commands OR too much code in one "execute" block. +Include ONLY ONE , , or per response, unless the assistant is finished with the task or needs more input or action from the user in order to proceed. IMPORTANT: Execute code using , , or whenever possible. - +When handling files, try to use full paths and pwd to avoid errors. ---------- diff --git a/tests/integration/mock/CodeActSWEAgent/test_ipython_module/prompt_001.log b/tests/integration/mock/CodeActSWEAgent/test_ipython_module/prompt_001.log index 7c7ab4d0ee..4b6d513ac0 100644 --- a/tests/integration/mock/CodeActSWEAgent/test_ipython_module/prompt_001.log +++ b/tests/integration/mock/CodeActSWEAgent/test_ipython_module/prompt_001.log @@ -13,11 +13,14 @@ For example, you can list the files in the current directory by l Apart from the standard Python library, the assistant can also use the following functions (already imported) in environment: -open_file(path: str, line_number: Optional[int] = None) -> None: +open_file(path: str, line_number: int | None = 1, context_lines: int | None = 100) -> None: Opens the file at the given path in the editor. If line_number is provided, the window will be moved to include that line. + It only shows the first 100 lines by default! Max `context_lines` supported is 2000, use `scroll up/down` + to view the file if you want to see more. Args: - path: str: The path to the file to open. - line_number: Optional[int]: The line number to move to. + path: str: The path to the file to open, preferredly absolute path. + line_number: int | None = 1: The line number to move to. Defaults to 1. + context_lines: int | None = 100: Only shows this number of lines in the context window (usually from line 1), with line_number as the center (if possible). Defaults to 100. goto_line(line_number: int) -> None: Moves the window to show the specified line number. @@ -49,6 +52,7 @@ append_file(file_name: str, content: str) -> None: edit_file(file_name: str, start: int, end: int, content: str) -> None: Edit a file. Replaces in given file `file_name` the lines `start` through `end` (inclusive) with the given text `content`. + If a line must be inserted, an already existing line must be passed in `content` with new content accordingly! Args: file_name: str: The name of the file to edit. start: int: The start line number. Must satisfy start >= 1. @@ -536,4 +540,4 @@ NOW, LET'S START! Install and import pymsgbox==1.0.9 and print it's version in /workspace/test.txt. Do not ask me for confirmation at any point. -ENVIRONMENT REMINDER: You have 9 turns left to complete the task. \ No newline at end of file +ENVIRONMENT REMINDER: You have 9 turns left to complete the task. diff --git a/tests/integration/mock/CodeActSWEAgent/test_ipython_module/prompt_002.log b/tests/integration/mock/CodeActSWEAgent/test_ipython_module/prompt_002.log index 75b95743ea..048bb07491 100644 --- a/tests/integration/mock/CodeActSWEAgent/test_ipython_module/prompt_002.log +++ b/tests/integration/mock/CodeActSWEAgent/test_ipython_module/prompt_002.log @@ -13,11 +13,14 @@ For example, you can list the files in the current directory by l Apart from the standard Python library, the assistant can also use the following functions (already imported) in environment: -open_file(path: str, line_number: Optional[int] = None) -> None: +open_file(path: str, line_number: int | None = 1, context_lines: int | None = 100) -> None: Opens the file at the given path in the editor. If line_number is provided, the window will be moved to include that line. + It only shows the first 100 lines by default! Max `context_lines` supported is 2000, use `scroll up/down` + to view the file if you want to see more. Args: - path: str: The path to the file to open. - line_number: Optional[int]: The line number to move to. + path: str: The path to the file to open, preferredly absolute path. + line_number: int | None = 1: The line number to move to. Defaults to 1. + context_lines: int | None = 100: Only shows this number of lines in the context window (usually from line 1), with line_number as the center (if possible). Defaults to 100. goto_line(line_number: int) -> None: Moves the window to show the specified line number. @@ -49,6 +52,7 @@ append_file(file_name: str, content: str) -> None: edit_file(file_name: str, start: int, end: int, content: str) -> None: Edit a file. Replaces in given file `file_name` the lines `start` through `end` (inclusive) with the given text `content`. + If a line must be inserted, an already existing line must be passed in `content` with new content accordingly! Args: file_name: str: The name of the file to edit. start: int: The start line number. Must satisfy start >= 1. @@ -538,22 +542,15 @@ Install and import pymsgbox==1.0.9 and print it's version in /workspace/test.txt ---------- -Understood! Let's proceed by installing the `pymsgbox` package version 1.0.9: - - -``` -pythonCopy code`` -``` -Once the installation is complete, we'll import `pymsgbox` and print its version to a file named `test.txt` in the `/workspace` directory. Let's continue! - -# Install pymsgbox version 1.0.9 -%pip install pymsgbox==1.0.9 - +Let's start by installing the `pymsgbox` package. + +pip install pymsgbox==1.0.9 + ---------- OBSERVATION: -[Package installed successfully] -[Kernel restarted successfully to load the package] +Package installed successfully +[Command -1 finished with exit code 0] ENVIRONMENT REMINDER: You have 8 turns left to complete the task. diff --git a/tests/integration/mock/CodeActSWEAgent/test_ipython_module/prompt_003.log b/tests/integration/mock/CodeActSWEAgent/test_ipython_module/prompt_003.log index 4e4be479ef..5dd77fe0d9 100644 --- a/tests/integration/mock/CodeActSWEAgent/test_ipython_module/prompt_003.log +++ b/tests/integration/mock/CodeActSWEAgent/test_ipython_module/prompt_003.log @@ -13,11 +13,14 @@ For example, you can list the files in the current directory by l Apart from the standard Python library, the assistant can also use the following functions (already imported) in environment: -open_file(path: str, line_number: Optional[int] = None) -> None: +open_file(path: str, line_number: int | None = 1, context_lines: int | None = 100) -> None: Opens the file at the given path in the editor. If line_number is provided, the window will be moved to include that line. + It only shows the first 100 lines by default! Max `context_lines` supported is 2000, use `scroll up/down` + to view the file if you want to see more. Args: - path: str: The path to the file to open. - line_number: Optional[int]: The line number to move to. + path: str: The path to the file to open, preferredly absolute path. + line_number: int | None = 1: The line number to move to. Defaults to 1. + context_lines: int | None = 100: Only shows this number of lines in the context window (usually from line 1), with line_number as the center (if possible). Defaults to 100. goto_line(line_number: int) -> None: Moves the window to show the specified line number. @@ -49,6 +52,7 @@ append_file(file_name: str, content: str) -> None: edit_file(file_name: str, start: int, end: int, content: str) -> None: Edit a file. Replaces in given file `file_name` the lines `start` through `end` (inclusive) with the given text `content`. + If a line must be inserted, an already existing line must be passed in `content` with new content accordingly! Args: file_name: str: The name of the file to edit. start: int: The start line number. Must satisfy start >= 1. @@ -538,47 +542,34 @@ Install and import pymsgbox==1.0.9 and print it's version in /workspace/test.txt ---------- -Understood! Let's proceed by installing the `pymsgbox` package version 1.0.9: - - -``` -pythonCopy code`` -``` -Once the installation is complete, we'll import `pymsgbox` and print its version to a file named `test.txt` in the `/workspace` directory. Let's continue! - -# Install pymsgbox version 1.0.9 -%pip install pymsgbox==1.0.9 - +Let's start by installing the `pymsgbox` package. + +pip install pymsgbox==1.0.9 + ---------- OBSERVATION: -[Package installed successfully] -[Kernel restarted successfully to load the package] +Package installed successfully +[Command -1 finished with exit code 0] ---------- -Now that `pymsgbox` is installed, let's import it and print its version to a file named `test.txt`: - - -``` -pythonCopy code`` -``` -This will import `pymsgbox`, retrieve its version, and then write the version to the `test.txt` file in the `/workspace` directory. +Next, let's import `pymsgbox`, print its version, and save the output to `/workspace/test.txt`. import pymsgbox -# Get the version of pymsgbox -pymsgbox_version = pymsgbox.__version__ +version_info = pymsgbox.__version__ -# Write the version to a file -with open("/workspace/test.txt", "w") as f: - f.write(pymsgbox_version) +with open("/workspace/test.txt", "w") as file: + file.write(f"pymsgbox version: {version_info}\n") + +version_info ---------- OBSERVATION: -[Code executed successfully with no output] +'1.0.9' ENVIRONMENT REMINDER: You have 7 turns left to complete the task. diff --git a/tests/integration/mock/CodeActSWEAgent/test_ipython_module/prompt_004.log b/tests/integration/mock/CodeActSWEAgent/test_ipython_module/prompt_004.log new file mode 100644 index 0000000000..632952d384 --- /dev/null +++ b/tests/integration/mock/CodeActSWEAgent/test_ipython_module/prompt_004.log @@ -0,0 +1,588 @@ + + +---------- + +A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. +The assistant can interact with an interactive Python (Jupyter Notebook) environment and receive the corresponding output when needed. The code should be enclosed using "" tag, for example: + +print("Hello World!") + +The assistant can execute bash commands on behalf of the user by wrapping them with and . +For example, you can list the files in the current directory by ls . + + + +Apart from the standard Python library, the assistant can also use the following functions (already imported) in environment: +open_file(path: str, line_number: int | None = 1, context_lines: int | None = 100) -> None: + Opens the file at the given path in the editor. If line_number is provided, the window will be moved to include that line. + It only shows the first 100 lines by default! Max `context_lines` supported is 2000, use `scroll up/down` + to view the file if you want to see more. + Args: + path: str: The path to the file to open, preferredly absolute path. + line_number: int | None = 1: The line number to move to. Defaults to 1. + context_lines: int | None = 100: Only shows this number of lines in the context window (usually from line 1), with line_number as the center (if possible). Defaults to 100. + +goto_line(line_number: int) -> None: + Moves the window to show the specified line number. + Args: + line_number: int: The line number to move to. + +scroll_down() -> None: + Moves the window down by 100 lines. + Args: + None + +scroll_up() -> None: + Moves the window up by 100 lines. + Args: + None + +create_file(filename: str) -> None: + Creates and opens a new file with the given name. + Args: + filename: str: The name of the file to create. + +append_file(file_name: str, content: str) -> None: + Append content to the given file. + It appends text `content` to the end of the specified file. + Args: + file_name: str: The name of the file to append to. + content: str: The content to append to the file. + +edit_file(file_name: str, start: int, end: int, content: str) -> None: + Edit a file. + Replaces in given file `file_name` the lines `start` through `end` (inclusive) with the given text `content`. + If a line must be inserted, an already existing line must be passed in `content` with new content accordingly! + Args: + file_name: str: The name of the file to edit. + start: int: The start line number. Must satisfy start >= 1. + end: int: The end line number. Must satisfy start <= end <= number of lines in the file. + content: str: The content to replace the lines with. + +search_dir(search_term: str, dir_path: str = './') -> None: + Searches for search_term in all files in dir. If dir is not provided, searches in the current directory. + Args: + search_term: str: The term to search for. + dir_path: Optional[str]: The path to the directory to search. + +search_file(search_term: str, file_path: Optional[str] = None) -> None: + Searches for search_term in file. If file is not provided, searches in the current open file. + Args: + search_term: str: The term to search for. + file_path: Optional[str]: The path to the file to search. + +find_file(file_name: str, dir_path: str = './') -> None: + Finds all files with the given name in the specified directory. + Args: + file_name: str: The name of the file to find. + dir_path: Optional[str]: The path to the directory to search. + +parse_pdf(file_path: str) -> None: + Parses the content of a PDF file and prints it. + Args: + file_path: str: The path to the file to open. + +parse_docx(file_path: str) -> None: + Parses the content of a DOCX file and prints it. + Args: + file_path: str: The path to the file to open. + +parse_latex(file_path: str) -> None: + Parses the content of a LaTex file and prints it. + Args: + file_path: str: The path to the file to open. + +parse_pptx(file_path: str) -> None: + Parses the content of a pptx file and prints it. + Args: + file_path: str: The path to the file to open. + +Please note that THE `edit_file` FUNCTION REQUIRES PROPER INDENTATION. If the assistant would like to add the line ' print(x)', it must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. + +The assistant's response should be concise. +The assistant should include ONLY ONE or in every one of the responses, unless the assistant is finished with the task or need more input or action from the user in order to proceed. +IMPORTANT: Whenever possible, execute the code for the user using or instead of providing it. + + +---------- + +Here is an example of how you can interact with the environment for task solving: + +--- START OF EXAMPLE --- + +USER: We are currently solving the following issue within our repository. Here is the issue text: +--- BEGIN ISSUE --- +TimeDelta serialization precision +Hi there! + +I just found quite strange behaviour of `TimeDelta` field serialization + +```python3 +from marshmallow.fields import TimeDelta +from datetime import timedelta + +td_field = TimeDelta(precision="milliseconds") + +obj = dict() +obj["td_field"] = timedelta(milliseconds=345) + +print(td_field.serialize("td_field", obj)) +``` + +Output of this snippet is `344`, but it seems that `345` is correct. + +Looks like a rounding issue here: https://github.com/marshmallow-code/marshmallow/blob/dev/src/marshmallow/fields.py#L1474 +--- END ISSUE --- + +[Current directory: /workspace/marshmallow-code__marshmallow] + +ASSISTANT: +Let's first start by reproducing the results of the issue. The issue includes some example code for reproduction, which we can use. We'll create a new file called `reproduce.py` and paste the example code into it. + +create_file('/workspace/marshmallow-code__marshmallow/reproduce.py') + + +USER: +OBSERVATION: +[File: /workspace/marshmallow-code__marshmallow/reproduce.py (1 lines total)] +1| +[File /workspace/marshmallow-code__marshmallow/reproduce.py created.] + +ASSISTANT: +Now let's paste in the example code from the issue. + +EDITED_CODE = """from marshmallow.fields import TimeDelta +from datetime import timedelta + +td_field = TimeDelta(precision="milliseconds") + +obj = dict() +obj["td_field"] = timedelta(milliseconds=345) + +print(td_field.serialize("td_field", obj)) +""" +edit_file(start=1, end=1, content=EDITED_CODE) + + +USER: +OBSERVATION: +[File: /workspace/marshmallow-code__marshmallow/reproduce.py (9 lines total)] +1|from marshmallow.fields import TimeDelta +2|from datetime import timedelta +3| +4|td_field = TimeDelta(precision="milliseconds") +5| +6|obj = dict() +7|obj["td_field"] = timedelta(milliseconds=345) +8| +9|print(td_field.serialize("td_field", obj)) +[File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.] + +ASSISTANT: +Now let's run the code to see if we see the same output as the issue. + +python reproduce.py + + +USER: +OBSERVATION: +344 + +ASSISTANT: +We are indeed seeing the same output as the issue. The issue suggests that we should look at line 1474 of the `fields.py` file to see if there is a rounding issue. Let's open the `fields.py` file and navigate to line 1474 to see the relevant code. First we should find the `fields.py` file in the repository. It is likely to be in the `src/marshmallow` directory, but we should check to be sure. + +ls -F + + +USER: +OBSERVATION: +AUTHORS.rst +CHANGELOG.rst +CODE_OF_CONDUCT.md +CONTRIBUTING.rst +LICENSE +MANIFEST.in +NOTICE +README.rst +RELEASING.md +azure-pipelines.yml +docs/ +examples/ +performance/ +pyproject.toml +reproduce.py +setup.cfg +setup.py +src/ +tests/ +tox.ini + +ASSISTANT: +It looks like the `src` directory is present, which suggests that the `fields.py` file is likely to be in the `src` directory. Let's use find_file to see where it is. + +find_file("fields.py", "/workspace/marshmallow-code__marshmallow/src") + + +USER: +OBSERVATION: +[Found 1 matches for "fields.py" in /workspace/marshmallow-code__marshmallow/src] +/workspace/marshmallow-code__marshmallow/src/marshmallow/fields.py + +ASSISTANT: +It looks like the `fields.py` file is present in the `./src/marshmallow/` directory. The issue also points to a specific URL with line number 1474. We should navigate to that line in fields.py to see the relevant code for the `TimeDelta` serialization. + +open_file("/workspace/marshmallow-code__marshmallow/src/marshmallow/fields.py", 1474) + + +USER: +OBSERVATION: +[File: /workspace/marshmallow-code__marshmallow/src/marshmallow/fields.py (1997 lines total)] +(1458 more lines above) +1459| self.WEEKS, +1460| ) +1461| +1462| if precision not in units: +1463| msg = 'The precision must be {} or "{}".'.format( +1464| ", ".join([f'"{each}"' for each in units[:-1]]), units[-1] +1465| ) +1466| raise ValueError(msg) +1467| +1468| self.precision = precision +1469| super().__init__(**kwargs) +1470| +1471| def _serialize(self, value, attr, obj, **kwargs): +1472| if value is None: +1473| return None +1474| base_unit = dt.timedelta(**{self.precision: 1}) +1475| return int(value.total_seconds() / base_unit.total_seconds()) +1476| +1477| def _deserialize(self, value, attr, data, **kwargs): +1478| try: +1479| value = int(value) +1480| except (TypeError, ValueError) as error: +1481| raise self.make_error("invalid") from error +1482| +1483| kwargs = {self.precision: value} +1484| +1485| try: +1486| return dt.timedelta(**kwargs) +1487| except OverflowError as error: +1488| raise self.make_error("invalid") from error +1489| +1490| +1491|class Mapping(Field): +1492| """An abstract class for objects with key-value pairs. +1493| +1494| :param keys: A field class or instance for dict keys. +1495| :param values: A field class or instance for dict values. +1496| :param kwargs: The same keyword arguments that :class:`Field` receives. +1497| +1498| .. note:: +1499| When the structure of nested data is not known, you may omit the +1500| `keys` and `values` arguments to prevent content validation. +1501| +1502| .. versionadded:: 3.0.0rc4 +1503| """ +1504| +1505| mapping_type = dict +1506| +1507| #: Default error messages. +1508| default_error_messages = {"invalid": "Not a valid mapping type."} +1509| +1510| def __init__( +1511| self, +1512| keys: typing.Optional[typing.Union[Field, type]] = None, +1513| values: typing.Optional[typing.Union[Field, type]] = None, +1514| **kwargs, +1515| ): +1516| super().__init__(**kwargs) +1517| if keys is None: +1518| self.key_field = None +1519| else: +1520| try: +1521| self.key_field = resolve_field_instance(keys) +1522| except FieldInstanceResolutionError as error: +1523| raise ValueError( +1524| '"keys" must be a subclass or instance of ' +1525| "marshmallow.base.FieldABC." +1526| ) from error +1527| +1528| if values is None: +1529| self.value_field = None +1530| else: +1531| try: +1532| self.value_field = resolve_field_instance(values) +1533| except FieldInstanceResolutionError as error: +1534| raise ValueError( +1535| '"values" must be a subclass or instance of ' +1536| "marshmallow.base.FieldABC." +1537| ) from error +1538| if isinstance(self.value_field, Nested): +1539| self.only = self.value_field.only +1540| self.exclude = self.value_field.exclude +1541| +1542| def _bind_to_schema(self, field_name, schema): +1543| super()._bind_to_schema(field_name, schema) +1544| if self.value_field: +1545| self.value_field = copy.deepcopy(self.value_field) +1546| self.value_field._bind_to_schema(field_name, self) +1547| if isinstance(self.value_field, Nested): +1548| self.value_field.only = self.only +1549| self.value_field.exclude = self.exclude +1550| if self.key_field: +1551| self.key_field = copy.deepcopy(self.key_field) +1552| self.key_field._bind_to_schema(field_name, self) +1553| +1554| def _serialize(self, value, attr, obj, **kwargs): +1555| if value is None: +1556| return None +1557| if not self.value_field and not self.key_field: +1558| return self.mapping_type(value) +(439 more lines below) + +ASSISTANT: +We are now looking at the relevant section of the `fields.py` file where the `TimeDelta` serialization occurs. The issue suggests that there is a rounding problem near line 1474. The current implementation uses integer division, which truncates the decimal part and can lead to incorrect rounding on line 1475. To fix this issue, we should ensure that the division results in a float, and then round the result to the nearest integer before converting it to an int. This should preserve the milliseconds precision as expected. Let's make the necessary edit to the code. + +edit_file(start=1475, end=1475, content="return int(round(value.total_seconds() / base_unit.total_seconds())) # round to nearest int") + + +USER: +OBSERVATION: +[Your proposed edit has introduced new syntax error(s). Please understand the errors and retry your edit command.] + +ERRORS: +- E999 IndentationError: unexpected indent + +[This is how your edit would have looked if applied] +------------------------------------------------- +[File: /marshmallow-code__marshmallow/src/marshmallow/fields.py (1997 lines total)] +(1468 more lines above) +1469| super().__init__(**kwargs) +1470| +1471| def _serialize(self, value, attr, obj, **kwargs): +1472| if value is None: +1473| return None +1474| base_unit = dt.timedelta(**{self.precision: 1}) +1475|return int(round(value.total_seconds() / base_unit.total_seconds())) # round to nearest int +1476| +1477| def _deserialize(self, value, attr, data, **kwargs): +1478| try: +1479| value = int(value) +(518 more lines below) +------------------------------------------------- + +[This is the original code before your edit] +------------------------------------------------- +[File: /marshmallow-code__marshmallow/src/marshmallow/fields.py (1997 lines total)] +(1469 more lines above) +1470| +1471| def _serialize(self, value, attr, obj, **kwargs): +1472| if value is None: +1473| return None +1474| base_unit = dt.timedelta(**{self.precision: 1}) +1475| return int(value.total_seconds() / base_unit.total_seconds()) +1476| +1477| def _deserialize(self, value, attr, data, **kwargs): +1478| try: +1479| value = int(value) +1480| except (TypeError, ValueError) as error: +(517 more lines below) +------------------------------------------------- +Your changes have NOT been applied. Please fix your edit command and try again. +You either need to 1) Specify the correct start/end line arguments or 2) Correct your edit code. +DO NOT re-run the same failed edit command. Running it again will lead to the same error. + +ASSISTANT: +My edit command did not use the proper indentation, I will fix my syntax in this follow up edit command. + +EDITED_CODE=" return int(round(value.total_seconds() / base_unit.total_seconds())) # round to nearest int" +edit_file(start=1475, end=1475, content=EDITED_CODE) + + +USER: +OBSERVATION: +[File: /marshmallow-code__marshmallow/src/marshmallow/fields.py (1997 lines total)] +(1424 more lines above) +1425| +1426| :param precision: Influences how the integer is interpreted during +1427| (de)serialization. Must be 'days', 'seconds', 'microseconds', +1428| 'milliseconds', 'minutes', 'hours' or 'weeks'. +1429| :param kwargs: The same keyword arguments that :class:`Field` receives. +1430| +1431| .. versionchanged:: 2.0.0 +1432| Always serializes to an integer value to avoid rounding errors. +1433| Add `precision` parameter. +1434| """ +1435| +1436| DAYS = "days" +1437| SECONDS = "seconds" +1438| MICROSECONDS = "microseconds" +1439| MILLISECONDS = "milliseconds" +1440| MINUTES = "minutes" +1441| HOURS = "hours" +1442| WEEKS = "weeks" +1443| +1444| #: Default error messages. +1445| default_error_messages = { +1446| "invalid": "Not a valid period of time.", +1447| "format": "{input!r} cannot be formatted as a timedelta.", +1448| } +1449| +1450| def __init__(self, precision: str = SECONDS, **kwargs): +1451| precision = precision.lower() +1452| units = ( +1453| self.DAYS, +1454| self.SECONDS, +1455| self.MICROSECONDS, +1456| self.MILLISECONDS, +1457| self.MINUTES, +1458| self.HOURS, +1459| self.WEEKS, +1460| ) +1461| +1462| if precision not in units: +1463| msg = 'The precision must be {} or "{}".'.format( +1464| ", ".join([f'"{each}"' for each in units[:-1]]), units[-1] +1465| ) +1466| raise ValueError(msg) +1467| +1468| self.precision = precision +1469| super().__init__(**kwargs) +1470| +1471| def _serialize(self, value, attr, obj, **kwargs): +1472| if value is None: +1473| return None +1474| base_unit = dt.timedelta(**{self.precision: 1}) +1475| return int(round(value.total_seconds() / base_unit.total_seconds())) # round to nearest int +1476| +1477| def _deserialize(self, value, attr, data, **kwargs): +1478| try: +1479| value = int(value) +1480| except (TypeError, ValueError) as error: +1481| raise self.make_error("invalid") from error +1482| +1483| kwargs = {self.precision: value} +1484| +1485| try: +1486| return dt.timedelta(**kwargs) +1487| except OverflowError as error: +1488| raise self.make_error("invalid") from error +1489| +1490| +1491|class Mapping(Field): +1492| """An abstract class for objects with key-value pairs. +1493| +1494| :param keys: A field class or instance for dict keys. +1495| :param values: A field class or instance for dict values. +1496| :param kwargs: The same keyword arguments that :class:`Field` receives. +1497| +1498| .. note:: +1499| When the structure of nested data is not known, you may omit the +1500| `keys` and `values` arguments to prevent content validation. +1501| +1502| .. versionadded:: 3.0.0rc4 +1503| """ +1504| +1505| mapping_type = dict +1506| +1507| #: Default error messages. +1508| default_error_messages = {"invalid": "Not a valid mapping type."} +1509| +1510| def __init__( +1511| self, +1512| keys: typing.Optional[typing.Union[Field, type]] = None, +1513| values: typing.Optional[typing.Union[Field, type]] = None, +1514| **kwargs, +1515| ): +1516| super().__init__(**kwargs) +1517| if keys is None: +1518| self.key_field = None +1519| else: +1520| try: +1521| self.key_field = resolve_field_instance(keys) +1522| except FieldInstanceResolutionError as error: +1523| raise ValueError( +1524| '"keys" must be a subclass or instance of ' +(473 more lines below) +[File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.] + +ASSISTANT: +The code has been updated to use the `round` function, which should fix the rounding issue. Before submitting the changes, it would be prudent to run the reproduce.py code again to ensure that this change has the desired effect. I see that "Current directory" is still `/marshmallow-code__marshmallow`, so we can just run `reproduce.py` directly. + +python reproduce.py + + +USER: +OBSERVATION: +345 + +ASSISTANT: +The output has changed from 344 to 345, which suggests that the rounding issue has been fixed. Let's remove the reproduce.py file since it is no longer needed. + +rm reproduce.py + + +USER: +OBSERVATION: + +ASSISTANT: +rm doesn't have any output when it deletes successfully, so that must have worked. Now that we have fixed the issue, we can exit the current task. + +exit + + +--- END OF EXAMPLE --- + + +NOW, LET'S START! + +---------- + +Install and import pymsgbox==1.0.9 and print it's version in /workspace/test.txt. Do not ask me for confirmation at any point. + +---------- + +Let's start by installing the `pymsgbox` package. + +pip install pymsgbox==1.0.9 + + +---------- + +OBSERVATION: +Package installed successfully +[Command -1 finished with exit code 0] + +---------- + +Next, let's import `pymsgbox`, print its version, and save the output to `/workspace/test.txt`. + +import pymsgbox + +version_info = pymsgbox.__version__ + +with open("/workspace/test.txt", "w") as file: + file.write(f"pymsgbox version: {version_info}\n") + +version_info + + +---------- + +OBSERVATION: +'1.0.9' + +---------- + +The version of `pymsgbox` has been successfully written to `/workspace/test.txt`. Let's verify the content of the file to ensure everything is correct. + +cat /workspace/test.txt + + +---------- + +OBSERVATION: +pymsgbox version: 1.0.9 +[Command -1 finished with exit code 0] + +ENVIRONMENT REMINDER: You have 6 turns left to complete the task. diff --git a/tests/integration/mock/CodeActSWEAgent/test_ipython_module/response_001.log b/tests/integration/mock/CodeActSWEAgent/test_ipython_module/response_001.log index aa550d4169..8c2b374354 100644 --- a/tests/integration/mock/CodeActSWEAgent/test_ipython_module/response_001.log +++ b/tests/integration/mock/CodeActSWEAgent/test_ipython_module/response_001.log @@ -1,10 +1,4 @@ -Understood! Let's proceed by installing the `pymsgbox` package version 1.0.9: +Let's start by installing the `pymsgbox` package. - -``` -pythonCopy code` -# Install pymsgbox version 1.0.9 -%pip install pymsgbox==1.0.9 -` -``` -Once the installation is complete, we'll import `pymsgbox` and print its version to a file named `test.txt` in the `/workspace` directory. Let's continue! + +pip install pymsgbox==1.0.9 diff --git a/tests/integration/mock/CodeActSWEAgent/test_ipython_module/response_002.log b/tests/integration/mock/CodeActSWEAgent/test_ipython_module/response_002.log index f949b99833..514cda5b27 100644 --- a/tests/integration/mock/CodeActSWEAgent/test_ipython_module/response_002.log +++ b/tests/integration/mock/CodeActSWEAgent/test_ipython_module/response_002.log @@ -1,16 +1,9 @@ -Now that `pymsgbox` is installed, let's import it and print its version to a file named `test.txt`: +Next, let's import `pymsgbox` and print its version to `/workspace/test.txt`. - -``` -pythonCopy code` + import pymsgbox -# Get the version of pymsgbox -pymsgbox_version = pymsgbox.__version__ +version = pymsgbox.__version__ -# Write the version to a file -with open("/workspace/test.txt", "w") as f: - f.write(pymsgbox_version) -` -``` -This will import `pymsgbox`, retrieve its version, and then write the version to the `test.txt` file in the `/workspace` directory. +with open("/workspace/test.txt", "w") as file: + file.write(f"pymsgbox version: {version}\n") diff --git a/tests/integration/mock/CodeActSWEAgent/test_ipython_module/response_003.log b/tests/integration/mock/CodeActSWEAgent/test_ipython_module/response_003.log index a0a9e5cd2c..d2578640a5 100644 --- a/tests/integration/mock/CodeActSWEAgent/test_ipython_module/response_003.log +++ b/tests/integration/mock/CodeActSWEAgent/test_ipython_module/response_003.log @@ -1 +1,4 @@ -The `pymsgbox` package version 1.0.9 has been successfully installed and its version has been written to the `test.txt` file in the `/workspace` directory. If you need any further assistance, feel free to ask! +The version of `pymsgbox` has been written to `/workspace/test.txt`. Let's verify the content of the file to ensure it was written correctly. + + +cat /workspace/test.txt diff --git a/tests/integration/mock/CodeActSWEAgent/test_ipython_module/response_004.log b/tests/integration/mock/CodeActSWEAgent/test_ipython_module/response_004.log new file mode 100644 index 0000000000..62ad1b8be0 --- /dev/null +++ b/tests/integration/mock/CodeActSWEAgent/test_ipython_module/response_004.log @@ -0,0 +1 @@ +The version of `pymsgbox` has been successfully written to `/workspace/test.txt`. The task is now complete. diff --git a/tests/integration/mock/CodeActSWEAgent/test_write_simple_script/prompt_001.log b/tests/integration/mock/CodeActSWEAgent/test_write_simple_script/prompt_001.log index 02a1ccbcf3..0637a2a5a0 100644 --- a/tests/integration/mock/CodeActSWEAgent/test_write_simple_script/prompt_001.log +++ b/tests/integration/mock/CodeActSWEAgent/test_write_simple_script/prompt_001.log @@ -13,11 +13,14 @@ For example, you can list the files in the current directory by l Apart from the standard Python library, the assistant can also use the following functions (already imported) in environment: -open_file(path: str, line_number: Optional[int] = None) -> None: +open_file(path: str, line_number: int | None = 1, context_lines: int | None = 100) -> None: Opens the file at the given path in the editor. If line_number is provided, the window will be moved to include that line. + It only shows the first 100 lines by default! Max `context_lines` supported is 2000, use `scroll up/down` + to view the file if you want to see more. Args: - path: str: The path to the file to open. - line_number: Optional[int]: The line number to move to. + path: str: The path to the file to open, preferredly absolute path. + line_number: int | None = 1: The line number to move to. Defaults to 1. + context_lines: int | None = 100: Only shows this number of lines in the context window (usually from line 1), with line_number as the center (if possible). Defaults to 100. goto_line(line_number: int) -> None: Moves the window to show the specified line number. @@ -49,6 +52,7 @@ append_file(file_name: str, content: str) -> None: edit_file(file_name: str, start: int, end: int, content: str) -> None: Edit a file. Replaces in given file `file_name` the lines `start` through `end` (inclusive) with the given text `content`. + If a line must be inserted, an already existing line must be passed in `content` with new content accordingly! Args: file_name: str: The name of the file to edit. start: int: The start line number. Must satisfy start >= 1. @@ -536,4 +540,4 @@ NOW, LET'S START! Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point. -ENVIRONMENT REMINDER: You have 9 turns left to complete the task. \ No newline at end of file +ENVIRONMENT REMINDER: You have 9 turns left to complete the task. diff --git a/tests/integration/mock/CodeActSWEAgent/test_write_simple_script/prompt_002.log b/tests/integration/mock/CodeActSWEAgent/test_write_simple_script/prompt_002.log index efcce4b92f..7cfa1da2ec 100644 --- a/tests/integration/mock/CodeActSWEAgent/test_write_simple_script/prompt_002.log +++ b/tests/integration/mock/CodeActSWEAgent/test_write_simple_script/prompt_002.log @@ -13,11 +13,14 @@ For example, you can list the files in the current directory by l Apart from the standard Python library, the assistant can also use the following functions (already imported) in environment: -open_file(path: str, line_number: Optional[int] = None) -> None: +open_file(path: str, line_number: int | None = 1, context_lines: int | None = 100) -> None: Opens the file at the given path in the editor. If line_number is provided, the window will be moved to include that line. + It only shows the first 100 lines by default! Max `context_lines` supported is 2000, use `scroll up/down` + to view the file if you want to see more. Args: - path: str: The path to the file to open. - line_number: Optional[int]: The line number to move to. + path: str: The path to the file to open, preferredly absolute path. + line_number: int | None = 1: The line number to move to. Defaults to 1. + context_lines: int | None = 100: Only shows this number of lines in the context window (usually from line 1), with line_number as the center (if possible). Defaults to 100. goto_line(line_number: int) -> None: Moves the window to show the specified line number. @@ -49,6 +52,7 @@ append_file(file_name: str, content: str) -> None: edit_file(file_name: str, start: int, end: int, content: str) -> None: Edit a file. Replaces in given file `file_name` the lines `start` through `end` (inclusive) with the given text `content`. + If a line must be inserted, an already existing line must be passed in `content` with new content accordingly! Args: file_name: str: The name of the file to edit. start: int: The start line number. Must satisfy start >= 1. @@ -538,7 +542,7 @@ Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmat ---------- -Let's create the shell script `hello.sh` that prints "hello". +Let's create the shell script `hello.sh` that prints 'hello'. echo -e '#!/bin/bash\n\necho "hello"' > hello.sh chmod +x hello.sh @@ -548,6 +552,6 @@ chmod +x hello.sh OBSERVATION: -[Command -1 finished with exit code 0]] +[Command -1 finished with exit code 0] -ENVIRONMENT REMINDER: You have 8 turns left to complete the task. \ No newline at end of file +ENVIRONMENT REMINDER: You have 8 turns left to complete the task. diff --git a/tests/integration/mock/CodeActSWEAgent/test_write_simple_script/prompt_003.log b/tests/integration/mock/CodeActSWEAgent/test_write_simple_script/prompt_003.log index cf61bb23aa..d3b203cbd6 100644 --- a/tests/integration/mock/CodeActSWEAgent/test_write_simple_script/prompt_003.log +++ b/tests/integration/mock/CodeActSWEAgent/test_write_simple_script/prompt_003.log @@ -13,11 +13,14 @@ For example, you can list the files in the current directory by l Apart from the standard Python library, the assistant can also use the following functions (already imported) in environment: -open_file(path: str, line_number: Optional[int] = None) -> None: +open_file(path: str, line_number: int | None = 1, context_lines: int | None = 100) -> None: Opens the file at the given path in the editor. If line_number is provided, the window will be moved to include that line. + It only shows the first 100 lines by default! Max `context_lines` supported is 2000, use `scroll up/down` + to view the file if you want to see more. Args: - path: str: The path to the file to open. - line_number: Optional[int]: The line number to move to. + path: str: The path to the file to open, preferredly absolute path. + line_number: int | None = 1: The line number to move to. Defaults to 1. + context_lines: int | None = 100: Only shows this number of lines in the context window (usually from line 1), with line_number as the center (if possible). Defaults to 100. goto_line(line_number: int) -> None: Moves the window to show the specified line number. @@ -49,6 +52,7 @@ append_file(file_name: str, content: str) -> None: edit_file(file_name: str, start: int, end: int, content: str) -> None: Edit a file. Replaces in given file `file_name` the lines `start` through `end` (inclusive) with the given text `content`. + If a line must be inserted, an already existing line must be passed in `content` with new content accordingly! Args: file_name: str: The name of the file to edit. start: int: The start line number. Must satisfy start >= 1. @@ -538,7 +542,7 @@ Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmat ---------- -Let's create the shell script `hello.sh` that prints "hello". +Let's create the shell script `hello.sh` that prints 'hello'. echo -e '#!/bin/bash\n\necho "hello"' > hello.sh chmod +x hello.sh @@ -548,7 +552,7 @@ chmod +x hello.sh OBSERVATION: -[Command -1 finished with exit code 0]] +[Command -1 finished with exit code 0] ---------- @@ -561,6 +565,6 @@ The shell script `hello.sh` has been created and made executable. Let's run it t OBSERVATION: hello -[Command -1 finished with exit code 0]] +[Command -1 finished with exit code 0] -ENVIRONMENT REMINDER: You have 7 turns left to complete the task. \ No newline at end of file +ENVIRONMENT REMINDER: You have 7 turns left to complete the task. diff --git a/tests/integration/mock/CodeActSWEAgent/test_write_simple_script/response_001.log b/tests/integration/mock/CodeActSWEAgent/test_write_simple_script/response_001.log index 99d95e0ca7..f1533ff5fd 100644 --- a/tests/integration/mock/CodeActSWEAgent/test_write_simple_script/response_001.log +++ b/tests/integration/mock/CodeActSWEAgent/test_write_simple_script/response_001.log @@ -1,6 +1,5 @@ -Let's create the shell script `hello.sh` that prints "hello". +Let's create the shell script `hello.sh` that prints 'hello'. echo -e '#!/bin/bash\n\necho "hello"' > hello.sh chmod +x hello.sh - diff --git a/tests/integration/regenerate.sh b/tests/integration/regenerate.sh index 7ba8957265..434ccd2f53 100755 --- a/tests/integration/regenerate.sh +++ b/tests/integration/regenerate.sh @@ -18,6 +18,10 @@ WORKSPACE_MOUNT_PATH+="/_test_workspace" WORKSPACE_BASE+="/_test_workspace" WORKSPACE_MOUNT_PATH_IN_SANDBOX="/workspace" +echo "WORKSPACE_BASE: $WORKSPACE_BASE" +echo "WORKSPACE_MOUNT_PATH: $WORKSPACE_MOUNT_PATH" +echo "WORKSPACE_MOUNT_PATH_IN_SANDBOX: $WORKSPACE_MOUNT_PATH_IN_SANDBOX" + mkdir -p $WORKSPACE_BASE # use environmental variable if exists, otherwise use "ssh" diff --git a/tests/integration/test_agent.py b/tests/integration/test_agent.py index c535413d57..667d59da91 100644 --- a/tests/integration/test_agent.py +++ b/tests/integration/test_agent.py @@ -6,7 +6,6 @@ import subprocess import pytest from opendevin.controller.state.state import State -from opendevin.core.config import AppConfig, load_from_toml from opendevin.core.main import main from opendevin.core.schema import AgentState from opendevin.events.action import ( @@ -15,17 +14,13 @@ from opendevin.events.action import ( ) workspace_base = os.getenv('WORKSPACE_BASE') +workspace_mount_path = os.getenv('WORKSPACE_MOUNT_PATH') +workspace_mount_path_in_sandbox = os.getenv('WORKSPACE_MOUNT_PATH_IN_SANDBOX') -# make sure we're testing in the same folder of an existing config.toml -if os.path.exists('config.toml'): - config = AppConfig() - load_from_toml(config, 'config.toml') - if config and config.workspace_base and config.workspace_base != workspace_base: - if os.path.exists(config.workspace_base) and os.access( - config.workspace_base, os.W_OK - ): - print(f'Setting workspace_base to {config.workspace_base}') - workspace_base = config.workspace_base +print('\nPaths used:') +print(f'workspace_base: {workspace_base}') +print(f'workspace_mount_path: {workspace_mount_path}') +print(f'workspace_mount_path_in_sandbox: {workspace_mount_path_in_sandbox}') @pytest.mark.skipif( @@ -33,7 +28,8 @@ if os.path.exists('config.toml'): reason='BrowsingAgent is a specialized agent', ) @pytest.mark.skipif( - (os.getenv('AGENT') == 'CodeActAgent' or os.getenv('AGENT') == 'CodeActSWEAgent') and os.getenv('SANDBOX_TYPE').lower() != 'ssh', + (os.getenv('AGENT') == 'CodeActAgent' or os.getenv('AGENT') == 'CodeActSWEAgent') + and os.getenv('SANDBOX_TYPE').lower() != 'ssh', reason='CodeActAgent/CodeActSWEAgent only supports ssh sandbox which is stateful', ) @pytest.mark.skipif( @@ -63,7 +59,8 @@ def test_write_simple_script(): reason='BrowsingAgent is a specialized agent', ) @pytest.mark.skipif( - (os.getenv('AGENT') == 'CodeActAgent' or os.getenv('AGENT') == 'CodeActSWEAgent') and os.getenv('SANDBOX_TYPE').lower() != 'ssh', + (os.getenv('AGENT') == 'CodeActAgent' or os.getenv('AGENT') == 'CodeActSWEAgent') + and os.getenv('SANDBOX_TYPE').lower() != 'ssh', reason='CodeActAgent/CodeActSWEAgent only supports ssh sandbox which is stateful', ) @pytest.mark.skipif( @@ -175,7 +172,8 @@ def test_ipython_module(): reason='currently only BrowsingAgent and CodeActAgent are capable of searching the internet', ) @pytest.mark.skipif( - (os.getenv('AGENT') == 'CodeActAgent' or os.getenv('AGENT') == 'CodeActSWEAgent') and os.getenv('SANDBOX_TYPE').lower() != 'ssh', + (os.getenv('AGENT') == 'CodeActAgent' or os.getenv('AGENT') == 'CodeActSWEAgent') + and os.getenv('SANDBOX_TYPE').lower() != 'ssh', reason='CodeActAgent/CodeActSWEAgent only supports ssh sandbox which is stateful', ) def test_browse_internet(http_server): diff --git a/tests/unit/test_agent_skill.py b/tests/unit/test_agent_skill.py index 4377114aac..1aed10af2b 100644 --- a/tests/unit/test_agent_skill.py +++ b/tests/unit/test_agent_skill.py @@ -34,6 +34,16 @@ def reset_current_file(): agentskills.CURRENT_FILE = None +def _numbered_test_lines(start, end) -> str: + return ('\n'.join(f'{i}|' for i in range(start, end + 1))) + '\n' + + +def _generate_test_file_with_lines(temp_path, num_lines) -> str: + file_path = temp_path / 'test_file.py' + file_path.write_text('\n' * num_lines) + return file_path + + def test_open_file_unexist_path(): with pytest.raises(FileNotFoundError): open_file('/unexist/path/a.txt') @@ -87,13 +97,13 @@ def test_open_file_long(tmp_path): with io.StringIO() as buf: with contextlib.redirect_stdout(buf): - open_file(str(temp_file_path)) + open_file(str(temp_file_path), 1, 50) result = buf.getvalue() assert result is not None expected = f'[File: {temp_file_path} (1000 lines total)]\n' - for i in range(1, 52): + for i in range(1, 51): expected += f'{i}|Line {i}\n' - expected += '(949 more lines below)\n' + expected += '(950 more lines below)\n' assert result.split('\n') == expected.split('\n') @@ -108,8 +118,8 @@ def test_open_file_long_with_lineno(tmp_path): result = buf.getvalue() assert result is not None expected = f'[File: {temp_file_path} (1000 lines total)]\n' - expected += '(50 more lines above)\n' - for i in range(51, 151): + expected += '(49 more lines above)\n' + for i in range(50, 151): expected += f'{i}|Line {i}\n' expected += '(850 more lines below)\n' assert result.split('\n') == expected.split('\n') @@ -147,9 +157,9 @@ def test_goto_line(tmp_path): assert result is not None expected = f'[File: {temp_file_path} (1000 lines total)]\n' - for i in range(1, 52): + for i in range(1, 101): expected += f'{i}|Line {i}\n' - expected += '(949 more lines below)\n' + expected += '(900 more lines below)\n' assert result.split('\n') == expected.split('\n') with io.StringIO() as buf: @@ -159,8 +169,8 @@ def test_goto_line(tmp_path): assert result is not None expected = f'[File: {temp_file_path} (1000 lines total)]\n' - expected += '(50 more lines above)\n' - for i in range(51, 151): + expected += '(49 more lines above)\n' + for i in range(50, 151): expected += f'{i}|Line {i}\n' expected += '(850 more lines below)\n' assert result.split('\n') == expected.split('\n') @@ -202,9 +212,9 @@ def test_scroll_down(tmp_path): assert result is not None expected = f'[File: {temp_file_path} (1000 lines total)]\n' - for i in range(1, 52): + for i in range(1, 101): expected += f'{i}|Line {i}\n' - expected += '(949 more lines below)\n' + expected += '(900 more lines below)\n' assert result.split('\n') == expected.split('\n') with io.StringIO() as buf: @@ -214,8 +224,8 @@ def test_scroll_down(tmp_path): assert result is not None expected = f'[File: {temp_file_path} (1000 lines total)]\n' - expected += '(51 more lines above)\n' - for i in range(52, 152): + expected += '(50 more lines above)\n' + for i in range(51, 152): expected += f'{i}|Line {i}\n' expected += '(849 more lines below)\n' assert result.split('\n') == expected.split('\n') @@ -233,8 +243,8 @@ def test_scroll_up(tmp_path): assert result is not None expected = f'[File: {temp_file_path} (1000 lines total)]\n' - expected += '(250 more lines above)\n' - for i in range(251, 351): + expected += '(249 more lines above)\n' + for i in range(250, 351): expected += f'{i}|Line {i}\n' expected += '(650 more lines below)\n' assert result.split('\n') == expected.split('\n') @@ -246,8 +256,8 @@ def test_scroll_up(tmp_path): assert result is not None expected = f'[File: {temp_file_path} (1000 lines total)]\n' - expected += '(150 more lines above)\n' - for i in range(151, 251): + expected += '(149 more lines above)\n' + for i in range(150, 251): expected += f'{i}|Line {i}\n' expected += '(750 more lines below)\n' assert result.split('\n') == expected.split('\n') @@ -296,7 +306,8 @@ def test_print_window_internal(tmp_path): _print_window(str(test_file_path), current_line, window, return_str=False) result = buf.getvalue() expected = ( - '(49 more lines above)\n' + '(48 more lines above)\n' + '49|Line `49`\n' '50|Line `50`\n' '51|Line `51`\n' '(49 more lines below)\n' @@ -366,7 +377,8 @@ check(any_int)""" + 'E999 IndentationError: unexpected indent\n' '[This is how your edit would have looked if applied]\n' '-------------------------------------------------\n' - '(4 more lines above)\n' + '(3 more lines above)\n' + '4|def test_any_int():\n' '5| assert any_int(1, 2, 3) == True\n' '6| assert any_int(1.5, 2, 3) == False\n' '7| assert any_int(1, 2.5, 3) == False\n' @@ -382,7 +394,8 @@ check(any_int)""" '\n' '[This is the original code before your edit]\n' '-------------------------------------------------\n' - '(4 more lines above)\n' + '(3 more lines above)\n' + '4|def test_any_int():\n' '5| assert any_int(1, 2, 3) == True\n' '6| assert any_int(1.5, 2, 3) == False\n' '7| assert any_int(1, 2.5, 3) == False\n' @@ -500,7 +513,7 @@ def test_edit_file(tmp_path): with io.StringIO() as buf: with contextlib.redirect_stdout(buf): edit_file( - str(temp_file_path), + file_name=str(temp_file_path), start=1, end=3, content='REPLACE TEXT', @@ -818,50 +831,46 @@ def test_find_file_not_exist_file_specific_path(tmp_path): def test_edit_lint_file_pass(tmp_path, monkeypatch): - # Create a Python file with correct syntax - file_path = tmp_path / 'test_file.py' - file_path.write_text('\n') - - # patch ENABLE_AUTO_LINT + # Enable linting monkeypatch.setattr( 'opendevin.runtime.plugins.agent_skills.agentskills.ENABLE_AUTO_LINT', True ) + file_path = _generate_test_file_with_lines(tmp_path, 1) + # Test linting functionality with io.StringIO() as buf: with contextlib.redirect_stdout(buf): open_file(str(file_path)) edit_file(str(file_path), 1, 1, "print('hello')\n") result = buf.getvalue() - assert result is not None expected = ( f'[File: {file_path} (1 lines total)]\n' '1|\n' - f'[File: {file_path} (2 lines total after edit)]\n' - "1|print('hello')\n" - '2|\n' + MSG_FILE_UPDATED + '\n' + f'[File: {file_path} (1 lines total after edit)]\n' + "1|print('hello')\n" + MSG_FILE_UPDATED + '\n' ) assert result.split('\n') == expected.split('\n') def test_lint_file_fail_undefined_name(tmp_path, monkeypatch, capsys): - # Create a Python file with a syntax error - file_path = tmp_path / 'test_file.py' - file_path.write_text('\n') - - # Set environment variable to enable linting + # Enable linting monkeypatch.setattr( 'opendevin.runtime.plugins.agent_skills.agentskills.ENABLE_AUTO_LINT', True ) - open_file(str(file_path)) - edit_file(str(file_path), 1, 1, 'undefined_name()\n') + num_lines = 1 + current_line = 1 + + file_path = _generate_test_file_with_lines(tmp_path, 1) + + open_file(str(file_path), current_line) + edit_file(str(file_path), current_line, num_lines, 'undefined_name()\n') result = capsys.readouterr().out - print(result) - assert result is not None + expected = ( f'[File: {file_path} (1 lines total)]\n' '1|\n' @@ -871,7 +880,6 @@ def test_lint_file_fail_undefined_name(tmp_path, monkeypatch, capsys): '[This is how your edit would have looked if applied]\n' '-------------------------------------------------\n' '1|undefined_name()\n' - '2|\n' '-------------------------------------------------\n\n' '[This is the original code before your edit]\n' '-------------------------------------------------\n' @@ -885,61 +893,47 @@ def test_lint_file_fail_undefined_name(tmp_path, monkeypatch, capsys): def test_lint_file_fail_undefined_name_long(tmp_path, monkeypatch, capsys): - # Create a Python file with a syntax error - file_path = tmp_path / 'test_file.py' - file_path.write_text('\n' * 1000) - - # Set environment variable to enable linting + # Enable linting monkeypatch.setattr( 'opendevin.runtime.plugins.agent_skills.agentskills.ENABLE_AUTO_LINT', True ) + num_lines = 1000 + current_line = 500 + error_line = 500 + window = 100 + + file_path = _generate_test_file_with_lines(tmp_path, num_lines) + + error_message = f"{file_path}:{error_line}:1: F821 undefined name 'undefined_name'" + open_file(str(file_path)) - edit_file(str(file_path), 500, 500, 'undefined_name()\n') + edit_file(str(file_path), current_line, error_line, 'undefined_name()\n') result = capsys.readouterr().out - print(result) - assert result is not None - open_lines = '\n'.join([f'{i+1}|' for i in range(51)]) + open_lines = '\n'.join([f'{i}|' for i in range(1, window + 1)]) expected = ( - f'[File: {file_path} (1000 lines total)]\n' + f'[File: {file_path} ({num_lines} lines total)]\n' f'{open_lines}\n' - '(949 more lines below)\n' + '(900 more lines below)\n' '[Your proposed edit has introduced new syntax error(s). Please understand the errors and retry your edit command.]\n' - 'ERRORS:\n' - f"{file_path}:500:1: F821 undefined name 'undefined_name'\n" + f'ERRORS:\n{error_message}\n' '[This is how your edit would have looked if applied]\n' '-------------------------------------------------\n' - '(495 more lines above)\n' - '496|\n' - '497|\n' - '498|\n' - '499|\n' - '500|undefined_name()\n' - '501|\n' - '502|\n' - '503|\n' - '504|\n' - '505|\n' - '(496 more lines below)\n' - '-------------------------------------------------\n\n' + '(494 more lines above)\n' + + _numbered_test_lines(error_line - 5, error_line - 1) + + '500|undefined_name()\n' + + _numbered_test_lines(error_line + 1, error_line + 5) + + '(495 more lines below)\n' + + '-------------------------------------------------\n\n' '[This is the original code before your edit]\n' '-------------------------------------------------\n' - '(495 more lines above)\n' - '496|\n' - '497|\n' - '498|\n' - '499|\n' - '500|\n' - '501|\n' - '502|\n' - '503|\n' - '504|\n' - '505|\n' - '(495 more lines below)\n' - '-------------------------------------------------\n' + '(494 more lines above)\n' + + _numbered_test_lines(error_line - 5, error_line + 5) + + '(495 more lines below)\n' + + '-------------------------------------------------\n' 'Your changes have NOT been applied. Please fix your edit command and try again.\n' 'You either need to 1) Specify the correct start/end line arguments or 2) Correct your edit code.\n' 'DO NOT re-run the same failed edit command. Running it again will lead to the same error.\n' @@ -948,15 +942,13 @@ def test_lint_file_fail_undefined_name_long(tmp_path, monkeypatch, capsys): def test_lint_file_disabled_undefined_name(tmp_path, monkeypatch, capsys): - # Create a Python file with a syntax error - file_path = tmp_path / 'test_file.py' - file_path.write_text('\n') - - # Set environment variable to disable linting + # Disable linting monkeypatch.setattr( 'opendevin.runtime.plugins.agent_skills.agentskills.ENABLE_AUTO_LINT', False ) + file_path = _generate_test_file_with_lines(tmp_path, 1) + open_file(str(file_path)) edit_file(str(file_path), 1, 1, 'undefined_name()\n') @@ -965,9 +957,8 @@ def test_lint_file_disabled_undefined_name(tmp_path, monkeypatch, capsys): expected = ( f'[File: {file_path} (1 lines total)]\n' '1|\n' - f'[File: {file_path} (2 lines total after edit)]\n' - '1|undefined_name()\n' - '2|\n' + MSG_FILE_UPDATED + '\n' + f'[File: {file_path} (1 lines total after edit)]\n' + '1|undefined_name()\n' + MSG_FILE_UPDATED + '\n' ) assert result.split('\n') == expected.split('\n') diff --git a/tests/unit/test_micro_agents.py b/tests/unit/test_micro_agents.py index d006a180ae..5273c080e3 100644 --- a/tests/unit/test_micro_agents.py +++ b/tests/unit/test_micro_agents.py @@ -13,7 +13,11 @@ from opendevin.events.observation import NullObservation def test_all_agents_are_loaded(): - full_path = os.path.join('agenthub', 'micro') + assert all_microagents is not None + assert len(all_microagents) > 1 + + base = os.path.join('agenthub', 'micro') + full_path = os.path.dirname(__file__) + '/../../' + base agent_names = set() for root, _, files in os.walk(full_path): for file in files: