diff --git a/config.template.toml b/config.template.toml index 660cea0ad5..060ec11ab1 100644 --- a/config.template.toml +++ b/config.template.toml @@ -172,11 +172,9 @@ model = "gpt-4o" #disable_vision = true [llm.gpt4o-mini] -# API key to use api_key = "your-api-key" +model = "gpt-4o" -# Model to use -model = "gpt-4o-mini" #################################### Agent ################################### # Configuration for agents (group name starts with 'agent') diff --git a/evaluation/aider_bench/run_infer.py b/evaluation/aider_bench/run_infer.py index b4698a7c69..98e8f20888 100644 --- a/evaluation/aider_bench/run_infer.py +++ b/evaluation/aider_bench/run_infer.py @@ -48,13 +48,14 @@ def get_config( config = AppConfig( default_agent=metadata.agent_class, run_as_openhands=False, - runtime='eventstream', + runtime=os.environ.get('RUNTIME', 'eventstream'), max_iterations=metadata.max_iterations, sandbox=SandboxConfig( base_container_image='python:3.11-bookworm', enable_auto_lint=True, use_host_network=False, timeout=100, + api_key=os.environ.get('ALLHANDS_API_KEY', None), ), # do not mount workspace workspace_base=None, @@ -186,7 +187,9 @@ def process_instance( signature_file=f'{instance.instance_name}.py', ) if USE_UNIT_TESTS: - print(f'\nInstruction to run test_file: {instance.instance_name}_test.py\n') + logger.info( + f'\nInstruction to run test_file: {instance.instance_name}_test.py\n' + ) instruction += ( f'Use `python -m unittest {instance.instance_name}_test.py` to run the test_file ' 'and verify the correctness of your solution. DO NOT EDIT the test file.\n\n' diff --git a/evaluation/swe_bench/run_infer.py b/evaluation/swe_bench/run_infer.py index d8bfbeab3e..e0539143de 100644 --- a/evaluation/swe_bench/run_infer.py +++ b/evaluation/swe_bench/run_infer.py @@ -81,8 +81,10 @@ def get_instruction(instance: pd.Series, metadata: EvalMetadata): instruction += f'# Hints\n{instance.hints_text}\n\n' instruction += ( 'IMPORTANT: You should ONLY interact with the environment provided to you AND NEVER ASK FOR HUMAN HELP.\n' - 'You should NOT modify any existing test case files. If needed, you can add new test cases in a NEW file to reproduce the issue.\n' - 'You SHOULD INCLUDE PROPER INDENTATION in your edit commands.\n' + 'You should NOT modify any existing test case files. You SHOULD add new test in a NEW file to reproduce the issue.\n' + 'You should verify that the issue is resolved and any new tests you create pass successfully.\n' + 'You should NEVER use web browsing or any other web-based tools.\n' + 'You should ALWAYS use the default Python interpreter available in the environment to run code related to the provided issue and/or repository.\n' ) # NOTE: You can actually set slightly different instruction for different agents @@ -123,7 +125,6 @@ def get_config( config = AppConfig( default_agent=metadata.agent_class, run_as_openhands=False, - max_budget_per_task=4, max_iterations=metadata.max_iterations, runtime=os.environ.get('RUNTIME', 'eventstream'), sandbox=SandboxConfig( @@ -169,7 +170,7 @@ def initialize_runtime( obs = runtime.run_action(action) logger.info(obs, extra={'msg_type': 'OBSERVATION'}) assert_and_raise( - obs.exit_code == 0, f'Failed to export SWE_INSTANCE_ID: {obs.content}' + obs.exit_code == 0, f'Failed to export SWE_INSTANCE_ID: {str(obs)}' ) action = CmdRunAction(command="""export USER=$(whoami); echo USER=${USER} """) @@ -177,7 +178,7 @@ def initialize_runtime( logger.info(action, extra={'msg_type': 'ACTION'}) obs = runtime.run_action(action) logger.info(obs, extra={'msg_type': 'OBSERVATION'}) - assert_and_raise(obs.exit_code == 0, f'Failed to export USER: {obs.content}') + assert_and_raise(obs.exit_code == 0, f'Failed to export USER: {str(obs)}') if USE_INSTANCE_IMAGE: # inject the init script @@ -191,7 +192,7 @@ def initialize_runtime( logger.info(obs, extra={'msg_type': 'OBSERVATION'}) assert_and_raise( obs.exit_code == 0, - f'Failed to create /swe_util/eval_data/instances: {obs.content}', + f'Failed to create /swe_util/eval_data/instances: {str(obs)}', ) swe_instance_json_name = 'swe-bench-instance.json' @@ -218,16 +219,16 @@ def initialize_runtime( logger.info(action, extra={'msg_type': 'ACTION'}) obs = runtime.run_action(action) logger.info(obs, extra={'msg_type': 'OBSERVATION'}) - assert_and_raise(obs.exit_code == 0, f'Failed to cat ~/.bashrc: {obs.content}') + assert_and_raise(obs.exit_code == 0, f'Failed to cat ~/.bashrc: {str(obs)}') action = CmdRunAction(command='source ~/.bashrc') action.timeout = 600 logger.info(action, extra={'msg_type': 'ACTION'}) obs = runtime.run_action(action) logger.info(obs, extra={'msg_type': 'OBSERVATION'}) - assert_and_raise( - obs.exit_code == 0, f'Failed to source ~/.bashrc: {obs.content}' - ) + if isinstance(obs, ErrorObservation): + logger.error(f'Failed to source ~/.bashrc: {str(obs)}') + assert_and_raise(obs.exit_code == 0, f'Failed to source ~/.bashrc: {str(obs)}') action = CmdRunAction(command='source /swe_util/instance_swe_entry.sh') action.timeout = 3600 @@ -236,7 +237,7 @@ def initialize_runtime( logger.info(obs, extra={'msg_type': 'OBSERVATION'}) assert_and_raise( obs.exit_code == 0, - f'Failed to source /swe_util/instance_swe_entry.sh: {obs.content}', + f'Failed to source /swe_util/instance_swe_entry.sh: {str(obs)}', ) else: action = CmdRunAction(command='source /swe_util/swe_entry.sh') @@ -246,7 +247,7 @@ def initialize_runtime( logger.info(obs, extra={'msg_type': 'OBSERVATION'}) assert_and_raise( obs.exit_code == 0, - f'Failed to source /swe_util/swe_entry.sh: {obs.content}', + f'Failed to source /swe_util/swe_entry.sh: {str(obs)}', ) action = CmdRunAction(command=f'cd /workspace/{workspace_dir_name}') @@ -256,7 +257,7 @@ def initialize_runtime( logger.info(obs, extra={'msg_type': 'OBSERVATION'}) assert_and_raise( obs.exit_code == 0, - f'Failed to cd to /workspace/{workspace_dir_name}: {obs.content}', + f'Failed to cd to /workspace/{workspace_dir_name}: {str(obs)}', ) action = CmdRunAction(command='git reset --hard') @@ -264,7 +265,7 @@ def initialize_runtime( logger.info(action, extra={'msg_type': 'ACTION'}) obs = runtime.run_action(action) logger.info(obs, extra={'msg_type': 'OBSERVATION'}) - assert_and_raise(obs.exit_code == 0, f'Failed to git reset --hard: {obs.content}') + assert_and_raise(obs.exit_code == 0, f'Failed to git reset --hard: {str(obs)}') action = CmdRunAction( command='for remote_name in $(git remote); do git remote remove "${remote_name}"; done' @@ -273,7 +274,7 @@ def initialize_runtime( logger.info(action, extra={'msg_type': 'ACTION'}) obs = runtime.run_action(action) logger.info(obs, extra={'msg_type': 'OBSERVATION'}) - assert_and_raise(obs.exit_code == 0, f'Failed to remove git remotes: {obs.content}') + assert_and_raise(obs.exit_code == 0, f'Failed to remove git remotes: {str(obs)}') logger.info('-' * 30) logger.info('END Runtime Initialization Fn') @@ -303,7 +304,7 @@ def complete_runtime( logger.info(obs, extra={'msg_type': 'OBSERVATION'}) assert_and_raise( obs.exit_code == 0, - f'Failed to cd to /workspace/{workspace_dir_name}: {obs.content}', + f'Failed to cd to /workspace/{workspace_dir_name}: {str(obs)}', ) action = CmdRunAction(command='git config --global core.pager ""') @@ -313,7 +314,7 @@ def complete_runtime( logger.info(obs, extra={'msg_type': 'OBSERVATION'}) assert_and_raise( obs.exit_code == 0, - f'Failed to git config --global core.pager "": {obs.content}', + f'Failed to git config --global core.pager "": {str(obs)}', ) action = CmdRunAction(command='git add -A') @@ -321,7 +322,7 @@ def complete_runtime( logger.info(action, extra={'msg_type': 'ACTION'}) obs = runtime.run_action(action) logger.info(obs, extra={'msg_type': 'OBSERVATION'}) - assert_and_raise(obs.exit_code == 0, f'Failed to git add -A: {obs.content}') + assert_and_raise(obs.exit_code == 0, f'Failed to git add -A: {str(obs)}') n_retries = 0 git_patch = None @@ -346,7 +347,9 @@ def complete_runtime( logger.error(f'Error occurred: {obs.content}. Retrying...') sleep_if_should_continue(10) else: - assert_and_raise(False, f'Unexpected observation type: {type(obs)}') + assert_and_raise(False, f'Unexpected observation type: {str(obs)}') + + assert_and_raise(git_patch is not None, 'Failed to get git diff (None)') logger.info('-' * 30) logger.info('END Runtime Completion Fn') @@ -482,10 +485,6 @@ if __name__ == '__main__': details = {} _agent_cls = openhands.agenthub.Agent.get_cls(args.agent_cls) - if hasattr(_agent_cls, 'system_message'): - details['system_message'] = _agent_cls.system_message - if hasattr(_agent_cls, 'in_context_example'): - details['in_context_example'] = _agent_cls.in_context_example dataset_descrption = ( args.dataset.replace('/', '__') + '-' + args.split.replace('/', '__') diff --git a/evaluation/swe_bench/scripts/eval/compare_outputs.py b/evaluation/swe_bench/scripts/eval/compare_outputs.py new file mode 100755 index 0000000000..2b4b8a40a8 --- /dev/null +++ b/evaluation/swe_bench/scripts/eval/compare_outputs.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python3 +import argparse + +import pandas as pd + +parser = argparse.ArgumentParser( + description='Compare two swe_bench output JSONL files and print the resolved diff' +) +parser.add_argument('input_file_1', type=str) +parser.add_argument('input_file_2', type=str) +args = parser.parse_args() + +df1 = pd.read_json(args.input_file_1, orient='records', lines=True) +df2 = pd.read_json(args.input_file_2, orient='records', lines=True) + + +# Get the intersection of the instance_ids +df = pd.merge(df1, df2, on='instance_id', how='inner') + + +def _get_resolved(report): + if report is None: + return False + if isinstance(report, float): + return False + else: + return report.get('resolved', False) + + +df['resolved_x'] = df['report_x'].apply(_get_resolved) +df['resolved_y'] = df['report_y'].apply(_get_resolved) +df['diff'] = df.apply(lambda x: x['resolved_x'] != x['resolved_y'], axis=1) + +df_diff = df[df['diff']].sort_values( + by=['resolved_x', 'resolved_y'], ascending=[False, False] +) +# skip if any of the resolved is nan, which means one of the eval is not finished yet +df_diff = df_diff[df_diff['resolved_x'].notna() & df_diff['resolved_y'].notna()] + +print(f'X={args.input_file_1}') +print(f'Y={args.input_file_2}') +print(f'# diff={df_diff.shape[0]}') +df_diff = df_diff[['instance_id', 'resolved_x', 'resolved_y', 'report_x', 'report_y']] + +# x resolved but y not +print('-' * 100) +df_diff_x_only = df_diff[df_diff['resolved_x'] & ~df_diff['resolved_y']].sort_values( + by='instance_id' +) +print(f'# x resolved but y not={df_diff_x_only.shape[0]}') +print(df_diff_x_only[['instance_id', 'report_x', 'report_y']]) + +# y resolved but x not +print('-' * 100) +df_diff_y_only = df_diff[~df_diff['resolved_x'] & df_diff['resolved_y']].sort_values( + by='instance_id' +) +print(f'# y resolved but x not={df_diff_y_only.shape[0]}') +print(df_diff_y_only[['instance_id', 'report_x', 'report_y']]) +# get instance_id from df_diff_y_only +print('-' * 100) +print('Instances that x resolved but y not:') +print(df_diff_x_only['instance_id'].tolist()) + +print('-' * 100) +print('Instances that y resolved but x not:') +print(df_diff_y_only['instance_id'].tolist()) diff --git a/evaluation/swe_bench/scripts/eval/summarize_outputs.py b/evaluation/swe_bench/scripts/eval/summarize_outputs.py index ccdc93cbf0..e47cceb993 100755 --- a/evaluation/swe_bench/scripts/eval/summarize_outputs.py +++ b/evaluation/swe_bench/scripts/eval/summarize_outputs.py @@ -3,6 +3,9 @@ import argparse import json from collections import Counter +from openhands.events.serialization import event_from_dict +from openhands.events.utils import get_pairs_from_events + ERROR_KEYWORDS = [ 'Agent encountered an error while processing the last action', 'APIError', @@ -26,8 +29,37 @@ if __name__ == '__main__': error_counter = Counter() + main_agent_cost = [] + editor_cost = [] + num_turns = [] + for line in lines: _d = json.loads(line) + + # Cost + costs = _d['metrics'].get('costs', []) + _cur_main_agent_cost = 0 + _cur_editor_cost = 0 + for cost in costs: + if isinstance(cost, float): + # backward compatible + _cur_main_agent_cost += cost + else: + if 'draft_editor' in cost['model']: + _cur_editor_cost += cost['cost'] + else: + _cur_main_agent_cost += cost['cost'] + + main_agent_cost.append(_cur_main_agent_cost) + editor_cost.append(_cur_editor_cost) + + # Turn status + history = _d.get('history', []) + events = [event_from_dict(event) for event in history] + pairs = get_pairs_from_events(events) + num_turns.append(len(pairs)) + + # Patch & resolve status patch = _d.get('test_result', {}).get('git_patch', '') if patch == '': num_empty_patch += 1 @@ -38,6 +70,7 @@ if __name__ == '__main__': if resolved: num_resolved += 1 + # Error error = _d.get('error', None) if error is not None and isinstance(error, str): @@ -70,7 +103,17 @@ if __name__ == '__main__': print( f'# of loop: {num_agent_stuck_in_loop} / {num_lines} ({num_agent_stuck_in_loop / num_lines * 100:.2f}%)' ) + assert len(num_turns) == num_lines + assert len(main_agent_cost) == num_lines + assert len(editor_cost) == num_lines + print(f'Avg. num of turns per instance: {sum(num_turns) / num_lines:.2f}') + print(f'Avg. agent cost per instance: {sum(main_agent_cost) / num_lines:.2f} USD') + print(f'Avg. editor cost per instance: {sum(editor_cost) / num_lines:.2f} USD') + print( + f'Avg. total cost per instance: {(sum(main_agent_cost) + sum(editor_cost)) / num_lines:.2f} USD' + ) print('-' * 100) print('Detailed error breakdown:') for error, count in error_counter.items(): print(f'{error}: {count} ({count / num_lines * 100:.2f}%)') + print('-' * 100) diff --git a/evaluation/swe_bench/scripts/run_infer.sh b/evaluation/swe_bench/scripts/run_infer.sh index 6bc60f2b02..54bcbbbc33 100755 --- a/evaluation/swe_bench/scripts/run_infer.sh +++ b/evaluation/swe_bench/scripts/run_infer.sh @@ -25,8 +25,8 @@ if [ -z "$AGENT" ]; then fi if [ -z "$MAX_ITER" ]; then - echo "MAX_ITER not specified, use default 30" - MAX_ITER=30 + echo "MAX_ITER not specified, use default 100" + MAX_ITER=100 fi if [ -z "$USE_INSTANCE_IMAGE" ]; then diff --git a/openhands/agenthub/codeact_agent/action_parser.py b/openhands/agenthub/codeact_agent/action_parser.py index 5da42214d5..bf926a6739 100644 --- a/openhands/agenthub/codeact_agent/action_parser.py +++ b/openhands/agenthub/codeact_agent/action_parser.py @@ -1,11 +1,17 @@ import re -from openhands.controller.action_parser import ActionParser, ResponseParser +from openhands.controller.action_parser import ( + ActionParser, + ResponseParser, +) +from openhands.core.exceptions import LLMMalformedActionError +from openhands.core.logger import openhands_logger as logger from openhands.events.action import ( Action, AgentDelegateAction, AgentFinishAction, CmdRunAction, + FileEditAction, IPythonRunCellAction, MessageAction, ) @@ -14,6 +20,7 @@ from openhands.events.action import ( class CodeActResponseParser(ResponseParser): """Parser action: - CmdRunAction(command) - bash command to run + - FileEditAction(path, content) - edit a file - IPythonRunCellAction(code) - IPython code to run - AgentDelegateAction(agent, inputs) - delegate action for (sub)task - MessageAction(content) - Message action to run (e.g. ask for clarification) @@ -25,6 +32,7 @@ class CodeActResponseParser(ResponseParser): super().__init__() self.action_parsers = [ CodeActActionParserFinish(), + CodeActActionParserFileEdit(), CodeActActionParserCmdRun(), CodeActActionParserIPythonRunCell(), CodeActActionParserAgentDelegate(), @@ -46,6 +54,8 @@ class CodeActResponseParser(ResponseParser): if f'' in action and f'' not in action: action += f'' + if '' not in action: + action += '' return action def parse_action(self, action_str: str) -> Action: @@ -186,3 +196,87 @@ class CodeActActionParserMessage(ActionParser): def parse(self, action_str: str) -> Action: return MessageAction(content=action_str, wait_for_response=True) + + +class CodeActActionParserFileEdit(ActionParser): + """Parser action: + - FileEditAction(path, content) - edit a file + """ + + def __init__(self): + self.file_edit_match: re.Match | None = None + + def check_condition(self, action_str: str) -> bool: + if '(.*?)', + action_str, + re.DOTALL, + ) + + if self.file_edit_match is None: + logger.error( + f'FileEditAction detected but the format is incorrect. Unable to match for in:\n{"-" * 80}\n{action_str}\n{"-" * 80}' + ) + raise LLMMalformedActionError( + 'FileEditAction detected but the format is incorrect. Usage:\n' + '\n' + '[content_to_edit]\n' + '\n' + ) + + path = self.file_edit_match.group(2) + start = self.file_edit_match.group(4) + end = self.file_edit_match.group(6) + + if not path: + raise LLMMalformedActionError( + 'FileEditAction detected but no `path` specified. You should specify the path of the file to edit.' + ) + + if start: + try: + int(start) + except ValueError: + raise LLMMalformedActionError( + f'FileEditAction detected but `start` is not a valid integer: {start}' + ) + + if end: + try: + int(end) + except ValueError: + raise LLMMalformedActionError( + f'FileEditAction detected but `end` is not a valid integer: {end}' + ) + + return True + + def parse(self, action_str: str) -> Action: + assert ( + self.file_edit_match is not None + ), 'self.file_edit_match should not be None when parse is called' + + file_path = self.file_edit_match.group(2).strip() + start_line = ( + int(self.file_edit_match.group(4)) + if self.file_edit_match.group(4) + else None + ) + end_line = ( + int(self.file_edit_match.group(6)) + if self.file_edit_match.group(6) + else None + ) + content = self.file_edit_match.group(7) + thought = action_str.replace(self.file_edit_match.group(0), '').strip() + + action = FileEditAction(path=file_path, content=content, thought=thought) + if start_line is not None: + action.start = start_line + if end_line is not None: + action.end = end_line + return action diff --git a/openhands/agenthub/codeact_agent/codeact_agent.py b/openhands/agenthub/codeact_agent/codeact_agent.py index 4db072395a..cacd683537 100644 --- a/openhands/agenthub/codeact_agent/codeact_agent.py +++ b/openhands/agenthub/codeact_agent/codeact_agent.py @@ -11,12 +11,14 @@ from openhands.events.action import ( AgentDelegateAction, AgentFinishAction, CmdRunAction, + FileEditAction, IPythonRunCellAction, MessageAction, ) from openhands.events.observation import ( AgentDelegateObservation, CmdOutputObservation, + FileEditObservation, IPythonRunCellObservation, UserRejectObservation, ) @@ -34,7 +36,7 @@ from openhands.utils.prompt import PromptManager class CodeActAgent(Agent): - VERSION = '1.9' + VERSION = '2.0' """ The Code Act Agent is a minimalist agent. The agent works by passing the model a list of action-observation pairs and prompting the model to take the next step. @@ -102,6 +104,8 @@ class CodeActAgent(Agent): return f'{action.thought}\n\n{action.code}\n' elif isinstance(action, AgentDelegateAction): return f'{action.thought}\n\n{action.inputs["task"]}\n' + elif isinstance(action, FileEditAction): + return f'{action.thought}\n\n{action.content}\n' elif isinstance(action, MessageAction): return action.content elif isinstance(action, AgentFinishAction) and action.source == 'agent': @@ -114,6 +118,7 @@ class CodeActAgent(Agent): or isinstance(action, CmdRunAction) or isinstance(action, IPythonRunCellAction) or isinstance(action, MessageAction) + or isinstance(action, FileEditAction) or (isinstance(action, AgentFinishAction) and action.source == 'agent') ): content = [TextContent(text=self.action_to_str(action))] @@ -151,6 +156,9 @@ class CodeActAgent(Agent): text = '\n'.join(splitted) text = truncate_content(text, max_message_chars) return Message(role='user', content=[TextContent(text=text)]) + elif isinstance(obs, FileEditObservation): + text = obs_prefix + truncate_content(str(obs), max_message_chars) + return Message(role='user', content=[TextContent(text=text)]) elif isinstance(obs, AgentDelegateObservation): text = obs_prefix + truncate_content( obs.outputs['content'] if 'content' in obs.outputs else '', @@ -201,6 +209,7 @@ class CodeActAgent(Agent): '', '', '', + '', ], } diff --git a/openhands/agenthub/codeact_agent/system_prompt.j2 b/openhands/agenthub/codeact_agent/system_prompt.j2 index 6ea468ffb8..a1498aacd6 100644 --- a/openhands/agenthub/codeact_agent/system_prompt.j2 +++ b/openhands/agenthub/codeact_agent/system_prompt.j2 @@ -1,10 +1,12 @@ {% set MINIMAL_SYSTEM_PREFIX %} A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed answers to the user's questions. -The assistant can use a Python environment with , e.g.: + +[1] The assistant can use a Python environment with , e.g.: print("Hello World!") -The assistant can execute bash commands wrapped with , e.g. ls . + +[2] The assistant can execute bash commands wrapped with , e.g. ls . If a bash command returns exit code `-1`, this means the process is not yet finished. The assistant must then send a second . The second can be empty (which will retrieve any additional logs), or it can contain text to be sent to STDIN of the running process, @@ -14,6 +16,126 @@ For commands that may run indefinitely, the output should be redirected to a fil in the background, e.g. python3 app.py > server.log 2>&1 & If a command execution result says "Command timed out. Sending SIGINT to the process", the assistant should retry running the command in the background. + +[3] The assistant can edit files using by setting the file path and providing a draft of the new file content. The draft file content does not need to be exactly the same as the existing file content; the assistant may skip some lines and only include the parts that need to be changed. + +IMPORTANT: When editing large file (e.g., > 300 lines), the assistant MUST SPECIFY the range of lines to be edited by setting `start` and `end` (1-indexed, both inclusive). For example, `` means the assistant will edit the whole file (from line 1 to the end of the file). `start=1` and `end=-1` are the default values, so the assistant can omit them if they are the same as the default values. +BEFORE you start editing, you MUST view the ENTIRE body of the part you want to edit and get the correct begin and end line numbers. + +When editing files, the assistant should include comments indicating where the code will not change. For example, use comments like `# no changes before` or `# no changes here` to clearly mark sections of the code that remain unchanged. This helps to provide context and ensure clarity in the edits being made. + +Possible cases: +- File too long: When the file to be edited is too long, the assistant should set `start` and `end` (1-indexed, both inclusive) to specify the range of lines to be edited. For example, `` means the assistant will only edit lines 100 to 200 of `/path/to/file.txt`. +- Append to file: If the assistant wants to append to a file, it should set both `start` and `end` to `-1`. +- File does not exist: If `` is pointing to a file that does not exist, a new file with the exact content will be created. + +Important: because line numbers are useful, the assistant should always use the provided functions to search (e.g., `search_dir`) or view the file content (e.g., `open_file`) along with the line numbers. DO NOT use other methods (e.g., `cat`) to view the file content. + +**Example 1 (general edit for short files)** +For example, given an existing file `/path/to/file.py` that looks like this: + +(this is the end of the file) +1|class MyClass: +2| def __init__(self): +3| self.x = 1 +4| self.y = 2 +5| self.z = 3 +6| +7|print(MyClass().z) +8|print(MyClass().x) +(this is the end of the file) + + +The assistant wants to edit the file to look like this: + +(this is the end of the file) +1|class MyClass: +2| def __init__(self): +3| self.x = 1 +4| self.y = 2 +5| +6|print(MyClass().y) +(this is the end of the file) + + +The assistant may produce an edit action like this: + +class MyClass: + def __init__(self): + # no changes before + self.y = 2 + # self.z is removed + +# MyClass().z is removed +print(MyClass().y) + + +**Example 2 (append to file for short files)** + +For example, given an existing file `/path/to/file.py` that looks like this: + +(this is the end of the file) +1|class MyClass: +2| def __init__(self): +3| self.x = 1 +4| self.y = 2 +5| self.z = 3 +6| +7|print(MyClass().z) +8|print(MyClass().x) +(this is the end of the file) + +To append the following lines to the file: +```python +print(MyClass().y) +``` + +The assistant may produce an edit action like this: + +print(MyClass().y) + + +**Example 3 (edit for long files)** + +Given an existing file `/path/to/file.py` that looks like this: + +(1000 more lines above) +1001|class MyClass: +1002| def __init__(self): +1003| self.x = 1 +1004| self.y = 2 +1005| self.z = 3 +1006| +1007|print(MyClass().z) +1008|print(MyClass().x) +(2000 more lines below) + + +The assistant wants to edit the file to look like this: + +(1000 more lines above) +1001|class MyClass: +1002| def __init__(self): +1003| self.x = 1 +1004| self.y = 2 +1005| +1006|print(MyClass().y) +(2000 more lines below) + +The assistant may produce an edit action like this: + + +class MyClass: + def __init__(self): + # no changes before + self.y = 2 + # self.z is removed + +# MyClass().z is removed +print(MyClass().y) + + + {% endset %} {% set BROWSING_PREFIX %} The assistant can browse the Internet with and . @@ -29,12 +151,8 @@ Apart from the standard Python library, the assistant can also use the following {{ agent_skills_docs }} IMPORTANT: - `open_file` only returns the first 100 lines of the file by default! The assistant MUST use `scroll_down` repeatedly to read the full file BEFORE making edits! -- The assistant shall adhere to THE `edit_file_by_replace`, `append_file` and `insert_content_at_line` FUNCTIONS REQUIRING PROPER INDENTATION. If the assistant would like to add the line ' print(x)', it must fully write the line out, with all leading spaces before the code! - Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. - Any code issued should be less than 50 lines to avoid context being cut off! -- After EVERY `create_file` the method `append_file` shall be used to write the FIRST content! -- For `edit_file_by_replace` NEVER provide empty parameters! -- For `edit_file_by_replace` the file must be read fully before any replacements! {% endset %} {% set SYSTEM_SUFFIX %} Responses should be concise. diff --git a/openhands/agenthub/codeact_agent/user_prompt.j2 b/openhands/agenthub/codeact_agent/user_prompt.j2 index bae6bfc18c..a94c02e39d 100644 --- a/openhands/agenthub/codeact_agent/user_prompt.j2 +++ b/openhands/agenthub/codeact_agent/user_prompt.j2 @@ -4,23 +4,19 @@ USER: Create a list of numbers from 1 to 10, and display them in a web page at port 5000. ASSISTANT: -Sure! Let me create a Python file `app.py`: - -create_file('app.py') - +Sure! Let me first check the current directory: + +ls + USER: OBSERVATION: -[File: /workspace/app.py (1 lines total)] -(this is the beginning of the file) -1| -(this is the end of the file) -[File app.py created.] +openhands@runtime:~/workspace$ ASSISTANT: -Now I will write the Python code for starting a web server and save it to the file `app.py`: - -EDITED_CODE="""from flask import Flask +There is no `app.py` file in the current directory. Let me create a Python file `app.py`: + +from flask import Flask app = Flask(__name__) @app.route('/') @@ -29,30 +25,27 @@ def index(): return str(numbers) if __name__ == '__main__': - app.run(port=5000)""" - -insert_content_at_line( - 'app.py', - 1, - EDITED_CODE, -) - + app.run(port=5000) + USER: OBSERVATION: -(this is the beginning of the file) -1|from flask import Flask -2|app = Flask(__name__) -3| -4|@app.route('/') -5|def index(): -6| numbers = list(range(1, 11)) -7| return str(numbers) -8| -9|if __name__ == '__main__': -10| app.run(port=5000) -(this is the end of the file) -[File updated (edited at line 1). Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.] +[New file /workspace/app.py is created.] +(begin of changes) +--- /workspace/app.py ++++ /workspace/app.py +@@ -1 +1,10 @@ ++from flask import Flask ++app = Flask(__name__) + ++@app.route('/') ++def index(): ++ numbers = list(range(1, 11)) ++ return str(numbers) ++ ++if __name__ == '__main__': ++ app.run(port=5000) +(end of changes) ASSISTANT: I have created a Python file `app.py` that will display a list of numbers from 1 to 10 when you run it. Let me run the Python file for you: @@ -177,16 +170,17 @@ USER: ASSISTANT: I should edit the file to display the numbers in a table format. I should include correct indentation. Let me update the file: - -edit_file_by_replace( - 'app.py', - to_replace=" return str(numbers)", - new_content=" return '' + ''.join([f'' for i in numbers]) + '
{i}
'", -) -
+ +@app.route('/') +def index(): + numbers = list(range(1, 11)) + ret = '' + ''.join([f'' for i in numbers]) + '
{i}
' + return ret +
USER: Observation: +[Edited existing file /workspace/app.py] [File: /workspace/app.py (10 lines total after edit)] (this is the beginning of the file) 1|from flask import Flask @@ -195,10 +189,11 @@ Observation: 4|@app.route('/') 5|def index(): 6| numbers = list(range(1, 11)) -7| return '' + ''.join([f'' for i in numbers]) + '
{i}
' -8| -9|if __name__ == '__main__': -10| app.run(port=5000) +7| ret = '' + ''.join([f'' for i in numbers]) + '
{i}
' +8| return ret +9| +10|if __name__ == '__main__': +11| app.run(port=5000) (this is the end of the file) [File updated (edited at line 7). Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.] diff --git a/openhands/controller/action_parser.py b/openhands/controller/action_parser.py index 46ccb33dcc..fdd4f864b4 100644 --- a/openhands/controller/action_parser.py +++ b/openhands/controller/action_parser.py @@ -3,6 +3,16 @@ from abc import ABC, abstractmethod from openhands.events.action import Action +class ActionParseError(Exception): + """Exception raised when the response from the LLM cannot be parsed into an action.""" + + def __init__(self, error: str): + self.error = error + + def __str__(self): + return self.error + + class ResponseParser(ABC): """This abstract base class is a general interface for an response parser dedicated to parsing the action from the response from the LLM. diff --git a/openhands/controller/state/state.py b/openhands/controller/state/state.py index 274df03879..e14d44517a 100644 --- a/openhands/controller/state/state.py +++ b/openhands/controller/state/state.py @@ -6,12 +6,12 @@ from typing import Any from openhands.controller.state.task import RootTask from openhands.core.logger import openhands_logger as logger -from openhands.core.metrics import Metrics from openhands.core.schema import AgentState from openhands.events.action import ( MessageAction, ) from openhands.events.action.agent import AgentFinishAction +from openhands.llm.metrics import Metrics from openhands.memory.history import ShortTermHistory from openhands.storage.files import FileStore diff --git a/openhands/core/cli.py b/openhands/core/cli.py index 5bdd976858..b87674e751 100644 --- a/openhands/core/cli.py +++ b/openhands/core/cli.py @@ -19,12 +19,14 @@ from openhands.events.action import ( Action, ChangeAgentStateAction, CmdRunAction, + FileEditAction, MessageAction, ) from openhands.events.event import Event from openhands.events.observation import ( AgentStateChangedObservation, CmdOutputObservation, + FileEditObservation, ) from openhands.llm.llm import LLM from openhands.runtime import get_runtime_cls @@ -50,6 +52,10 @@ def display_command_output(output: str): print('\n') +def display_file_edit(event: FileEditAction | FileEditObservation): + print(colored(str(event), 'green')) + + def display_event(event: Event): if isinstance(event, Action): if hasattr(event, 'thought'): @@ -61,6 +67,10 @@ def display_event(event: Event): display_command(event.command) if isinstance(event, CmdOutputObservation): display_command_output(event.content) + if isinstance(event, FileEditAction): + display_file_edit(event) + if isinstance(event, FileEditObservation): + display_file_edit(event) async def main(): diff --git a/openhands/core/config/llm_config.py b/openhands/core/config/llm_config.py index 98596b2a9c..12e2f4afa3 100644 --- a/openhands/core/config/llm_config.py +++ b/openhands/core/config/llm_config.py @@ -1,5 +1,6 @@ import os from dataclasses import dataclass, fields +from typing import Optional from openhands.core.config.config_utils import get_field_info @@ -39,6 +40,7 @@ class LLMConfig: disable_vision: If model is vision capable, this option allows to disable image processing (useful for cost reduction). caching_prompt: Use the prompt caching feature if provided by the LLM and supported by the provider. log_completions: Whether to log LLM completions to the state. + draft_editor: A more efficient LLM to use for file editing. Introduced in [PR 3985](https://github.com/All-Hands-AI/OpenHands/pull/3985). """ model: str = 'gpt-4o' @@ -71,6 +73,7 @@ class LLMConfig: disable_vision: bool | None = None caching_prompt: bool = True log_completions: bool = False + draft_editor: Optional['LLMConfig'] = None def defaults_to_dict(self) -> dict: """Serialize fields to a dict for the frontend, including type hints, defaults, and whether it's optional.""" @@ -113,4 +116,19 @@ class LLMConfig: for k, v in ret.items(): if k in LLM_SENSITIVE_FIELDS: ret[k] = '******' if v else None + elif isinstance(v, LLMConfig): + ret[k] = v.to_safe_dict() return ret + + @classmethod + def from_dict(cls, llm_config_dict: dict) -> 'LLMConfig': + """Create an LLMConfig object from a dictionary. + + This function is used to create an LLMConfig object from a dictionary, + with the exception of the 'draft_editor' key, which is a nested LLMConfig object. + """ + args = {k: v for k, v in llm_config_dict.items() if not isinstance(v, dict)} + if 'draft_editor' in llm_config_dict: + draft_editor_config = LLMConfig(**llm_config_dict['draft_editor']) + args['draft_editor'] = draft_editor_config + return cls(**args) diff --git a/openhands/core/config/utils.py b/openhands/core/config/utils.py index 15b64eb6d6..ddd8fcbd66 100644 --- a/openhands/core/config/utils.py +++ b/openhands/core/config/utils.py @@ -136,17 +136,14 @@ def load_from_toml(cfg: AppConfig, toml_file: str = 'config.toml'): logger.openhands_logger.debug( 'Attempt to load default LLM config from config toml' ) - non_dict_fields = { - k: v for k, v in value.items() if not isinstance(v, dict) - } - llm_config = LLMConfig(**non_dict_fields) + llm_config = LLMConfig.from_dict(value) cfg.set_llm_config(llm_config, 'llm') for nested_key, nested_value in value.items(): if isinstance(nested_value, dict): logger.openhands_logger.debug( f'Attempt to load group {nested_key} from config toml as llm config' ) - llm_config = LLMConfig(**nested_value) + llm_config = LLMConfig.from_dict(nested_value) cfg.set_llm_config(llm_config, nested_key) elif not key.startswith('sandbox') and key.lower() != 'core': logger.openhands_logger.warning( @@ -272,7 +269,7 @@ def get_llm_config_arg( # update the llm config with the specified section if 'llm' in toml_config and llm_config_arg in toml_config['llm']: - return LLMConfig(**toml_config['llm'][llm_config_arg]) + return LLMConfig.from_dict(toml_config['llm'][llm_config_arg]) logger.openhands_logger.debug(f'Loading from toml failed for {llm_config_arg}') return None diff --git a/openhands/core/exceptions.py b/openhands/core/exceptions.py index 5722bb9570..c33297a0d2 100644 --- a/openhands/core/exceptions.py +++ b/openhands/core/exceptions.py @@ -52,8 +52,12 @@ class BrowserUnavailableException(Exception): # It might be malformed JSON class LLMMalformedActionError(Exception): def __init__(self, message='Malformed response'): + self.message = message super().__init__(message) + def __str__(self): + return self.message + # This exception gets sent back to the LLM # For some reason, the agent did not return an action diff --git a/openhands/core/logger.py b/openhands/core/logger.py index 3c4b8aa927..13b91e451e 100644 --- a/openhands/core/logger.py +++ b/openhands/core/logger.py @@ -119,11 +119,14 @@ class SensitiveDataFilter(logging.Filter): return True -def get_console_handler(log_level=logging.INFO): +def get_console_handler(log_level=logging.INFO, extra_info: str | None = None): """Returns a console handler for logging.""" console_handler = logging.StreamHandler() console_handler.setLevel(log_level) - console_handler.setFormatter(console_formatter) + formatter_str = '%(asctime)s - %(levelname)s - %(message)s' + if extra_info: + formatter_str = f'{extra_info} - ' + formatter_str + console_handler.setFormatter(logging.Formatter(formatter_str)) return console_handler diff --git a/openhands/core/schema/action.py b/openhands/core/schema/action.py index b2cd267e21..dc4cfe542e 100644 --- a/openhands/core/schema/action.py +++ b/openhands/core/schema/action.py @@ -24,6 +24,10 @@ class ActionTypeSchema(BaseModel): """Writes the content to a file. """ + EDIT: str = Field(default='edit') + """Edits a file by providing a draft. + """ + RUN: str = Field(default='run') """Runs a command. """ diff --git a/openhands/core/schema/observation.py b/openhands/core/schema/observation.py index 62f9503e82..622f2680f7 100644 --- a/openhands/core/schema/observation.py +++ b/openhands/core/schema/observation.py @@ -10,6 +10,8 @@ class ObservationTypeSchema(BaseModel): WRITE: str = Field(default='write') + EDIT: str = Field(default='edit') + BROWSE: str = Field(default='browse') """The HTML content of a URL """ diff --git a/openhands/core/utils/json.py b/openhands/core/utils/json.py index cec1fe8163..859cab1450 100644 --- a/openhands/core/utils/json.py +++ b/openhands/core/utils/json.py @@ -6,6 +6,7 @@ from json_repair import repair_json from openhands.core.exceptions import LLMResponseError from openhands.events.event import Event from openhands.events.serialization import event_to_dict +from openhands.llm.metrics import Metrics def my_default_encoder(obj): @@ -14,6 +15,8 @@ def my_default_encoder(obj): return obj.isoformat() if isinstance(obj, Event): return event_to_dict(obj) + if isinstance(obj, Metrics): + return obj.get() return json.JSONEncoder().default(obj) diff --git a/openhands/events/action/__init__.py b/openhands/events/action/__init__.py index 386d614c18..129cb30739 100644 --- a/openhands/events/action/__init__.py +++ b/openhands/events/action/__init__.py @@ -9,7 +9,11 @@ from openhands.events.action.agent import ( from openhands.events.action.browse import BrowseInteractiveAction, BrowseURLAction from openhands.events.action.commands import CmdRunAction, IPythonRunCellAction from openhands.events.action.empty import NullAction -from openhands.events.action.files import FileReadAction, FileWriteAction +from openhands.events.action.files import ( + FileEditAction, + FileReadAction, + FileWriteAction, +) from openhands.events.action.message import MessageAction from openhands.events.action.tasks import AddTaskAction, ModifyTaskAction @@ -21,6 +25,7 @@ __all__ = [ 'BrowseInteractiveAction', 'FileReadAction', 'FileWriteAction', + 'FileEditAction', 'AgentFinishAction', 'AgentRejectAction', 'AgentDelegateAction', diff --git a/openhands/events/action/files.py b/openhands/events/action/files.py index f323fc9181..3e2131228b 100644 --- a/openhands/events/action/files.py +++ b/openhands/events/action/files.py @@ -27,6 +27,11 @@ class FileReadAction(Action): @dataclass class FileWriteAction(Action): + """Writes a file to a given path. + Can be set to write specific lines using start and end + Default lines 0:-1 (whole file) + """ + path: str content: str start: int = 0 @@ -39,3 +44,31 @@ class FileWriteAction(Action): @property def message(self) -> str: return f'Writing file: {self.path}' + + +@dataclass +class FileEditAction(Action): + """Edits a file by provided a draft at a given path. + + Can be set to edit specific lines using start and end (1-index, inclusive) if the file is too long. + Default lines 1:-1 (whole file). + + If start is set to -1, the FileEditAction will simply append the content to the file. + """ + + path: str + content: str + start: int = 1 + end: int = -1 + thought: str = '' + action: str = ActionType.EDIT + runnable: ClassVar[bool] = True + security_risk: ActionSecurityRisk | None = None + + def __repr__(self) -> str: + ret = '**FileEditAction**\n' + ret += f'Thought: {self.thought}\n' + ret += f'Range: [L{self.start}:L{self.end}]\n' + ret += f'Path: [{self.path}]\n' + ret += f'Content:\n```\n{self.content}\n```\n' + return ret diff --git a/openhands/events/event.py b/openhands/events/event.py index 2464be7713..43a8840671 100644 --- a/openhands/events/event.py +++ b/openhands/events/event.py @@ -2,7 +2,7 @@ from dataclasses import dataclass from datetime import datetime from enum import Enum -from openhands.core.metrics import Metrics +from openhands.llm.metrics import Metrics class EventSource(str, Enum): diff --git a/openhands/events/observation/__init__.py b/openhands/events/observation/__init__.py index 4283af05be..a0fad86dfb 100644 --- a/openhands/events/observation/__init__.py +++ b/openhands/events/observation/__init__.py @@ -8,6 +8,7 @@ from openhands.events.observation.delegate import AgentDelegateObservation from openhands.events.observation.empty import NullObservation from openhands.events.observation.error import ErrorObservation, FatalErrorObservation from openhands.events.observation.files import ( + FileEditObservation, FileReadObservation, FileWriteObservation, ) @@ -23,6 +24,7 @@ __all__ = [ 'BrowserOutputObservation', 'FileReadObservation', 'FileWriteObservation', + 'FileEditObservation', 'ErrorObservation', 'FatalErrorObservation', 'AgentStateChangedObservation', diff --git a/openhands/events/observation/files.py b/openhands/events/observation/files.py index 8432393ff9..bfc45264cc 100644 --- a/openhands/events/observation/files.py +++ b/openhands/events/observation/files.py @@ -1,4 +1,5 @@ from dataclasses import dataclass +from difflib import SequenceMatcher from openhands.core.schema import ObservationType from openhands.events.observation.observation import Observation @@ -26,3 +27,107 @@ class FileWriteObservation(Observation): @property def message(self) -> str: return f'I wrote to the file {self.path}.' + + +@dataclass +class FileEditObservation(Observation): + """This data class represents a file edit operation""" + + # content: str will be a unified diff patch string include NO context lines + path: str + prev_exist: bool + old_content: str + new_content: str + observation: str = ObservationType.EDIT + + @property + def message(self) -> str: + return f'I edited the file {self.path}.' + + def get_edit_groups(self, n_context_lines: int = 2) -> list[dict[str, list[str]]]: + """Get the edit groups of the file edit.""" + old_lines = self.old_content.split('\n') + new_lines = self.new_content.split('\n') + # Borrowed from difflib.unified_diff to directly parse into structured format. + edit_groups: list[dict] = [] + for group in SequenceMatcher(None, old_lines, new_lines).get_grouped_opcodes( + n_context_lines + ): + # take the max line number in the group + _indent_pad_size = len(str(group[-1][3])) + 1 # +1 for the "*" prefix + cur_group: dict[str, list[str]] = { + 'before_edits': [], + 'after_edits': [], + } + for tag, i1, i2, j1, j2 in group: + if tag == 'equal': + for idx, line in enumerate(old_lines[i1:i2]): + cur_group['before_edits'].append( + f'{i1+idx+1:>{_indent_pad_size}}|{line}' + ) + for idx, line in enumerate(new_lines[j1:j2]): + cur_group['after_edits'].append( + f'{j1+idx+1:>{_indent_pad_size}}|{line}' + ) + continue + if tag in {'replace', 'delete'}: + for idx, line in enumerate(old_lines[i1:i2]): + cur_group['before_edits'].append( + f'-{i1+idx+1:>{_indent_pad_size-1}}|{line}' + ) + if tag in {'replace', 'insert'}: + for idx, line in enumerate(new_lines[j1:j2]): + cur_group['after_edits'].append( + f'+{j1+idx+1:>{_indent_pad_size-1}}|{line}' + ) + edit_groups.append(cur_group) + return edit_groups + + def visualize_diff( + self, + n_context_lines: int = 2, + change_applied: bool = True, + ) -> str: + """Visualize the diff of the file edit. + + Instead of showing the diff line by line, this function + shows each hunk of changes as a separate entity. + + Args: + n_context_lines: The number of lines of context to show before and after the changes. + change_applied: Whether the changes are applied to the file. If true, the file have been modified. If not, the file is not modified (due to linting errors). + """ + if change_applied and self.content.strip() == '': + # diff patch is empty + return '(no changes detected. Please make sure your edits changes the content of the existing file.)\n' + + edit_groups = self.get_edit_groups(n_context_lines=n_context_lines) + + result = [ + f'[Existing file {self.path} is edited with {len(edit_groups)} changes.]' + if change_applied + else f"[Changes are NOT applied to {self.path} - Here's how the file looks like if changes are applied.]" + ] + + op_type = 'edit' if change_applied else 'ATTEMPTED edit' + for i, cur_edit_group in enumerate(edit_groups): + if i != 0: + result.append('-------------------------') + result.append(f'[begin of {op_type} {i+1} / {len(edit_groups)}]') + result.append(f'(content before {op_type})') + result.extend(cur_edit_group['before_edits']) + result.append(f'(content after {op_type})') + result.extend(cur_edit_group['after_edits']) + result.append(f'[end of {op_type} {i+1} / {len(edit_groups)}]') + return '\n'.join(result) + + def __str__(self) -> str: + ret = '' + if not self.prev_exist: + assert ( + self.old_content == '' + ), 'old_content should be empty if the file is new (prev_exist=False).' + ret += f'[New file {self.path} is created with the provided content.]\n' + return ret.rstrip() + '\n' + ret += self.visualize_diff() + return ret.rstrip() + '\n' diff --git a/openhands/events/serialization/action.py b/openhands/events/serialization/action.py index f40e0ef0c6..4f6050172c 100644 --- a/openhands/events/serialization/action.py +++ b/openhands/events/serialization/action.py @@ -12,7 +12,11 @@ from openhands.events.action.commands import ( IPythonRunCellAction, ) from openhands.events.action.empty import NullAction -from openhands.events.action.files import FileReadAction, FileWriteAction +from openhands.events.action.files import ( + FileEditAction, + FileReadAction, + FileWriteAction, +) from openhands.events.action.message import MessageAction from openhands.events.action.tasks import AddTaskAction, ModifyTaskAction @@ -24,6 +28,7 @@ actions = ( BrowseInteractiveAction, FileReadAction, FileWriteAction, + FileEditAction, AgentFinishAction, AgentRejectAction, AgentDelegateAction, diff --git a/openhands/events/serialization/observation.py b/openhands/events/serialization/observation.py index 1ffbc645a2..9030ccb1e1 100644 --- a/openhands/events/serialization/observation.py +++ b/openhands/events/serialization/observation.py @@ -7,7 +7,11 @@ from openhands.events.observation.commands import ( from openhands.events.observation.delegate import AgentDelegateObservation from openhands.events.observation.empty import NullObservation from openhands.events.observation.error import ErrorObservation -from openhands.events.observation.files import FileReadObservation, FileWriteObservation +from openhands.events.observation.files import ( + FileEditObservation, + FileReadObservation, + FileWriteObservation, +) from openhands.events.observation.observation import Observation from openhands.events.observation.reject import UserRejectObservation from openhands.events.observation.success import SuccessObservation @@ -19,6 +23,7 @@ observations = ( BrowserOutputObservation, FileReadObservation, FileWriteObservation, + FileEditObservation, AgentDelegateObservation, SuccessObservation, ErrorObservation, diff --git a/openhands/llm/llm.py b/openhands/llm/llm.py index d0a11faf22..698f4c4a86 100644 --- a/openhands/llm/llm.py +++ b/openhands/llm/llm.py @@ -24,8 +24,8 @@ from litellm.types.utils import CostPerToken, ModelResponse, Usage from openhands.core.exceptions import CloudFlareBlockageError from openhands.core.logger import openhands_logger as logger from openhands.core.message import Message -from openhands.core.metrics import Metrics from openhands.llm.debug_mixin import DebugMixin +from openhands.llm.metrics import Metrics from openhands.llm.retry_mixin import RetryMixin __all__ = ['LLM'] @@ -73,7 +73,9 @@ class LLM(RetryMixin, DebugMixin): config: The LLM configuration. metrics: The metrics to use. """ - self.metrics: Metrics = metrics if metrics is not None else Metrics() + self.metrics: Metrics = ( + metrics if metrics is not None else Metrics(model_name=config.model) + ) self.cost_metric_supported: bool = True self.config: LLMConfig = copy.deepcopy(config) @@ -396,7 +398,7 @@ class LLM(RetryMixin, DebugMixin): return str(self) def reset(self): - self.metrics = Metrics() + self.metrics.reset() self.llm_completions = [] def format_messages_for_llm(self, messages: Message | list[Message]) -> list[dict]: diff --git a/openhands/core/metrics.py b/openhands/llm/metrics.py similarity index 65% rename from openhands/core/metrics.py rename to openhands/llm/metrics.py index 9217d273ad..182d48d93b 100644 --- a/openhands/core/metrics.py +++ b/openhands/llm/metrics.py @@ -1,12 +1,24 @@ +import time + +from pydantic import BaseModel, Field + + +class Cost(BaseModel): + model: str + cost: float + timestamp: float = Field(default_factory=time.time) + + class Metrics: """Metrics class can record various metrics during running and evaluation. Currently, we define the following metrics: accumulated_cost: the total cost (USD $) of the current LLM. """ - def __init__(self) -> None: + def __init__(self, model_name: str = 'default') -> None: self._accumulated_cost: float = 0.0 - self._costs: list[float] = [] + self._costs: list[Cost] = [] + self.model_name = model_name @property def accumulated_cost(self) -> float: @@ -19,22 +31,29 @@ class Metrics: self._accumulated_cost = value @property - def costs(self) -> list: + def costs(self) -> list[Cost]: return self._costs def add_cost(self, value: float) -> None: if value < 0: raise ValueError('Added cost cannot be negative.') self._accumulated_cost += value - self._costs.append(value) + self._costs.append(Cost(cost=value, model=self.model_name)) def merge(self, other: 'Metrics') -> None: self._accumulated_cost += other.accumulated_cost self._costs += other._costs - def get(self): + def get(self) -> dict: """Return the metrics in a dictionary.""" - return {'accumulated_cost': self._accumulated_cost, 'costs': self._costs} + return { + 'accumulated_cost': self._accumulated_cost, + 'costs': [cost.model_dump() for cost in self._costs], + } + + def reset(self): + self._accumulated_cost = 0.0 + self._costs = [] def log(self): """Log the metrics.""" diff --git a/openhands/runtime/builder/remote.py b/openhands/runtime/builder/remote.py index 648cafead6..7f057d0597 100644 --- a/openhands/runtime/builder/remote.py +++ b/openhands/runtime/builder/remote.py @@ -98,7 +98,7 @@ class RemoteRuntimeBuilder(RuntimeBuilder): 'EXPIRED', ]: error_message = status_data.get( - 'error', f'Build failed with status: {status}' + 'error', f'Build failed with status: {status}. Build ID: {build_id}' ) logger.error(error_message) raise RuntimeError(error_message) diff --git a/openhands/runtime/client/client.py b/openhands/runtime/client/client.py index 0afbca1830..89fd230652 100644 --- a/openhands/runtime/client/client.py +++ b/openhands/runtime/client/client.py @@ -297,7 +297,8 @@ class RuntimeClient: self.pwd = os.path.expanduser(working_dir) # re-assemble the prompt - prompt = f'{other_info.strip()}\n{username}@{hostname}:{working_dir} ' + # ignore the hostname AND use 'openhands-workspace' + prompt = f'{other_info.strip()}\n{username}@openhands-workspace:{working_dir} ' if username == 'root': prompt += '#' else: @@ -504,7 +505,9 @@ class RuntimeClient: # NOTE: this is part of initialization, so we hard code the timeout result, exit_code = self._execute_bash('pwd', timeout=60, keep_prompt=False) if exit_code != 0: - raise RuntimeError('Failed to get working directory') + raise RuntimeError( + f'Failed to get working directory (exit code: {exit_code}): {result}' + ) return result.strip() def _resolve_path(self, path: str, working_dir: str) -> str: diff --git a/openhands/runtime/client/runtime.py b/openhands/runtime/client/runtime.py index 485b995fc5..61d9a7a490 100644 --- a/openhands/runtime/client/runtime.py +++ b/openhands/runtime/client/runtime.py @@ -17,6 +17,7 @@ from openhands.events.action import ( BrowseInteractiveAction, BrowseURLAction, CmdRunAction, + FileEditAction, FileReadAction, FileWriteAction, IPythonRunCellAction, @@ -429,6 +430,9 @@ class EventStreamRuntime(Runtime): self.docker_client.close() def run_action(self, action: Action) -> Observation: + if isinstance(action, FileEditAction): + return self.edit(action) + # set timeout to default if not set if action.timeout is None: action.timeout = self.config.sandbox.timeout diff --git a/openhands/runtime/plugins/agent_skills/file_ops/file_ops.py b/openhands/runtime/plugins/agent_skills/file_ops/file_ops.py index e30cdd78fd..b2e1b4c8aa 100644 --- a/openhands/runtime/plugins/agent_skills/file_ops/file_ops.py +++ b/openhands/runtime/plugins/agent_skills/file_ops/file_ops.py @@ -7,20 +7,12 @@ Functions: - goto_line(line_number: int): Moves the window to show the specified line number. - scroll_down(): Moves the window down by the number of lines specified in WINDOW. - scroll_up(): Moves the window up by the number of lines specified in WINDOW. -- create_file(filename: str): Creates and opens a new file with the given name. - search_dir(search_term: str, dir_path: str = './'): Searches for a term in all files in the specified directory. - search_file(search_term: str, file_path: str | None = None): Searches for a term in the specified file or the currently open file. - find_file(file_name: str, dir_path: str = './'): Finds all files with the given name in the specified directory. -- edit_file_by_replace(file_name: str, to_replace: str, new_content: str): Replaces specific content in a file with new content. -- insert_content_at_line(file_name: str, line_number: int, content: str): Inserts given content at the specified line number in a file. -- append_file(file_name: str, content: str): Appends the given content to the end of the specified file. """ import os -import re -import shutil -import tempfile -import uuid from openhands.linter import DefaultLinter, LintResult @@ -205,7 +197,7 @@ def open_file( output += _print_window( CURRENT_FILE, CURRENT_LINE, - _clamp(context_lines, 1, 300), + _clamp(context_lines, 1, 100), return_str=True, ignore_window=False, ) @@ -276,531 +268,10 @@ def scroll_up() -> None: print(output) -def create_file(filename: str) -> None: - """Creates and opens a new file with the given name. - - Args: - filename: str: The name of the file to create. - """ - if os.path.exists(filename): - _output_error(f"File '{filename}' already exists.") - return - - with open(filename, 'w') as file: - file.write('\n') - - open_file(filename) - print(f'[File {filename} created.]') - - class LineNumberError(Exception): pass -def _append_impl(lines, content): - """Internal method to handle appending to a file. - - Args: - lines: list[str]: The lines in the original file. - content: str: The content to append to the file. - - Returns: - content: str: The new content of the file. - n_added_lines: int: The number of lines added to the file. - """ - content_lines = content.splitlines(keepends=True) - n_added_lines = len(content_lines) - if lines and not (len(lines) == 1 and lines[0].strip() == ''): - # file is not empty - if not lines[-1].endswith('\n'): - lines[-1] += '\n' - new_lines = lines + content_lines - content = ''.join(new_lines) - else: - # file is empty - content = ''.join(content_lines) - - return content, n_added_lines - - -def _insert_impl(lines, start, content): - """Internal method to handle inserting to a file. - - Args: - lines: list[str]: The lines in the original file. - start: int: The start line number for inserting. - content: str: The content to insert to the file. - - Returns: - content: str: The new content of the file. - n_added_lines: int: The number of lines added to the file. - - Raises: - LineNumberError: If the start line number is invalid. - """ - inserted_lines = [content + '\n' if not content.endswith('\n') else content] - if len(lines) == 0: - new_lines = inserted_lines - elif start is not None: - if len(lines) == 1 and lines[0].strip() == '': - # if the file with only 1 line and that line is empty - lines = [] - - if len(lines) == 0: - new_lines = inserted_lines - else: - new_lines = lines[: start - 1] + inserted_lines + lines[start - 1 :] - else: - raise LineNumberError( - f'Invalid line number: {start}. Line numbers must be between 1 and {len(lines)} (inclusive).' - ) - - content = ''.join(new_lines) - n_added_lines = len(inserted_lines) - return content, n_added_lines - - -def _edit_impl(lines, start, end, content): - """Internal method to handle editing a file. - - REQUIRES (should be checked by caller): - start <= end - start and end are between 1 and len(lines) (inclusive) - content ends with a newline - - Args: - lines: list[str]: The lines in the original file. - start: int: The start line number for editing. - end: int: The end line number for editing. - content: str: The content to replace the lines with. - - Returns: - content: str: The new content of the file. - n_added_lines: int: The number of lines added to the file. - """ - # Handle cases where start or end are None - if start is None: - start = 1 # Default to the beginning - if end is None: - end = len(lines) # Default to the end - # Check arguments - if not (1 <= start <= len(lines)): - raise LineNumberError( - f'Invalid start line number: {start}. Line numbers must be between 1 and {len(lines)} (inclusive).' - ) - if not (1 <= end <= len(lines)): - raise LineNumberError( - f'Invalid end line number: {end}. Line numbers must be between 1 and {len(lines)} (inclusive).' - ) - if start > end: - raise LineNumberError( - f'Invalid line range: {start}-{end}. Start must be less than or equal to end.' - ) - - if not content.endswith('\n'): - content += '\n' - content_lines = content.splitlines(True) - n_added_lines = len(content_lines) - new_lines = lines[: start - 1] + content_lines + lines[end:] - content = ''.join(new_lines) - return content, n_added_lines - - -def _edit_file_impl( - file_name: str, - start: int | None = None, - end: int | None = None, - content: str = '', - is_insert: bool = False, - is_append: bool = False, -) -> str | None: - """Internal method to handle common logic for edit_/append_file methods. - - Args: - file_name: str: The name of the file to edit or append to. - start: int | None = None: The start line number for editing. Ignored if is_append is True. - end: int | None = None: The end line number for editing. Ignored if is_append is True. - content: str: The content to replace the lines with or to append. - is_insert: bool = False: Whether to insert content at the given line number instead of editing. - is_append: bool = False: Whether to append content to the file instead of editing. - """ - ret_str = '' - global CURRENT_FILE, CURRENT_LINE, WINDOW - - ERROR_MSG = f'[Error editing file {file_name}. Please confirm the file is correct.]' - ERROR_MSG_SUFFIX = ( - 'Your changes have NOT been applied. Please fix your edit command and try again.\n' - 'You either need to 1) Open the correct file and try again or 2) Specify the correct line number arguments.\n' - 'DO NOT re-run the same failed edit command. Running it again will lead to the same error.' - ) - - if not _is_valid_filename(file_name): - _output_error('Invalid file name.') - return None - - if not _is_valid_path(file_name): - _output_error('Invalid path or file name.') - return None - - if not _create_paths(file_name): - _output_error('Could not access or create directories.') - return None - - if not os.path.isfile(file_name): - _output_error(f'File {file_name} not found.') - return None - - if is_insert and is_append: - _output_error('Cannot insert and append at the same time.') - return None - - # Use a temporary file to write changes - content = str(content or '') - temp_file_path = '' - first_error_line = None - - try: - n_added_lines = None - - # lint the original file - enable_auto_lint = os.getenv('ENABLE_AUTO_LINT', 'false').lower() == 'true' - if enable_auto_lint: - # Copy the original file to a temporary file (with the same ext) and lint it - suffix = os.path.splitext(file_name)[1] - with tempfile.NamedTemporaryFile(suffix=suffix) as orig_file_clone: - shutil.copy2(file_name, orig_file_clone.name) - original_lint_error, _ = _lint_file(orig_file_clone.name) - - # Create a temporary file in the same directory as the original file - original_dir = os.path.dirname(file_name) - original_ext = os.path.splitext(file_name)[1] - temp_file_name = f'.temp_{uuid.uuid4().hex}{original_ext}' - temp_file_path = os.path.join(original_dir, temp_file_name) - - with open(temp_file_path, 'w') as temp_file: - # Read the original file and check if empty and for a trailing newline - with open(file_name) as original_file: - lines = original_file.readlines() - - if is_append: - content, n_added_lines = _append_impl(lines, content) - elif is_insert: - try: - content, n_added_lines = _insert_impl(lines, start, content) - except LineNumberError as e: - ret_str += (f'{ERROR_MSG}\n' f'{e}\n' f'{ERROR_MSG_SUFFIX}') + '\n' - return ret_str - else: - try: - content, n_added_lines = _edit_impl(lines, start, end, content) - except LineNumberError as e: - ret_str += (f'{ERROR_MSG}\n' f'{e}\n' f'{ERROR_MSG_SUFFIX}') + '\n' - return ret_str - - if not content.endswith('\n'): - content += '\n' - - # Write the new content to the temporary file - temp_file.write(content) - - # Replace the original file with the temporary file - os.replace(temp_file_path, file_name) - - # Handle linting - # NOTE: we need to get env var inside this function - # because the env var will be set AFTER the agentskills is imported - if enable_auto_lint: - # Generate a random temporary file path - suffix = os.path.splitext(file_name)[1] - with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tfile: - original_file_backup_path = tfile.name - - with open(original_file_backup_path, 'w') as f: - f.writelines(lines) - - file_name_abs = os.path.abspath(file_name) - lint_error, first_error_line = _lint_file(file_name_abs) - - # Select the errors caused by the modification - def extract_last_part(line): - parts = line.split(':') - if len(parts) > 1: - return parts[-1].strip() - return line.strip() - - def subtract_strings(str1, str2) -> str: - lines1 = str1.splitlines() - lines2 = str2.splitlines() - - last_parts1 = [extract_last_part(line) for line in lines1] - - remaining_lines = [ - line - for line in lines2 - if extract_last_part(line) not in last_parts1 - ] - - result = '\n'.join(remaining_lines) - return result - - if original_lint_error and lint_error: - lint_error = subtract_strings(original_lint_error, lint_error) - if lint_error == '': - lint_error = None - first_error_line = None - - if lint_error is not None: - if first_error_line is not None: - show_line = int(first_error_line) - elif is_append: - # original end-of-file - show_line = len(lines) - # insert OR edit WILL provide meaningful line numbers - elif start is not None and end is not None: - show_line = int((start + end) / 2) - else: - raise ValueError('Invalid state. This should never happen.') - - ret_str += LINTER_ERROR_MSG - ret_str += lint_error + '\n' - - editor_lines = n_added_lines + 20 - sep = '-' * 49 + '\n' - ret_str += ( - f'[This is how your edit would have looked if applied]\n{sep}' - ) - ret_str += ( - _print_window(file_name, show_line, editor_lines, return_str=True) - + '\n' - ) - ret_str += f'{sep}\n' - - ret_str += '[This is the original code before your edit]\n' - ret_str += sep - ret_str += ( - _print_window( - original_file_backup_path, - show_line, - editor_lines, - return_str=True, - ) - + '\n' - ) - ret_str += sep - ret_str += ( - 'Your changes have NOT been applied. Please fix your edit command and try again.\n' - 'You either need to 1) Specify the correct start/end line arguments or 2) Correct your edit code.\n' - 'DO NOT re-run the same failed edit command. Running it again will lead to the same error.' - ) - - # recover the original file - with open(original_file_backup_path) as fin, open( - file_name, 'w' - ) as fout: - fout.write(fin.read()) - - # Don't forget to remove the temporary file after you're done - os.unlink(original_file_backup_path) - return ret_str - - except FileNotFoundError as e: - ret_str += f'File not found: {e}\n' - except PermissionError as e: - ret_str += f'Permission error during file operation: {str(e)}\n' - except IOError as e: - ret_str += f'An error occurred while handling the file: {e}\n' - except ValueError as e: - ret_str += f'Invalid input: {e}\n' - except Exception as e: - # Clean up the temporary file if an error occurs - if temp_file_path and os.path.exists(temp_file_path): - os.remove(temp_file_path) - print(f'An unexpected error occurred: {e}') - raise e - - # Update the file information and print the updated content - with open(file_name, 'r', encoding='utf-8') as file: - n_total_lines = max(1, len(file.readlines())) - if first_error_line is not None and int(first_error_line) > 0: - CURRENT_LINE = first_error_line - else: - if is_append: - CURRENT_LINE = max(1, len(lines)) # end of original file - else: - CURRENT_LINE = start or n_total_lines or 1 - ret_str += f'[File: {os.path.abspath(file_name)} ({n_total_lines} lines total after edit)]\n' - CURRENT_FILE = file_name - ret_str += _print_window(CURRENT_FILE, CURRENT_LINE, WINDOW, return_str=True) + '\n' - ret_str += MSG_FILE_UPDATED.format(line_number=CURRENT_LINE) - return ret_str - - -def edit_file_by_replace(file_name: str, to_replace: str, new_content: str) -> None: - """Edit an existing file. This will search for non-empty `to_replace` in the given file and replace it with non-empty `new_content`. - `to_replace` and `new_content` must be different! Split large edits into multiple smaller edits if necessary! - Use `append_file` method for writing after `create_file`! - - Every *to_replace* must *EXACTLY MATCH* the existing source code, character for character, including all comments, docstrings, etc. - - Include enough lines to make code in `to_replace` unique. `to_replace` should NOT be empty. - - For example, given a file "/workspace/example.txt" with the following content: - ``` - line 1 - line 2 - line 2 - line 3 - ``` - - EDITING: If you want to replace the second occurrence of "line 2", you can make `to_replace` unique: - - edit_file_by_replace( - '/workspace/example.txt', - to_replace='line 2\nline 3', - new_content='new line\nline 3', - ) - - This will replace only the second "line 2" with "new line". The first "line 2" will remain unchanged. - - The resulting file will be: - ``` - line 1 - line 2 - new line - line 3 - ``` - - REMOVAL: If you want to remove "line 2" and "line 3", you can set `new_content` to an empty string: - - edit_file_by_replace( - '/workspace/example.txt', - to_replace='line 2\nline 3', - new_content='', - ) - - Args: - file_name: str: The name of the file to edit. - to_replace: str: The content to search for and replace. - new_content: str: The new content to replace the old content with. - """ - # FIXME: support replacing *all* occurrences - if to_replace is None or to_replace.strip() == '': - _output_error('`to_replace` must not be empty.') - return - - if to_replace == new_content: - _output_error('`to_replace` and `new_content` must be different.') - return - - if not os.path.isfile(file_name): - _output_error(f'File {file_name} not found.') - return None - - # search for `to_replace` in the file - # if found, replace it with `new_content` - # if not found, perform a fuzzy search to find the closest match and replace it with `new_content` - with open(file_name, 'r') as file: - file_content = file.read() - - if file_content.count(to_replace) > 1: - _output_error( - '`to_replace` appears more than once, please include enough lines to make code in `to_replace` unique.' - ) - return - - start = file_content.find(to_replace) - if start != -1: - # Convert start from index to line number - start_line_number = file_content[:start].count('\n') + 1 - end_line_number = start_line_number + len(to_replace.splitlines()) - 1 - else: - - def _fuzzy_transform(s: str) -> str: - # remove all space except newline - return re.sub(r'[^\S\n]+', '', s) - - # perform a fuzzy search (remove all spaces except newlines) - to_replace_fuzzy = _fuzzy_transform(to_replace) - file_content_fuzzy = _fuzzy_transform(file_content) - # find the closest match - start = file_content_fuzzy.find(to_replace_fuzzy) - if start == -1: - print( - f'[No exact match found in {file_name} for\n```\n{to_replace}\n```\n]' - ) - return - # Convert start from index to line number for fuzzy match - start_line_number = file_content_fuzzy[:start].count('\n') + 1 - end_line_number = start_line_number + len(to_replace.splitlines()) - 1 - - ret_str = _edit_file_impl( - file_name, - start=start_line_number, - end=end_line_number, - content=new_content, - is_insert=False, - ) - # lint_error = bool(LINTER_ERROR_MSG in ret_str) - # TODO: automatically tries to fix linter error (maybe involve some static analysis tools on the location near the edit to figure out indentation) - if ret_str is not None: - print(ret_str) - - -def insert_content_at_line(file_name: str, line_number: int, content: str) -> None: - """Insert content at the given line number in a file. - This will NOT modify the content of the lines before OR after the given line number. - - For example, if the file has the following content: - ``` - line 1 - line 2 - line 3 - ``` - and you call `insert_content_at_line('file.txt', 2, 'new line')`, the file will be updated to: - ``` - line 1 - new line - line 2 - line 3 - ``` - - Args: - file_name: str: The name of the file to edit. - line_number: int: The line number (starting from 1) to insert the content after. - content: str: The content to insert. - """ - ret_str = _edit_file_impl( - file_name, - start=line_number, - end=line_number, - content=content, - is_insert=True, - is_append=False, - ) - if ret_str is not None: - print(ret_str) - - -def append_file(file_name: str, content: str) -> None: - """Append content to the given file. - It appends text `content` to the end of the specified file, ideal after a `create_file`! - - Args: - file_name: str: The name of the file to edit. - content: str: The content to insert. - """ - ret_str = _edit_file_impl( - file_name, - start=None, - end=None, - content=content, - is_insert=False, - is_append=True, - ) - if ret_str is not None: - print(ret_str) - - def search_dir(search_term: str, dir_path: str = './') -> None: """Searches for search_term in all files in dir. If dir is not provided, searches in the current directory. @@ -904,10 +375,6 @@ __all__ = [ 'goto_line', 'scroll_down', 'scroll_up', - 'create_file', - 'edit_file_by_replace', - 'insert_content_at_line', - 'append_file', 'search_dir', 'search_file', 'find_file', diff --git a/openhands/runtime/remote/runtime.py b/openhands/runtime/remote/runtime.py index 8d1ee5e893..39134f5094 100644 --- a/openhands/runtime/remote/runtime.py +++ b/openhands/runtime/remote/runtime.py @@ -15,6 +15,7 @@ from openhands.events.action import ( BrowseInteractiveAction, BrowseURLAction, CmdRunAction, + FileEditAction, FileReadAction, FileWriteAction, IPythonRunCellAction, @@ -350,6 +351,8 @@ class RemoteRuntime(Runtime): def run_action(self, action: Action) -> Observation: if action.timeout is None: action.timeout = self.config.sandbox.timeout + if isinstance(action, FileEditAction): + return self.edit(action) with self.action_semaphore: if not action.runnable: return NullObservation('') diff --git a/openhands/runtime/runtime.py b/openhands/runtime/runtime.py index 44614ee0a3..2646c06768 100644 --- a/openhands/runtime/runtime.py +++ b/openhands/runtime/runtime.py @@ -28,6 +28,7 @@ from openhands.events.observation import ( ) from openhands.events.serialization.action import ACTION_TYPE_TO_CLASS from openhands.runtime.plugins import JupyterRequirement, PluginRequirement +from openhands.runtime.utils.edit import FileEditRuntimeMixin from openhands.utils.async_utils import call_sync_from_async @@ -42,7 +43,7 @@ def _default_env_vars(sandbox_config: SandboxConfig) -> dict[str, str]: return ret -class Runtime: +class Runtime(FileEditRuntimeMixin): """The runtime is how the agent interacts with the external environment. This includes a bash sandbox, a browser, and filesystem interactions. @@ -78,6 +79,9 @@ class Runtime: if env_vars is not None: self.initial_env_vars.update(env_vars) + # Load mixins + FileEditRuntimeMixin.__init__(self) + def setup_initial_env(self) -> None: if self.attach_to_existing: return diff --git a/openhands/runtime/utils/edit.py b/openhands/runtime/utils/edit.py new file mode 100644 index 0000000000..4ed5c0edaf --- /dev/null +++ b/openhands/runtime/utils/edit.py @@ -0,0 +1,350 @@ +import copy +import os +import re +import tempfile +from abc import ABC, abstractmethod + +from openhands.core.config import AppConfig +from openhands.core.logger import openhands_logger as logger +from openhands.events.action import ( + FileEditAction, + FileReadAction, + FileWriteAction, +) +from openhands.events.observation import ( + ErrorObservation, + FatalErrorObservation, + FileEditObservation, + FileReadObservation, + FileWriteObservation, + Observation, +) +from openhands.linter import DefaultLinter +from openhands.llm.llm import LLM +from openhands.llm.metrics import Metrics +from openhands.utils.chunk_localizer import Chunk, get_top_k_chunk_matches +from openhands.utils.diff import get_diff + +SYS_MSG = """Your job is to produce a new version of the file based on the old version and the +provided draft of the new version. The provided draft may be incomplete (it may skip lines) and/or incorrectly indented. You should try to apply the changes present in the draft to the old version, and output a new version of the file. +NOTE: +- The output file should be COMPLETE and CORRECTLY INDENTED. Do not omit any lines, and do not change any lines that are not part of the changes. +- You should output the new version of the file by wrapping the new version of the file content in a ``` block. +- If there's no explicit comment to remove the existing code, we should keep them and append the new code to the end of the file. +- If there's placeholder comments like `# no changes before` or `# no changes here`, we should replace these comments with the original code near the placeholder comments. +""" + +USER_MSG = """ +HERE IS THE OLD VERSION OF THE FILE: +``` +{old_contents} +``` + +HERE IS THE DRAFT OF THE NEW VERSION OF THE FILE: +``` +{draft_changes} +``` + +GIVE ME THE NEW VERSION OF THE FILE. +IMPORTANT: +- There should be NO placeholder comments like `# no changes before` or `# no changes here`. They should be replaced with the original code near the placeholder comments. +- The output file should be COMPLETE and CORRECTLY INDENTED. Do not omit any lines, and do not change any lines that are not part of the changes. +""".strip() + + +def _extract_code(string): + pattern = r'```(?:\w*\n)?(.*?)```' + matches = re.findall(pattern, string, re.DOTALL) + if not matches: + return None + return matches[0] + + +def get_new_file_contents( + llm: LLM, old_contents: str, draft_changes: str, num_retries: int = 3 +) -> str | None: + while num_retries > 0: + messages = [ + {'role': 'system', 'content': SYS_MSG}, + { + 'role': 'user', + 'content': USER_MSG.format( + old_contents=old_contents, draft_changes=draft_changes + ), + }, + ] + resp = llm.completion(messages=messages) + new_contents = _extract_code(resp['choices'][0]['message']['content']) + if new_contents is not None: + return new_contents + num_retries -= 1 + return None + + +class FileEditRuntimeInterface(ABC): + config: AppConfig + + @abstractmethod + def read(self, action: FileReadAction) -> Observation: + pass + + @abstractmethod + def write(self, action: FileWriteAction) -> Observation: + pass + + +class FileEditRuntimeMixin(FileEditRuntimeInterface): + # Most LLMs have output token limit of 4k tokens. + # This restricts the number of lines we can edit to avoid exceeding the token limit. + MAX_LINES_TO_EDIT = 300 + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + llm_config = self.config.get_llm_config() + + if llm_config.draft_editor is None: + llm_config.draft_editor = copy.deepcopy(llm_config) + + # manually set the model name for the draft editor LLM to distinguish token costs + llm_metrics = Metrics( + model_name='draft_editor:' + llm_config.draft_editor.model + ) + if llm_config.draft_editor.caching_prompt: + logger.info( + 'It is not recommended to cache draft editor LLM prompts as it may incur high costs for the same prompt. ' + 'Automatically setting caching_prompt=false.' + ) + llm_config.draft_editor.caching_prompt = False + + self.draft_editor_llm = LLM(llm_config.draft_editor, metrics=llm_metrics) + logger.info( + f'[Draft edit functionality] enabled with LLM: {self.draft_editor_llm}' + ) + + def _validate_range( + self, start: int, end: int, total_lines: int + ) -> Observation | None: + # start and end are 1-indexed and inclusive + if ( + (start < 1 and start != -1) + or start > total_lines + or (start > end and end != -1 and start != -1) + ): + return ErrorObservation( + f'Invalid range for editing: start={start}, end={end}, total lines={total_lines}. start must be >= 1 and <={total_lines} (total lines of the edited file), start <= end, or start == -1 (append to the end of the file).' + ) + if ( + (end < 1 and end != -1) + or end > total_lines + or (end < start and start != -1 and end != -1) + ): + return ErrorObservation( + f'Invalid range for editing: start={start}, end={end}, total lines={total_lines}. end must be >= 1 and <= {total_lines} (total lines of the edited file), end >= start, or end == -1 (to edit till the end of the file).' + ) + return None + + def _get_lint_error( + self, + suffix: str, + old_content: str, + new_content: str, + filepath: str, + diff: str, + ) -> ErrorObservation | None: + linter = DefaultLinter() + # Copy the original file to a temporary file (with the same ext) and lint it + with tempfile.NamedTemporaryFile( + suffix=suffix, mode='w+', encoding='utf-8' + ) as original_file_copy, tempfile.NamedTemporaryFile( + suffix=suffix, mode='w+', encoding='utf-8' + ) as updated_file_copy: + # Lint the original file + original_file_copy.write(old_content) + original_file_copy.flush() + + # Lint the updated file + updated_file_copy.write(new_content) + updated_file_copy.flush() + + updated_lint_error = linter.lint_file_diff( + original_file_copy.name, updated_file_copy.name + ) + + if len(updated_lint_error) > 0: + _obs = FileEditObservation( + content=diff, + path=filepath, + prev_exist=True, + old_content=old_content, + new_content=new_content, + ) + error_message = ( + ( + f'\n[Linting failed for edited file {filepath}. {len(updated_lint_error)} lint errors found.]\n' + '[begin attempted changes]\n' + f'{_obs.visualize_diff(change_applied=False)}\n' + '[end attempted changes]\n' + ) + + '-' * 40 + + '\n' + ) + error_message += '-' * 20 + 'First 5 lint errors' + '-' * 20 + '\n' + for i, lint_error in enumerate(updated_lint_error[:5]): + error_message += f'[begin lint error {i}]\n' + error_message += lint_error.visualize().strip() + '\n' + error_message += f'[end lint error {i}]\n' + error_message += '-' * 40 + '\n' + return ErrorObservation(error_message) + return None + + def edit(self, action: FileEditAction) -> Observation: + obs = self.read(FileReadAction(path=action.path)) + if ( + isinstance(obs, ErrorObservation) + and 'File not found'.lower() in obs.content.lower() + ): + logger.debug( + f'Agent attempted to edit a file that does not exist. Creating the file. Error msg: {obs.content}' + ) + # directly write the new content + obs = self.write( + FileWriteAction(path=action.path, content=action.content.strip()) + ) + if isinstance(obs, ErrorObservation): + return obs + if not isinstance(obs, FileWriteObservation): + return FatalErrorObservation( + f'Fatal Runtime in editing: Expected FileWriteObservation, got {type(obs)}: {str(obs)}' + ) + return FileEditObservation( + content=get_diff('', action.content, action.path), + path=action.path, + prev_exist=False, + old_content='', + new_content=action.content, + ) + if not isinstance(obs, FileReadObservation): + return FatalErrorObservation( + f'Fatal Runtime in editing: Expected FileReadObservation, got {type(obs)}: {str(obs)}' + ) + + original_file_content = obs.content + old_file_lines = original_file_content.split('\n') + # NOTE: start and end are 1-indexed + start = action.start + end = action.end + # validate the range + error = self._validate_range(start, end, len(old_file_lines)) + if error is not None: + return error + + # append to the end of the file + if start == -1: + updated_content = '\n'.join(old_file_lines + action.content.split('\n')) + diff = get_diff(original_file_content, updated_content, action.path) + # Lint the updated content + if self.config.sandbox.enable_auto_lint: + suffix = os.path.splitext(action.path)[1] + + error_obs = self._get_lint_error( + suffix, + original_file_content, + updated_content, + action.path, + diff, + ) + if error_obs is not None: + return error_obs + + obs = self.write(FileWriteAction(path=action.path, content=updated_content)) + return FileEditObservation( + content=diff, + path=action.path, + prev_exist=True, + old_content=original_file_content, + new_content=updated_content, + ) + + # Get the 0-indexed start and end + start_idx = start - 1 + if end != -1: + # remove 1 to make it 0-indexed + # then add 1 since the `end` is inclusive + end_idx = end - 1 + 1 + else: + # end == -1 means the user wants to edit till the end of the file + end_idx = len(old_file_lines) + + # Get the range of lines to edit - reject if too long + length_of_range = end_idx - start_idx + if length_of_range > self.MAX_LINES_TO_EDIT + 1: + error_msg = ( + f'[Edit error: The range of lines to edit is too long.]\n' + f'[The maximum number of lines allowed to edit at once is {self.MAX_LINES_TO_EDIT}. ' + f'Got (L{start_idx + 1}-L{end_idx}) {length_of_range} lines.]\n' # [start_idx, end_idx), so no need to + 1 + ) + # search for relevant ranges to hint the agent + topk_chunks: list[Chunk] = get_top_k_chunk_matches( + text=original_file_content, + query=action.content, # edit draft as query + k=3, + max_chunk_size=20, # lines + ) + error_msg += ( + 'Here are some snippets that maybe relevant to the provided edit.\n' + ) + for i, chunk in enumerate(topk_chunks): + error_msg += f'[begin relevant snippet {i+1}. Line range: L{chunk.line_range[0]}-L{chunk.line_range[1]}. Similarity: {chunk.normalized_lcs}]\n' + error_msg += f'[Browse around it via `open_file("{action.path}", {(chunk.line_range[0] + chunk.line_range[1]) // 2})`]\n' + error_msg += chunk.visualize() + '\n' + error_msg += f'[end relevant snippet {i+1}]\n' + error_msg += '-' * 40 + '\n' + + error_msg += 'Consider using `open_file` to explore around the relevant snippets if needed.\n' + error_msg += f'**IMPORTANT**: Please REDUCE the range of edits to less than {self.MAX_LINES_TO_EDIT} lines by setting `start` and `end` in the edit action (e.g. ``). ' + + return ErrorObservation(error_msg) + + content_to_edit = '\n'.join(old_file_lines[start_idx:end_idx]) + self.draft_editor_llm.reset() + _edited_content = get_new_file_contents( + self.draft_editor_llm, content_to_edit, action.content + ) + if _edited_content is None: + ret_err = ErrorObservation( + 'Failed to get new file contents. ' + 'Please try to reduce the number of edits and try again.' + ) + ret_err.llm_metrics = self.draft_editor_llm.metrics + return ret_err + + # piece the updated content with the unchanged content + updated_lines = ( + old_file_lines[:start_idx] + + _edited_content.split('\n') + + old_file_lines[end_idx:] + ) + updated_content = '\n'.join(updated_lines) + diff = get_diff(original_file_content, updated_content, action.path) + + # Lint the updated content + if self.config.sandbox.enable_auto_lint: + suffix = os.path.splitext(action.path)[1] + error_obs = self._get_lint_error( + suffix, original_file_content, updated_content, action.path, diff + ) + if error_obs is not None: + error_obs.llm_metrics = self.draft_editor_llm.metrics + return error_obs + + obs = self.write(FileWriteAction(path=action.path, content=updated_content)) + ret_obs = FileEditObservation( + content=diff, + path=action.path, + prev_exist=True, + old_content=original_file_content, + new_content=updated_content, + ) + ret_obs.llm_metrics = self.draft_editor_llm.metrics + return ret_obs diff --git a/openhands/runtime/utils/tenacity_stop.py b/openhands/runtime/utils/tenacity_stop.py new file mode 100644 index 0000000000..e4b6345477 --- /dev/null +++ b/openhands/runtime/utils/tenacity_stop.py @@ -0,0 +1,12 @@ + + +from tenacity import RetryCallState +from tenacity.stop import stop_base +from openhands.runtime.utils.shutdown_listener import should_exit + + +class stop_if_should_exit(stop_base): + """Stop if the should_exit flag is set.""" + + def __call__(self, retry_state: "RetryCallState") -> bool: + return should_exit() diff --git a/openhands/server/session/agent_session.py b/openhands/server/session/agent_session.py index 8b9373b4b6..bbb568ee48 100644 --- a/openhands/server/session/agent_session.py +++ b/openhands/server/session/agent_session.py @@ -224,13 +224,23 @@ class AgentSession: 'Runtime must be initialized before the agent controller' ) - logger.info( + msg = ( '\n--------------------------------- OpenHands Configuration ---------------------------------\n' f'LLM: {agent.llm.config.model}\n' f'Base URL: {agent.llm.config.base_url}\n' + ) + if agent.llm.config.draft_editor: + msg += ( + f'Draft editor LLM (for file editing): {agent.llm.config.draft_editor.model}\n' + f'Draft editor LLM (for file editing) Base URL: {agent.llm.config.draft_editor.base_url}\n' + ) + msg += ( f'Agent: {agent.name}\n' + f'Runtime: {self.runtime.__class__.__name__}\n' + f'Plugins: {agent.sandbox_plugins}\n' '-------------------------------------------------------------------------------------------' ) + logger.info(msg) self.controller = AgentController( sid=self.sid, diff --git a/openhands/utils/chunk_localizer.py b/openhands/utils/chunk_localizer.py new file mode 100644 index 0000000000..ffceaee3d9 --- /dev/null +++ b/openhands/utils/chunk_localizer.py @@ -0,0 +1,97 @@ +"""Chunk localizer to help localize the most relevant chunks in a file. + +This is primarily used to localize the most relevant chunks in a file +for a given query (e.g. edit draft produced by the agent). +""" + +import pylcs +from pydantic import BaseModel +from tree_sitter_languages import get_parser + + +class Chunk(BaseModel): + text: str + line_range: tuple[int, int] # (start_line, end_line), 1-index, inclusive + normalized_lcs: float | None = None + + def visualize(self) -> str: + lines = self.text.split('\n') + assert len(lines) == self.line_range[1] - self.line_range[0] + 1 + ret = '' + for i, line in enumerate(lines): + ret += f'{self.line_range[0] + i}|{line}\n' + return ret + + +def _create_chunks_from_raw_string(content: str, size: int): + lines = content.split('\n') + ret = [] + for i in range(0, len(lines), size): + _cur_lines = lines[i : i + size] + ret.append( + Chunk( + text='\n'.join(_cur_lines), + line_range=(i + 1, i + len(_cur_lines)), + ) + ) + return ret + + +def create_chunks( + text: str, size: int = 100, language: str | None = None +) -> list[Chunk]: + try: + parser = get_parser(language) if language is not None else None + except AttributeError: + # print(f"Language {language} not supported. Falling back to raw string.") + parser = None + + if parser is None: + # fallback to raw string + return _create_chunks_from_raw_string(text, size) + + # TODO: implement tree-sitter chunking + # return _create_chunks_from_tree_sitter(parser.parse(bytes(text, 'utf-8')), max_chunk_lines=size) + raise NotImplementedError('Tree-sitter chunking not implemented yet.') + + +def normalized_lcs(chunk: str, query: str) -> float: + """Calculate the normalized Longest Common Subsequence (LCS) to compare file chunk with the query (e.g. edit draft). + + We normalize Longest Common Subsequence (LCS) by the length of the chunk + to check how **much** of the chunk is covered by the query. + """ + if len(chunk) == 0: + return 0.0 + _score = pylcs.lcs_sequence_length(chunk, query) + return _score / len(chunk) + + +def get_top_k_chunk_matches( + text: str, query: str, k: int = 3, max_chunk_size: int = 100 +) -> list[Chunk]: + """Get the top k chunks in the text that match the query. + + The query could be a string of draft code edits. + + Args: + text: The text to search for the query. + query: The query to search for in the text. + k: The number of top chunks to return. + max_chunk_size: The maximum number of lines in a chunk. + """ + raw_chunks = create_chunks(text, max_chunk_size) + chunks_with_lcs: list[Chunk] = [ + Chunk( + text=chunk.text, + line_range=chunk.line_range, + normalized_lcs=normalized_lcs(chunk.text, query), + ) + for chunk in raw_chunks + ] + sorted_chunks = sorted( + chunks_with_lcs, + key=lambda x: x.normalized_lcs, # type: ignore + reverse=True, + ) + return sorted_chunks[:k] diff --git a/poetry.lock b/poetry.lock index d3a199ad92..199584e8c1 100644 --- a/poetry.lock +++ b/poetry.lock @@ -6497,6 +6497,20 @@ files = [ [package.dependencies] pyasn1 = ">=0.4.6,<0.7.0" +[[package]] +name = "pybind11" +version = "2.13.6" +description = "Seamless operability between C++11 and Python" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pybind11-2.13.6-py3-none-any.whl", hash = "sha256:237c41e29157b962835d356b370ededd57594a26d5894a795960f0047cb5caf5"}, + {file = "pybind11-2.13.6.tar.gz", hash = "sha256:ba6af10348c12b24e92fa086b39cfba0eff619b61ac77c406167d813b096d39a"}, +] + +[package.extras] +global = ["pybind11-global (==2.13.6)"] + [[package]] name = "pycodestyle" version = "2.12.1" @@ -6775,6 +6789,26 @@ files = [ {file = "pylatexenc-2.10.tar.gz", hash = "sha256:3dd8fd84eb46dc30bee1e23eaab8d8fb5a7f507347b23e5f38ad9675c84f40d3"}, ] +[[package]] +name = "pylcs" +version = "0.1.1" +description = "super fast cpp implementation of longest common subsequence" +optional = false +python-versions = "*" +files = [ + {file = "pylcs-0.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:7b8adea6b41dff27332c967533ec3c42a5e94171be778d6f01f0c5cee82e7604"}, + {file = "pylcs-0.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:9ff06e037c54056cb67d6ef5ad946c0360afeff7d43be67ce09e55201ecc15cc"}, + {file = "pylcs-0.1.1-cp35-cp35m-win_amd64.whl", hash = "sha256:d2ebf340aa180d841939d9ec1168dfd072992dda1d48148ceb07b65b1ab62ffa"}, + {file = "pylcs-0.1.1-cp36-cp36m-win_amd64.whl", hash = "sha256:b6c43b63e20048f8fec7e122fbc08c238940a0ee5302bf84a70db22c7f8cc836"}, + {file = "pylcs-0.1.1-cp37-cp37m-win_amd64.whl", hash = "sha256:db52d55cfdf813af974bcc164aedbd29274da83086877bf05778aa7fbf777f7f"}, + {file = "pylcs-0.1.1-cp38-cp38-win_amd64.whl", hash = "sha256:954495f1c164ccb722b835e7028783f8a38d85ed5f6ff7b9d50143896c6cff9b"}, + {file = "pylcs-0.1.1-cp39-cp39-win_amd64.whl", hash = "sha256:0f4c82fad8c0429abef9e98fb98904459c4f5f9fb9b6ce20e0df0841a6a48a54"}, + {file = "pylcs-0.1.1.tar.gz", hash = "sha256:632c69235d77cda0ba524d82796878801d2f46131fc59e730c98767fc4ce1307"}, +] + +[package.dependencies] +pybind11 = ">=2.2" + [[package]] name = "pyparsing" version = "3.2.0" @@ -10031,4 +10065,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"] [metadata] lock-version = "2.0" python-versions = "^3.12" -content-hash = "7fc51225767e3a98147f7b0dacdce4486a1afd83dc3273f06fd9f6cdc35d1860" +content-hash = "6198aa7d5c9d6e172d9f6cb3aff58006ab0e38fd1ce34be8a2d7696bf2a28fb9" diff --git a/pyproject.toml b/pyproject.toml index de7640f308..8b30b440b3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -52,6 +52,7 @@ python-pptx = "*" pylatexenc = "*" tornado = "*" python-dotenv = "*" +pylcs = "^0.1.1" whatthepatch = "^1.0.6" protobuf = "^4.21.6,<5.0.0" # chromadb currently fails on 5.0+ opentelemetry-api = "1.25.0" @@ -88,6 +89,7 @@ reportlab = "*" [tool.coverage.run] concurrency = ["gevent"] + [tool.poetry.group.runtime.dependencies] jupyterlab = "*" notebook = "*" @@ -118,6 +120,7 @@ ignore = ["D1"] [tool.ruff.lint.pydocstyle] convention = "google" + [tool.poetry.group.evaluation.dependencies] streamlit = "*" whatthepatch = "*" diff --git a/tests/runtime/test_edit.py b/tests/runtime/test_edit.py new file mode 100644 index 0000000000..27c3a265ca --- /dev/null +++ b/tests/runtime/test_edit.py @@ -0,0 +1,417 @@ +"""Edit-related tests for the EventStreamRuntime.""" + +import os + +import pytest +from conftest import ( + TEST_IN_CI, + _close_test_runtime, + _load_runtime, +) + +from openhands.core.logger import openhands_logger as logger +from openhands.events.action import FileEditAction, FileReadAction +from openhands.events.observation import FileEditObservation +from openhands.utils.diff import get_diff + +ORGINAL = """from flask import Flask +app = Flask(__name__) + +@app.route('/') +def index(): + numbers = list(range(1, 11)) + return str(numbers) + +if __name__ == '__main__': + app.run(port=5000) +""" + + +@pytest.mark.skipif( + TEST_IN_CI != 'True', + reason='This test requires LLM to run.', +) +def test_edit_from_scratch(temp_dir, box_class, run_as_openhands): + runtime = _load_runtime(temp_dir, box_class, run_as_openhands) + try: + action = FileEditAction( + content=ORGINAL, + start=-1, + path=os.path.join('/workspace', 'app.py'), + ) + logger.info(action, extra={'msg_type': 'ACTION'}) + obs = runtime.run_action(action) + logger.info(obs, extra={'msg_type': 'OBSERVATION'}) + + assert isinstance( + obs, FileEditObservation + ), 'The observation should be a FileEditObservation.' + + action = FileReadAction( + path=os.path.join('/workspace', 'app.py'), + ) + obs = runtime.run_action(action) + logger.info(obs, extra={'msg_type': 'OBSERVATION'}) + assert obs.content.strip() == ORGINAL.strip() + + finally: + _close_test_runtime(runtime) + + +EDIT = """# above stays the same +@app.route('/') +def index(): + numbers = list(range(1, 11)) + return '' + ''.join([f'' for i in numbers]) + '
{i}
' +# below stays the same +""" + + +@pytest.mark.skipif( + TEST_IN_CI != 'True', + reason='This test requires LLM to run.', +) +def test_edit(temp_dir, box_class, run_as_openhands): + runtime = _load_runtime(temp_dir, box_class, run_as_openhands) + try: + action = FileEditAction( + content=ORGINAL, + path=os.path.join('/workspace', 'app.py'), + ) + logger.info(action, extra={'msg_type': 'ACTION'}) + obs = runtime.run_action(action) + logger.info(obs, extra={'msg_type': 'OBSERVATION'}) + + assert isinstance( + obs, FileEditObservation + ), 'The observation should be a FileEditObservation.' + + action = FileReadAction( + path=os.path.join('/workspace', 'app.py'), + ) + obs = runtime.run_action(action) + logger.info(obs, extra={'msg_type': 'OBSERVATION'}) + assert obs.content.strip() == ORGINAL.strip() + + action = FileEditAction( + content=EDIT, + path=os.path.join('/workspace', 'app.py'), + ) + obs = runtime.run_action(action) + logger.info(obs, extra={'msg_type': 'OBSERVATION'}) + assert ( + obs.content.strip() + == ( + '--- /workspace/app.py\n' + '+++ /workspace/app.py\n' + '@@ -4,7 +4,7 @@\n' + " @app.route('/')\n" + ' def index():\n' + ' numbers = list(range(1, 11))\n' + '- return str(numbers)\n' + "+ return '' + ''.join([f'' for i in numbers]) + '
{i}
'\n" + '\n' + " if __name__ == '__main__':\n" + ' app.run(port=5000)\n' + ).strip() + ) + finally: + _close_test_runtime(runtime) + + +ORIGINAL_LONG = '\n'.join([f'This is line {i}' for i in range(1, 1000)]) +EDIT_LONG = """ +This is line 100 + 10 +This is line 101 + 10 +""" + + +@pytest.mark.skipif( + TEST_IN_CI != 'True', + reason='This test requires LLM to run.', +) +def test_edit_long_file(temp_dir, box_class, run_as_openhands): + runtime = _load_runtime(temp_dir, box_class, run_as_openhands) + try: + action = FileEditAction( + content=ORIGINAL_LONG, + path=os.path.join('/workspace', 'app.py'), + start=-1, + ) + logger.info(action, extra={'msg_type': 'ACTION'}) + obs = runtime.run_action(action) + logger.info(obs, extra={'msg_type': 'OBSERVATION'}) + + assert isinstance( + obs, FileEditObservation + ), 'The observation should be a FileEditObservation.' + + action = FileReadAction( + path=os.path.join('/workspace', 'app.py'), + ) + obs = runtime.run_action(action) + logger.info(obs, extra={'msg_type': 'OBSERVATION'}) + assert obs.content.strip() == ORIGINAL_LONG.strip() + + action = FileEditAction( + content=EDIT_LONG, + path=os.path.join('/workspace', 'app.py'), + start=100, + end=200, + ) + obs = runtime.run_action(action) + logger.info(obs, extra={'msg_type': 'OBSERVATION'}) + assert ( + obs.content.strip() + == ( + '--- /workspace/app.py\n' + '+++ /workspace/app.py\n' + '@@ -97,8 +97,8 @@\n' + ' This is line 97\n' + ' This is line 98\n' + ' This is line 99\n' + '-This is line 100\n' + '-This is line 101\n' + '+This is line 100 + 10\n' + '+This is line 101 + 10\n' + ' This is line 102\n' + ' This is line 103\n' + ' This is line 104\n' + ).strip() + ) + finally: + _close_test_runtime(runtime) + + +# ====================================================================================== +# Test FileEditObservation (things that are displayed to the agent) +# ====================================================================================== + + +def test_edit_obs_insert_only(): + EDIT_LONG_INSERT_ONLY = ( + '\n'.join([f'This is line {i}' for i in range(1, 100)]) + + EDIT_LONG + + '\n'.join([f'This is line {i}' for i in range(100, 1000)]) + ) + + diff = get_diff(ORIGINAL_LONG, EDIT_LONG_INSERT_ONLY, '/workspace/app.py') + obs = FileEditObservation( + content=diff, + path='/workspace/app.py', + prev_exist=True, + old_content=ORIGINAL_LONG, + new_content=EDIT_LONG_INSERT_ONLY, + ) + assert ( + str(obs).strip() + == """ +[Existing file /workspace/app.py is edited with 1 changes.] +[begin of edit 1 / 1] +(content before edit) + 98|This is line 98 + 99|This is line 99 + 100|This is line 100 + 101|This is line 101 +(content after edit) + 98|This is line 98 + 99|This is line 99 ++100|This is line 100 + 10 ++101|This is line 101 + 10 + 102|This is line 100 + 103|This is line 101 +[end of edit 1 / 1] +""".strip() + ) + + +def test_edit_obs_replace(): + _new_content = ( + '\n'.join([f'This is line {i}' for i in range(1, 100)]) + + EDIT_LONG + + '\n'.join([f'This is line {i}' for i in range(102, 1000)]) + ) + + diff = get_diff(ORIGINAL_LONG, _new_content, '/workspace/app.py') + obs = FileEditObservation( + content=diff, + path='/workspace/app.py', + prev_exist=True, + old_content=ORIGINAL_LONG, + new_content=_new_content, + ) + print(str(obs)) + assert ( + str(obs).strip() + == """ +[Existing file /workspace/app.py is edited with 1 changes.] +[begin of edit 1 / 1] +(content before edit) + 98|This is line 98 + 99|This is line 99 +-100|This is line 100 +-101|This is line 101 + 102|This is line 102 + 103|This is line 103 +(content after edit) + 98|This is line 98 + 99|This is line 99 ++100|This is line 100 + 10 ++101|This is line 101 + 10 + 102|This is line 102 + 103|This is line 103 +[end of edit 1 / 1] +""".strip() + ) + + +def test_edit_obs_replace_with_empty_line(): + _new_content = ( + '\n'.join([f'This is line {i}' for i in range(1, 100)]) + + '\n' + + EDIT_LONG + + '\n'.join([f'This is line {i}' for i in range(102, 1000)]) + ) + + diff = get_diff(ORIGINAL_LONG, _new_content, '/workspace/app.py') + obs = FileEditObservation( + content=diff, + path='/workspace/app.py', + prev_exist=True, + old_content=ORIGINAL_LONG, + new_content=_new_content, + ) + print(str(obs)) + assert ( + str(obs).strip() + == """ +[Existing file /workspace/app.py is edited with 1 changes.] +[begin of edit 1 / 1] +(content before edit) + 98|This is line 98 + 99|This is line 99 +-100|This is line 100 +-101|This is line 101 + 102|This is line 102 + 103|This is line 103 +(content after edit) + 98|This is line 98 + 99|This is line 99 ++100| ++101|This is line 100 + 10 ++102|This is line 101 + 10 + 103|This is line 102 + 104|This is line 103 +[end of edit 1 / 1] +""".strip() + ) + + +def test_edit_obs_multiple_edits(): + _new_content = ( + '\n'.join([f'This is line {i}' for i in range(1, 50)]) + + '\nbalabala\n' + + '\n'.join([f'This is line {i}' for i in range(50, 100)]) + + EDIT_LONG + + '\n'.join([f'This is line {i}' for i in range(102, 1000)]) + ) + + diff = get_diff(ORIGINAL_LONG, _new_content, '/workspace/app.py') + obs = FileEditObservation( + content=diff, + path='/workspace/app.py', + prev_exist=True, + old_content=ORIGINAL_LONG, + new_content=_new_content, + ) + assert ( + str(obs).strip() + == """ +[Existing file /workspace/app.py is edited with 2 changes.] +[begin of edit 1 / 2] +(content before edit) + 48|This is line 48 + 49|This is line 49 + 50|This is line 50 + 51|This is line 51 +(content after edit) + 48|This is line 48 + 49|This is line 49 ++50|balabala + 51|This is line 50 + 52|This is line 51 +[end of edit 1 / 2] +------------------------- +[begin of edit 2 / 2] +(content before edit) + 98|This is line 98 + 99|This is line 99 +-100|This is line 100 +-101|This is line 101 + 102|This is line 102 + 103|This is line 103 +(content after edit) + 99|This is line 98 + 100|This is line 99 ++101|This is line 100 + 10 ++102|This is line 101 + 10 + 103|This is line 102 + 104|This is line 103 +[end of edit 2 / 2] +""".strip() + ) + + +def test_edit_visualize_failed_edit(): + _new_content = ( + '\n'.join([f'This is line {i}' for i in range(1, 50)]) + + '\nbalabala\n' + + '\n'.join([f'This is line {i}' for i in range(50, 100)]) + + EDIT_LONG + + '\n'.join([f'This is line {i}' for i in range(102, 1000)]) + ) + + diff = get_diff(ORIGINAL_LONG, _new_content, '/workspace/app.py') + obs = FileEditObservation( + content=diff, + path='/workspace/app.py', + prev_exist=True, + old_content=ORIGINAL_LONG, + new_content=_new_content, + ) + assert ( + obs.visualize_diff(change_applied=False).strip() + == """ +[Changes are NOT applied to /workspace/app.py - Here's how the file looks like if changes are applied.] +[begin of ATTEMPTED edit 1 / 2] +(content before ATTEMPTED edit) + 48|This is line 48 + 49|This is line 49 + 50|This is line 50 + 51|This is line 51 +(content after ATTEMPTED edit) + 48|This is line 48 + 49|This is line 49 ++50|balabala + 51|This is line 50 + 52|This is line 51 +[end of ATTEMPTED edit 1 / 2] +------------------------- +[begin of ATTEMPTED edit 2 / 2] +(content before ATTEMPTED edit) + 98|This is line 98 + 99|This is line 99 +-100|This is line 100 +-101|This is line 101 + 102|This is line 102 + 103|This is line 103 +(content after ATTEMPTED edit) + 99|This is line 98 + 100|This is line 99 ++101|This is line 100 + 10 ++102|This is line 101 + 10 + 103|This is line 102 + 104|This is line 103 +[end of ATTEMPTED edit 2 / 2] +""".strip() + ) diff --git a/tests/runtime/test_ipython.py b/tests/runtime/test_ipython.py index b62a56f371..0d13fda2c3 100644 --- a/tests/runtime/test_ipython.py +++ b/tests/runtime/test_ipython.py @@ -22,7 +22,6 @@ from openhands.events.observation import ( FileWriteObservation, IPythonRunCellObservation, ) -from openhands.runtime.client.runtime import EventStreamRuntime # ============================================================================================================================ # ipython-specific tests @@ -199,215 +198,6 @@ def test_ipython_simple(temp_dir, box_class): _close_test_runtime(runtime) -def _test_ipython_agentskills_fileop_pwd_impl( - runtime: EventStreamRuntime, enable_auto_lint: bool -): - sandbox_dir = _get_sandbox_folder(runtime) - # remove everything in /workspace - action = CmdRunAction(command=f'rm -rf {sandbox_dir}/*') - logger.info(action, extra={'msg_type': 'ACTION'}) - obs = runtime.run_action(action) - logger.info(obs, extra={'msg_type': 'OBSERVATION'}) - assert obs.exit_code == 0 - - action = CmdRunAction(command='mkdir test') - logger.info(action, extra={'msg_type': 'ACTION'}) - obs = runtime.run_action(action) - logger.info(obs, extra={'msg_type': 'OBSERVATION'}) - assert isinstance(obs, CmdOutputObservation) - assert obs.exit_code == 0 - - action = IPythonRunCellAction(code="create_file('hello.py')") - logger.info(action, extra={'msg_type': 'ACTION'}) - obs = runtime.run_action(action) - logger.info(obs, extra={'msg_type': 'OBSERVATION'}) - assert isinstance(obs, IPythonRunCellObservation) - assert obs.content.replace('\r\n', '\n').strip().split('\n') == ( - f'[File: {sandbox_dir}/hello.py (1 lines total)]\n' - '(this is the beginning of the file)\n' - '1|\n' - '(this is the end of the file)\n' - '[File hello.py created.]\n' - f'[Jupyter current working directory: {sandbox_dir}]\n' - '[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.12/bin/python]' - ).strip().split('\n') - - action = CmdRunAction(command='cd test') - logger.info(action, extra={'msg_type': 'ACTION'}) - obs = runtime.run_action(action) - logger.info(obs, extra={'msg_type': 'OBSERVATION'}) - assert isinstance(obs, CmdOutputObservation) - assert obs.exit_code == 0 - - # This should create a file in the current working directory - # i.e., /workspace/test/hello.py instead of /workspace/hello.py - action = IPythonRunCellAction(code="create_file('hello.py')") - logger.info(action, extra={'msg_type': 'ACTION'}) - obs = runtime.run_action(action) - logger.info(obs, extra={'msg_type': 'OBSERVATION'}) - assert isinstance(obs, IPythonRunCellObservation) - assert obs.content.replace('\r\n', '\n').strip().split('\n') == ( - f'[File: {sandbox_dir}/test/hello.py (1 lines total)]\n' - '(this is the beginning of the file)\n' - '1|\n' - '(this is the end of the file)\n' - '[File hello.py created.]\n' - f'[Jupyter current working directory: {sandbox_dir}/test]\n' - '[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.12/bin/python]' - ).strip().split('\n') - - if enable_auto_lint: - # edit file, but make a mistake in indentation - action = IPythonRunCellAction( - code="insert_content_at_line('hello.py', 1, ' print(\"hello world\")')" - ) - logger.info(action, extra={'msg_type': 'ACTION'}) - obs = runtime.run_action(action) - logger.info(obs, extra={'msg_type': 'OBSERVATION'}) - assert isinstance(obs, IPythonRunCellObservation) - assert obs.content.replace('\r\n', '\n').strip().split('\n') == ( - f""" -[Your proposed edit has introduced new syntax error(s). Please understand the errors and retry your edit command.] -ERRORS: -{sandbox_dir}/test/hello.py:1:3: E999 IndentationError: unexpected indent -[This is how your edit would have looked if applied] -------------------------------------------------- -(this is the beginning of the file) -1| print("hello world") -(this is the end of the file) -------------------------------------------------- - -[This is the original code before your edit] -------------------------------------------------- -(this is the beginning of the file) -1| -(this is the end of the file) -------------------------------------------------- -Your changes have NOT been applied. Please fix your edit command and try again. -You either need to 1) Specify the correct start/end line arguments or 2) Correct your edit code. -DO NOT re-run the same failed edit command. Running it again will lead to the same error. -[Jupyter current working directory: {sandbox_dir}/test] -[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.12/bin/python] -""" - ).strip().split('\n') - - # edit file with correct indentation - action = IPythonRunCellAction( - code="insert_content_at_line('hello.py', 1, 'print(\"hello world\")')" - ) - logger.info(action, extra={'msg_type': 'ACTION'}) - obs = runtime.run_action(action) - logger.info(obs, extra={'msg_type': 'OBSERVATION'}) - assert isinstance(obs, IPythonRunCellObservation) - assert obs.content.replace('\r\n', '\n').strip().split('\n') == ( - f""" -[File: {sandbox_dir}/test/hello.py (1 lines total after edit)] -(this is the beginning of the file) -1|print("hello world") -(this is the end of the file) -[File updated (edited at line 1). Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.] -[Jupyter current working directory: {sandbox_dir}/test] -[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.12/bin/python] -""" - ).strip().split('\n') - - action = CmdRunAction(command=f'rm -rf {sandbox_dir}/*') - logger.info(action, extra={'msg_type': 'ACTION'}) - obs = runtime.run_action(action) - logger.info(obs, extra={'msg_type': 'OBSERVATION'}) - assert obs.exit_code == 0 - - -def test_ipython_agentskills_fileop_pwd_with_lint( - temp_dir, box_class, run_as_openhands -): - runtime = _load_runtime( - temp_dir, box_class, run_as_openhands, enable_auto_lint=True - ) - _test_ipython_agentskills_fileop_pwd_impl(runtime, True) - - _close_test_runtime(runtime) - - -def test_ipython_agentskills_fileop_pwd_without_lint( - temp_dir, box_class, run_as_openhands -): - runtime = _load_runtime( - temp_dir, box_class, run_as_openhands, enable_auto_lint=False - ) - _test_ipython_agentskills_fileop_pwd_impl(runtime, False) - - _close_test_runtime(runtime) - - -def test_ipython_agentskills_fileop_pwd_with_userdir(temp_dir, box_class): - """Make sure that cd in bash also update the current working directory in ipython. - - Handle special case where the pwd is provided as "~", which should be expanded using os.path.expanduser - on the client side. - """ - - runtime = _load_runtime( - temp_dir, - box_class, - run_as_openhands=False, - ) - - action = CmdRunAction(command='cd ~') - logger.info(action, extra={'msg_type': 'ACTION'}) - obs = runtime.run_action(action) - logger.info(obs, extra={'msg_type': 'OBSERVATION'}) - assert obs.exit_code == 0 - - action = CmdRunAction(command='mkdir test && ls -la') - logger.info(action, extra={'msg_type': 'ACTION'}) - obs = runtime.run_action(action) - logger.info(obs, extra={'msg_type': 'OBSERVATION'}) - assert isinstance(obs, CmdOutputObservation) - assert obs.exit_code == 0 - - action = IPythonRunCellAction(code="create_file('hello.py')") - logger.info(action, extra={'msg_type': 'ACTION'}) - obs = runtime.run_action(action) - logger.info(obs, extra={'msg_type': 'OBSERVATION'}) - assert isinstance(obs, IPythonRunCellObservation) - assert obs.content.replace('\r\n', '\n').strip().split('\n') == ( - '[File: /root/hello.py (1 lines total)]\n' - '(this is the beginning of the file)\n' - '1|\n' - '(this is the end of the file)\n' - '[File hello.py created.]\n' - '[Jupyter current working directory: /root]\n' - '[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.12/bin/python]' - ).strip().split('\n') - - action = CmdRunAction(command='cd test') - logger.info(action, extra={'msg_type': 'ACTION'}) - obs = runtime.run_action(action) - logger.info(obs, extra={'msg_type': 'OBSERVATION'}) - assert isinstance(obs, CmdOutputObservation) - assert obs.exit_code == 0 - - # This should create a file in the current working directory - # i.e., /workspace/test/hello.py instead of /workspace/hello.py - action = IPythonRunCellAction(code="create_file('hello.py')") - logger.info(action, extra={'msg_type': 'ACTION'}) - obs = runtime.run_action(action) - logger.info(obs, extra={'msg_type': 'OBSERVATION'}) - assert isinstance(obs, IPythonRunCellObservation) - assert obs.content.replace('\r\n', '\n').strip().split('\n') == ( - '[File: /root/test/hello.py (1 lines total)]\n' - '(this is the beginning of the file)\n' - '1|\n' - '(this is the end of the file)\n' - '[File hello.py created.]\n' - '[Jupyter current working directory: /root/test]\n' - '[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.12/bin/python]' - ).strip().split('\n') - - _close_test_runtime(runtime) - - def test_ipython_package_install(temp_dir, box_class, run_as_openhands): """Make sure that cd in bash also update the current working directory in ipython.""" runtime = _load_runtime(temp_dir, box_class, run_as_openhands) diff --git a/tests/unit/test_agent_skill.py b/tests/unit/test_agent_skill.py index ae295b0ddf..63745f4dd2 100644 --- a/tests/unit/test_agent_skill.py +++ b/tests/unit/test_agent_skill.py @@ -1,22 +1,15 @@ import contextlib import io -import os import sys -from unittest.mock import patch import docx import pytest from openhands.runtime.plugins.agent_skills.file_ops.file_ops import ( - MSG_FILE_UPDATED, WINDOW, _print_window, - append_file, - create_file, - edit_file_by_replace, find_file, goto_line, - insert_content_at_line, open_file, scroll_down, scroll_up, @@ -182,28 +175,6 @@ def test_open_file_long_with_lineno(tmp_path): assert result.split('\n') == expected.split('\n') -def test_create_file_unexist_path(): - with pytest.raises(FileNotFoundError): - create_file('/unexist/path/a.txt') - - -def test_create_file(tmp_path): - temp_file_path = tmp_path / 'a.txt' - with io.StringIO() as buf: - with contextlib.redirect_stdout(buf): - create_file(str(temp_file_path)) - result = buf.getvalue() - - expected = ( - f'[File: {temp_file_path} (1 lines total)]\n' - '(this is the beginning of the file)\n' - '1|\n' - '(this is the end of the file)\n' - f'[File {temp_file_path} created.]\n' - ) - assert result.split('\n') == expected.split('\n') - - def test_goto_line(tmp_path): temp_file_path = tmp_path / 'a.txt' total_lines = 1000 @@ -405,7 +376,7 @@ def test_scroll_down_edge(tmp_path): def test_print_window_internal(tmp_path): test_file_path = tmp_path / 'a.txt' - create_file(str(test_file_path)) + test_file_path.write_text('') open_file(str(test_file_path)) with open(test_file_path, 'w') as file: for i in range(1, 101): @@ -432,7 +403,7 @@ def test_print_window_internal(tmp_path): def test_open_file_large_line_number(tmp_path): test_file_path = tmp_path / 'a.txt' - create_file(str(test_file_path)) + test_file_path.write_text('') open_file(str(test_file_path)) with open(test_file_path, 'w') as file: for i in range(1, 1000): @@ -457,648 +428,6 @@ def test_open_file_large_line_number(tmp_path): assert result == expected -def test_edit_file_by_replace_window(tmp_path): - with patch.dict(os.environ, {'ENABLE_AUTO_LINT': 'True'}): - content = """def any_int(a, b, c): - return isinstance(a, int) and isinstance(b, int) and isinstance(c, int) - -def test_any_int(): - assert any_int(1, 2, 3) == True - assert any_int(1.5, 2, 3) == False - assert any_int(1, 2.5, 3) == False - assert any_int(1, 2, 3.5) == False - assert any_int(1.0, 2, 3) == False - assert any_int(1, 2.0, 3) == False - assert any_int(1, 2, 3.0) == False - assert any_int(0, 0, 0) == True - assert any_int(-1, -2, -3) == True - assert any_int(1, -2, 3) == True - assert any_int(1.5, -2, 3) == False - assert any_int(1, -2.5, 3) == False - -def check(any_int): - # Check some simple cases - assert any_int(2, 3, 1)==True, "This prints if this assert fails 1 (good for debugging!)" - assert any_int(2.5, 2, 3)==False, "This prints if this assert fails 2 (good for debugging!)" - assert any_int(1.5, 5, 3.5)==False, "This prints if this assert fails 3 (good for debugging!)" - assert any_int(2, 6, 2)==False, "This prints if this assert fails 4 (good for debugging!)" - assert any_int(4, 2, 2)==True, "This prints if this assert fails 5 (good for debugging!)" - assert any_int(2.2, 2.2, 2.2)==False, "This prints if this assert fails 6 (good for debugging!)" - assert any_int(-4, 6, 2)==True, "This prints if this assert fails 7 (good for debugging!)" - - # Check some edge cases that are easy to work out by hand. - assert any_int(2,1,1)==True, "This prints if this assert fails 8 (also good for debugging!)" - assert any_int(3,4,7)==True, "This prints if this assert fails 9 (also good for debugging!)" - assert any_int(3.0,4,7)==False, "This prints if this assert fails 10 (also good for debugging!)" - -check(any_int)""" - - temp_file_path = tmp_path / 'error-test.py' - temp_file_path.write_text(content) - - open_file(str(temp_file_path)) - - with io.StringIO() as buf: - with contextlib.redirect_stdout(buf): - edit_file_by_replace( - str(temp_file_path), - to_replace=' assert any_int(1.0, 2, 3) == False', - new_content=' assert any_int(1.0, 2, 3) == False', - ) - result = buf.getvalue() - expected = ( - '[Your proposed edit has introduced new syntax error(s). Please understand the errors and retry your edit command.]\n' - 'ERRORS:\n' - + str(temp_file_path) - + ':9:9: ' - + 'E999 IndentationError: unexpected indent\n' - '[This is how your edit would have looked if applied]\n' - + SEP - + '(this is the beginning of the file)\n' - '1|def any_int(a, b, c):\n' - '2| return isinstance(a, int) and isinstance(b, int) and isinstance(c, int)\n' - '3|\n' - '4|def test_any_int():\n' - '5| assert any_int(1, 2, 3) == True\n' - '6| assert any_int(1.5, 2, 3) == False\n' - '7| assert any_int(1, 2.5, 3) == False\n' - '8| assert any_int(1, 2, 3.5) == False\n' - '9| assert any_int(1.0, 2, 3) == False\n' - '10| assert any_int(1, 2.0, 3) == False\n' - '11| assert any_int(1, 2, 3.0) == False\n' - '12| assert any_int(0, 0, 0) == True\n' - '13| assert any_int(-1, -2, -3) == True\n' - '14| assert any_int(1, -2, 3) == True\n' - '15| assert any_int(1.5, -2, 3) == False\n' - '16| assert any_int(1, -2.5, 3) == False\n' - '17|\n' - '18|def check(any_int):\n' - '19| # Check some simple cases\n' - '20| assert any_int(2, 3, 1)==True, "This prints if this assert fails 1 (good for debugging!)"\n' - '21| assert any_int(2.5, 2, 3)==False, "This prints if this assert fails 2 (good for debugging!)"\n' - '(12 more lines below)\n' + SEP + '\n' - '[This is the original code before your edit]\n' - + SEP - + '(this is the beginning of the file)\n' - '1|def any_int(a, b, c):\n' - '2| return isinstance(a, int) and isinstance(b, int) and isinstance(c, int)\n' - '3|\n' - '4|def test_any_int():\n' - '5| assert any_int(1, 2, 3) == True\n' - '6| assert any_int(1.5, 2, 3) == False\n' - '7| assert any_int(1, 2.5, 3) == False\n' - '8| assert any_int(1, 2, 3.5) == False\n' - '9| assert any_int(1.0, 2, 3) == False\n' - '10| assert any_int(1, 2.0, 3) == False\n' - '11| assert any_int(1, 2, 3.0) == False\n' - '12| assert any_int(0, 0, 0) == True\n' - '13| assert any_int(-1, -2, -3) == True\n' - '14| assert any_int(1, -2, 3) == True\n' - '15| assert any_int(1.5, -2, 3) == False\n' - '16| assert any_int(1, -2.5, 3) == False\n' - '17|\n' - '18|def check(any_int):\n' - '19| # Check some simple cases\n' - '20| assert any_int(2, 3, 1)==True, "This prints if this assert fails 1 (good for debugging!)"\n' - '21| assert any_int(2.5, 2, 3)==False, "This prints if this assert fails 2 (good for debugging!)"\n' - '(12 more lines below)\n' - + SEP - + 'Your changes have NOT been applied. Please fix your edit command and try again.\n' - 'You either need to 1) Specify the correct start/end line arguments or 2) Correct your edit code.\n' - 'DO NOT re-run the same failed edit command. Running it again will lead to the same error.\n' - ) - assert result == expected - - -def test_edit_file_by_replace_with_multiple_errors(tmp_path): - # If the file has multiple errors, but the suggested modification can only fix one error, make sure it is applied. - with patch.dict(os.environ, {'ENABLE_AUTO_LINT': 'True'}): - content = """def Sum(a,b): - try: - answer = a + b - return answer - except Exception: - answer = ANOTHER_CONSTANT - return answer -Sum(1,1) -""" - - temp_file_path = tmp_path / 'problematic-file-test.py' - temp_file_path.write_text(content) - - open_file(str(temp_file_path)) - - with io.StringIO() as buf: - with contextlib.redirect_stdout(buf): - edit_file_by_replace( - str(temp_file_path), - to_replace=' answer = a + b', - new_content=' answer = a+b', - ) - result = buf.getvalue() - expected = ( - f'[File: {temp_file_path} (8 lines total after edit)]\n' - '(this is the beginning of the file)\n' - '1|def Sum(a,b):\n' - '2| try:\n' - '3| answer = a+b\n' - '4| return answer\n' - '5| except Exception:\n' - '6| answer = ANOTHER_CONSTANT\n' - '7| return answer\n' - '8|Sum(1,1)\n' - '(this is the end of the file)\n' - + MSG_FILE_UPDATED.format(line_number=3) - + '\n' - ) - assert result.split('\n') == expected.split('\n') - - -# ================================ - - -def test_edit_file_by_replace(tmp_path): - temp_file_path = tmp_path / 'a.txt' - content = 'Line 1\nLine 2\nLine 3\nLine 4\nLine 5' - temp_file_path.write_text(content) - - open_file(str(temp_file_path)) - - with io.StringIO() as buf: - with contextlib.redirect_stdout(buf): - edit_file_by_replace( - file_name=str(temp_file_path), - to_replace='Line 1\nLine 2\nLine 3', - new_content='REPLACE TEXT', - ) - result = buf.getvalue() - expected = ( - f'[File: {temp_file_path} (3 lines total after edit)]\n' - '(this is the beginning of the file)\n' - '1|REPLACE TEXT\n' - '2|Line 4\n' - '3|Line 5\n' - '(this is the end of the file)\n' - + MSG_FILE_UPDATED.format(line_number=1) - + '\n' - ) - assert result.split('\n') == expected.split('\n') - - with open(temp_file_path, 'r') as file: - lines = file.readlines() - assert len(lines) == 3 - assert lines[0].rstrip() == 'REPLACE TEXT' - assert lines[1].rstrip() == 'Line 4' - assert lines[2].rstrip() == 'Line 5' - - -def test_edit_file_by_replace_sameline(tmp_path): - temp_file_path = tmp_path / 'a.txt' - content = 'Line 1\nLine 2\nLine 2\nLine 4\nLine 5' - temp_file_path.write_text(content) - - open_file(str(temp_file_path)) - - with io.StringIO() as buf: - with contextlib.redirect_stdout(buf): - edit_file_by_replace( - file_name=str(temp_file_path), - to_replace='Line 2\nLine 2', - new_content='Line 2\nREPLACE TEXT', - ) - result = buf.getvalue() - expected = ( - f'[File: {temp_file_path} (5 lines total after edit)]\n' - '(this is the beginning of the file)\n' - '1|Line 1\n' - '2|Line 2\n' - '3|REPLACE TEXT\n' - '4|Line 4\n' - '5|Line 5\n' - '(this is the end of the file)\n' - + MSG_FILE_UPDATED.format(line_number=2) - + '\n' - ) - assert result.split('\n') == expected.split('\n') - - with open(temp_file_path, 'r') as file: - lines = file.readlines() - assert len(lines) == 5 - assert lines[0].rstrip() == 'Line 1' - assert lines[1].rstrip() == 'Line 2' - assert lines[2].rstrip() == 'REPLACE TEXT' - assert lines[3].rstrip() == 'Line 4' - assert lines[4].rstrip() == 'Line 5' - - -def test_edit_file_by_replace_multiline(tmp_path): - temp_file_path = tmp_path / 'a.txt' - content = 'Line 1\nLine 2\nLine 2\nLine 4\nLine 5' - temp_file_path.write_text(content) - - open_file(str(temp_file_path)) - - with io.StringIO() as buf: - with contextlib.redirect_stdout(buf): - edit_file_by_replace( - file_name=str(temp_file_path), - to_replace='Line 2', - new_content='REPLACE TEXT', - ) - result = buf.getvalue() - assert result.strip().startswith( - 'ERROR: `to_replace` appears more than once, please include enough lines to make code in `to_replace` unique' - ) - - -def test_edit_file_by_replace_no_diff(tmp_path): - temp_file_path = tmp_path / 'a.txt' - content = 'Line 1\nLine 2\nLine 2\nLine 4\nLine 5' - temp_file_path.write_text(content) - - open_file(str(temp_file_path)) - - with io.StringIO() as buf: - with contextlib.redirect_stdout(buf): - edit_file_by_replace( - file_name=str(temp_file_path), - to_replace='Line 1', - new_content='Line 1', - ) - result = buf.getvalue() - assert result.strip().startswith( - 'ERROR: `to_replace` and `new_content` must be different' - ) - - -def test_edit_file_by_replace_toreplace_empty(tmp_path): - temp_file_path = tmp_path / 'a.txt' - content = 'Line 1\nLine 2\nLine 2\nLine 4\nLine 5' - temp_file_path.write_text(content) - - open_file(str(temp_file_path)) - - _capture_file_operation_error( - lambda: edit_file_by_replace( - file_name=str(temp_file_path), - to_replace='', - new_content='Line 1', - ), - 'ERROR: `to_replace` must not be empty.', - ) - - -def test_edit_file_by_replace_unknown_file(): - _capture_file_operation_error( - lambda: edit_file_by_replace( - str('unknown file'), - 'ORIGINAL TEXT', - 'REPLACE TEXT', - ), - 'ERROR: File unknown file not found.', - ) - - -def test_insert_content_at_line(tmp_path): - temp_file_path = tmp_path / 'b.txt' - content = 'Line 1\nLine 2\nLine 3' - temp_file_path.write_text(content) - open_file(str(temp_file_path)) - - with io.StringIO() as buf: - with contextlib.redirect_stdout(buf): - insert_content_at_line( - file_name=str(temp_file_path), - line_number=2, - content='Inserted Line', - ) - result = buf.getvalue() - expected = ( - f'[File: {temp_file_path} (4 lines total after edit)]\n' - '(this is the beginning of the file)\n' - '1|Line 1\n' - '2|Inserted Line\n' - '3|Line 2\n' - '4|Line 3\n' - '(this is the end of the file)\n' - + MSG_FILE_UPDATED.format(line_number=2) - + '\n' - ) - assert result.split('\n') == expected.split('\n') - - with open(temp_file_path, 'r') as file: - lines = file.readlines() - assert len(lines) == 4 - assert lines[0].rstrip() == 'Line 1' - assert lines[1].rstrip() == 'Inserted Line' - assert lines[2].rstrip() == 'Line 2' - assert lines[3].rstrip() == 'Line 3' - - -def test_insert_content_at_line_from_scratch(tmp_path): - temp_file_path = tmp_path / 'a.txt' - create_file(str(temp_file_path)) - open_file(str(temp_file_path)) - - with io.StringIO() as buf: - with contextlib.redirect_stdout(buf): - insert_content_at_line( - file_name=str(temp_file_path), - line_number=1, - content='REPLACE TEXT', - ) - result = buf.getvalue() - expected = ( - f'[File: {temp_file_path} (1 lines total after edit)]\n' - '(this is the beginning of the file)\n' - '1|REPLACE TEXT\n' - '(this is the end of the file)\n' - + MSG_FILE_UPDATED.format(line_number=1) - + '\n' - ) - assert result.split('\n') == expected.split('\n') - - with open(temp_file_path, 'r') as file: - lines = file.readlines() - assert len(lines) == 1 - assert lines[0].rstrip() == 'REPLACE TEXT' - - -def test_insert_content_at_line_from_scratch_emptyfile(tmp_path): - temp_file_path = tmp_path / 'a.txt' - with open(temp_file_path, 'w') as file: - file.write('') - open_file(str(temp_file_path)) - - with io.StringIO() as buf: - with contextlib.redirect_stdout(buf): - insert_content_at_line( - file_name=str(temp_file_path), - line_number=1, - content='REPLACE TEXT', - ) - result = buf.getvalue() - expected = ( - f'[File: {temp_file_path} (1 lines total after edit)]\n' - '(this is the beginning of the file)\n' - '1|REPLACE TEXT\n' - '(this is the end of the file)\n' - + MSG_FILE_UPDATED.format(line_number=1) - + '\n' - ) - assert result.split('\n') == expected.split('\n') - - with open(temp_file_path, 'r') as file: - lines = file.readlines() - assert len(lines) == 1 - assert lines[0].rstrip() == 'REPLACE TEXT' - - -def test_insert_content_at_line_emptyline(tmp_path): - temp_file_path = tmp_path / 'b.txt' - content = 'Line 1\n\n' - temp_file_path.write_text(content) - open_file(str(temp_file_path)) - - with io.StringIO() as buf: - with contextlib.redirect_stdout(buf): - insert_content_at_line( - file_name=str(temp_file_path), - line_number=2, - content='Inserted Line', - ) - result = buf.getvalue() - expected = ( - f'[File: {temp_file_path} (3 lines total after edit)]\n' - '(this is the beginning of the file)\n' - '1|Line 1\n' - '2|Inserted Line\n' - '3|\n' - '(this is the end of the file)\n' - + MSG_FILE_UPDATED.format(line_number=2) - + '\n' - ) - assert result.split('\n') == expected.split('\n') - - with open(temp_file_path, 'r') as file: - lines = file.readlines() - assert len(lines) == 3 - assert lines[0].rstrip() == 'Line 1' - assert lines[1].rstrip() == 'Inserted Line' - - -def test_insert_content_at_line_from_scratch_multiline_with_backticks_and_second_edit( - tmp_path, -): - temp_file_path = tmp_path / 'a.txt' - create_file(str(temp_file_path)) - open_file(str(temp_file_path)) - - with io.StringIO() as buf: - with contextlib.redirect_stdout(buf): - insert_content_at_line( - str(temp_file_path), - 1, - '`REPLACE TEXT1`\n`REPLACE TEXT2`\n`REPLACE TEXT3`', - ) - result = buf.getvalue() - expected = ( - f'[File: {temp_file_path} (3 lines total after edit)]\n' - '(this is the beginning of the file)\n' - '1|`REPLACE TEXT1`\n' - '2|`REPLACE TEXT2`\n' - '3|`REPLACE TEXT3`\n' - '(this is the end of the file)\n' - + MSG_FILE_UPDATED.format(line_number=1) - + '\n' - ) - assert result.split('\n') == expected.split('\n') - - with open(temp_file_path, 'r') as file: - lines = file.readlines() - assert len(lines) == 3 - assert lines[0].rstrip() == '`REPLACE TEXT1`' - assert lines[1].rstrip() == '`REPLACE TEXT2`' - assert lines[2].rstrip() == '`REPLACE TEXT3`' - - # Check that no backticks are escaped in the edit_file_by_replace call - assert '\\`' not in result - - # Perform a second edit - with io.StringIO() as buf: - with contextlib.redirect_stdout(buf): - edit_file_by_replace( - str(temp_file_path), - '`REPLACE TEXT1`\n`REPLACE TEXT2`\n`REPLACE TEXT3`', - '`REPLACED TEXT1`\n`REPLACED TEXT2`\n`REPLACED TEXT3`', - ) - second_result = buf.getvalue() - second_expected = ( - f'[File: {temp_file_path} (3 lines total after edit)]\n' - '(this is the beginning of the file)\n' - '1|`REPLACED TEXT1`\n' - '2|`REPLACED TEXT2`\n' - '3|`REPLACED TEXT3`\n' - '(this is the end of the file)\n' - + MSG_FILE_UPDATED.format(line_number=1) - + '\n' - ) - assert second_result.split('\n') == second_expected.split('\n') - - with open(temp_file_path, 'r') as file: - lines = file.readlines() - assert len(lines) == 3 - assert lines[0].rstrip() == '`REPLACED TEXT1`' - assert lines[1].rstrip() == '`REPLACED TEXT2`' - assert lines[2].rstrip() == '`REPLACED TEXT3`' - - # Check that no backticks are escaped in the second edit_file_by_replace call - assert '\\`' not in second_result - - -def test_insert_content_at_line_from_scratch_multiline(tmp_path): - temp_file_path = tmp_path / 'a.txt' - create_file(str(temp_file_path)) - open_file(temp_file_path) - - with io.StringIO() as buf: - with contextlib.redirect_stdout(buf): - insert_content_at_line( - str(temp_file_path), - 1, - content='REPLACE TEXT1\nREPLACE TEXT2\nREPLACE TEXT3', - ) - result = buf.getvalue() - expected = ( - f'[File: {temp_file_path} (3 lines total after edit)]\n' - '(this is the beginning of the file)\n' - '1|REPLACE TEXT1\n' - '2|REPLACE TEXT2\n' - '3|REPLACE TEXT3\n' - '(this is the end of the file)\n' - + MSG_FILE_UPDATED.format(line_number=1) - + '\n' - ) - assert result.split('\n') == expected.split('\n') - - with open(temp_file_path, 'r') as file: - lines = file.readlines() - assert len(lines) == 3 - assert lines[0].rstrip() == 'REPLACE TEXT1' - assert lines[1].rstrip() == 'REPLACE TEXT2' - assert lines[2].rstrip() == 'REPLACE TEXT3' - - -def test_insert_content_at_line_not_opened(): - _capture_file_operation_error( - lambda: insert_content_at_line( - str('unknown file'), - 1, - 'REPLACE TEXT', - ), - 'ERROR: Invalid path or file name.', - ) - - -def test_append_file(tmp_path): - temp_file_path = tmp_path / 'a.txt' - content = 'Line 1\nLine 2' - temp_file_path.write_text(content) - - open_file(str(temp_file_path)) - - with io.StringIO() as buf: - with contextlib.redirect_stdout(buf): - append_file(str(temp_file_path), content='APPENDED TEXT') - result = buf.getvalue() - expected = ( - f'[File: {temp_file_path} (3 lines total after edit)]\n' - '(this is the beginning of the file)\n' - '1|Line 1\n' - '2|Line 2\n' - '3|APPENDED TEXT\n' - '(this is the end of the file)\n' - + MSG_FILE_UPDATED.format(line_number=2) - + '\n' - ) - assert result.split('\n') == expected.split('\n') - - with open(temp_file_path, 'r') as file: - lines = file.readlines() - assert len(lines) == 3 - assert lines[0].rstrip() == 'Line 1' - assert lines[1].rstrip() == 'Line 2' - assert lines[2].rstrip() == 'APPENDED TEXT' - - -def test_append_file_from_scratch(tmp_path): - temp_file_path = tmp_path / 'a.txt' - create_file(str(temp_file_path)) - try: - open_file(str(temp_file_path)) - with io.StringIO() as buf: - with contextlib.redirect_stdout(buf): - append_file(str(temp_file_path), content='APPENDED TEXT') - result = buf.getvalue() - expected = ( - f'[File: {temp_file_path} (1 lines total after edit)]\n' - '(this is the beginning of the file)\n' - '1|APPENDED TEXT\n' - '(this is the end of the file)\n' - + MSG_FILE_UPDATED.format(line_number=1) - + '\n' - ) - assert result.split('\n') == expected.split('\n') - - with open(temp_file_path, 'r') as file: - lines = file.readlines() - assert len(lines) == 1 - assert lines[0].rstrip() == 'APPENDED TEXT' - finally: - os.remove(temp_file_path) - - -def test_append_file_from_scratch_multiline(tmp_path): - temp_file_path = tmp_path / 'a3.txt' - create_file(str(temp_file_path)) - try: - open_file(temp_file_path) - with io.StringIO() as buf: - with contextlib.redirect_stdout(buf): - append_file( - str(temp_file_path), - content='APPENDED TEXT1\nAPPENDED TEXT2\nAPPENDED TEXT3', - ) - result = buf.getvalue() - expected = ( - f'[File: {temp_file_path} (3 lines total after edit)]\n' - '(this is the beginning of the file)\n' - '1|APPENDED TEXT1\n' - '2|APPENDED TEXT2\n' - '3|APPENDED TEXT3\n' - '(this is the end of the file)\n' - + MSG_FILE_UPDATED.format(line_number=1) - + '\n' - ) - assert result.split('\n') == expected.split('\n') - - with open(temp_file_path, 'r') as file: - lines = file.readlines() - assert len(lines) == 3 - assert lines[0].rstrip() == 'APPENDED TEXT1' - assert lines[1].rstrip() == 'APPENDED TEXT2' - assert lines[2].rstrip() == 'APPENDED TEXT3' - finally: - os.remove(temp_file_path) - - -def test_append_file_not_opened(): - _capture_file_operation_error( - lambda: append_file('unknown file', content='APPENDED TEXT'), - 'ERROR: Invalid path or file name.', - ) - - def test_search_dir(tmp_path): # create files with the search term "bingo" for i in range(1, 101): @@ -1269,143 +598,6 @@ def test_find_file_not_exist_file_specific_path(tmp_path): assert result.split('\n') == expected.split('\n') -def test_edit_lint_file_pass(tmp_path): - # Enable linting - with patch.dict(os.environ, {'ENABLE_AUTO_LINT': 'True'}): - file_path = _generate_test_file_with_lines(tmp_path, 1) - - # Test linting functionality - with io.StringIO() as buf: - with contextlib.redirect_stdout(buf): - open_file(str(file_path)) - insert_content_at_line(str(file_path), 1, "print('hello')\n") - result = buf.getvalue() - assert result is not None - expected = ( - f'[File: {file_path} (1 lines total)]\n' - '(this is the beginning of the file)\n' - '1|\n' - '(this is the end of the file)\n' - f'[File: {file_path} (1 lines total after edit)]\n' - '(this is the beginning of the file)\n' - "1|print('hello')\n" - '(this is the end of the file)\n' - + MSG_FILE_UPDATED.format(line_number=1) - + '\n' - ) - assert result.split('\n') == expected.split('\n') - - -def test_lint_file_fail_undefined_name(tmp_path, capsys): - with patch.dict(os.environ, {'ENABLE_AUTO_LINT': 'True'}): - current_line = 1 - - file_path = _generate_test_file_with_lines(tmp_path, 1) - - open_file(str(file_path), current_line) - insert_content_at_line(str(file_path), 1, 'undefined_name()\n') - - result = capsys.readouterr().out - assert result is not None - - expected = ( - f'[File: {file_path} (1 lines total)]\n' - '(this is the beginning of the file)\n' - '1|\n' - '(this is the end of the file)\n' - '[Your proposed edit has introduced new syntax error(s). Please understand the errors and retry your edit command.]\n' - 'ERRORS:\n' - f"{file_path}:1:1: F821 undefined name 'undefined_name'\n" - '[This is how your edit would have looked if applied]\n' - + SEP - + '(this is the beginning of the file)\n' - '1|undefined_name()\n' - '(this is the end of the file)\n' - + SEP - + '\n[This is the original code before your edit]\n' - + SEP - + '(this is the beginning of the file)\n' - '1|\n' - '(this is the end of the file)\n' - + SEP - + 'Your changes have NOT been applied. Please fix your edit command and try again.\n' - 'You either need to 1) Specify the correct start/end line arguments or 2) Correct your edit code.\n' - 'DO NOT re-run the same failed edit command. Running it again will lead to the same error.\n' - ) - assert result.split('\n') == expected.split('\n') - - -def test_lint_file_fail_undefined_name_long(tmp_path, capsys): - with patch.dict(os.environ, {'ENABLE_AUTO_LINT': 'True'}): - num_lines = 1000 - error_line = 500 - - file_path = _generate_test_file_with_lines(tmp_path, num_lines) - - error_message = ( - f"{file_path}:{error_line}:1: F821 undefined name 'undefined_name'" - ) - - open_file(str(file_path)) - insert_content_at_line(str(file_path), error_line, 'undefined_name()\n') - - result = capsys.readouterr().out - assert result is not None - - open_lines = '\n'.join([f'{i}|' for i in range(1, WINDOW + 1)]) - expected = ( - f'[File: {file_path} ({num_lines} lines total)]\n' - '(this is the beginning of the file)\n' - f'{open_lines}\n' - f'({num_lines - WINDOW} more lines below)\n' - f'[Use `scroll_down` to view the next 100 lines of the file!]\n' - '[Your proposed edit has introduced new syntax error(s). Please understand the errors and retry your edit command.]\n' - f'ERRORS:\n{error_message}\n' - '[This is how your edit would have looked if applied]\n' - + SEP - + '(489 more lines above)\n' - + _numbered_test_lines(error_line - 10, error_line - 1) - + '500|undefined_name()\n' - + _numbered_test_lines(error_line + 1, error_line + 10) - + '(491 more lines below)\n' - + SEP - + '\n[This is the original code before your edit]\n' - + SEP - + '(489 more lines above)\n' - + _numbered_test_lines(error_line - 10, error_line + 10) - + '(490 more lines below)\n' - + SEP - + 'Your changes have NOT been applied. Please fix your edit command and try again.\n' - 'You either need to 1) Specify the correct start/end line arguments or 2) Correct your edit code.\n' - 'DO NOT re-run the same failed edit command. Running it again will lead to the same error.\n' - ) - assert result.split('\n') == expected.split('\n') - - -def test_lint_file_disabled_undefined_name(tmp_path, capsys): - with patch.dict(os.environ, {'ENABLE_AUTO_LINT': 'False'}): - file_path = _generate_test_file_with_lines(tmp_path, 1) - - open_file(str(file_path)) - insert_content_at_line(str(file_path), 1, 'undefined_name()\n') - - result = capsys.readouterr().out - assert result is not None - expected = ( - f'[File: {file_path} (1 lines total)]\n' - '(this is the beginning of the file)\n' - '1|\n' - '(this is the end of the file)\n' - f'[File: {file_path} (1 lines total after edit)]\n' - '(this is the beginning of the file)\n' - '1|undefined_name()\n' - '(this is the end of the file)\n' - + MSG_FILE_UPDATED.format(line_number=1) - + '\n' - ) - assert result.split('\n') == expected.split('\n') - - def test_parse_docx(tmp_path): # Create a DOCX file with some content test_docx_path = tmp_path / 'test.docx' @@ -1523,42 +715,3 @@ def test_parse_pptx(tmp_path): 'Hello, this is the second test PPTX slide.\n\n' ) assert output == expected_output, f'Expected output does not match. Got: {output}' - - -def test_lint_file_fail_non_python(tmp_path, capsys): - with patch.dict(os.environ, {'ENABLE_AUTO_LINT': 'True'}): - current_line = 1 - file_path = _generate_ruby_test_file_with_lines(tmp_path, 1) - - open_file(str(file_path), current_line) - insert_content_at_line( - str(file_path), 1, "def print_hello_world()\n puts 'Hello World'" - ) - result = capsys.readouterr().out - assert result is not None - expected = ( - f'[File: {file_path} (1 lines total)]\n' - '(this is the beginning of the file)\n' - '1|\n' - '(this is the end of the file)\n' - '[Your proposed edit has introduced new syntax error(s). Please understand the errors and retry your edit command.]\n' - 'ERRORS:\n' - f'{file_path}:1:1: Syntax error\n' - '[This is how your edit would have looked if applied]\n' - + SEP - + '(this is the beginning of the file)\n' - '1|def print_hello_world()\n' - "2| puts 'Hello World'\n" - '(this is the end of the file)\n' - '-------------------------------------------------\n\n' - '[This is the original code before your edit]\n' - + SEP - + '(this is the beginning of the file)\n' - '1|\n' - '(this is the end of the file)\n' - + SEP - + 'Your changes have NOT been applied. Please fix your edit command and try again.\n' - 'You either need to 1) Specify the correct start/end line arguments or 2) Correct your edit code.\n' - 'DO NOT re-run the same failed edit command. Running it again will lead to the same error.\n' - ) - assert result.split('\n') == expected.split('\n') diff --git a/tests/unit/test_chunk_localizer.py b/tests/unit/test_chunk_localizer.py new file mode 100644 index 0000000000..d63da22488 --- /dev/null +++ b/tests/unit/test_chunk_localizer.py @@ -0,0 +1,136 @@ +import pytest + +from openhands.utils.chunk_localizer import ( + Chunk, + create_chunks, + get_top_k_chunk_matches, + normalized_lcs, +) + + +def test_chunk_creation(): + chunk = Chunk(text='test chunk', line_range=(1, 1)) + assert chunk.text == 'test chunk' + assert chunk.line_range == (1, 1) + assert chunk.normalized_lcs is None + + +def test_chunk_visualization(capsys): + chunk = Chunk(text='line1\nline2', line_range=(1, 2)) + assert chunk.visualize() == '1|line1\n2|line2\n' + + +def test_create_chunks_raw_string(): + text = 'line1\nline2\nline3\nline4\nline5' + chunks = create_chunks(text, size=2) + assert len(chunks) == 3 + assert chunks[0].text == 'line1\nline2' + assert chunks[0].line_range == (1, 2) + assert chunks[1].text == 'line3\nline4' + assert chunks[1].line_range == (3, 4) + assert chunks[2].text == 'line5' + assert chunks[2].line_range == (5, 5) + + +def test_normalized_lcs(): + chunk = 'abcdef' + edit_draft = 'abcxyz' + assert normalized_lcs(chunk, edit_draft) == 0.5 + + +def test_get_top_k_chunk_matches(): + text = 'chunk1\nchunk2\nchunk3\nchunk4' + query = 'chunk2' + matches = get_top_k_chunk_matches(text, query, k=2, max_chunk_size=1) + assert len(matches) == 2 + assert matches[0].text == 'chunk2' + assert matches[0].line_range == (2, 2) + assert matches[0].normalized_lcs == 1.0 + assert matches[1].text == 'chunk1' + assert matches[1].line_range == (1, 1) + assert matches[1].normalized_lcs == 5 / 6 + assert matches[0].normalized_lcs > matches[1].normalized_lcs + + +def test_create_chunks_with_empty_lines(): + text = 'line1\n\nline3\n\n\nline6' + chunks = create_chunks(text, size=2) + assert len(chunks) == 3 + assert chunks[0].text == 'line1\n' + assert chunks[0].line_range == (1, 2) + assert chunks[1].text == 'line3\n' + assert chunks[1].line_range == (3, 4) + assert chunks[2].text == '\nline6' + assert chunks[2].line_range == (5, 6) + + +def test_create_chunks_with_large_size(): + text = 'line1\nline2\nline3' + chunks = create_chunks(text, size=10) + assert len(chunks) == 1 + assert chunks[0].text == text + assert chunks[0].line_range == (1, 3) + + +def test_create_chunks_with_last_chunk_smaller(): + text = 'line1\nline2\nline3' + chunks = create_chunks(text, size=2) + assert len(chunks) == 2 + assert chunks[0].text == 'line1\nline2' + assert chunks[0].line_range == (1, 2) + assert chunks[1].text == 'line3' + assert chunks[1].line_range == (3, 3) + + +def test_normalized_lcs_edge_cases(): + assert normalized_lcs('', '') == 0.0 + assert normalized_lcs('a', '') == 0.0 + assert normalized_lcs('', 'a') == 0.0 + assert normalized_lcs('abcde', 'ace') == 0.6 + + +def test_get_top_k_chunk_matches_with_ties(): + text = 'chunk1\nchunk2\nchunk3\nchunk1' + query = 'chunk' + matches = get_top_k_chunk_matches(text, query, k=3, max_chunk_size=1) + assert len(matches) == 3 + assert all(match.normalized_lcs == 5 / 6 for match in matches) + assert {match.text for match in matches} == {'chunk1', 'chunk2', 'chunk3'} + + +def test_get_top_k_chunk_matches_with_large_k(): + text = 'chunk1\nchunk2\nchunk3' + query = 'chunk' + matches = get_top_k_chunk_matches(text, query, k=10, max_chunk_size=1) + assert len(matches) == 3 # Should return all chunks even if k is larger + + +@pytest.mark.parametrize('chunk_size', [1, 2, 3, 4]) +def test_create_chunks_different_sizes(chunk_size): + text = 'line1\nline2\nline3\nline4' + chunks = create_chunks(text, size=chunk_size) + assert len(chunks) == (4 + chunk_size - 1) // chunk_size + assert sum(len(chunk.text.split('\n')) for chunk in chunks) == 4 + + +def test_chunk_visualization_with_special_characters(): + chunk = Chunk(text='line1\nline2\t\nline3\r', line_range=(1, 3)) + assert chunk.visualize() == '1|line1\n2|line2\t\n3|line3\r\n' + + +def test_normalized_lcs_with_unicode(): + chunk = 'Hello, 世界!' + edit_draft = 'Hello, world!' + assert 0 < normalized_lcs(chunk, edit_draft) < 1 + + +def test_get_top_k_chunk_matches_with_overlapping_chunks(): + text = 'chunk1\nchunk2\nchunk3\nchunk4' + query = 'chunk2\nchunk3' + matches = get_top_k_chunk_matches(text, query, k=2, max_chunk_size=2) + assert len(matches) == 2 + assert matches[0].text == 'chunk1\nchunk2' + assert matches[0].line_range == (1, 2) + assert matches[1].text == 'chunk3\nchunk4' + assert matches[1].line_range == (3, 4) + assert matches[0].normalized_lcs == matches[1].normalized_lcs diff --git a/tests/unit/test_llm.py b/tests/unit/test_llm.py index 2060883896..347d383076 100644 --- a/tests/unit/test_llm.py +++ b/tests/unit/test_llm.py @@ -1,3 +1,4 @@ +import copy from unittest.mock import MagicMock, patch import pytest @@ -10,8 +11,8 @@ from litellm.exceptions import ( from openhands.core.config import LLMConfig from openhands.core.exceptions import OperationCancelled -from openhands.core.metrics import Metrics from openhands.llm.llm import LLM +from openhands.llm.metrics import Metrics @pytest.fixture(autouse=True) @@ -39,6 +40,7 @@ def test_llm_init_with_default_config(default_config): assert llm.config.model == 'gpt-4o' assert llm.config.api_key == 'test_key' assert isinstance(llm.metrics, Metrics) + assert llm.metrics.model_name == 'gpt-4o' @patch('openhands.llm.llm.litellm.get_model_info') @@ -83,13 +85,18 @@ def test_llm_init_with_metrics(): metrics = Metrics() llm = LLM(config, metrics=metrics) assert llm.metrics is metrics + assert ( + llm.metrics.model_name == 'default' + ) # because we didn't specify model_name in Metrics init def test_llm_reset(): llm = LLM(LLMConfig(model='gpt-4o-mini', api_key='test_key')) - initial_metrics = llm.metrics + initial_metrics = copy.deepcopy(llm.metrics) + initial_metrics.add_cost(1.0) llm.reset() - assert llm.metrics is not initial_metrics + assert llm.metrics._accumulated_cost != initial_metrics._accumulated_cost + assert llm.metrics._costs != initial_metrics._costs assert isinstance(llm.metrics, Metrics)