From d84befe28fa294a9d10415dac7fc61fe3d63ead2 Mon Sep 17 00:00:00 2001 From: Leander Maben Date: Mon, 9 Jun 2025 09:57:20 -0400 Subject: [PATCH] Adding LLM Based Editing capability (#8677) Co-authored-by: Xingyao Wang Co-authored-by: Engel Nyst Co-authored-by: Engel Nyst --- evaluation/README.md | 18 ++++++ evaluation/benchmarks/swe_bench/run_infer.py | 9 ++- .../codeact_agent/function_calling.py | 3 + .../codeact_agent/tools/llm_based_edit.py | 20 +++++-- openhands/llm/fn_call_converter.py | 59 +++++++++++++++++++ openhands/llm/tool_names.py | 1 + openhands/runtime/utils/edit.py | 43 ++++++-------- 7 files changed, 119 insertions(+), 34 deletions(-) diff --git a/evaluation/README.md b/evaluation/README.md index 55bef746d6..b617cd9766 100644 --- a/evaluation/README.md +++ b/evaluation/README.md @@ -74,6 +74,24 @@ If no condenser configuration is specified, the 'noop' condenser will be used by For other configurations specific to evaluation, such as `save_trajectory_path`, these are typically set in the `get_config` function of the respective `run_infer.py` file for each benchmark. +### Enabling LLM-Based Editor Tools + +The LLM-Based Editor tool (currently supported only for SWE-Bench) can be enabled by setting: +```bash +export ENABLE_LLM_EDITOR=true +``` + +You can set the config for the Editor LLM as: +```toml +[llm.draft_editor] +base_url = "http://localhost:9002/v1" +model = "hosted_vllm/lite_coder_qwen_editor_3B" +api_key = "" +temperature = 0.7 +max_input_tokens = 10500 +max_output_tokens = 10500 +``` + ## Supported Benchmarks The OpenHands evaluation harness supports a wide variety of benchmarks across [software engineering](#software-engineering), [web browsing](#web-browsing), [miscellaneous assistance](#misc-assistance), and [real-world](#real-world) tasks. diff --git a/evaluation/benchmarks/swe_bench/run_infer.py b/evaluation/benchmarks/swe_bench/run_infer.py index ea242736c3..40157c929d 100644 --- a/evaluation/benchmarks/swe_bench/run_infer.py +++ b/evaluation/benchmarks/swe_bench/run_infer.py @@ -42,7 +42,7 @@ from openhands.core.config import ( AgentConfig, OpenHandsConfig, get_llm_config_arg, - get_parser, + get_parser ) from openhands.core.config.condenser_config import NoOpCondenserConfig from openhands.core.config.utils import get_condenser_config_arg @@ -62,6 +62,7 @@ from openhands.utils.shutdown_listener import sleep_if_should_continue USE_HINT_TEXT = os.environ.get('USE_HINT_TEXT', 'false').lower() == 'true' RUN_WITH_BROWSING = os.environ.get('RUN_WITH_BROWSING', 'false').lower() == 'true' +ENABLE_LLM_EDITOR = os.environ.get('ENABLE_LLM_EDITOR', 'false').lower() == 'true' BenchMode = Literal['swe', 'swt', 'swt-ci'] @@ -254,15 +255,19 @@ def get_config( workspace_base=None, workspace_mount_path=None, ) + config.set_llm_config( update_llm_config_for_completions_logging( metadata.llm_config, metadata.eval_output_dir, instance['instance_id'] ) ) + # get 'draft_editor' config if exists + config.set_llm_config(get_llm_config_arg('draft_editor'), 'draft_editor') + agent_config = AgentConfig( enable_jupyter=False, enable_browsing=RUN_WITH_BROWSING, - enable_llm_editor=False, + enable_llm_editor=ENABLE_LLM_EDITOR, enable_mcp=False, condenser=metadata.condenser_config, enable_prompt_extensions=False, diff --git a/openhands/agenthub/codeact_agent/function_calling.py b/openhands/agenthub/codeact_agent/function_calling.py index da42432bee..02da90e74e 100644 --- a/openhands/agenthub/codeact_agent/function_calling.py +++ b/openhands/agenthub/codeact_agent/function_calling.py @@ -141,6 +141,9 @@ def response_to_actions( content=arguments['content'], start=arguments.get('start', 1), end=arguments.get('end', -1), + impl_source=arguments.get( + 'impl_source', FileEditSource.LLM_BASED_EDIT + ), ) elif ( tool_call.function.name diff --git a/openhands/agenthub/codeact_agent/tools/llm_based_edit.py b/openhands/agenthub/codeact_agent/tools/llm_based_edit.py index e007846d4c..392890d371 100644 --- a/openhands/agenthub/codeact_agent/tools/llm_based_edit.py +++ b/openhands/agenthub/codeact_agent/tools/llm_based_edit.py @@ -2,10 +2,18 @@ from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChun _FILE_EDIT_DESCRIPTION = """Edit a file in plain-text format. * The assistant can edit files by specifying the file path and providing a draft of the new file content. -* The draft content doesn't need to be exactly the same as the existing file; the assistant may skip unchanged lines using comments like `# unchanged` to indicate unchanged sections. +* The draft content doesn't need to be exactly the same as the existing file; the assistant may skip unchanged lines using comments like `# ... existing code ...` to indicate unchanged sections. * IMPORTANT: For large files (e.g., > 300 lines), specify the range of lines to edit using `start` and `end` (1-indexed, inclusive). The range should be smaller than 300 lines. +* -1 indicates the last line of the file when used as the `start` or `end` value. +* Keep at least one unchanged line before the changed section and after the changed section wherever possible. +* Make sure to set the `start` and `end` to include all the lines in the original file referred to in the draft of the new file content. Failure to do so will result in bad edits. * To append to a file, set both `start` and `end` to `-1`. * If the file doesn't exist, a new file will be created with the provided content. +* IMPORTANT: Make sure you include all the required indentations for each line of code in the draft, otherwise the edited code will be incorrectly indented. +* IMPORTANT: Make sure that the first line of the draft is also properly indented and has the required whitespaces. +* IMPORTANT: NEVER include or make references to lines from outside the `start` and `end` range in the draft. +* IMPORTANT: Start the content with a comment in the format: #EDIT: Reason for edit +* IMPORTANT: If you are not appending to the file, avoid setting `start` and `end` to the same value. **Example 1: general edit for short files** For example, given an existing file `/path/to/file.py` that looks like this: @@ -33,13 +41,12 @@ The assistant wants to edit the file to look like this: The assistant may produce an edit action like this: path="/path/to/file.txt" start=1 end=-1 content=``` +#EDIT: I want to change the value of y to 2 class MyClass: def __init__(self): - # no changes before + # ... existing code ... self.y = 2 - # self.z is removed -# MyClass().z is removed print(MyClass().y) ``` @@ -58,6 +65,7 @@ For example, given an existing file `/path/to/file.py` that looks like this: To append the following lines to the file: ```python +#EDIT: I want to print the value of y print(MyClass().y) ``` @@ -93,9 +101,9 @@ The assistant wants to edit the file to look like this: (2000 more lines below) The assistant may produce an edit action like this: -path="/path/to/file.txt" start=1001 end=1008 +path="/path/to/file.txt" start=1002 end=1008 content=``` -class MyClass: +#EDIT: I want to change the value of y to 2 def __init__(self): # no changes before self.y = 2 diff --git a/openhands/llm/fn_call_converter.py b/openhands/llm/fn_call_converter.py index 0218107ca6..90827d10fb 100644 --- a/openhands/llm/fn_call_converter.py +++ b/openhands/llm/fn_call_converter.py @@ -22,6 +22,7 @@ from openhands.llm.tool_names import ( BROWSER_TOOL_NAME, EXECUTE_BASH_TOOL_NAME, FINISH_TOOL_NAME, + LLM_BASED_EDIT_TOOL_NAME, STR_REPLACE_EDITOR_TOOL_NAME, ) @@ -251,6 +252,58 @@ noop(1000) # Wait for page to load USER: EXECUTION RESULT of [browser]: [Browser shows the numbers in a table format] """ + }, + 'edit_file': { + 'create_file': """ +ASSISTANT: There is no `app.py` file in the current directory. Let me create a Python file `app.py`: + +/workspace/app.py +1 +-1 + +from flask import Flask +app = Flask(__name__) + +@app.route('/') +def index(): + numbers = list(range(1, 11)) + return str(numbers) + +if __name__ == '__main__': + app.run(port=5000) + + + +USER: EXECUTION RESULT of [edit_file]: +File created successfully at: /workspace/app.py +""", + 'edit_file': """ +ASSISTANT: +Now let me display the numbers in a table format: + +/workspace/app.py +6 +9 + + numbers = list(range(1, 11)) + return '' + ''.join([f'' for i in numbers]) + '
{i}
' + # ... existing code ... +if __name__ == '__main__': + + + +USER: EXECUTION RESULT of [edit_file]: +The file /workspace/app.py has been edited. Here's the result of running `cat -n` on a snippet of /workspace/app.py: + 3 + 4 @app.route('/') + 5 def index(): + 6 numbers = list(range(1, 11)) + 7 return '' + ''.join([f'' for i in numbers]) + '
{i}
' + 8 + 9 if __name__ == '__main__': + 10 app.run(port=5000) +Review the changes and make sure they are as expected. Edit the file again if necessary. +""", }, 'finish': { 'task_completed': """ @@ -279,6 +332,8 @@ def get_example_for_tools(tools: list[dict]) -> str: available_tools.add('browser') elif name == FINISH_TOOL_NAME: available_tools.add('finish') + elif name == LLM_BASED_EDIT_TOOL_NAME: + available_tools.add('edit_file') if not available_tools: return '' @@ -297,6 +352,8 @@ USER: Create a list of numbers from 1 to 10, and display them in a web page at p if 'str_replace_editor' in available_tools: example += TOOL_EXAMPLES['str_replace_editor']['create_file'] + elif 'edit_file' in available_tools: + example += TOOL_EXAMPLES['edit_file']['create_file'] if 'execute_bash' in available_tools: example += TOOL_EXAMPLES['execute_bash']['run_server'] @@ -309,6 +366,8 @@ USER: Create a list of numbers from 1 to 10, and display them in a web page at p if 'str_replace_editor' in available_tools: example += TOOL_EXAMPLES['str_replace_editor']['edit_file'] + elif 'edit_file' in available_tools: + example += TOOL_EXAMPLES['edit_file']['edit_file'] if 'execute_bash' in available_tools: example += TOOL_EXAMPLES['execute_bash']['run_server_again'] diff --git a/openhands/llm/tool_names.py b/openhands/llm/tool_names.py index b61c8198ed..753ba7afb3 100644 --- a/openhands/llm/tool_names.py +++ b/openhands/llm/tool_names.py @@ -4,3 +4,4 @@ EXECUTE_BASH_TOOL_NAME = 'execute_bash' STR_REPLACE_EDITOR_TOOL_NAME = 'str_replace_editor' BROWSER_TOOL_NAME = 'browser' FINISH_TOOL_NAME = 'finish' +LLM_BASED_EDIT_TOOL_NAME = 'edit_file' diff --git a/openhands/runtime/utils/edit.py b/openhands/runtime/utils/edit.py index 47d5a6f5ef..b71b6ba0de 100644 --- a/openhands/runtime/utils/edit.py +++ b/openhands/runtime/utils/edit.py @@ -4,7 +4,7 @@ import tempfile from abc import ABC, abstractmethod from typing import Any -from openhands_aci.utils.diff import get_diff +from openhands_aci.utils.diff import get_diff # type: ignore from openhands.core.config import OpenHandsConfig from openhands.core.logger import openhands_logger as logger @@ -26,39 +26,31 @@ from openhands.llm.llm import LLM from openhands.llm.metrics import Metrics from openhands.utils.chunk_localizer import Chunk, get_top_k_chunk_matches -SYS_MSG = """Your job is to produce a new version of the file based on the old version and the -provided draft of the new version. The provided draft may be incomplete (it may skip lines) and/or incorrectly indented. You should try to apply the changes present in the draft to the old version, and output a new version of the file. -NOTE: -- The output file should be COMPLETE and CORRECTLY INDENTED. Do not omit any lines, and do not change any lines that are not part of the changes. -- You should output the new version of the file by wrapping the new version of the file content in a ``` block. -- If there's no explicit comment to remove the existing code, we should keep them and append the new code to the end of the file. -- If there's placeholder comments like `# no changes before` or `# no changes here`, we should replace these comments with the original code near the placeholder comments. -""" - USER_MSG = """ -HERE IS THE OLD VERSION OF THE FILE: -``` -{old_contents} -``` +Code changes will be provided in the form of a draft. You will need to apply the draft to the original code. +The original code will be enclosed within `` tags. +The draft will be enclosed within `` tags. +You need to output the update code within `` tags. -HERE IS THE DRAFT OF THE NEW VERSION OF THE FILE: -``` -{draft_changes} -``` +Within the `` tag, include only the final code after updation. Do not include any explanations or other content within these tags. -GIVE ME THE NEW VERSION OF THE FILE. -IMPORTANT: -- There should be NO placeholder comments like `# no changes before` or `# no changes here`. They should be replaced with the original code near the placeholder comments. -- The output file should be COMPLETE and CORRECTLY INDENTED. Do not omit any lines, and do not change any lines that are not part of the changes. -""".strip() +{old_contents} + +{draft_changes} + """ def _extract_code(string: str) -> str | None: - pattern = r'```(?:\w*\n)?(.*?)```' + pattern = r'(.*?)' matches = re.findall(pattern, string, re.DOTALL) if not matches: return None - return str(matches[0]) + + content = str(matches[0]) + if content.startswith('#EDIT:'): + #Remove first line + content = content[content.find('\n') + 1:] + return content def get_new_file_contents( @@ -66,7 +58,6 @@ def get_new_file_contents( ) -> str | None: while num_retries > 0: messages = [ - {'role': 'system', 'content': SYS_MSG}, { 'role': 'user', 'content': USER_MSG.format(