From d84befe28fa294a9d10415dac7fc61fe3d63ead2 Mon Sep 17 00:00:00 2001
From: Leander Maben <leander.maben@gmail.com>
Date: Mon, 9 Jun 2025 09:57:20 -0400
Subject: [PATCH] Adding LLM Based Editing capability (#8677)

Co-authored-by: Xingyao Wang <xingyao@all-hands.dev>
Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>
Co-authored-by: Engel Nyst <engel.nyst@gmail.com>
---
 evaluation/README.md                          | 18 ++++++
 evaluation/benchmarks/swe_bench/run_infer.py  |  9 ++-
 .../codeact_agent/function_calling.py         |  3 +
 .../codeact_agent/tools/llm_based_edit.py     | 20 +++++--
 openhands/llm/fn_call_converter.py            | 59 +++++++++++++++++++
 openhands/llm/tool_names.py                   |  1 +
 openhands/runtime/utils/edit.py               | 43 ++++++--------
 7 files changed, 119 insertions(+), 34 deletions(-)

diff --git a/evaluation/README.md b/evaluation/README.md
index 55bef746d6..b617cd9766 100644
--- a/evaluation/README.md
+++ b/evaluation/README.md
@@ -74,6 +74,24 @@ If no condenser configuration is specified, the 'noop' condenser will be used by
 
 For other configurations specific to evaluation, such as `save_trajectory_path`, these are typically set in the `get_config` function of the respective `run_infer.py` file for each benchmark.
 
+### Enabling LLM-Based Editor Tools
+
+The LLM-Based Editor tool (currently supported only for SWE-Bench) can be enabled by setting:
+```bash
+export ENABLE_LLM_EDITOR=true
+```
+
+You can set the config for the Editor LLM as:
+```toml
+[llm.draft_editor]
+base_url = "http://localhost:9002/v1"
+model = "hosted_vllm/lite_coder_qwen_editor_3B"
+api_key = ""
+temperature = 0.7
+max_input_tokens = 10500
+max_output_tokens = 10500
+```
+
 ## Supported Benchmarks
 
 The OpenHands evaluation harness supports a wide variety of benchmarks across [software engineering](#software-engineering), [web browsing](#web-browsing), [miscellaneous assistance](#misc-assistance), and [real-world](#real-world) tasks.
diff --git a/evaluation/benchmarks/swe_bench/run_infer.py b/evaluation/benchmarks/swe_bench/run_infer.py
index ea242736c3..40157c929d 100644
--- a/evaluation/benchmarks/swe_bench/run_infer.py
+++ b/evaluation/benchmarks/swe_bench/run_infer.py
@@ -42,7 +42,7 @@ from openhands.core.config import (
     AgentConfig,
     OpenHandsConfig,
     get_llm_config_arg,
-    get_parser,
+    get_parser
 )
 from openhands.core.config.condenser_config import NoOpCondenserConfig
 from openhands.core.config.utils import get_condenser_config_arg
@@ -62,6 +62,7 @@ from openhands.utils.shutdown_listener import sleep_if_should_continue
 
 USE_HINT_TEXT = os.environ.get('USE_HINT_TEXT', 'false').lower() == 'true'
 RUN_WITH_BROWSING = os.environ.get('RUN_WITH_BROWSING', 'false').lower() == 'true'
+ENABLE_LLM_EDITOR = os.environ.get('ENABLE_LLM_EDITOR', 'false').lower() == 'true'
 BenchMode = Literal['swe', 'swt', 'swt-ci']
 
 
@@ -254,15 +255,19 @@ def get_config(
         workspace_base=None,
         workspace_mount_path=None,
     )
+
     config.set_llm_config(
         update_llm_config_for_completions_logging(
             metadata.llm_config, metadata.eval_output_dir, instance['instance_id']
         )
     )
+    # get 'draft_editor' config if exists
+    config.set_llm_config(get_llm_config_arg('draft_editor'), 'draft_editor')
+
     agent_config = AgentConfig(
         enable_jupyter=False,
         enable_browsing=RUN_WITH_BROWSING,
-        enable_llm_editor=False,
+        enable_llm_editor=ENABLE_LLM_EDITOR,
         enable_mcp=False,
         condenser=metadata.condenser_config,
         enable_prompt_extensions=False,
diff --git a/openhands/agenthub/codeact_agent/function_calling.py b/openhands/agenthub/codeact_agent/function_calling.py
index da42432bee..02da90e74e 100644
--- a/openhands/agenthub/codeact_agent/function_calling.py
+++ b/openhands/agenthub/codeact_agent/function_calling.py
@@ -141,6 +141,9 @@ def response_to_actions(
                     content=arguments['content'],
                     start=arguments.get('start', 1),
                     end=arguments.get('end', -1),
+                    impl_source=arguments.get(
+                        'impl_source', FileEditSource.LLM_BASED_EDIT
+                    ),
                 )
             elif (
                 tool_call.function.name
diff --git a/openhands/agenthub/codeact_agent/tools/llm_based_edit.py b/openhands/agenthub/codeact_agent/tools/llm_based_edit.py
index e007846d4c..392890d371 100644
--- a/openhands/agenthub/codeact_agent/tools/llm_based_edit.py
+++ b/openhands/agenthub/codeact_agent/tools/llm_based_edit.py
@@ -2,10 +2,18 @@ from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChun
 
 _FILE_EDIT_DESCRIPTION = """Edit a file in plain-text format.
 * The assistant can edit files by specifying the file path and providing a draft of the new file content.
-* The draft content doesn't need to be exactly the same as the existing file; the assistant may skip unchanged lines using comments like `# unchanged` to indicate unchanged sections.
+* The draft content doesn't need to be exactly the same as the existing file; the assistant may skip unchanged lines using comments like `# ... existing code ...` to indicate unchanged sections.
 * IMPORTANT: For large files (e.g., > 300 lines), specify the range of lines to edit using `start` and `end` (1-indexed, inclusive). The range should be smaller than 300 lines.
+* -1 indicates the last line of the file when used as the `start` or `end` value.
+* Keep at least one unchanged line before the changed section and after the changed section wherever possible.
+* Make sure to set the `start` and `end` to include all the lines in the original file referred to in the draft of the new file content. Failure to do so will result in bad edits.
 * To append to a file, set both `start` and `end` to `-1`.
 * If the file doesn't exist, a new file will be created with the provided content.
+* IMPORTANT: Make sure you include all the required indentations for each line of code in the draft, otherwise the edited code will be incorrectly indented.
+* IMPORTANT: Make sure that the first line of the draft is also properly indented and has the required whitespaces.
+* IMPORTANT: NEVER include or make references to lines from outside the `start` and `end` range in the draft.
+* IMPORTANT: Start the content with a comment in the format: #EDIT: Reason for edit
+* IMPORTANT: If you are not appending to the file, avoid setting `start` and `end` to the same value.
 
 **Example 1: general edit for short files**
 For example, given an existing file `/path/to/file.py` that looks like this:
@@ -33,13 +41,12 @@ The assistant wants to edit the file to look like this:
 The assistant may produce an edit action like this:
 path="/path/to/file.txt" start=1 end=-1
 content=```
+#EDIT: I want to change the value of y to 2
 class MyClass:
     def __init__(self):
-        # no changes before
+        # ... existing code ...
         self.y = 2
-        # self.z is removed
 
-# MyClass().z is removed
 print(MyClass().y)
 ```
 
@@ -58,6 +65,7 @@ For example, given an existing file `/path/to/file.py` that looks like this:
 
 To append the following lines to the file:
 ```python
+#EDIT: I want to print the value of y
 print(MyClass().y)
 ```
 
@@ -93,9 +101,9 @@ The assistant wants to edit the file to look like this:
 (2000 more lines below)
 
 The assistant may produce an edit action like this:
-path="/path/to/file.txt" start=1001 end=1008
+path="/path/to/file.txt" start=1002 end=1008
 content=```
-class MyClass:
+#EDIT: I want to change the value of y to 2
     def __init__(self):
         # no changes before
         self.y = 2
diff --git a/openhands/llm/fn_call_converter.py b/openhands/llm/fn_call_converter.py
index 0218107ca6..90827d10fb 100644
--- a/openhands/llm/fn_call_converter.py
+++ b/openhands/llm/fn_call_converter.py
@@ -22,6 +22,7 @@ from openhands.llm.tool_names import (
     BROWSER_TOOL_NAME,
     EXECUTE_BASH_TOOL_NAME,
     FINISH_TOOL_NAME,
+    LLM_BASED_EDIT_TOOL_NAME,
     STR_REPLACE_EDITOR_TOOL_NAME,
 )
 
@@ -251,6 +252,58 @@ noop(1000)  # Wait for page to load
 USER: EXECUTION RESULT of [browser]:
 [Browser shows the numbers in a table format]
 """
+    },
+    'edit_file': {
+        'create_file': """
+ASSISTANT: There is no `app.py` file in the current directory. Let me create a Python file `app.py`:
+<function=edit_file>
+<parameter=path>/workspace/app.py</parameter>
+<parameter=start>1</parameter>
+<parameter=end>-1</parameter>
+<parameter=content>
+from flask import Flask
+app = Flask(__name__)
+
+@app.route('/')
+def index():
+    numbers = list(range(1, 11))
+    return str(numbers)
+
+if __name__ == '__main__':
+    app.run(port=5000)
+</parameter>
+</function>
+
+USER: EXECUTION RESULT of [edit_file]:
+File created successfully at: /workspace/app.py
+""",
+        'edit_file': """
+ASSISTANT:
+Now let me display the numbers in a table format:
+<function=edit_file>
+<parameter=path>/workspace/app.py</parameter>
+<parameter=start>6</parameter>
+<parameter=end>9</parameter>
+<parameter=content>
+    numbers = list(range(1, 11))
+    return '<table>' + ''.join([f'<tr><td>{i}</td></tr>' for i in numbers]) + '</table>'
+    # ... existing code ...
+if __name__ == '__main__':
+</parameter>
+</function>
+
+USER: EXECUTION RESULT of [edit_file]:
+The file /workspace/app.py has been edited. Here's the result of running `cat -n` on a snippet of /workspace/app.py:
+     3
+     4  @app.route('/')
+     5  def index():
+     6      numbers = list(range(1, 11))
+     7      return '<table>' + ''.join([f'<tr><td>{i}</td></tr>' for i in numbers]) + '</table>'
+     8
+     9  if __name__ == '__main__':
+    10      app.run(port=5000)
+Review the changes and make sure they are as expected. Edit the file again if necessary.
+""",
     },
     'finish': {
         'task_completed': """
@@ -279,6 +332,8 @@ def get_example_for_tools(tools: list[dict]) -> str:
                 available_tools.add('browser')
             elif name == FINISH_TOOL_NAME:
                 available_tools.add('finish')
+            elif name == LLM_BASED_EDIT_TOOL_NAME:
+                available_tools.add('edit_file')
 
     if not available_tools:
         return ''
@@ -297,6 +352,8 @@ USER: Create a list of numbers from 1 to 10, and display them in a web page at p
 
     if 'str_replace_editor' in available_tools:
         example += TOOL_EXAMPLES['str_replace_editor']['create_file']
+    elif 'edit_file' in available_tools:
+        example += TOOL_EXAMPLES['edit_file']['create_file']
 
     if 'execute_bash' in available_tools:
         example += TOOL_EXAMPLES['execute_bash']['run_server']
@@ -309,6 +366,8 @@ USER: Create a list of numbers from 1 to 10, and display them in a web page at p
 
     if 'str_replace_editor' in available_tools:
         example += TOOL_EXAMPLES['str_replace_editor']['edit_file']
+    elif 'edit_file' in available_tools:
+        example += TOOL_EXAMPLES['edit_file']['edit_file']
 
     if 'execute_bash' in available_tools:
         example += TOOL_EXAMPLES['execute_bash']['run_server_again']
diff --git a/openhands/llm/tool_names.py b/openhands/llm/tool_names.py
index b61c8198ed..753ba7afb3 100644
--- a/openhands/llm/tool_names.py
+++ b/openhands/llm/tool_names.py
@@ -4,3 +4,4 @@ EXECUTE_BASH_TOOL_NAME = 'execute_bash'
 STR_REPLACE_EDITOR_TOOL_NAME = 'str_replace_editor'
 BROWSER_TOOL_NAME = 'browser'
 FINISH_TOOL_NAME = 'finish'
+LLM_BASED_EDIT_TOOL_NAME = 'edit_file'
diff --git a/openhands/runtime/utils/edit.py b/openhands/runtime/utils/edit.py
index 47d5a6f5ef..b71b6ba0de 100644
--- a/openhands/runtime/utils/edit.py
+++ b/openhands/runtime/utils/edit.py
@@ -4,7 +4,7 @@ import tempfile
 from abc import ABC, abstractmethod
 from typing import Any
 
-from openhands_aci.utils.diff import get_diff
+from openhands_aci.utils.diff import get_diff  # type: ignore
 
 from openhands.core.config import OpenHandsConfig
 from openhands.core.logger import openhands_logger as logger
@@ -26,39 +26,31 @@ from openhands.llm.llm import LLM
 from openhands.llm.metrics import Metrics
 from openhands.utils.chunk_localizer import Chunk, get_top_k_chunk_matches
 
-SYS_MSG = """Your job is to produce a new version of the file based on the old version and the
-provided draft of the new version. The provided draft may be incomplete (it may skip lines) and/or incorrectly indented. You should try to apply the changes present in the draft to the old version, and output a new version of the file.
-NOTE:
-- The output file should be COMPLETE and CORRECTLY INDENTED. Do not omit any lines, and do not change any lines that are not part of the changes.
-- You should output the new version of the file by wrapping the new version of the file content in a ``` block.
-- If there's no explicit comment to remove the existing code, we should keep them and append the new code to the end of the file.
-- If there's placeholder comments like `# no changes before` or `# no changes here`, we should replace these comments with the original code near the placeholder comments.
-"""
-
 USER_MSG = """
-HERE IS THE OLD VERSION OF THE FILE:
-```
-{old_contents}
-```
+Code changes will be provided in the form of a draft. You will need to apply the draft to the original code. 
+The original code will be enclosed within `<original_code>` tags.
+The draft will be enclosed within `<update_snippet>` tags.
+You need to output the update code within `<updated_code>` tags.
 
-HERE IS THE DRAFT OF THE NEW VERSION OF THE FILE:
-```
-{draft_changes}
-```
+Within the `<updated_code>` tag, include only the final code after updation. Do not include any explanations or other content within these tags.
 
-GIVE ME THE NEW VERSION OF THE FILE.
-IMPORTANT:
-- There should be NO placeholder comments like `# no changes before` or `# no changes here`. They should be replaced with the original code near the placeholder comments.
-- The output file should be COMPLETE and CORRECTLY INDENTED. Do not omit any lines, and do not change any lines that are not part of the changes.
-""".strip()
+<original_code>{old_contents}</original_code>
+
+<update_snippet>{draft_changes}</update_snippet>
+    """
 
 
 def _extract_code(string: str) -> str | None:
-    pattern = r'```(?:\w*\n)?(.*?)```'
+    pattern = r'<updated_code>(.*?)</updated_code>'
     matches = re.findall(pattern, string, re.DOTALL)
     if not matches:
         return None
-    return str(matches[0])
+
+    content = str(matches[0])
+    if content.startswith('#EDIT:'):
+        #Remove first line
+        content = content[content.find('\n') + 1:]
+    return content
 
 
 def get_new_file_contents(
@@ -66,7 +58,6 @@ def get_new_file_contents(
 ) -> str | None:
     while num_retries > 0:
         messages = [
-            {'role': 'system', 'content': SYS_MSG},
             {
                 'role': 'user',
                 'content': USER_MSG.format(