mirror of
https://github.com/OpenHands/OpenHands.git
synced 2025-12-26 05:48:36 +08:00
Adding LLM Based Editing capability (#8677)
Co-authored-by: Xingyao Wang <xingyao@all-hands.dev> Co-authored-by: Engel Nyst <enyst@users.noreply.github.com> Co-authored-by: Engel Nyst <engel.nyst@gmail.com>
This commit is contained in:
parent
4eef22e04e
commit
d84befe28f
@ -74,6 +74,24 @@ If no condenser configuration is specified, the 'noop' condenser will be used by
|
|||||||
|
|
||||||
For other configurations specific to evaluation, such as `save_trajectory_path`, these are typically set in the `get_config` function of the respective `run_infer.py` file for each benchmark.
|
For other configurations specific to evaluation, such as `save_trajectory_path`, these are typically set in the `get_config` function of the respective `run_infer.py` file for each benchmark.
|
||||||
|
|
||||||
|
### Enabling LLM-Based Editor Tools
|
||||||
|
|
||||||
|
The LLM-Based Editor tool (currently supported only for SWE-Bench) can be enabled by setting:
|
||||||
|
```bash
|
||||||
|
export ENABLE_LLM_EDITOR=true
|
||||||
|
```
|
||||||
|
|
||||||
|
You can set the config for the Editor LLM as:
|
||||||
|
```toml
|
||||||
|
[llm.draft_editor]
|
||||||
|
base_url = "http://localhost:9002/v1"
|
||||||
|
model = "hosted_vllm/lite_coder_qwen_editor_3B"
|
||||||
|
api_key = ""
|
||||||
|
temperature = 0.7
|
||||||
|
max_input_tokens = 10500
|
||||||
|
max_output_tokens = 10500
|
||||||
|
```
|
||||||
|
|
||||||
## Supported Benchmarks
|
## Supported Benchmarks
|
||||||
|
|
||||||
The OpenHands evaluation harness supports a wide variety of benchmarks across [software engineering](#software-engineering), [web browsing](#web-browsing), [miscellaneous assistance](#misc-assistance), and [real-world](#real-world) tasks.
|
The OpenHands evaluation harness supports a wide variety of benchmarks across [software engineering](#software-engineering), [web browsing](#web-browsing), [miscellaneous assistance](#misc-assistance), and [real-world](#real-world) tasks.
|
||||||
|
|||||||
@ -42,7 +42,7 @@ from openhands.core.config import (
|
|||||||
AgentConfig,
|
AgentConfig,
|
||||||
OpenHandsConfig,
|
OpenHandsConfig,
|
||||||
get_llm_config_arg,
|
get_llm_config_arg,
|
||||||
get_parser,
|
get_parser
|
||||||
)
|
)
|
||||||
from openhands.core.config.condenser_config import NoOpCondenserConfig
|
from openhands.core.config.condenser_config import NoOpCondenserConfig
|
||||||
from openhands.core.config.utils import get_condenser_config_arg
|
from openhands.core.config.utils import get_condenser_config_arg
|
||||||
@ -62,6 +62,7 @@ from openhands.utils.shutdown_listener import sleep_if_should_continue
|
|||||||
|
|
||||||
USE_HINT_TEXT = os.environ.get('USE_HINT_TEXT', 'false').lower() == 'true'
|
USE_HINT_TEXT = os.environ.get('USE_HINT_TEXT', 'false').lower() == 'true'
|
||||||
RUN_WITH_BROWSING = os.environ.get('RUN_WITH_BROWSING', 'false').lower() == 'true'
|
RUN_WITH_BROWSING = os.environ.get('RUN_WITH_BROWSING', 'false').lower() == 'true'
|
||||||
|
ENABLE_LLM_EDITOR = os.environ.get('ENABLE_LLM_EDITOR', 'false').lower() == 'true'
|
||||||
BenchMode = Literal['swe', 'swt', 'swt-ci']
|
BenchMode = Literal['swe', 'swt', 'swt-ci']
|
||||||
|
|
||||||
|
|
||||||
@ -254,15 +255,19 @@ def get_config(
|
|||||||
workspace_base=None,
|
workspace_base=None,
|
||||||
workspace_mount_path=None,
|
workspace_mount_path=None,
|
||||||
)
|
)
|
||||||
|
|
||||||
config.set_llm_config(
|
config.set_llm_config(
|
||||||
update_llm_config_for_completions_logging(
|
update_llm_config_for_completions_logging(
|
||||||
metadata.llm_config, metadata.eval_output_dir, instance['instance_id']
|
metadata.llm_config, metadata.eval_output_dir, instance['instance_id']
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
# get 'draft_editor' config if exists
|
||||||
|
config.set_llm_config(get_llm_config_arg('draft_editor'), 'draft_editor')
|
||||||
|
|
||||||
agent_config = AgentConfig(
|
agent_config = AgentConfig(
|
||||||
enable_jupyter=False,
|
enable_jupyter=False,
|
||||||
enable_browsing=RUN_WITH_BROWSING,
|
enable_browsing=RUN_WITH_BROWSING,
|
||||||
enable_llm_editor=False,
|
enable_llm_editor=ENABLE_LLM_EDITOR,
|
||||||
enable_mcp=False,
|
enable_mcp=False,
|
||||||
condenser=metadata.condenser_config,
|
condenser=metadata.condenser_config,
|
||||||
enable_prompt_extensions=False,
|
enable_prompt_extensions=False,
|
||||||
|
|||||||
@ -141,6 +141,9 @@ def response_to_actions(
|
|||||||
content=arguments['content'],
|
content=arguments['content'],
|
||||||
start=arguments.get('start', 1),
|
start=arguments.get('start', 1),
|
||||||
end=arguments.get('end', -1),
|
end=arguments.get('end', -1),
|
||||||
|
impl_source=arguments.get(
|
||||||
|
'impl_source', FileEditSource.LLM_BASED_EDIT
|
||||||
|
),
|
||||||
)
|
)
|
||||||
elif (
|
elif (
|
||||||
tool_call.function.name
|
tool_call.function.name
|
||||||
|
|||||||
@ -2,10 +2,18 @@ from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChun
|
|||||||
|
|
||||||
_FILE_EDIT_DESCRIPTION = """Edit a file in plain-text format.
|
_FILE_EDIT_DESCRIPTION = """Edit a file in plain-text format.
|
||||||
* The assistant can edit files by specifying the file path and providing a draft of the new file content.
|
* The assistant can edit files by specifying the file path and providing a draft of the new file content.
|
||||||
* The draft content doesn't need to be exactly the same as the existing file; the assistant may skip unchanged lines using comments like `# unchanged` to indicate unchanged sections.
|
* The draft content doesn't need to be exactly the same as the existing file; the assistant may skip unchanged lines using comments like `# ... existing code ...` to indicate unchanged sections.
|
||||||
* IMPORTANT: For large files (e.g., > 300 lines), specify the range of lines to edit using `start` and `end` (1-indexed, inclusive). The range should be smaller than 300 lines.
|
* IMPORTANT: For large files (e.g., > 300 lines), specify the range of lines to edit using `start` and `end` (1-indexed, inclusive). The range should be smaller than 300 lines.
|
||||||
|
* -1 indicates the last line of the file when used as the `start` or `end` value.
|
||||||
|
* Keep at least one unchanged line before the changed section and after the changed section wherever possible.
|
||||||
|
* Make sure to set the `start` and `end` to include all the lines in the original file referred to in the draft of the new file content. Failure to do so will result in bad edits.
|
||||||
* To append to a file, set both `start` and `end` to `-1`.
|
* To append to a file, set both `start` and `end` to `-1`.
|
||||||
* If the file doesn't exist, a new file will be created with the provided content.
|
* If the file doesn't exist, a new file will be created with the provided content.
|
||||||
|
* IMPORTANT: Make sure you include all the required indentations for each line of code in the draft, otherwise the edited code will be incorrectly indented.
|
||||||
|
* IMPORTANT: Make sure that the first line of the draft is also properly indented and has the required whitespaces.
|
||||||
|
* IMPORTANT: NEVER include or make references to lines from outside the `start` and `end` range in the draft.
|
||||||
|
* IMPORTANT: Start the content with a comment in the format: #EDIT: Reason for edit
|
||||||
|
* IMPORTANT: If you are not appending to the file, avoid setting `start` and `end` to the same value.
|
||||||
|
|
||||||
**Example 1: general edit for short files**
|
**Example 1: general edit for short files**
|
||||||
For example, given an existing file `/path/to/file.py` that looks like this:
|
For example, given an existing file `/path/to/file.py` that looks like this:
|
||||||
@ -33,13 +41,12 @@ The assistant wants to edit the file to look like this:
|
|||||||
The assistant may produce an edit action like this:
|
The assistant may produce an edit action like this:
|
||||||
path="/path/to/file.txt" start=1 end=-1
|
path="/path/to/file.txt" start=1 end=-1
|
||||||
content=```
|
content=```
|
||||||
|
#EDIT: I want to change the value of y to 2
|
||||||
class MyClass:
|
class MyClass:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
# no changes before
|
# ... existing code ...
|
||||||
self.y = 2
|
self.y = 2
|
||||||
# self.z is removed
|
|
||||||
|
|
||||||
# MyClass().z is removed
|
|
||||||
print(MyClass().y)
|
print(MyClass().y)
|
||||||
```
|
```
|
||||||
|
|
||||||
@ -58,6 +65,7 @@ For example, given an existing file `/path/to/file.py` that looks like this:
|
|||||||
|
|
||||||
To append the following lines to the file:
|
To append the following lines to the file:
|
||||||
```python
|
```python
|
||||||
|
#EDIT: I want to print the value of y
|
||||||
print(MyClass().y)
|
print(MyClass().y)
|
||||||
```
|
```
|
||||||
|
|
||||||
@ -93,9 +101,9 @@ The assistant wants to edit the file to look like this:
|
|||||||
(2000 more lines below)
|
(2000 more lines below)
|
||||||
|
|
||||||
The assistant may produce an edit action like this:
|
The assistant may produce an edit action like this:
|
||||||
path="/path/to/file.txt" start=1001 end=1008
|
path="/path/to/file.txt" start=1002 end=1008
|
||||||
content=```
|
content=```
|
||||||
class MyClass:
|
#EDIT: I want to change the value of y to 2
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
# no changes before
|
# no changes before
|
||||||
self.y = 2
|
self.y = 2
|
||||||
|
|||||||
@ -22,6 +22,7 @@ from openhands.llm.tool_names import (
|
|||||||
BROWSER_TOOL_NAME,
|
BROWSER_TOOL_NAME,
|
||||||
EXECUTE_BASH_TOOL_NAME,
|
EXECUTE_BASH_TOOL_NAME,
|
||||||
FINISH_TOOL_NAME,
|
FINISH_TOOL_NAME,
|
||||||
|
LLM_BASED_EDIT_TOOL_NAME,
|
||||||
STR_REPLACE_EDITOR_TOOL_NAME,
|
STR_REPLACE_EDITOR_TOOL_NAME,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -251,6 +252,58 @@ noop(1000) # Wait for page to load
|
|||||||
USER: EXECUTION RESULT of [browser]:
|
USER: EXECUTION RESULT of [browser]:
|
||||||
[Browser shows the numbers in a table format]
|
[Browser shows the numbers in a table format]
|
||||||
"""
|
"""
|
||||||
|
},
|
||||||
|
'edit_file': {
|
||||||
|
'create_file': """
|
||||||
|
ASSISTANT: There is no `app.py` file in the current directory. Let me create a Python file `app.py`:
|
||||||
|
<function=edit_file>
|
||||||
|
<parameter=path>/workspace/app.py</parameter>
|
||||||
|
<parameter=start>1</parameter>
|
||||||
|
<parameter=end>-1</parameter>
|
||||||
|
<parameter=content>
|
||||||
|
from flask import Flask
|
||||||
|
app = Flask(__name__)
|
||||||
|
|
||||||
|
@app.route('/')
|
||||||
|
def index():
|
||||||
|
numbers = list(range(1, 11))
|
||||||
|
return str(numbers)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
app.run(port=5000)
|
||||||
|
</parameter>
|
||||||
|
</function>
|
||||||
|
|
||||||
|
USER: EXECUTION RESULT of [edit_file]:
|
||||||
|
File created successfully at: /workspace/app.py
|
||||||
|
""",
|
||||||
|
'edit_file': """
|
||||||
|
ASSISTANT:
|
||||||
|
Now let me display the numbers in a table format:
|
||||||
|
<function=edit_file>
|
||||||
|
<parameter=path>/workspace/app.py</parameter>
|
||||||
|
<parameter=start>6</parameter>
|
||||||
|
<parameter=end>9</parameter>
|
||||||
|
<parameter=content>
|
||||||
|
numbers = list(range(1, 11))
|
||||||
|
return '<table>' + ''.join([f'<tr><td>{i}</td></tr>' for i in numbers]) + '</table>'
|
||||||
|
# ... existing code ...
|
||||||
|
if __name__ == '__main__':
|
||||||
|
</parameter>
|
||||||
|
</function>
|
||||||
|
|
||||||
|
USER: EXECUTION RESULT of [edit_file]:
|
||||||
|
The file /workspace/app.py has been edited. Here's the result of running `cat -n` on a snippet of /workspace/app.py:
|
||||||
|
3
|
||||||
|
4 @app.route('/')
|
||||||
|
5 def index():
|
||||||
|
6 numbers = list(range(1, 11))
|
||||||
|
7 return '<table>' + ''.join([f'<tr><td>{i}</td></tr>' for i in numbers]) + '</table>'
|
||||||
|
8
|
||||||
|
9 if __name__ == '__main__':
|
||||||
|
10 app.run(port=5000)
|
||||||
|
Review the changes and make sure they are as expected. Edit the file again if necessary.
|
||||||
|
""",
|
||||||
},
|
},
|
||||||
'finish': {
|
'finish': {
|
||||||
'task_completed': """
|
'task_completed': """
|
||||||
@ -279,6 +332,8 @@ def get_example_for_tools(tools: list[dict]) -> str:
|
|||||||
available_tools.add('browser')
|
available_tools.add('browser')
|
||||||
elif name == FINISH_TOOL_NAME:
|
elif name == FINISH_TOOL_NAME:
|
||||||
available_tools.add('finish')
|
available_tools.add('finish')
|
||||||
|
elif name == LLM_BASED_EDIT_TOOL_NAME:
|
||||||
|
available_tools.add('edit_file')
|
||||||
|
|
||||||
if not available_tools:
|
if not available_tools:
|
||||||
return ''
|
return ''
|
||||||
@ -297,6 +352,8 @@ USER: Create a list of numbers from 1 to 10, and display them in a web page at p
|
|||||||
|
|
||||||
if 'str_replace_editor' in available_tools:
|
if 'str_replace_editor' in available_tools:
|
||||||
example += TOOL_EXAMPLES['str_replace_editor']['create_file']
|
example += TOOL_EXAMPLES['str_replace_editor']['create_file']
|
||||||
|
elif 'edit_file' in available_tools:
|
||||||
|
example += TOOL_EXAMPLES['edit_file']['create_file']
|
||||||
|
|
||||||
if 'execute_bash' in available_tools:
|
if 'execute_bash' in available_tools:
|
||||||
example += TOOL_EXAMPLES['execute_bash']['run_server']
|
example += TOOL_EXAMPLES['execute_bash']['run_server']
|
||||||
@ -309,6 +366,8 @@ USER: Create a list of numbers from 1 to 10, and display them in a web page at p
|
|||||||
|
|
||||||
if 'str_replace_editor' in available_tools:
|
if 'str_replace_editor' in available_tools:
|
||||||
example += TOOL_EXAMPLES['str_replace_editor']['edit_file']
|
example += TOOL_EXAMPLES['str_replace_editor']['edit_file']
|
||||||
|
elif 'edit_file' in available_tools:
|
||||||
|
example += TOOL_EXAMPLES['edit_file']['edit_file']
|
||||||
|
|
||||||
if 'execute_bash' in available_tools:
|
if 'execute_bash' in available_tools:
|
||||||
example += TOOL_EXAMPLES['execute_bash']['run_server_again']
|
example += TOOL_EXAMPLES['execute_bash']['run_server_again']
|
||||||
|
|||||||
@ -4,3 +4,4 @@ EXECUTE_BASH_TOOL_NAME = 'execute_bash'
|
|||||||
STR_REPLACE_EDITOR_TOOL_NAME = 'str_replace_editor'
|
STR_REPLACE_EDITOR_TOOL_NAME = 'str_replace_editor'
|
||||||
BROWSER_TOOL_NAME = 'browser'
|
BROWSER_TOOL_NAME = 'browser'
|
||||||
FINISH_TOOL_NAME = 'finish'
|
FINISH_TOOL_NAME = 'finish'
|
||||||
|
LLM_BASED_EDIT_TOOL_NAME = 'edit_file'
|
||||||
|
|||||||
@ -4,7 +4,7 @@ import tempfile
|
|||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from openhands_aci.utils.diff import get_diff
|
from openhands_aci.utils.diff import get_diff # type: ignore
|
||||||
|
|
||||||
from openhands.core.config import OpenHandsConfig
|
from openhands.core.config import OpenHandsConfig
|
||||||
from openhands.core.logger import openhands_logger as logger
|
from openhands.core.logger import openhands_logger as logger
|
||||||
@ -26,39 +26,31 @@ from openhands.llm.llm import LLM
|
|||||||
from openhands.llm.metrics import Metrics
|
from openhands.llm.metrics import Metrics
|
||||||
from openhands.utils.chunk_localizer import Chunk, get_top_k_chunk_matches
|
from openhands.utils.chunk_localizer import Chunk, get_top_k_chunk_matches
|
||||||
|
|
||||||
SYS_MSG = """Your job is to produce a new version of the file based on the old version and the
|
|
||||||
provided draft of the new version. The provided draft may be incomplete (it may skip lines) and/or incorrectly indented. You should try to apply the changes present in the draft to the old version, and output a new version of the file.
|
|
||||||
NOTE:
|
|
||||||
- The output file should be COMPLETE and CORRECTLY INDENTED. Do not omit any lines, and do not change any lines that are not part of the changes.
|
|
||||||
- You should output the new version of the file by wrapping the new version of the file content in a ``` block.
|
|
||||||
- If there's no explicit comment to remove the existing code, we should keep them and append the new code to the end of the file.
|
|
||||||
- If there's placeholder comments like `# no changes before` or `# no changes here`, we should replace these comments with the original code near the placeholder comments.
|
|
||||||
"""
|
|
||||||
|
|
||||||
USER_MSG = """
|
USER_MSG = """
|
||||||
HERE IS THE OLD VERSION OF THE FILE:
|
Code changes will be provided in the form of a draft. You will need to apply the draft to the original code.
|
||||||
```
|
The original code will be enclosed within `<original_code>` tags.
|
||||||
{old_contents}
|
The draft will be enclosed within `<update_snippet>` tags.
|
||||||
```
|
You need to output the update code within `<updated_code>` tags.
|
||||||
|
|
||||||
HERE IS THE DRAFT OF THE NEW VERSION OF THE FILE:
|
Within the `<updated_code>` tag, include only the final code after updation. Do not include any explanations or other content within these tags.
|
||||||
```
|
|
||||||
{draft_changes}
|
|
||||||
```
|
|
||||||
|
|
||||||
GIVE ME THE NEW VERSION OF THE FILE.
|
<original_code>{old_contents}</original_code>
|
||||||
IMPORTANT:
|
|
||||||
- There should be NO placeholder comments like `# no changes before` or `# no changes here`. They should be replaced with the original code near the placeholder comments.
|
<update_snippet>{draft_changes}</update_snippet>
|
||||||
- The output file should be COMPLETE and CORRECTLY INDENTED. Do not omit any lines, and do not change any lines that are not part of the changes.
|
"""
|
||||||
""".strip()
|
|
||||||
|
|
||||||
|
|
||||||
def _extract_code(string: str) -> str | None:
|
def _extract_code(string: str) -> str | None:
|
||||||
pattern = r'```(?:\w*\n)?(.*?)```'
|
pattern = r'<updated_code>(.*?)</updated_code>'
|
||||||
matches = re.findall(pattern, string, re.DOTALL)
|
matches = re.findall(pattern, string, re.DOTALL)
|
||||||
if not matches:
|
if not matches:
|
||||||
return None
|
return None
|
||||||
return str(matches[0])
|
|
||||||
|
content = str(matches[0])
|
||||||
|
if content.startswith('#EDIT:'):
|
||||||
|
#Remove first line
|
||||||
|
content = content[content.find('\n') + 1:]
|
||||||
|
return content
|
||||||
|
|
||||||
|
|
||||||
def get_new_file_contents(
|
def get_new_file_contents(
|
||||||
@ -66,7 +58,6 @@ def get_new_file_contents(
|
|||||||
) -> str | None:
|
) -> str | None:
|
||||||
while num_retries > 0:
|
while num_retries > 0:
|
||||||
messages = [
|
messages = [
|
||||||
{'role': 'system', 'content': SYS_MSG},
|
|
||||||
{
|
{
|
||||||
'role': 'user',
|
'role': 'user',
|
||||||
'content': USER_MSG.format(
|
'content': USER_MSG.format(
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user