Add git patch info to guess_success prompt (#5950)

Co-authored-by: openhands <openhands@all-hands.dev>
This commit is contained in:
Graham Neubig 2025-01-04 10:56:50 +09:00 committed by GitHub
parent 510c1644dd
commit 5bdebac741
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 217 additions and 15 deletions

View File

@ -37,9 +37,9 @@ class IssueHandlerInterface(ABC):
@abstractmethod
def guess_success(
self, issue: GithubIssue, history: list[Event]
self, issue: GithubIssue, history: list[Event], git_patch: str | None = None
) -> tuple[bool, list[bool] | None, str]:
"""Guess if the issue has been resolved based on the agent's output."""
"""Guess if the issue has been resolved based on the agent's output and git patch."""
pass
@ -249,13 +249,14 @@ class IssueHandler(IssueHandlerInterface):
)
def guess_success(
self, issue: GithubIssue, history: list[Event]
self, issue: GithubIssue, history: list[Event], git_patch: str | None = None
) -> tuple[bool, None | list[bool], str]:
"""Guess if the issue is fixed based on the history and the issue description.
Args:
issue: The issue to check
history: The agent's history
git_patch: Optional git patch showing the changes made
"""
last_message = history[-1].message
@ -665,6 +666,7 @@ class PRHandler(IssueHandler):
review_thread: ReviewThread,
issues_context: str,
last_message: str,
git_patch: str | None = None,
) -> tuple[bool, str]:
"""Check if a review thread's feedback has been addressed."""
files_context = json.dumps(review_thread.files, indent=4)
@ -683,6 +685,7 @@ class PRHandler(IssueHandler):
feedback=review_thread.comment,
files_context=files_context,
last_message=last_message,
git_patch=git_patch or 'No changes made yet',
)
return self._check_feedback_with_llm(prompt)
@ -692,6 +695,7 @@ class PRHandler(IssueHandler):
thread_comments: list[str],
issues_context: str,
last_message: str,
git_patch: str | None = None,
) -> tuple[bool, str]:
"""Check if thread comments feedback has been addressed."""
thread_context = '\n---\n'.join(thread_comments)
@ -708,6 +712,7 @@ class PRHandler(IssueHandler):
issue_context=issues_context,
thread_context=thread_context,
last_message=last_message,
git_patch=git_patch or 'No changes made yet',
)
return self._check_feedback_with_llm(prompt)
@ -717,6 +722,7 @@ class PRHandler(IssueHandler):
review_comments: list[str],
issues_context: str,
last_message: str,
git_patch: str | None = None,
) -> tuple[bool, str]:
"""Check if review comments feedback has been addressed."""
review_context = '\n---\n'.join(review_comments)
@ -733,15 +739,17 @@ class PRHandler(IssueHandler):
issue_context=issues_context,
review_context=review_context,
last_message=last_message,
git_patch=git_patch or 'No changes made yet',
)
return self._check_feedback_with_llm(prompt)
def guess_success(
self, issue: GithubIssue, history: list[Event]
self, issue: GithubIssue, history: list[Event], git_patch: str | None = None
) -> tuple[bool, None | list[bool], str]:
"""Guess if the issue is fixed based on the history and the issue description."""
"""Guess if the issue is fixed based on the history, issue description and git patch."""
last_message = history[-1].message
issues_context = json.dumps(issue.closing_issues, indent=4)
success_list = []
explanation_list = []
@ -751,7 +759,7 @@ class PRHandler(IssueHandler):
for review_thread in issue.review_threads:
if issues_context and last_message:
success, explanation = self._check_review_thread(
review_thread, issues_context, last_message
review_thread, issues_context, last_message, git_patch
)
else:
success, explanation = False, 'Missing context or message'
@ -761,7 +769,7 @@ class PRHandler(IssueHandler):
elif issue.thread_comments:
if issue.thread_comments and issues_context and last_message:
success, explanation = self._check_thread_comments(
issue.thread_comments, issues_context, last_message
issue.thread_comments, issues_context, last_message, git_patch
)
else:
success, explanation = (
@ -774,7 +782,7 @@ class PRHandler(IssueHandler):
# Handle PRs with only review comments (no file-specific review comments or thread comments)
if issue.review_comments and issues_context and last_message:
success, explanation = self._check_review_comments(
issue.review_comments, issues_context, last_message
issue.review_comments, issues_context, last_message, git_patch
)
else:
success, explanation = (

View File

@ -9,6 +9,9 @@ Feedback:
Files locations:
{{ files_context }}
Changes made (git patch):
{{ git_patch }}
Last message from AI agent:
{{ last_message }}

View File

@ -6,6 +6,9 @@ Issue descriptions:
PR Review Comments:
{{ review_context }}
Changes made (git patch):
{{ git_patch }}
Last message from AI agent:
{{ last_message }}

View File

@ -6,6 +6,9 @@ Issue descriptions:
PR Thread Comments:
{{ thread_context }}
Changes made (git patch):
{{ git_patch }}
Last message from AI agent:
{{ last_message }}

View File

@ -244,9 +244,9 @@ async def process_issue(
else:
histories = [dataclasses.asdict(event) for event in state.history]
metrics = state.metrics.get() if state.metrics else None
# determine success based on the history and the issue description
# determine success based on the history, issue description and git patch
success, comment_success, result_explanation = issue_handler.guess_success(
issue, state.history
issue, state.history, git_patch
)
if issue_handler.issue_type == 'pr' and comment_success:

View File

@ -1,6 +1,8 @@
import json
from unittest.mock import MagicMock, patch
import pytest
from openhands.core.config import LLMConfig
from openhands.events.action.message import MessageAction
from openhands.llm.llm import LLM
@ -8,11 +10,27 @@ from openhands.resolver.github_issue import GithubIssue, ReviewThread
from openhands.resolver.issue_definitions import PRHandler
def mock_llm_response(content):
"""Helper function to create a mock LLM response."""
mock_response = MagicMock()
mock_response.choices = [MagicMock(message=MagicMock(content=content))]
return mock_response
@pytest.fixture
def pr_handler():
llm_config = LLMConfig(model='test-model')
return PRHandler('test-owner', 'test-repo', 'test-token', llm_config)
@pytest.fixture
def mock_llm_success_response():
return MagicMock(
choices=[
MagicMock(
message=MagicMock(
content="""--- success
true
--- explanation
The changes look good"""
)
)
]
)
def test_guess_success_review_threads_litellm_call():
@ -233,6 +251,63 @@ def test_check_feedback_with_llm():
assert (success, explanation) == case['expected']
def test_check_review_thread_with_git_patch():
"""Test that git patch from complete_runtime is included in the prompt."""
# Create a PR handler instance
llm_config = LLMConfig(model='test', api_key='test')
handler = PRHandler('test-owner', 'test-repo', 'test-token', llm_config)
# Create test data
review_thread = ReviewThread(
comment='Please fix the formatting\n---\nlatest feedback:\nAdd docstrings',
files=['/src/file1.py', '/src/file2.py'],
)
issues_context = json.dumps(
['Issue 1 description', 'Issue 2 description'], indent=4
)
last_message = 'I have fixed the formatting and added docstrings'
git_patch = 'diff --git a/src/file1.py b/src/file1.py\n+"""Added docstring."""\n'
# Mock the LLM response
mock_response = MagicMock()
mock_response.choices = [
MagicMock(
message=MagicMock(
content="""--- success
true
--- explanation
Changes look good"""
)
)
]
# Test the function
with patch.object(LLM, 'completion') as mock_completion:
mock_completion.return_value = mock_response
success, explanation = handler._check_review_thread(
review_thread, issues_context, last_message, git_patch
)
# Verify the completion() call
mock_completion.assert_called_once()
call_args = mock_completion.call_args
prompt = call_args[1]['messages'][0]['content']
# Check prompt content
assert 'Issue descriptions:\n' + issues_context in prompt
assert 'Feedback:\n' + review_thread.comment in prompt
assert (
'Files locations:\n' + json.dumps(review_thread.files, indent=4) in prompt
)
assert 'Last message from AI agent:\n' + last_message in prompt
assert 'Changes made (git patch):\n' + git_patch in prompt
# Check result
assert success is True
assert explanation == 'Changes look good'
def test_check_review_thread():
"""Test the _check_review_thread helper function."""
# Create a PR handler instance
@ -288,6 +363,61 @@ Changes look good"""
assert explanation == 'Changes look good'
def test_check_thread_comments_with_git_patch():
"""Test that git patch from complete_runtime is included in the prompt."""
# Create a PR handler instance
llm_config = LLMConfig(model='test', api_key='test')
handler = PRHandler('test-owner', 'test-repo', 'test-token', llm_config)
# Create test data
thread_comments = [
'Please improve error handling',
'Add input validation',
'latest feedback:\nHandle edge cases',
]
issues_context = json.dumps(
['Issue 1 description', 'Issue 2 description'], indent=4
)
last_message = 'I have added error handling and input validation'
git_patch = 'diff --git a/src/file1.py b/src/file1.py\n+try:\n+ validate_input()\n+except ValueError:\n+ handle_error()\n'
# Mock the LLM response
mock_response = MagicMock()
mock_response.choices = [
MagicMock(
message=MagicMock(
content="""--- success
true
--- explanation
Changes look good"""
)
)
]
# Test the function
with patch.object(LLM, 'completion') as mock_completion:
mock_completion.return_value = mock_response
success, explanation = handler._check_thread_comments(
thread_comments, issues_context, last_message, git_patch
)
# Verify the completion() call
mock_completion.assert_called_once()
call_args = mock_completion.call_args
prompt = call_args[1]['messages'][0]['content']
# Check prompt content
assert 'Issue descriptions:\n' + issues_context in prompt
assert 'PR Thread Comments:\n' + '\n---\n'.join(thread_comments) in prompt
assert 'Last message from AI agent:\n' + last_message in prompt
assert 'Changes made (git patch):\n' + git_patch in prompt
# Check result
assert success is True
assert explanation == 'Changes look good'
def test_check_thread_comments():
"""Test the _check_thread_comments helper function."""
# Create a PR handler instance
@ -341,6 +471,61 @@ Changes look good"""
assert explanation == 'Changes look good'
def test_check_review_comments_with_git_patch():
"""Test that git patch from complete_runtime is included in the prompt."""
# Create a PR handler instance
llm_config = LLMConfig(model='test', api_key='test')
handler = PRHandler('test-owner', 'test-repo', 'test-token', llm_config)
# Create test data
review_comments = [
'Please fix the code style',
'Add more test cases',
'latest feedback:\nImprove documentation',
]
issues_context = json.dumps(
['Issue 1 description', 'Issue 2 description'], indent=4
)
last_message = 'I have fixed the code style and added tests'
git_patch = 'diff --git a/src/file1.py b/src/file1.py\n+"""This module does X."""\n+def func():\n+ """Do Y."""\n'
# Mock the LLM response
mock_response = MagicMock()
mock_response.choices = [
MagicMock(
message=MagicMock(
content="""--- success
true
--- explanation
Changes look good"""
)
)
]
# Test the function
with patch.object(LLM, 'completion') as mock_completion:
mock_completion.return_value = mock_response
success, explanation = handler._check_review_comments(
review_comments, issues_context, last_message, git_patch
)
# Verify the completion() call
mock_completion.assert_called_once()
call_args = mock_completion.call_args
prompt = call_args[1]['messages'][0]['content']
# Check prompt content
assert 'Issue descriptions:\n' + issues_context in prompt
assert 'PR Review Comments:\n' + '\n---\n'.join(review_comments) in prompt
assert 'Last message from AI agent:\n' + last_message in prompt
assert 'Changes made (git patch):\n' + git_patch in prompt
# Check result
assert success is True
assert explanation == 'Changes look good'
def test_check_review_comments():
"""Test the _check_review_comments helper function."""
# Create a PR handler instance