Add git patch info to guess_success prompt (#5950)

Co-authored-by: openhands <openhands@all-hands.dev>
2025-12-26 05:48:36 +08:00 · 2025-01-04 10:56:50 +09:00 · 2025-01-04 10:56:50 +09:00 · 5bdebac741
commit 5bdebac741
parent 510c1644dd
6 changed files with 217 additions and 15 deletions
--- a/openhands/resolver/issue_definitions.py
+++ b/openhands/resolver/issue_definitions.py
@ -37,9 +37,9 @@ class IssueHandlerInterface(ABC):

    @abstractmethod
    def guess_success(
-        self, issue: GithubIssue, history: list[Event]
+        self, issue: GithubIssue, history: list[Event], git_patch: str | None = None
    ) -> tuple[bool, list[bool] | None, str]:
-        """Guess if the issue has been resolved based on the agent's output."""
+        """Guess if the issue has been resolved based on the agent's output and git patch."""
        pass


@ -249,13 +249,14 @@ class IssueHandler(IssueHandlerInterface):
        )

    def guess_success(
-        self, issue: GithubIssue, history: list[Event]
+        self, issue: GithubIssue, history: list[Event], git_patch: str | None = None
    ) -> tuple[bool, None | list[bool], str]:
        """Guess if the issue is fixed based on the history and the issue description.

        Args:
            issue: The issue to check
            history: The agent's history
+            git_patch: Optional git patch showing the changes made
        """
        last_message = history[-1].message

@ -665,6 +666,7 @@ class PRHandler(IssueHandler):
        review_thread: ReviewThread,
        issues_context: str,
        last_message: str,
+        git_patch: str | None = None,
    ) -> tuple[bool, str]:
        """Check if a review thread's feedback has been addressed."""
        files_context = json.dumps(review_thread.files, indent=4)
@ -683,6 +685,7 @@ class PRHandler(IssueHandler):
            feedback=review_thread.comment,
            files_context=files_context,
            last_message=last_message,
+            git_patch=git_patch or 'No changes made yet',
        )

        return self._check_feedback_with_llm(prompt)
@ -692,6 +695,7 @@ class PRHandler(IssueHandler):
        thread_comments: list[str],
        issues_context: str,
        last_message: str,
+        git_patch: str | None = None,
    ) -> tuple[bool, str]:
        """Check if thread comments feedback has been addressed."""
        thread_context = '\n---\n'.join(thread_comments)
@ -708,6 +712,7 @@ class PRHandler(IssueHandler):
            issue_context=issues_context,
            thread_context=thread_context,
            last_message=last_message,
+            git_patch=git_patch or 'No changes made yet',
        )

        return self._check_feedback_with_llm(prompt)
@ -717,6 +722,7 @@ class PRHandler(IssueHandler):
        review_comments: list[str],
        issues_context: str,
        last_message: str,
+        git_patch: str | None = None,
    ) -> tuple[bool, str]:
        """Check if review comments feedback has been addressed."""
        review_context = '\n---\n'.join(review_comments)
@ -733,15 +739,17 @@ class PRHandler(IssueHandler):
            issue_context=issues_context,
            review_context=review_context,
            last_message=last_message,
+            git_patch=git_patch or 'No changes made yet',
        )

        return self._check_feedback_with_llm(prompt)

    def guess_success(
-        self, issue: GithubIssue, history: list[Event]
+        self, issue: GithubIssue, history: list[Event], git_patch: str | None = None
    ) -> tuple[bool, None | list[bool], str]:
-        """Guess if the issue is fixed based on the history and the issue description."""
+        """Guess if the issue is fixed based on the history, issue description and git patch."""
        last_message = history[-1].message
+
        issues_context = json.dumps(issue.closing_issues, indent=4)
        success_list = []
        explanation_list = []
@ -751,7 +759,7 @@ class PRHandler(IssueHandler):
            for review_thread in issue.review_threads:
                if issues_context and last_message:
                    success, explanation = self._check_review_thread(
-                        review_thread, issues_context, last_message
+                        review_thread, issues_context, last_message, git_patch
                    )
                else:
                    success, explanation = False, 'Missing context or message'
@ -761,7 +769,7 @@ class PRHandler(IssueHandler):
        elif issue.thread_comments:
            if issue.thread_comments and issues_context and last_message:
                success, explanation = self._check_thread_comments(
-                    issue.thread_comments, issues_context, last_message
+                    issue.thread_comments, issues_context, last_message, git_patch
                )
            else:
                success, explanation = (
@ -774,7 +782,7 @@ class PRHandler(IssueHandler):
            # Handle PRs with only review comments (no file-specific review comments or thread comments)
            if issue.review_comments and issues_context and last_message:
                success, explanation = self._check_review_comments(
-                    issue.review_comments, issues_context, last_message
+                    issue.review_comments, issues_context, last_message, git_patch
                )
            else:
                success, explanation = (
--- a/openhands/resolver/prompts/guess_success/pr-feedback-check.jinja
+++ b/openhands/resolver/prompts/guess_success/pr-feedback-check.jinja
@ -9,6 +9,9 @@ Feedback:
 Files locations:
 {{ files_context }}

+Changes made (git patch):
+{{ git_patch }}
+
 Last message from AI agent:
 {{ last_message }}

--- a/openhands/resolver/prompts/guess_success/pr-review-check.jinja
+++ b/openhands/resolver/prompts/guess_success/pr-review-check.jinja
@ -6,6 +6,9 @@ Issue descriptions:
 PR Review Comments:
 {{ review_context }}

+Changes made (git patch):
+{{ git_patch }}
+
 Last message from AI agent:
 {{ last_message }}

--- a/openhands/resolver/prompts/guess_success/pr-thread-check.jinja
+++ b/openhands/resolver/prompts/guess_success/pr-thread-check.jinja
@ -6,6 +6,9 @@ Issue descriptions:
 PR Thread Comments:
 {{ thread_context }}

+Changes made (git patch):
+{{ git_patch }}
+
 Last message from AI agent:
 {{ last_message }}

--- a/openhands/resolver/resolve_issue.py
+++ b/openhands/resolver/resolve_issue.py
@ -244,9 +244,9 @@ async def process_issue(
    else:
        histories = [dataclasses.asdict(event) for event in state.history]
        metrics = state.metrics.get() if state.metrics else None
-        # determine success based on the history and the issue description
+        # determine success based on the history, issue description and git patch
        success, comment_success, result_explanation = issue_handler.guess_success(
-            issue, state.history
+            issue, state.history, git_patch
        )

        if issue_handler.issue_type == 'pr' and comment_success:
--- a/tests/unit/resolver/test_pr_handler_guess_success.py
+++ b/tests/unit/resolver/test_pr_handler_guess_success.py
@ -1,6 +1,8 @@
 import json
 from unittest.mock import MagicMock, patch

+import pytest
+
 from openhands.core.config import LLMConfig
 from openhands.events.action.message import MessageAction
 from openhands.llm.llm import LLM
@ -8,11 +10,27 @@ from openhands.resolver.github_issue import GithubIssue, ReviewThread
 from openhands.resolver.issue_definitions import PRHandler


-def mock_llm_response(content):
-    """Helper function to create a mock LLM response."""
-    mock_response = MagicMock()
-    mock_response.choices = [MagicMock(message=MagicMock(content=content))]
-    return mock_response
+@pytest.fixture
+def pr_handler():
+    llm_config = LLMConfig(model='test-model')
+    return PRHandler('test-owner', 'test-repo', 'test-token', llm_config)
+
+
+@pytest.fixture
+def mock_llm_success_response():
+    return MagicMock(
+        choices=[
+            MagicMock(
+                message=MagicMock(
+                    content="""--- success
+true
+
+--- explanation
+The changes look good"""
+                )
+            )
+        ]
+    )


 def test_guess_success_review_threads_litellm_call():
@ -233,6 +251,63 @@ def test_check_feedback_with_llm():
            assert (success, explanation) == case['expected']


+def test_check_review_thread_with_git_patch():
+    """Test that git patch from complete_runtime is included in the prompt."""
+    # Create a PR handler instance
+    llm_config = LLMConfig(model='test', api_key='test')
+    handler = PRHandler('test-owner', 'test-repo', 'test-token', llm_config)
+
+    # Create test data
+    review_thread = ReviewThread(
+        comment='Please fix the formatting\n---\nlatest feedback:\nAdd docstrings',
+        files=['/src/file1.py', '/src/file2.py'],
+    )
+    issues_context = json.dumps(
+        ['Issue 1 description', 'Issue 2 description'], indent=4
+    )
+    last_message = 'I have fixed the formatting and added docstrings'
+    git_patch = 'diff --git a/src/file1.py b/src/file1.py\n+"""Added docstring."""\n'
+
+    # Mock the LLM response
+    mock_response = MagicMock()
+    mock_response.choices = [
+        MagicMock(
+            message=MagicMock(
+                content="""--- success
+true
+
+--- explanation
+Changes look good"""
+            )
+        )
+    ]
+
+    # Test the function
+    with patch.object(LLM, 'completion') as mock_completion:
+        mock_completion.return_value = mock_response
+        success, explanation = handler._check_review_thread(
+            review_thread, issues_context, last_message, git_patch
+        )
+
+        # Verify the completion() call
+        mock_completion.assert_called_once()
+        call_args = mock_completion.call_args
+        prompt = call_args[1]['messages'][0]['content']
+
+        # Check prompt content
+        assert 'Issue descriptions:\n' + issues_context in prompt
+        assert 'Feedback:\n' + review_thread.comment in prompt
+        assert (
+            'Files locations:\n' + json.dumps(review_thread.files, indent=4) in prompt
+        )
+        assert 'Last message from AI agent:\n' + last_message in prompt
+        assert 'Changes made (git patch):\n' + git_patch in prompt
+
+        # Check result
+        assert success is True
+        assert explanation == 'Changes look good'
+
+
 def test_check_review_thread():
    """Test the _check_review_thread helper function."""
    # Create a PR handler instance
@ -288,6 +363,61 @@ Changes look good"""
        assert explanation == 'Changes look good'


+def test_check_thread_comments_with_git_patch():
+    """Test that git patch from complete_runtime is included in the prompt."""
+    # Create a PR handler instance
+    llm_config = LLMConfig(model='test', api_key='test')
+    handler = PRHandler('test-owner', 'test-repo', 'test-token', llm_config)
+
+    # Create test data
+    thread_comments = [
+        'Please improve error handling',
+        'Add input validation',
+        'latest feedback:\nHandle edge cases',
+    ]
+    issues_context = json.dumps(
+        ['Issue 1 description', 'Issue 2 description'], indent=4
+    )
+    last_message = 'I have added error handling and input validation'
+    git_patch = 'diff --git a/src/file1.py b/src/file1.py\n+try:\n+    validate_input()\n+except ValueError:\n+    handle_error()\n'
+
+    # Mock the LLM response
+    mock_response = MagicMock()
+    mock_response.choices = [
+        MagicMock(
+            message=MagicMock(
+                content="""--- success
+true
+
+--- explanation
+Changes look good"""
+            )
+        )
+    ]
+
+    # Test the function
+    with patch.object(LLM, 'completion') as mock_completion:
+        mock_completion.return_value = mock_response
+        success, explanation = handler._check_thread_comments(
+            thread_comments, issues_context, last_message, git_patch
+        )
+
+        # Verify the completion() call
+        mock_completion.assert_called_once()
+        call_args = mock_completion.call_args
+        prompt = call_args[1]['messages'][0]['content']
+
+        # Check prompt content
+        assert 'Issue descriptions:\n' + issues_context in prompt
+        assert 'PR Thread Comments:\n' + '\n---\n'.join(thread_comments) in prompt
+        assert 'Last message from AI agent:\n' + last_message in prompt
+        assert 'Changes made (git patch):\n' + git_patch in prompt
+
+        # Check result
+        assert success is True
+        assert explanation == 'Changes look good'
+
+
 def test_check_thread_comments():
    """Test the _check_thread_comments helper function."""
    # Create a PR handler instance
@ -341,6 +471,61 @@ Changes look good"""
        assert explanation == 'Changes look good'


+def test_check_review_comments_with_git_patch():
+    """Test that git patch from complete_runtime is included in the prompt."""
+    # Create a PR handler instance
+    llm_config = LLMConfig(model='test', api_key='test')
+    handler = PRHandler('test-owner', 'test-repo', 'test-token', llm_config)
+
+    # Create test data
+    review_comments = [
+        'Please fix the code style',
+        'Add more test cases',
+        'latest feedback:\nImprove documentation',
+    ]
+    issues_context = json.dumps(
+        ['Issue 1 description', 'Issue 2 description'], indent=4
+    )
+    last_message = 'I have fixed the code style and added tests'
+    git_patch = 'diff --git a/src/file1.py b/src/file1.py\n+"""This module does X."""\n+def func():\n+    """Do Y."""\n'
+
+    # Mock the LLM response
+    mock_response = MagicMock()
+    mock_response.choices = [
+        MagicMock(
+            message=MagicMock(
+                content="""--- success
+true
+
+--- explanation
+Changes look good"""
+            )
+        )
+    ]
+
+    # Test the function
+    with patch.object(LLM, 'completion') as mock_completion:
+        mock_completion.return_value = mock_response
+        success, explanation = handler._check_review_comments(
+            review_comments, issues_context, last_message, git_patch
+        )
+
+        # Verify the completion() call
+        mock_completion.assert_called_once()
+        call_args = mock_completion.call_args
+        prompt = call_args[1]['messages'][0]['content']
+
+        # Check prompt content
+        assert 'Issue descriptions:\n' + issues_context in prompt
+        assert 'PR Review Comments:\n' + '\n---\n'.join(review_comments) in prompt
+        assert 'Last message from AI agent:\n' + last_message in prompt
+        assert 'Changes made (git patch):\n' + git_patch in prompt
+
+        # Check result
+        assert success is True
+        assert explanation == 'Changes look good'
+
+
 def test_check_review_comments():
    """Test the _check_review_comments helper function."""
    # Create a PR handler instance