fix: Handle empty lines in patch parser (#6208)

Co-authored-by: openhands <openhands@all-hands.dev>
This commit is contained in:
Graham Neubig 2025-01-12 06:43:08 +09:00 committed by GitHub
parent f31ccad48b
commit 40c52feb5b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 74 additions and 26 deletions

View File

@ -24,7 +24,7 @@ unified_header_index = re.compile('^Index: (.+)$')
unified_header_old_line = re.compile(r'^--- ' + file_timestamp_str + '$')
unified_header_new_line = re.compile(r'^\+\+\+ ' + file_timestamp_str + '$')
unified_hunk_start = re.compile(r'^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@(.*)$')
unified_change = re.compile('^([-+ ])(.*)$')
unified_change = re.compile('^([-+ ])(.*)$', re.MULTILINE)
context_header_old_line = re.compile(r'^\*\*\* ' + file_timestamp_str + '$')
context_header_new_line = re.compile('^--- ' + file_timestamp_str + '$')
@ -606,38 +606,39 @@ def parse_unified_diff(text):
h = unified_hunk_start.match(hunk[0])
del hunk[0]
if h:
old = int(h.group(1))
if len(h.group(2)) > 0:
old_len = int(h.group(2))
else:
old_len = 0
# The hunk header @@ -1,6 +1,6 @@ means:
# - Start at line 1 in the old file and show 6 lines
# - Start at line 1 in the new file and show 6 lines
old = int(h.group(1)) # Starting line in old file
old_len = int(h.group(2)) if len(h.group(2)) > 0 else 1 # Number of lines in old file
new = int(h.group(3))
if len(h.group(4)) > 0:
new_len = int(h.group(4))
else:
new_len = 0
new = int(h.group(3)) # Starting line in new file
new_len = int(h.group(4)) if len(h.group(4)) > 0 else 1 # Number of lines in new file
h = None
break
# Process each line in the hunk
for n in hunk:
c = unified_change.match(n)
if c:
kind = c.group(1)
line = c.group(2)
# Each line in a unified diff starts with a space (context), + (addition), or - (deletion)
# The first character is the kind, the rest is the line content
kind = n[0] if len(n) > 0 else ' ' # Empty lines in the hunk are treated as context lines
line = n[1:] if len(n) > 1 else ''
if kind == '-' and (r != old_len or r == 0):
changes.append(Change(old + r, None, line, hunk_n))
r += 1
elif kind == '+' and (i != new_len or i == 0):
changes.append(Change(None, new + i, line, hunk_n))
i += 1
elif kind == ' ':
if r != old_len and i != new_len:
changes.append(Change(old + r, new + i, line, hunk_n))
r += 1
i += 1
# Process the line based on its kind
if kind == '-' and (r != old_len or r == 0):
# Line was removed from the old file
changes.append(Change(old + r, None, line, hunk_n))
r += 1
elif kind == '+' and (i != new_len or i == 0):
# Line was added in the new file
changes.append(Change(None, new + i, line, hunk_n))
i += 1
elif kind == ' ':
# Context line - exists in both old and new file
changes.append(Change(old + r, new + i, line, hunk_n))
r += 1
i += 1
if len(changes) > 0:
return changes

View File

@ -0,0 +1,47 @@
import pytest
from openhands.resolver.patching.apply import apply_diff
from openhands.resolver.patching.exceptions import HunkApplyException
from openhands.resolver.patching.patch import parse_diff, diffobj
def test_patch_apply_with_empty_lines():
# The original file has no indentation and uses \n line endings
original_content = "# PR Viewer\n\nThis React application allows you to view open pull requests from GitHub repositories in a GitHub organization. By default, it uses the All-Hands-AI organization.\n\n## Setup"
# The patch has spaces at the start of each line and uses \n line endings
patch = """diff --git a/README.md b/README.md
index b760a53..5071727 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,3 @@
# PR Viewer
-This React application allows you to view open pull requests from GitHub repositories in a GitHub organization. By default, it uses the All-Hands-AI organization.
+This React application was created by Graham Neubig and OpenHands. It allows you to view open pull requests from GitHub repositories in a GitHub organization. By default, it uses the All-Hands-AI organization."""
print("Original content lines:")
for i, line in enumerate(original_content.splitlines(), 1):
print(f"{i}: {repr(line)}")
print("\nPatch lines:")
for i, line in enumerate(patch.splitlines(), 1):
print(f"{i}: {repr(line)}")
changes = parse_diff(patch)
print("\nParsed changes:")
for change in changes:
print(f"Change(old={change.old}, new={change.new}, line={repr(change.line)}, hunk={change.hunk})")
diff = diffobj(header=None, changes=changes, text=patch)
# Apply the patch
result = apply_diff(diff, original_content)
# The patch should be applied successfully
expected_result = [
"# PR Viewer",
"",
"This React application was created by Graham Neubig and OpenHands. It allows you to view open pull requests from GitHub repositories in a GitHub organization. By default, it uses the All-Hands-AI organization.",
"",
"## Setup"
]
assert result == expected_result