fix: sanitize file_path in git diff shell commands to prevent command injection (#13051)

Co-authored-by: openhands <openhands@all-hands.dev>
This commit is contained in:
Ray Myers
2026-03-09 12:29:25 -05:00
committed by GitHub
parent 8356170193
commit 698cfc2520
3 changed files with 93 additions and 13 deletions

View File

@@ -12,6 +12,7 @@ NOTE: Since this is run as a script, there should be no imports from project fil
import json
import os
import shlex
import subprocess
import sys
from pathlib import Path
@@ -81,6 +82,11 @@ def get_valid_ref(repo_dir: str) -> str | None:
return None
def _make_git_show_cmd(ref: str, repo_relative_path: str) -> str:
"""Return a git-show shell command with the ref:path argument safely quoted."""
return f'git show {shlex.quote(f"{ref}:{repo_relative_path}")}'
def get_git_diff(relative_file_path: str) -> dict[str, str]:
path = Path(os.getcwd(), relative_file_path).resolve()
if os.path.getsize(path) > MAX_FILE_SIZE_FOR_GIT_DIFF:
@@ -89,13 +95,17 @@ def get_git_diff(relative_file_path: str) -> dict[str, str]:
if not closest_git_repo:
raise ValueError('no_repository')
current_rev = get_valid_ref(str(closest_git_repo))
try:
original = run(
f'git show "{current_rev}:{path.relative_to(closest_git_repo)}"',
str(closest_git_repo),
)
except RuntimeError:
original = ''
original = ''
if current_rev is not None:
try:
original = run(
_make_git_show_cmd(
current_rev, str(path.relative_to(closest_git_repo))
),
str(closest_git_repo),
)
except RuntimeError:
pass
try:
with open(path, 'r') as f:
modified = '\n'.join(f.read().splitlines())

View File

@@ -6,6 +6,7 @@
# Unless you are working on deprecation, please avoid extending this legacy file and consult the V1 codepaths above.
# Tag: Legacy-V0
import json
import shlex
from dataclasses import dataclass
from pathlib import Path
from typing import Callable
@@ -14,9 +15,7 @@ from openhands.core.logger import openhands_logger as logger
from openhands.runtime.utils import git_changes, git_diff
GIT_CHANGES_CMD = 'python3 /openhands/code/openhands/runtime/utils/git_changes.py'
GIT_DIFF_CMD = (
'python3 /openhands/code/openhands/runtime/utils/git_diff.py "{file_path}"'
)
GIT_DIFF_CMD = 'python3 /openhands/code/openhands/runtime/utils/git_diff.py {file_path}'
GIT_BRANCH_CMD = 'git branch --show-current'
@@ -138,7 +137,9 @@ class GitHandler:
if not self.cwd:
raise ValueError('no_dir_in_git_diff')
result = self.execute(self.git_diff_cmd.format(file_path=file_path), self.cwd)
result = self.execute(
self.git_diff_cmd.format(file_path=shlex.quote(file_path)), self.cwd
)
if result.exit_code == 0:
diff = json.loads(result.content, strict=False)
return diff
@@ -150,7 +151,7 @@ class GitHandler:
# We try to add a script for getting git changes to the runtime - legacy runtimes may be missing the script
logger.info('GitHandler:get_git_diff: adding git_diff script to runtime...')
script_file = self._create_python_script_file(git_diff.__file__)
self.git_diff_cmd = f'python3 {script_file} "{{file_path}}"'
self.git_diff_cmd = f'python3 {script_file} {{file_path}}'
# Try again with the new changes cmd
return self.get_git_diff(file_path)

View File

@@ -1,4 +1,5 @@
import os
import shlex
import shutil
import subprocess
import sys
@@ -36,7 +37,7 @@ class TestGitHandler(unittest.TestCase):
self.git_handler.set_cwd(self.local_dir)
self.git_handler.git_changes_cmd = f'python3 {git_changes.__file__}'
self.git_handler.git_diff_cmd = f'python3 {git_diff.__file__} "{{file_path}}"'
self.git_handler.git_diff_cmd = f'python3 {git_diff.__file__} {{file_path}}'
# Set up the git repositories
self._setup_git_repos()
@@ -292,3 +293,71 @@ class TestGitHandler(unittest.TestCase):
'modified': 'unchanged.txt\nLine 1\nLine 2\nLine 3',
}
assert diff == expected_diff
def test_get_git_diff_command_injection_is_sanitized(self):
"""Verify that a malicious path does not execute injected shell commands."""
sentinel = os.path.join(self.test_dir, 'injected')
# A payload that would create the sentinel file if injection were possible
malicious_path = f'"; touch {sentinel}; echo "'
# get_git_diff should raise (no such file) rather than executing the payload
with self.assertRaises(ValueError):
self.git_handler.get_git_diff(malicious_path)
assert not os.path.exists(sentinel), (
'Shell injection succeeded: sentinel file was created'
)
def test_get_git_diff_no_cwd(self):
"""Raises ValueError('no_dir_in_git_diff') when cwd has not been set."""
handler = GitHandler(
execute_shell_fn=self._execute_command,
create_file_fn=self._create_file,
)
with self.assertRaises(ValueError) as ctx:
handler.get_git_diff('some_file.txt')
assert str(ctx.exception) == 'no_dir_in_git_diff'
def test_get_git_diff_double_fallback_raises(self):
"""Raises ValueError('error_in_git_diff') when cmd differs from GIT_DIFF_CMD and still fails."""
# Simulate being in fallback mode: cmd != GIT_DIFF_CMD but it still fails
self.git_handler.git_diff_cmd = 'non-existent-command {file_path}'
with self.assertRaises(ValueError) as ctx:
self.git_handler.get_git_diff('unchanged.txt')
assert str(ctx.exception) == 'error_in_git_diff'
class TestGitShowCmdBuilder:
"""Pure unit tests for _make_git_show_cmd — no filesystem or subprocess access."""
def test_plain_path(self):
cmd = git_diff._make_git_show_cmd('abc123', 'file.txt')
assert shlex.split(cmd) == ['git', 'show', 'abc123:file.txt']
def test_path_with_spaces(self):
cmd = git_diff._make_git_show_cmd('abc123', 'file with spaces.txt')
assert shlex.split(cmd) == ['git', 'show', 'abc123:file with spaces.txt']
def test_path_with_single_quote(self):
cmd = git_diff._make_git_show_cmd('abc123', "it's a test.txt")
assert shlex.split(cmd) == ['git', 'show', "abc123:it's a test.txt"]
def test_injection_attempt(self):
cmd = git_diff._make_git_show_cmd('abc123', '"; rm -rf /; echo "')
assert shlex.split(cmd) == ['git', 'show', 'abc123:"; rm -rf /; echo "']
def test_get_git_diff_file_too_large():
"""Raises ValueError('file_to_large') when the file exceeds the size limit."""
with patch('os.path.getsize', return_value=git_diff.MAX_FILE_SIZE_FOR_GIT_DIFF + 1):
with pytest.raises(ValueError, match='file_to_large'):
git_diff.get_git_diff('/nonexistent/path.txt')
def test_get_git_diff_no_repository():
"""Raises ValueError('no_repository') when the file is outside any git repository."""
with tempfile.TemporaryDirectory() as tmp_dir:
file_path = os.path.join(tmp_dir, 'file.txt')
Path(file_path).write_text('content')
with pytest.raises(ValueError, match='no_repository'):
git_diff.get_git_diff(file_path)