mirror of
https://github.com/OpenHands/OpenHands.git
synced 2026-03-22 05:37:20 +08:00
fix: sanitize file_path in git diff shell commands to prevent command injection (#13051)
Co-authored-by: openhands <openhands@all-hands.dev>
This commit is contained in:
@@ -12,6 +12,7 @@ NOTE: Since this is run as a script, there should be no imports from project fil
|
||||
|
||||
import json
|
||||
import os
|
||||
import shlex
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
@@ -81,6 +82,11 @@ def get_valid_ref(repo_dir: str) -> str | None:
|
||||
return None
|
||||
|
||||
|
||||
def _make_git_show_cmd(ref: str, repo_relative_path: str) -> str:
|
||||
"""Return a git-show shell command with the ref:path argument safely quoted."""
|
||||
return f'git show {shlex.quote(f"{ref}:{repo_relative_path}")}'
|
||||
|
||||
|
||||
def get_git_diff(relative_file_path: str) -> dict[str, str]:
|
||||
path = Path(os.getcwd(), relative_file_path).resolve()
|
||||
if os.path.getsize(path) > MAX_FILE_SIZE_FOR_GIT_DIFF:
|
||||
@@ -89,13 +95,17 @@ def get_git_diff(relative_file_path: str) -> dict[str, str]:
|
||||
if not closest_git_repo:
|
||||
raise ValueError('no_repository')
|
||||
current_rev = get_valid_ref(str(closest_git_repo))
|
||||
try:
|
||||
original = run(
|
||||
f'git show "{current_rev}:{path.relative_to(closest_git_repo)}"',
|
||||
str(closest_git_repo),
|
||||
)
|
||||
except RuntimeError:
|
||||
original = ''
|
||||
original = ''
|
||||
if current_rev is not None:
|
||||
try:
|
||||
original = run(
|
||||
_make_git_show_cmd(
|
||||
current_rev, str(path.relative_to(closest_git_repo))
|
||||
),
|
||||
str(closest_git_repo),
|
||||
)
|
||||
except RuntimeError:
|
||||
pass
|
||||
try:
|
||||
with open(path, 'r') as f:
|
||||
modified = '\n'.join(f.read().splitlines())
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
# Unless you are working on deprecation, please avoid extending this legacy file and consult the V1 codepaths above.
|
||||
# Tag: Legacy-V0
|
||||
import json
|
||||
import shlex
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Callable
|
||||
@@ -14,9 +15,7 @@ from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.runtime.utils import git_changes, git_diff
|
||||
|
||||
GIT_CHANGES_CMD = 'python3 /openhands/code/openhands/runtime/utils/git_changes.py'
|
||||
GIT_DIFF_CMD = (
|
||||
'python3 /openhands/code/openhands/runtime/utils/git_diff.py "{file_path}"'
|
||||
)
|
||||
GIT_DIFF_CMD = 'python3 /openhands/code/openhands/runtime/utils/git_diff.py {file_path}'
|
||||
GIT_BRANCH_CMD = 'git branch --show-current'
|
||||
|
||||
|
||||
@@ -138,7 +137,9 @@ class GitHandler:
|
||||
if not self.cwd:
|
||||
raise ValueError('no_dir_in_git_diff')
|
||||
|
||||
result = self.execute(self.git_diff_cmd.format(file_path=file_path), self.cwd)
|
||||
result = self.execute(
|
||||
self.git_diff_cmd.format(file_path=shlex.quote(file_path)), self.cwd
|
||||
)
|
||||
if result.exit_code == 0:
|
||||
diff = json.loads(result.content, strict=False)
|
||||
return diff
|
||||
@@ -150,7 +151,7 @@ class GitHandler:
|
||||
# We try to add a script for getting git changes to the runtime - legacy runtimes may be missing the script
|
||||
logger.info('GitHandler:get_git_diff: adding git_diff script to runtime...')
|
||||
script_file = self._create_python_script_file(git_diff.__file__)
|
||||
self.git_diff_cmd = f'python3 {script_file} "{{file_path}}"'
|
||||
self.git_diff_cmd = f'python3 {script_file} {{file_path}}'
|
||||
|
||||
# Try again with the new changes cmd
|
||||
return self.get_git_diff(file_path)
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import os
|
||||
import shlex
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
@@ -36,7 +37,7 @@ class TestGitHandler(unittest.TestCase):
|
||||
self.git_handler.set_cwd(self.local_dir)
|
||||
|
||||
self.git_handler.git_changes_cmd = f'python3 {git_changes.__file__}'
|
||||
self.git_handler.git_diff_cmd = f'python3 {git_diff.__file__} "{{file_path}}"'
|
||||
self.git_handler.git_diff_cmd = f'python3 {git_diff.__file__} {{file_path}}'
|
||||
|
||||
# Set up the git repositories
|
||||
self._setup_git_repos()
|
||||
@@ -292,3 +293,71 @@ class TestGitHandler(unittest.TestCase):
|
||||
'modified': 'unchanged.txt\nLine 1\nLine 2\nLine 3',
|
||||
}
|
||||
assert diff == expected_diff
|
||||
|
||||
def test_get_git_diff_command_injection_is_sanitized(self):
|
||||
"""Verify that a malicious path does not execute injected shell commands."""
|
||||
sentinel = os.path.join(self.test_dir, 'injected')
|
||||
# A payload that would create the sentinel file if injection were possible
|
||||
malicious_path = f'"; touch {sentinel}; echo "'
|
||||
|
||||
# get_git_diff should raise (no such file) rather than executing the payload
|
||||
with self.assertRaises(ValueError):
|
||||
self.git_handler.get_git_diff(malicious_path)
|
||||
|
||||
assert not os.path.exists(sentinel), (
|
||||
'Shell injection succeeded: sentinel file was created'
|
||||
)
|
||||
|
||||
def test_get_git_diff_no_cwd(self):
|
||||
"""Raises ValueError('no_dir_in_git_diff') when cwd has not been set."""
|
||||
handler = GitHandler(
|
||||
execute_shell_fn=self._execute_command,
|
||||
create_file_fn=self._create_file,
|
||||
)
|
||||
with self.assertRaises(ValueError) as ctx:
|
||||
handler.get_git_diff('some_file.txt')
|
||||
assert str(ctx.exception) == 'no_dir_in_git_diff'
|
||||
|
||||
def test_get_git_diff_double_fallback_raises(self):
|
||||
"""Raises ValueError('error_in_git_diff') when cmd differs from GIT_DIFF_CMD and still fails."""
|
||||
# Simulate being in fallback mode: cmd != GIT_DIFF_CMD but it still fails
|
||||
self.git_handler.git_diff_cmd = 'non-existent-command {file_path}'
|
||||
with self.assertRaises(ValueError) as ctx:
|
||||
self.git_handler.get_git_diff('unchanged.txt')
|
||||
assert str(ctx.exception) == 'error_in_git_diff'
|
||||
|
||||
|
||||
class TestGitShowCmdBuilder:
|
||||
"""Pure unit tests for _make_git_show_cmd — no filesystem or subprocess access."""
|
||||
|
||||
def test_plain_path(self):
|
||||
cmd = git_diff._make_git_show_cmd('abc123', 'file.txt')
|
||||
assert shlex.split(cmd) == ['git', 'show', 'abc123:file.txt']
|
||||
|
||||
def test_path_with_spaces(self):
|
||||
cmd = git_diff._make_git_show_cmd('abc123', 'file with spaces.txt')
|
||||
assert shlex.split(cmd) == ['git', 'show', 'abc123:file with spaces.txt']
|
||||
|
||||
def test_path_with_single_quote(self):
|
||||
cmd = git_diff._make_git_show_cmd('abc123', "it's a test.txt")
|
||||
assert shlex.split(cmd) == ['git', 'show', "abc123:it's a test.txt"]
|
||||
|
||||
def test_injection_attempt(self):
|
||||
cmd = git_diff._make_git_show_cmd('abc123', '"; rm -rf /; echo "')
|
||||
assert shlex.split(cmd) == ['git', 'show', 'abc123:"; rm -rf /; echo "']
|
||||
|
||||
|
||||
def test_get_git_diff_file_too_large():
|
||||
"""Raises ValueError('file_to_large') when the file exceeds the size limit."""
|
||||
with patch('os.path.getsize', return_value=git_diff.MAX_FILE_SIZE_FOR_GIT_DIFF + 1):
|
||||
with pytest.raises(ValueError, match='file_to_large'):
|
||||
git_diff.get_git_diff('/nonexistent/path.txt')
|
||||
|
||||
|
||||
def test_get_git_diff_no_repository():
|
||||
"""Raises ValueError('no_repository') when the file is outside any git repository."""
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
file_path = os.path.join(tmp_dir, 'file.txt')
|
||||
Path(file_path).write_text('content')
|
||||
with pytest.raises(ValueError, match='no_repository'):
|
||||
git_diff.get_git_diff(file_path)
|
||||
|
||||
Reference in New Issue
Block a user