From 698cfc2520cdc50eac5fa869c6c1ba5c8fee9e13 Mon Sep 17 00:00:00 2001 From: Ray Myers Date: Mon, 9 Mar 2026 12:29:25 -0500 Subject: [PATCH] fix: sanitize file_path in git diff shell commands to prevent command injection (#13051) Co-authored-by: openhands --- openhands/runtime/utils/git_diff.py | 24 +++++-- openhands/runtime/utils/git_handler.py | 11 +-- tests/unit/runtime/utils/test_git_handler.py | 71 +++++++++++++++++++- 3 files changed, 93 insertions(+), 13 deletions(-) diff --git a/openhands/runtime/utils/git_diff.py b/openhands/runtime/utils/git_diff.py index b42dc861e4..065e351812 100644 --- a/openhands/runtime/utils/git_diff.py +++ b/openhands/runtime/utils/git_diff.py @@ -12,6 +12,7 @@ NOTE: Since this is run as a script, there should be no imports from project fil import json import os +import shlex import subprocess import sys from pathlib import Path @@ -81,6 +82,11 @@ def get_valid_ref(repo_dir: str) -> str | None: return None +def _make_git_show_cmd(ref: str, repo_relative_path: str) -> str: + """Return a git-show shell command with the ref:path argument safely quoted.""" + return f'git show {shlex.quote(f"{ref}:{repo_relative_path}")}' + + def get_git_diff(relative_file_path: str) -> dict[str, str]: path = Path(os.getcwd(), relative_file_path).resolve() if os.path.getsize(path) > MAX_FILE_SIZE_FOR_GIT_DIFF: @@ -89,13 +95,17 @@ def get_git_diff(relative_file_path: str) -> dict[str, str]: if not closest_git_repo: raise ValueError('no_repository') current_rev = get_valid_ref(str(closest_git_repo)) - try: - original = run( - f'git show "{current_rev}:{path.relative_to(closest_git_repo)}"', - str(closest_git_repo), - ) - except RuntimeError: - original = '' + original = '' + if current_rev is not None: + try: + original = run( + _make_git_show_cmd( + current_rev, str(path.relative_to(closest_git_repo)) + ), + str(closest_git_repo), + ) + except RuntimeError: + pass try: with open(path, 'r') as f: modified = '\n'.join(f.read().splitlines()) diff --git a/openhands/runtime/utils/git_handler.py b/openhands/runtime/utils/git_handler.py index ba85aa34d1..af68a3211e 100644 --- a/openhands/runtime/utils/git_handler.py +++ b/openhands/runtime/utils/git_handler.py @@ -6,6 +6,7 @@ # Unless you are working on deprecation, please avoid extending this legacy file and consult the V1 codepaths above. # Tag: Legacy-V0 import json +import shlex from dataclasses import dataclass from pathlib import Path from typing import Callable @@ -14,9 +15,7 @@ from openhands.core.logger import openhands_logger as logger from openhands.runtime.utils import git_changes, git_diff GIT_CHANGES_CMD = 'python3 /openhands/code/openhands/runtime/utils/git_changes.py' -GIT_DIFF_CMD = ( - 'python3 /openhands/code/openhands/runtime/utils/git_diff.py "{file_path}"' -) +GIT_DIFF_CMD = 'python3 /openhands/code/openhands/runtime/utils/git_diff.py {file_path}' GIT_BRANCH_CMD = 'git branch --show-current' @@ -138,7 +137,9 @@ class GitHandler: if not self.cwd: raise ValueError('no_dir_in_git_diff') - result = self.execute(self.git_diff_cmd.format(file_path=file_path), self.cwd) + result = self.execute( + self.git_diff_cmd.format(file_path=shlex.quote(file_path)), self.cwd + ) if result.exit_code == 0: diff = json.loads(result.content, strict=False) return diff @@ -150,7 +151,7 @@ class GitHandler: # We try to add a script for getting git changes to the runtime - legacy runtimes may be missing the script logger.info('GitHandler:get_git_diff: adding git_diff script to runtime...') script_file = self._create_python_script_file(git_diff.__file__) - self.git_diff_cmd = f'python3 {script_file} "{{file_path}}"' + self.git_diff_cmd = f'python3 {script_file} {{file_path}}' # Try again with the new changes cmd return self.get_git_diff(file_path) diff --git a/tests/unit/runtime/utils/test_git_handler.py b/tests/unit/runtime/utils/test_git_handler.py index b26be80348..f83cc9e8e4 100644 --- a/tests/unit/runtime/utils/test_git_handler.py +++ b/tests/unit/runtime/utils/test_git_handler.py @@ -1,4 +1,5 @@ import os +import shlex import shutil import subprocess import sys @@ -36,7 +37,7 @@ class TestGitHandler(unittest.TestCase): self.git_handler.set_cwd(self.local_dir) self.git_handler.git_changes_cmd = f'python3 {git_changes.__file__}' - self.git_handler.git_diff_cmd = f'python3 {git_diff.__file__} "{{file_path}}"' + self.git_handler.git_diff_cmd = f'python3 {git_diff.__file__} {{file_path}}' # Set up the git repositories self._setup_git_repos() @@ -292,3 +293,71 @@ class TestGitHandler(unittest.TestCase): 'modified': 'unchanged.txt\nLine 1\nLine 2\nLine 3', } assert diff == expected_diff + + def test_get_git_diff_command_injection_is_sanitized(self): + """Verify that a malicious path does not execute injected shell commands.""" + sentinel = os.path.join(self.test_dir, 'injected') + # A payload that would create the sentinel file if injection were possible + malicious_path = f'"; touch {sentinel}; echo "' + + # get_git_diff should raise (no such file) rather than executing the payload + with self.assertRaises(ValueError): + self.git_handler.get_git_diff(malicious_path) + + assert not os.path.exists(sentinel), ( + 'Shell injection succeeded: sentinel file was created' + ) + + def test_get_git_diff_no_cwd(self): + """Raises ValueError('no_dir_in_git_diff') when cwd has not been set.""" + handler = GitHandler( + execute_shell_fn=self._execute_command, + create_file_fn=self._create_file, + ) + with self.assertRaises(ValueError) as ctx: + handler.get_git_diff('some_file.txt') + assert str(ctx.exception) == 'no_dir_in_git_diff' + + def test_get_git_diff_double_fallback_raises(self): + """Raises ValueError('error_in_git_diff') when cmd differs from GIT_DIFF_CMD and still fails.""" + # Simulate being in fallback mode: cmd != GIT_DIFF_CMD but it still fails + self.git_handler.git_diff_cmd = 'non-existent-command {file_path}' + with self.assertRaises(ValueError) as ctx: + self.git_handler.get_git_diff('unchanged.txt') + assert str(ctx.exception) == 'error_in_git_diff' + + +class TestGitShowCmdBuilder: + """Pure unit tests for _make_git_show_cmd — no filesystem or subprocess access.""" + + def test_plain_path(self): + cmd = git_diff._make_git_show_cmd('abc123', 'file.txt') + assert shlex.split(cmd) == ['git', 'show', 'abc123:file.txt'] + + def test_path_with_spaces(self): + cmd = git_diff._make_git_show_cmd('abc123', 'file with spaces.txt') + assert shlex.split(cmd) == ['git', 'show', 'abc123:file with spaces.txt'] + + def test_path_with_single_quote(self): + cmd = git_diff._make_git_show_cmd('abc123', "it's a test.txt") + assert shlex.split(cmd) == ['git', 'show', "abc123:it's a test.txt"] + + def test_injection_attempt(self): + cmd = git_diff._make_git_show_cmd('abc123', '"; rm -rf /; echo "') + assert shlex.split(cmd) == ['git', 'show', 'abc123:"; rm -rf /; echo "'] + + +def test_get_git_diff_file_too_large(): + """Raises ValueError('file_to_large') when the file exceeds the size limit.""" + with patch('os.path.getsize', return_value=git_diff.MAX_FILE_SIZE_FOR_GIT_DIFF + 1): + with pytest.raises(ValueError, match='file_to_large'): + git_diff.get_git_diff('/nonexistent/path.txt') + + +def test_get_git_diff_no_repository(): + """Raises ValueError('no_repository') when the file is outside any git repository.""" + with tempfile.TemporaryDirectory() as tmp_dir: + file_path = os.path.join(tmp_dir, 'file.txt') + Path(file_path).write_text('content') + with pytest.raises(ValueError, match='no_repository'): + git_diff.get_git_diff(file_path)