Add sanitization against malformed paths used in fileop actions. Added tests for resolve_path. (#1338)

This commit is contained in:
Christian Balcom 2024-04-24 11:46:16 -04:00 committed by GitHub
parent a0e8fcb19a
commit b48f440476
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 37 additions and 4 deletions

View File

@ -1,6 +1,7 @@
import os
from dataclasses import dataclass
from pathlib import Path
from opendevin.observation import (
FileReadObservation,
@ -17,10 +18,21 @@ SANDBOX_PATH_PREFIX = '/workspace/'
def resolve_path(file_path):
if file_path.startswith(SANDBOX_PATH_PREFIX):
# Sometimes LLMs include the absolute path of the file inside the sandbox
file_path = file_path[len(SANDBOX_PATH_PREFIX):]
return os.path.join(config.get('WORKSPACE_BASE'), file_path)
# Sanitize the path with respect to the root of the full sandbox
# (deny any .. path traversal to parent directories of this)
abs_path_in_sandbox = (Path(SANDBOX_PATH_PREFIX) / Path(file_path)).resolve()
# If the path is outside the workspace, deny it
if not abs_path_in_sandbox.is_relative_to(SANDBOX_PATH_PREFIX):
raise PermissionError(f'File access not permitted: {file_path}')
# Get path relative to the root of the workspace inside the sandbox
path_in_workspace = abs_path_in_sandbox.relative_to(Path(SANDBOX_PATH_PREFIX))
# Get path relative to host
path_in_host_workspace = Path(config.get('WORKSPACE_BASE')) / path_in_workspace
return path_in_host_workspace
@dataclass

21
tests/test_fileops.py Normal file
View File

@ -0,0 +1,21 @@
from opendevin import config
from opendevin.action import fileop
from pathlib import Path
import pytest
def test_resolve_path():
assert fileop.resolve_path('test.txt') == Path(config.get('WORKSPACE_BASE')) / 'test.txt'
assert fileop.resolve_path('subdir/test.txt') == Path(config.get('WORKSPACE_BASE')) / 'subdir' / 'test.txt'
assert fileop.resolve_path(Path(fileop.SANDBOX_PATH_PREFIX) / 'test.txt') == \
Path(config.get('WORKSPACE_BASE')) / 'test.txt'
assert fileop.resolve_path(Path(fileop.SANDBOX_PATH_PREFIX) / 'subdir' / 'test.txt') == \
Path(config.get('WORKSPACE_BASE')) / 'subdir' / 'test.txt'
assert fileop.resolve_path(Path(fileop.SANDBOX_PATH_PREFIX) / 'subdir' / '..' / 'test.txt') == \
Path(config.get('WORKSPACE_BASE')) / 'test.txt'
with pytest.raises(PermissionError):
fileop.resolve_path(Path(fileop.SANDBOX_PATH_PREFIX) / '..' / 'test.txt')
with pytest.raises(PermissionError):
fileop.resolve_path(Path('..') / 'test.txt')
with pytest.raises(PermissionError):
fileop.resolve_path(Path('/') / 'test.txt')