Add HTTP FileStore implementation (#8751)

Co-authored-by: openhands <openhands@all-hands.dev>
This commit is contained in:
tofarr 2025-05-28 08:17:26 -06:00 committed by GitHub
parent 82657b7ba1
commit 90c440d709
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 252 additions and 0 deletions

View File

@ -1,5 +1,6 @@
from openhands.storage.files import FileStore
from openhands.storage.google_cloud import GoogleCloudFileStore
from openhands.storage.http import HTTPFileStore
from openhands.storage.local import LocalFileStore
from openhands.storage.memory import InMemoryFileStore
from openhands.storage.s3 import S3FileStore
@ -14,4 +15,8 @@ def get_file_store(file_store: str, file_store_path: str | None = None) -> FileS
return S3FileStore(file_store_path)
elif file_store == 'google_cloud':
return GoogleCloudFileStore(file_store_path)
elif file_store == 'http':
if file_store_path is None:
raise ValueError('file_store_path is required for HTTP file store')
return HTTPFileStore(file_store_path)
return InMemoryFileStore()

206
openhands/storage/http.py Normal file
View File

@ -0,0 +1,206 @@
import json
import os
import urllib.parse
from typing import Union
import httpx
from requests.exceptions import RequestException
from openhands.core.logger import openhands_logger as logger
from openhands.storage.files import FileStore
class HTTPFileStore(FileStore):
"""
A FileStore implementation that uses HTTP requests to store and retrieve files.
This implementation allows storing files on a remote HTTP server that implements
a simple REST API for file operations.
The server should implement the following endpoints:
- POST /files/{path} - Write a file
- GET /files/{path} - Read a file
- OPTIONS /files/{path} - List files in a directory
- DELETE /files/{path} - Delete a file or directory
Authentication can be provided by customizing the provided httpx client.
A (mock) server implementation is available in the MockHttpxClient class
located at /tests/unit/test_storage.py
"""
base_url: str
client: httpx.Client
def __init__(
self,
base_url: str,
client: httpx.Client | None = None,
) -> None:
"""
Initialize the HTTP file store.
Args:
base_url: The base URL of the HTTP file server
api_key: Optional API key for authentication
username: Optional username for basic authentication
password: Optional password for basic authentication
bearer_token: Optional bearer token for authentication
timeout: Request timeout in seconds
verify_ssl: Whether to verify SSL certificates
"""
self.base_url = base_url.rstrip('/')
if not client:
headers = {}
if os.getenv('SESSION_API_KEY'):
headers['X-Session-API-Key'] = os.getenv('SESSION_API_KEY')
client = httpx.Client(headers=headers)
self.client = client
def _get_file_url(self, path: str) -> str:
"""
Get the full URL for a file path.
Args:
path: The file path
Returns:
The full URL
"""
# Ensure path starts with a slash
if not path.startswith('/'):
path = '/' + path
# URL encode the path
encoded_path = urllib.parse.quote(path)
return f'{self.base_url}{encoded_path}'
def write(self, path: str, contents: Union[str, bytes]) -> None:
"""
Write contents to a file.
Args:
path: The file path
contents: The file contents (string or bytes)
Raises:
FileNotFoundError: If the file cannot be written
"""
url = self._get_file_url(path)
try:
# Convert string to bytes if needed
if isinstance(contents, str):
contents = contents.encode('utf-8')
response = self.client.post(url, content=contents)
if response.status_code not in (200, 201, 204):
raise FileNotFoundError(
f'Error: Failed to write to path {path}. '
f'Status code: {response.status_code}, Response: {response.text}'
)
logger.debug(f'Successfully wrote to {path}')
except RequestException as e:
raise FileNotFoundError(f'Error: Failed to write to path {path}: {str(e)}')
def read(self, path: str) -> str:
"""
Read contents from a file.
Args:
path: The file path
Returns:
The file contents as a string
Raises:
FileNotFoundError: If the file cannot be read
"""
url = self._get_file_url(path)
try:
response = self.client.get(url)
if response.status_code != 200:
raise FileNotFoundError(
f'Error: Failed to read from path {path}. '
f'Status code: {response.status_code}, Response: {response.text}'
)
return response.text
except RequestException as e:
raise FileNotFoundError(f'Error: Failed to read from path {path}: {str(e)}')
def list(self, path: str) -> list[str]:
"""
List files in a directory.
Args:
path: The directory path
Returns:
A list of file paths
Raises:
FileNotFoundError: If the directory cannot be listed
"""
url = f'{self._get_file_url(path)}'
try:
response = self.client.options(url)
if response.status_code != 200:
if response.status_code == 404:
return []
raise FileNotFoundError(
f'Error: Failed to list path {path}. '
f'Status code: {response.status_code}, Response: {response.text}'
)
try:
files = response.json()
if not isinstance(files, list):
raise FileNotFoundError(
f'Error: Invalid response format when listing path {path}. '
f'Expected a list, got: {type(files)}'
)
return files
except json.JSONDecodeError:
raise FileNotFoundError(
f'Error: Invalid JSON response when listing path {path}. '
f'Response: {response.text}'
)
except RequestException as e:
raise FileNotFoundError(f'Error: Failed to list path {path}: {str(e)}')
def delete(self, path: str) -> None:
"""
Delete a file or directory.
Args:
path: The file or directory path
Raises:
FileNotFoundError: If the file or directory cannot be deleted
"""
url = self._get_file_url(path)
try:
response = self.client.delete(url)
# 404 is acceptable for delete operations
if response.status_code not in (200, 202, 204, 404):
raise FileNotFoundError(
f'Error: Failed to delete path {path}. '
f'Status code: {response.status_code}, Response: {response.text}'
)
logger.debug(f'Successfully deleted {path}')
except RequestException as e:
raise FileNotFoundError(f'Error: Failed to delete path {path}: {str(e)}')

View File

@ -11,9 +11,11 @@ from unittest.mock import patch
import botocore.exceptions
from google.api_core.exceptions import NotFound
from httpx import Response
from openhands.storage.files import FileStore
from openhands.storage.google_cloud import GoogleCloudFileStore
from openhands.storage.http import HTTPFileStore
from openhands.storage.local import LocalFileStore
from openhands.storage.memory import InMemoryFileStore
from openhands.storage.s3 import S3FileStore
@ -140,6 +142,11 @@ class TestS3FileStore(TestCase, _StorageTest):
self.store = S3FileStore('dear-liza')
class TestHTTPFileStore(TestCase, _StorageTest):
def setUp(self):
self.store = HTTPFileStore('http://foo.com', MockHttpxClient('http://foo.com/'))
# I would have liked to use cloud-storage-mocker here but the python versions were incompatible :(
# If we write tests for the S3 storage class I would definitely recommend we use moto.
class _MockGoogleCloudClient:
@ -274,3 +281,37 @@ class _MockS3Client:
class _MockS3Object:
key: str
content: str | bytes
@dataclass
class MockHttpxClient:
base_url: str
file_store: FileStore = field(default_factory=InMemoryFileStore)
def options(self, url: str):
path = self._get_path(url)
files = self.file_store.list(path)
return Response(200, json=files)
def delete(self, url: str):
path = self._get_path(url)
self.file_store.delete(path)
return Response(200)
def post(self, url: str, content: str | bytes):
path = self._get_path(url)
self.file_store.write(path, content)
return Response(200)
def get(self, url: str):
path = self._get_path(url)
try:
content = self.file_store.read(path)
return Response(200, content=content)
except FileNotFoundError:
return Response(404)
def _get_path(self, url: str):
assert url.startswith(self.base_url)
path = url[len(self.base_url) :]
return path