feat: file explorer: better sorting; .gitignore support; file upload config (#2621)

* feat: file explorer: better sorting; .gitignore support; file upload config

* resolved poetry

* move config settings (no extra file); updated uploading of files; fix exception on refresh of removed folder

* removed console cmds; fix in a toast

* attempt fix of upload toasts

* fix new options' assignments in listen.py
This commit is contained in:
tobitege 2024-06-28 18:36:25 +02:00 committed by GitHub
parent b88898235e
commit 7d31057904
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 436 additions and 86 deletions

View File

@ -107,15 +107,60 @@ function FileExplorer() {
return;
}
dispatch(setRefreshID(Math.random()));
setFiles(await listFiles("/"));
try {
const fileList = await listFiles("/");
setFiles(fileList);
if (fileList.length === 0) {
toast.info(t(I18nKey.EXPLORER$EMPTY_WORKSPACE_MESSAGE));
}
} catch (error) {
toast.error("refresh-error", t(I18nKey.EXPLORER$REFRESH_ERROR_MESSAGE));
}
};
const uploadFileData = async (toAdd: FileList) => {
try {
await uploadFiles(toAdd);
const result = await uploadFiles(toAdd);
if (result.error) {
// Handle error response
toast.error(
`upload-error-${new Date().getTime()}`,
result.error || t(I18nKey.EXPLORER$UPLOAD_ERROR_MESSAGE),
);
return;
}
const uploadedCount = result.uploadedFiles.length;
const skippedCount = result.skippedFiles.length;
if (uploadedCount > 0) {
toast.success(
`upload-success-${new Date().getTime()}`,
t(I18nKey.EXPLORER$UPLOAD_SUCCESS_MESSAGE, {
count: uploadedCount,
}),
);
}
if (skippedCount > 0) {
const message = t(I18nKey.EXPLORER$UPLOAD_PARTIAL_SUCCESS_MESSAGE, {
count: skippedCount,
});
toast.info(message);
}
if (uploadedCount === 0 && skippedCount === 0) {
toast.info(t(I18nKey.EXPLORER$NO_FILES_UPLOADED_MESSAGE));
}
await refreshWorkspace();
} catch (error) {
toast.error("ws", t(I18nKey.EXPLORER$UPLOAD_ERROR_MESSAGE));
// Handle unexpected errors (network issues, etc.)
toast.error(
`upload-error-${new Date().getTime()}`,
t(I18nKey.EXPLORER$UPLOAD_ERROR_MESSAGE),
);
}
};
@ -148,13 +193,16 @@ function FileExplorer() {
}
return (
<div className="relative">
<div className="relative h-full">
{isDragging && (
<div
data-testid="dropzone"
onDrop={(event) => {
event.preventDefault();
uploadFileData(event.dataTransfer.files);
const { files: droppedFiles } = event.dataTransfer;
if (droppedFiles.length > 0) {
uploadFileData(droppedFiles);
}
}}
onDragOver={(event) => event.preventDefault()}
className="z-10 absolute flex flex-col justify-center items-center bg-black top-0 bottom-0 left-0 right-0 opacity-65"
@ -167,32 +215,37 @@ function FileExplorer() {
)}
<div
className={twMerge(
"bg-neutral-800 h-full border-r-1 border-r-neutral-600 flex flex-col transition-all ease-soft-spring overflow-auto",
"bg-neutral-800 h-full border-r-1 border-r-neutral-600 flex flex-col transition-all ease-soft-spring",
isHidden ? "min-w-[48px]" : "min-w-[228px]",
)}
>
<div className="flex flex-col p-2 relative">
<div
className={twMerge(
"flex items-center mt-2 mb-1",
isHidden ? "justify-center" : "justify-between",
)}
>
{!isHidden && (
<div className="ml-1 text-neutral-300 font-bold text-sm">
{t(I18nKey.EXPLORER$LABEL_WORKSPACE)}
</div>
)}
<ExplorerActions
isHidden={isHidden}
toggleHidden={() => setIsHidden((prev) => !prev)}
onRefresh={refreshWorkspace}
onUpload={selectFileInput}
/>
<div className="flex flex-col relative h-full">
<div className="sticky top-0 bg-neutral-800 z-10">
<div
className={twMerge(
"flex items-center mt-2 mb-1 p-2",
isHidden ? "justify-center" : "justify-between",
)}
>
{!isHidden && (
<div className="ml-1 text-neutral-300 font-bold text-sm">
<div className="ml-1 text-neutral-300 font-bold text-sm">
{t(I18nKey.EXPLORER$LABEL_WORKSPACE)}
</div>
</div>
)}
<ExplorerActions
isHidden={isHidden}
toggleHidden={() => setIsHidden((prev) => !prev)}
onRefresh={refreshWorkspace}
onUpload={selectFileInput}
/>
</div>
</div>
<div style={{ display: isHidden ? "none" : "block" }}>
<ExplorerTree files={files} defaultOpen />
<div className="overflow-auto flex-grow">
<div style={{ display: isHidden ? "none" : "block" }}>
<ExplorerTree files={files} defaultOpen />
</div>
</div>
</div>
<input
@ -202,8 +255,9 @@ function FileExplorer() {
ref={fileInputRef}
style={{ display: "none" }}
onChange={(event) => {
if (event.target.files) {
uploadFileData(event.target.files);
const { files: selectedFiles } = event.target;
if (selectedFiles && selectedFiles.length > 0) {
uploadFileData(selectedFiles);
}
}}
/>

View File

@ -308,6 +308,36 @@
"zh-CN": "工作区",
"de": "Arbeitsbereich"
},
"EXPLORER$EMPTY_WORKSPACE_MESSAGE": {
"en": "No files in workspace",
"zh-CN": "工作区没有文件",
"de": "Keine Dateien im Arbeitsbereich"
},
"EXPLORER$REFRESH_ERROR_MESSAGE": {
"en": "Error refreshing workspace",
"zh-CN": "工作区刷新错误",
"de": "Fehler beim Aktualisieren des Arbeitsbereichs"
},
"EXPLORER$UPLOAD_SUCCESS_MESSAGE": {
"en": "Successfully uploaded {{count}} file(s)",
"zh-CN": "成功上传 {{count}} 个文件",
"de": "Erfolgreich {{count}} Datei(en) hochgeladen"
},
"EXPLORER$NO_FILES_UPLOADED_MESSAGE": {
"en": "No files were uploaded",
"zh-CN": "没有文件上传",
"de": "Keine Dateien wurden hochgeladen"
},
"EXPLORER$UPLOAD_PARTIAL_SUCCESS_MESSAGE": {
"en": "{{count}} file(s) were skipped during upload",
"zh-CN": "{{count}} 个文件在上传过程中被跳过",
"de": "{{count}} Datei(en) wurden während des Hochladens übersprungen"
},
"EXPLORER$UPLOAD_UNEXPECTED_RESPONSE_MESSAGE": {
"en": "Unexpected response structure from server",
"zh-CN": "服务器响应结构不符合预期",
"de": "Unerwartetes Antwortformat vom Server"
},
"LOAD_SESSION$MODAL_TITLE": {
"en": "Return to existing session?",
"de": "Zurück zu vorhandener Sitzung?",

View File

@ -1,23 +1,74 @@
import { request } from "./api";
export async function selectFile(file: string): Promise<string> {
const data = await request(`/api/select-file?file=${file}`);
const encodedFile = encodeURIComponent(file);
const data = await request(`/api/select-file?file=${encodedFile}`);
return data.code as string;
}
export async function uploadFiles(files: FileList) {
interface UploadResult {
message: string;
uploadedFiles: string[];
skippedFiles: Array<{ name: string; reason: string }>;
error?: string;
}
export async function uploadFiles(files: FileList): Promise<UploadResult> {
const formData = new FormData();
const skippedFiles: Array<{ name: string; reason: string }> = [];
let uploadedCount = 0;
for (let i = 0; i < files.length; i += 1) {
formData.append("files", files[i]);
const file = files[i];
if (
file.name.includes("..") ||
file.name.includes("/") ||
file.name.includes("\\")
) {
skippedFiles.push({
name: file.name,
reason: "Invalid file name",
});
} else {
formData.append("files", file);
uploadedCount += 1;
}
}
await request("/api/upload-files", {
// Add skippedFilesCount to formData
formData.append("skippedFilesCount", skippedFiles.length.toString());
// Add uploadedFilesCount to formData
formData.append("uploadedFilesCount", uploadedCount.toString());
const response = await request("/api/upload-files", {
method: "POST",
body: formData,
});
if (
typeof response.message !== "string" ||
!Array.isArray(response.uploaded_files) ||
!Array.isArray(response.skipped_files)
) {
throw new Error("Unexpected response structure from server");
}
return {
message: response.message,
uploadedFiles: response.uploaded_files,
skippedFiles: [...skippedFiles, ...response.skipped_files],
};
}
export async function listFiles(path: string = "/"): Promise<string[]> {
const data = await request(`/api/list-files?path=${path}`);
return data as string[];
try {
const encodedPath = encodeURIComponent(path);
const data = await request(`/api/list-files?path=${encodedPath}`);
return data as string[];
} catch (error) {
return [];
}
}

View File

@ -156,6 +156,9 @@ class AppConfig(metaclass=Singleton):
sandbox_timeout: The timeout for the sandbox.
debug: Whether to enable debugging.
enable_auto_lint: Whether to enable auto linting. This is False by default, for regular runs of the app. For evaluation, please set this to True.
file_uploads_max_file_size_mb: Maximum file size for uploads in megabytes. 0 means no limit.
file_uploads_restrict_file_types: Whether to restrict file types for file uploads. Defaults to False.
file_uploads_allowed_extensions: List of allowed file extensions for uploads. ['.*'] means all extensions are allowed.
"""
llm: LLMConfig = field(default_factory=LLMConfig)
@ -194,6 +197,9 @@ class AppConfig(metaclass=Singleton):
enable_auto_lint: bool = (
False # once enabled, OpenDevin would lint files after editing
)
file_uploads_max_file_size_mb: int = 0
file_uploads_restrict_file_types: bool = False
file_uploads_allowed_extensions: list[str] = field(default_factory=lambda: ['.*'])
defaults_dict: ClassVar[dict] = {}

View File

@ -42,3 +42,6 @@ class ConfigType(str, Enum):
SSH_HOSTNAME = 'SSH_HOSTNAME'
DISABLE_COLOR = 'DISABLE_COLOR'
DEBUG = 'DEBUG'
FILE_UPLOADS_MAX_FILE_SIZE_MB = 'FILE_UPLOADS_MAX_FILE_SIZE_MB'
FILE_UPLOADS_RESTRICT_FILE_TYPES = 'FILE_UPLOADS_RESTRICT_FILE_TYPES'
FILE_UPLOADS_ALLOWED_EXTENSIONS = 'FILE_UPLOADS_ALLOWED_EXTENSIONS'

View File

@ -1,12 +1,16 @@
import os
import re
import uuid
import warnings
from pathspec import PathSpec
from pathspec.patterns import GitWildMatchPattern
from opendevin.server.data_models.feedback import FeedbackDataModel, store_feedback
with warnings.catch_warnings():
warnings.simplefilter('ignore')
import litellm
from pathlib import Path
from fastapi import FastAPI, Request, Response, UploadFile, WebSocket, status
from fastapi.middleware.cors import CORSMiddleware
@ -19,7 +23,10 @@ from opendevin.controller.agent import Agent
from opendevin.core.config import config
from opendevin.core.logger import opendevin_logger as logger
from opendevin.events.action import ChangeAgentStateAction, NullAction
from opendevin.events.observation import AgentStateChangedObservation, NullObservation
from opendevin.events.observation import (
AgentStateChangedObservation,
NullObservation,
)
from opendevin.events.serialization import event_to_dict
from opendevin.llm import bedrock
from opendevin.server.auth import get_sid_from_token, sign_token
@ -37,6 +44,96 @@ app.add_middleware(
security_scheme = HTTPBearer()
def load_file_upload_config() -> tuple[int, bool, list[str]]:
"""
Load file upload configuration from the config object.
This function retrieves the file upload settings from the global config object.
It handles the following settings:
- Maximum file size for uploads
- Whether to restrict file types
- List of allowed file extensions
It also performs sanity checks on the values to ensure they are valid and safe.
Returns:
tuple: A tuple containing:
- max_file_size_mb (int): Maximum file size in MB. 0 means no limit.
- restrict_file_types (bool): Whether file type restrictions are enabled.
- allowed_extensions (set): Set of allowed file extensions.
"""
# Retrieve values from config
max_file_size_mb = config.file_uploads_max_file_size_mb
restrict_file_types = config.file_uploads_restrict_file_types
allowed_extensions = config.file_uploads_allowed_extensions
# Sanity check for max_file_size_mb
MAX_ALLOWED_SIZE = 1024 # Maximum allowed file size 1 GB
if not isinstance(max_file_size_mb, int) or max_file_size_mb < 0:
logger.warning(
f'Invalid max_file_size_mb: {max_file_size_mb}. Setting to 0 (no limit).'
)
max_file_size_mb = 0
elif max_file_size_mb > MAX_ALLOWED_SIZE:
logger.warning(
f'max_file_size_mb exceeds maximum allowed size. Capping at {MAX_ALLOWED_SIZE}MB.'
)
max_file_size_mb = MAX_ALLOWED_SIZE
# Sanity check for allowed_extensions
if not isinstance(allowed_extensions, (list, set)) or not allowed_extensions:
logger.warning(
f'Invalid allowed_extensions: {allowed_extensions}. Setting to [".*"].'
)
allowed_extensions = ['.*']
else:
# Ensure all extensions start with a dot and are lowercase
allowed_extensions = [
ext.lower() if ext.startswith('.') else f'.{ext.lower()}'
for ext in allowed_extensions
]
# If restrictions are disabled, allow all
if not restrict_file_types:
allowed_extensions = ['.*']
logger.info(
f'File upload config: max_size={max_file_size_mb}MB, '
f'restrict_types={restrict_file_types}, '
f'allowed_extensions={allowed_extensions}'
)
return max_file_size_mb, restrict_file_types, allowed_extensions
# Load configuration
MAX_FILE_SIZE_MB, RESTRICT_FILE_TYPES, ALLOWED_EXTENSIONS = load_file_upload_config()
def is_extension_allowed(filename):
"""
Check if the file extension is allowed based on the current configuration.
This function supports wildcards and files without extensions.
The check is case-insensitive for extensions.
Args:
filename (str): The name of the file to check.
Returns:
bool: True if the file extension is allowed, False otherwise.
"""
if not RESTRICT_FILE_TYPES:
return True
file_ext = os.path.splitext(filename)[1].lower() # Convert to lowercase
return (
'.*' in ALLOWED_EXTENSIONS
or file_ext in (ext.lower() for ext in ALLOWED_EXTENSIONS)
or (file_ext == '' and '.' in ALLOWED_EXTENSIONS)
)
@app.middleware('http')
async def attach_session(request: Request, call_next):
if request.url.path.startswith('/api/options/') or not request.url.path.startswith(
@ -225,48 +322,85 @@ def list_files(request: Request, path: str = '/'):
content={'error': 'Runtime not yet initialized'},
)
exclude_list = (
'.git',
'.DS_Store',
'.svn',
'.hg',
'.idea',
'.vscode',
'.settings',
'.pytest_cache',
'__pycache__',
'node_modules',
'vendor',
'build',
'dist',
'bin',
'logs',
'log',
'tmp',
'temp',
'coverage',
'venv',
'env',
)
try:
# Get the full path of the requested directory
full_path = (
request.state.session.agent_session.runtime.file_store.get_full_path(path)
)
# Check if the directory exists
if not os.path.exists(full_path) or not os.path.isdir(full_path):
return []
# Check if .gitignore exists
gitignore_path = os.path.join(full_path, '.gitignore')
if os.path.exists(gitignore_path):
# Use PathSpec to parse .gitignore
with open(gitignore_path, 'r') as f:
spec = PathSpec.from_lines(GitWildMatchPattern, f.readlines())
else:
# Fallback to default exclude list if .gitignore doesn't exist
default_exclude = [
'.git',
'.DS_Store',
'.svn',
'.hg',
'.idea',
'.vscode',
'.settings',
'.pytest_cache',
'__pycache__',
'node_modules',
'vendor',
'build',
'dist',
'bin',
'logs',
'log',
'tmp',
'temp',
'coverage',
'venv',
'env',
]
spec = PathSpec.from_lines(GitWildMatchPattern, default_exclude)
entries = request.state.session.agent_session.runtime.file_store.list(path)
# Filter entries, excluding special folders
if entries:
return [
entry
for entry in entries
if Path(entry).parts and Path(entry).parts[-1] not in exclude_list
]
return []
# Filter entries using PathSpec
filtered_entries = [
entry
for entry in entries
if not spec.match_file(os.path.relpath(entry, full_path))
]
# Separate directories and files
directories = []
files = []
for entry in filtered_entries:
# Remove leading slash and any parent directory components
entry_relative = entry.lstrip('/').split('/')[-1]
# Construct the full path by joining the base path with the relative entry path
full_entry_path = os.path.join(full_path, entry_relative)
if os.path.exists(full_entry_path):
is_dir = os.path.isdir(full_entry_path)
if is_dir:
directories.append(entry)
else:
files.append(entry)
# Sort directories and files separately
directories.sort(key=str.lower)
files.sort(key=str.lower)
# Combine sorted directories and files
sorted_entries = directories + files
return sorted_entries
except Exception as e:
logger.error(f'Error refreshing files: {e}', exc_info=False)
error_msg = f'Error refreshing files: {e}'
return JSONResponse(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
content={'error': error_msg},
)
logger.error(f'Error listing files: {e}', exc_info=True)
return []
@app.get('/api/select-file')
@ -291,6 +425,22 @@ def select_file(file: str, request: Request):
return {'code': content}
def sanitize_filename(filename):
"""
Sanitize the filename to prevent directory traversal
"""
# Remove any directory components
filename = os.path.basename(filename)
# Remove any non-alphanumeric characters except for .-_
filename = re.sub(r'[^\w\-_\.]', '', filename)
# Limit the filename length
max_length = 255
if len(filename) > max_length:
name, ext = os.path.splitext(filename)
filename = name[: max_length - len(ext)] + ext
return filename
@app.post('/api/upload-files')
async def upload_file(request: Request, files: list[UploadFile]):
"""
@ -302,24 +452,68 @@ async def upload_file(request: Request, files: list[UploadFile]):
```
"""
try:
uploaded_files = []
skipped_files = []
for file in files:
safe_filename = sanitize_filename(file.filename)
file_contents = await file.read()
if (
MAX_FILE_SIZE_MB > 0
and len(file_contents) > MAX_FILE_SIZE_MB * 1024 * 1024
):
skipped_files.append(
{
'name': safe_filename,
'reason': f'Exceeds maximum size limit of {MAX_FILE_SIZE_MB}MB',
}
)
continue
if not is_extension_allowed(safe_filename):
skipped_files.append(
{'name': safe_filename, 'reason': 'File type not allowed'}
)
continue
request.state.session.agent_session.runtime.file_store.write(
file.filename, file_contents
safe_filename, file_contents
)
uploaded_files.append(safe_filename)
response_content = {
'message': 'File upload process completed',
'uploaded_files': uploaded_files,
'skipped_files': skipped_files,
}
if not uploaded_files and skipped_files:
return JSONResponse(
status_code=status.HTTP_400_BAD_REQUEST,
content={
**response_content,
'error': 'No files were uploaded successfully',
},
)
return JSONResponse(status_code=status.HTTP_200_OK, content=response_content)
except Exception as e:
logger.error(f'Error saving files: {e}', exc_info=True)
logger.error(f'Error during file upload: {e}', exc_info=True)
return JSONResponse(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
content={'error': f'Error saving file:s {e}'},
content={
'error': f'Error during file upload: {str(e)}',
'uploaded_files': [],
'skipped_files': [],
},
)
return {'message': 'Files uploaded successfully', 'file_count': len(files)}
@app.post('/api/submit-feedback')
async def submit_feedback(request: Request, feedback: FeedbackDataModel):
"""
Upload files to the workspace.
Upload feedback data to the feedback site.
To upload files:
```sh
@ -327,7 +521,7 @@ async def submit_feedback(request: Request, feedback: FeedbackDataModel):
```
"""
# Assuming the storage service is already configured in the backend
# and there is a function to handle the storage.
# and there is a function to handle the storage.
try:
feedback_data = store_feedback(feedback)
return JSONResponse(status_code=200, content=feedback_data)

View File

@ -15,10 +15,11 @@ class LocalFileStore(FileStore):
path = path[1:]
return os.path.join(self.root, path)
def write(self, path: str, contents: str) -> None:
def write(self, path: str, contents: str | bytes):
full_path = self.get_full_path(path)
os.makedirs(os.path.dirname(full_path), exist_ok=True)
with open(full_path, 'w') as f:
mode = 'w' if isinstance(contents, str) else 'wb'
with open(full_path, mode) as f:
f.write(contents)
def read(self, path: str) -> str:

16
poetry.lock generated
View File

@ -1,4 +1,4 @@
# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand.
# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
[[package]]
name = "aenum"
@ -4114,7 +4114,6 @@ description = "Nvidia JIT LTO Library"
optional = false
python-versions = ">=3"
files = [
{file = "nvidia_nvjitlink_cu12-12.5.40-py3-none-manylinux2014_aarch64.whl", hash = "sha256:004186d5ea6a57758fd6d57052a123c73a4815adf365eb8dd6a85c9eaa7535ff"},
{file = "nvidia_nvjitlink_cu12-12.5.40-py3-none-manylinux2014_x86_64.whl", hash = "sha256:d9714f27c1d0f0895cd8915c07a87a1d0029a0aa36acaf9156952ec2a8a12189"},
{file = "nvidia_nvjitlink_cu12-12.5.40-py3-none-win_amd64.whl", hash = "sha256:c3401dc8543b52d3a8158007a0c1ab4e9c768fcbd24153a48c86972102197ddd"},
]
@ -4544,6 +4543,17 @@ sql-other = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-d
test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)"]
xml = ["lxml (>=4.9.2)"]
[[package]]
name = "pathspec"
version = "0.12.1"
description = "Utility library for gitignore style pattern matching of file paths."
optional = false
python-versions = ">=3.8"
files = [
{file = "pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08"},
{file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"},
]
[[package]]
name = "pexpect"
version = "4.9.0"
@ -7737,4 +7747,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
[metadata]
lock-version = "2.0"
python-versions = "^3.11"
content-hash = "dba3c8c3812d657e413a57e3bd87ad6f80adadc08857948ff1fd6e1c62692ca7"
content-hash = "d30ba49e7737bdacfb1c08a821ab1d41f97e00c19b691bec504e6eae301ee0e7"

View File

@ -34,6 +34,7 @@ gevent = "^24.2.1"
pyarrow = "16.1.0" # transitive dependency, pinned here to avoid conflicts
tenacity = "^8.4.2"
zope-interface = "6.4.post2"
pathspec = "^0.12.1"
[tool.poetry.group.llama-index.dependencies]
llama-index = "*"