From e9cafb0372d2b0926cbb6e35567fefb0a35cadd7 Mon Sep 17 00:00:00 2001 From: Xingyao Wang Date: Thu, 19 Dec 2024 12:28:29 -0500 Subject: [PATCH] chore: Cleanup runtime exception handling (#5696) --- evaluation/benchmarks/swe_bench/run_infer.py | 7 +- evaluation/utils/shared.py | 32 +++++ openhands/controller/agent_controller.py | 7 +- openhands/core/exceptions.py | 113 +++++++++++++++--- openhands/runtime/base.py | 19 +-- openhands/runtime/builder/base.py | 2 +- openhands/runtime/builder/docker.py | 13 +- openhands/runtime/builder/remote.py | 15 ++- .../impl/eventstream/eventstream_runtime.py | 27 +++-- .../runtime/impl/remote/remote_runtime.py | 40 ++++--- .../runtime/impl/runloop/runloop_runtime.py | 8 +- openhands/runtime/utils/runtime_build.py | 3 +- openhands/server/routes/files.py | 15 +-- openhands/server/session/agent_session.py | 5 +- openhands/server/session/manager.py | 4 +- tests/unit/test_agent_controller.py | 4 +- 16 files changed, 219 insertions(+), 95 deletions(-) diff --git a/evaluation/benchmarks/swe_bench/run_infer.py b/evaluation/benchmarks/swe_bench/run_infer.py index b97b5d9361..8da11d517f 100644 --- a/evaluation/benchmarks/swe_bench/run_infer.py +++ b/evaluation/benchmarks/swe_bench/run_infer.py @@ -15,6 +15,7 @@ from evaluation.utils.shared import ( EvalOutput, assert_and_raise, codeact_user_response, + is_fatal_evaluation_error, make_metadata, prepare_dataset, reset_logger_for_multiprocessing, @@ -400,11 +401,7 @@ def process_instance( ) # if fatal error, throw EvalError to trigger re-run - if ( - state.last_error - and 'fatal error during agent execution' in state.last_error - and 'stuck in a loop' not in state.last_error - ): + if is_fatal_evaluation_error(state.last_error): raise EvalException('Fatal error detected: ' + state.last_error) # ======= THIS IS SWE-Bench specific ======= diff --git a/evaluation/utils/shared.py b/evaluation/utils/shared.py index 517ecc5235..5a4fa2a2ad 100644 --- a/evaluation/utils/shared.py +++ b/evaluation/utils/shared.py @@ -16,6 +16,16 @@ from tqdm import tqdm from openhands.controller.state.state import State from openhands.core.config import LLMConfig +from openhands.core.exceptions import ( + AgentRuntimeBuildError, + AgentRuntimeDisconnectedError, + AgentRuntimeError, + AgentRuntimeNotFoundError, + AgentRuntimeNotReadyError, + AgentRuntimeTimeoutError, + AgentRuntimeUnavailableError, + AgentStuckInLoopError, +) from openhands.core.logger import get_console_handler from openhands.core.logger import openhands_logger as logger from openhands.events.action import Action @@ -503,3 +513,25 @@ def compatibility_for_eval_history_pairs( history_pairs.append((event_to_dict(action), event_to_dict(observation))) return history_pairs + + +def is_fatal_evaluation_error(error: str | None) -> bool: + if not error: + return False + + FATAL_EXCEPTIONS = [ + AgentRuntimeError, + AgentRuntimeBuildError, + AgentRuntimeTimeoutError, + AgentRuntimeUnavailableError, + AgentRuntimeNotReadyError, + AgentRuntimeDisconnectedError, + AgentRuntimeNotFoundError, + AgentStuckInLoopError, + ] + + if any(exception.__name__ in error for exception in FATAL_EXCEPTIONS): + logger.error(f'Fatal evaluation error detected: {error}') + return True + + return False diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py index e749334187..a6b666f136 100644 --- a/openhands/controller/agent_controller.py +++ b/openhands/controller/agent_controller.py @@ -12,6 +12,7 @@ from openhands.controller.state.state import State, TrafficControlState from openhands.controller.stuck import StuckDetector from openhands.core.config import AgentConfig, LLMConfig from openhands.core.exceptions import ( + AgentStuckInLoopError, FunctionCallNotExistsError, FunctionCallValidationError, LLMMalformedActionError, @@ -196,7 +197,7 @@ class AgentController: err_id = '' if isinstance(e, litellm.AuthenticationError): err_id = 'STATUS$ERROR_LLM_AUTHENTICATION' - self.status_callback('error', err_id, str(e)) + self.status_callback('error', err_id, type(e).__name__ + ': ' + str(e)) async def start_step_loop(self): """The main loop for the agent's step-by-step execution.""" @@ -502,7 +503,9 @@ class AgentController: return if self._is_stuck(): - await self._react_to_exception(RuntimeError('Agent got stuck in a loop')) + await self._react_to_exception( + AgentStuckInLoopError('Agent got stuck in a loop') + ) return self.update_state_before_step() diff --git a/openhands/core/exceptions.py b/openhands/core/exceptions.py index bf5a29f607..a33c821b7e 100644 --- a/openhands/core/exceptions.py +++ b/openhands/core/exceptions.py @@ -1,14 +1,25 @@ -class AgentNoInstructionError(Exception): +# ============================================ +# Agent Exceptions +# ============================================ + + +class AgentError(Exception): + """Base class for all agent exceptions.""" + + pass + + +class AgentNoInstructionError(AgentError): def __init__(self, message='Instruction must be provided'): super().__init__(message) -class AgentEventTypeError(Exception): +class AgentEventTypeError(AgentError): def __init__(self, message='Event must be a dictionary'): super().__init__(message) -class AgentAlreadyRegisteredError(Exception): +class AgentAlreadyRegisteredError(AgentError): def __init__(self, name=None): if name is not None: message = f"Agent class already registered under '{name}'" @@ -17,7 +28,7 @@ class AgentAlreadyRegisteredError(Exception): super().__init__(message) -class AgentNotRegisteredError(Exception): +class AgentNotRegisteredError(AgentError): def __init__(self, name=None): if name is not None: message = f"No agent class registered under '{name}'" @@ -26,6 +37,16 @@ class AgentNotRegisteredError(Exception): super().__init__(message) +class AgentStuckInLoopError(AgentError): + def __init__(self, message='Agent got stuck in a loop'): + super().__init__(message) + + +# ============================================ +# Agent Controller Exceptions +# ============================================ + + class TaskInvalidStateError(Exception): def __init__(self, state=None): if state is not None: @@ -35,17 +56,9 @@ class TaskInvalidStateError(Exception): super().__init__(message) -class BrowserInitException(Exception): - def __init__(self, message='Failed to initialize browser environment'): - super().__init__(message) - - -class BrowserUnavailableException(Exception): - def __init__( - self, - message='Browser environment is not available, please check if has been initialized', - ): - super().__init__(message) +# ============================================ +# LLM Exceptions +# ============================================ # This exception gets sent back to the LLM @@ -96,6 +109,11 @@ class CloudFlareBlockageError(Exception): pass +# ============================================ +# LLM function calling Exceptions +# ============================================ + + class FunctionCallConversionError(Exception): """Exception raised when FunctionCallingConverter failed to convert a non-function call message to a function call message. @@ -121,3 +139,68 @@ class FunctionCallNotExistsError(Exception): def __init__(self, message): super().__init__(message) + + +# ============================================ +# Agent Runtime Exceptions +# ============================================ + + +class AgentRuntimeError(Exception): + """Base class for all agent runtime exceptions.""" + + pass + + +class AgentRuntimeBuildError(AgentRuntimeError): + """Exception raised when an agent runtime build operation fails.""" + + pass + + +class AgentRuntimeTimeoutError(AgentRuntimeError): + """Exception raised when an agent runtime operation times out.""" + + pass + + +class AgentRuntimeUnavailableError(AgentRuntimeError): + """Exception raised when an agent runtime is unavailable.""" + + pass + + +class AgentRuntimeNotReadyError(AgentRuntimeUnavailableError): + """Exception raised when an agent runtime is not ready.""" + + pass + + +class AgentRuntimeDisconnectedError(AgentRuntimeUnavailableError): + """Exception raised when an agent runtime is disconnected.""" + + pass + + +class AgentRuntimeNotFoundError(AgentRuntimeUnavailableError): + """Exception raised when an agent runtime is not found.""" + + pass + + +# ============================================ +# Browser Exceptions +# ============================================ + + +class BrowserInitException(Exception): + def __init__(self, message='Failed to initialize browser environment'): + super().__init__(message) + + +class BrowserUnavailableException(Exception): + def __init__( + self, + message='Browser environment is not available, please check if has been initialized', + ): + super().__init__(message) diff --git a/openhands/runtime/base.py b/openhands/runtime/base.py index e2d8044ba7..1dcafdcb75 100644 --- a/openhands/runtime/base.py +++ b/openhands/runtime/base.py @@ -9,6 +9,7 @@ from typing import Callable from requests.exceptions import ConnectionError from openhands.core.config import AppConfig, SandboxConfig +from openhands.core.exceptions import AgentRuntimeDisconnectedError from openhands.core.logger import openhands_logger as logger from openhands.events import EventSource, EventStream, EventStreamSubscriber from openhands.events.action import ( @@ -47,22 +48,6 @@ STATUS_MESSAGES = { } -class RuntimeUnavailableError(Exception): - pass - - -class RuntimeNotReadyError(RuntimeUnavailableError): - pass - - -class RuntimeDisconnectedError(RuntimeUnavailableError): - pass - - -class RuntimeNotFoundError(RuntimeUnavailableError): - pass - - def _default_env_vars(sandbox_config: SandboxConfig) -> dict[str, str]: ret = {} for key in os.environ: @@ -193,7 +178,7 @@ class Runtime(FileEditRuntimeMixin): except Exception as e: err_id = '' if isinstance(e, ConnectionError) or isinstance( - e, RuntimeDisconnectedError + e, AgentRuntimeDisconnectedError ): err_id = 'STATUS$ERROR_RUNTIME_DISCONNECTED' logger.error( diff --git a/openhands/runtime/builder/base.py b/openhands/runtime/builder/base.py index acfe3c60fb..6bc1155d7f 100644 --- a/openhands/runtime/builder/base.py +++ b/openhands/runtime/builder/base.py @@ -24,7 +24,7 @@ class RuntimeBuilder(abc.ABC): registry prefix). This should be used for subsequent use (e.g., `docker run`). Raises: - RuntimeError: If the build failed. + AgentRuntimeBuildError: If the build failed. """ pass diff --git a/openhands/runtime/builder/docker.py b/openhands/runtime/builder/docker.py index 880b1c73c5..d15aa4fa4e 100644 --- a/openhands/runtime/builder/docker.py +++ b/openhands/runtime/builder/docker.py @@ -6,6 +6,7 @@ import time import docker from openhands import __version__ as oh_version +from openhands.core.exceptions import AgentRuntimeBuildError from openhands.core.logger import RollingLogger from openhands.core.logger import openhands_logger as logger from openhands.runtime.builder.base import RuntimeBuilder @@ -19,7 +20,9 @@ class DockerRuntimeBuilder(RuntimeBuilder): version_info = self.docker_client.version() server_version = version_info.get('Version', '').replace('-', '.') if tuple(map(int, server_version.split('.')[:2])) < (18, 9): - raise RuntimeError('Docker server version must be >= 18.09 to use BuildKit') + raise AgentRuntimeBuildError( + 'Docker server version must be >= 18.09 to use BuildKit' + ) self.rolling_logger = RollingLogger(max_lines=10) @@ -44,7 +47,7 @@ class DockerRuntimeBuilder(RuntimeBuilder): str: The name of the built Docker image. Raises: - RuntimeError: If the Docker server version is incompatible or if the build process fails. + AgentRuntimeBuildError: If the Docker server version is incompatible or if the build process fails. Note: This method uses Docker BuildKit for improved build performance and caching capabilities. @@ -55,7 +58,9 @@ class DockerRuntimeBuilder(RuntimeBuilder): version_info = self.docker_client.version() server_version = version_info.get('Version', '').replace('-', '.') if tuple(map(int, server_version.split('.'))) < (18, 9): - raise RuntimeError('Docker server version must be >= 18.09 to use BuildKit') + raise AgentRuntimeBuildError( + 'Docker server version must be >= 18.09 to use BuildKit' + ) target_image_hash_name = tags[0] target_image_repo, target_image_source_tag = target_image_hash_name.split(':') @@ -154,7 +159,7 @@ class DockerRuntimeBuilder(RuntimeBuilder): # Check if the image is built successfully image = self.docker_client.images.get(target_image_hash_name) if image is None: - raise RuntimeError( + raise AgentRuntimeBuildError( f'Build failed: Image {target_image_hash_name} not found' ) diff --git a/openhands/runtime/builder/remote.py b/openhands/runtime/builder/remote.py index 5cfe1a4943..2e2c67c5a8 100644 --- a/openhands/runtime/builder/remote.py +++ b/openhands/runtime/builder/remote.py @@ -5,6 +5,7 @@ import time import requests +from openhands.core.exceptions import AgentRuntimeBuildError from openhands.core.logger import openhands_logger as logger from openhands.runtime.builder import RuntimeBuilder from openhands.runtime.utils.request import send_request @@ -77,7 +78,7 @@ class RemoteRuntimeBuilder(RuntimeBuilder): while should_continue(): if time.time() - start_time > timeout: logger.error('Build timed out after 30 minutes') - raise RuntimeError('Build timed out after 30 minutes') + raise AgentRuntimeBuildError('Build timed out after 30 minutes') status_response = send_request( self.session, @@ -88,7 +89,7 @@ class RemoteRuntimeBuilder(RuntimeBuilder): if status_response.status_code != 200: logger.error(f'Failed to get build status: {status_response.text}') - raise RuntimeError( + raise AgentRuntimeBuildError( f'Failed to get build status: {status_response.text}' ) @@ -110,12 +111,14 @@ class RemoteRuntimeBuilder(RuntimeBuilder): 'error', f'Build failed with status: {status}. Build ID: {build_id}' ) logger.error(error_message) - raise RuntimeError(error_message) + raise AgentRuntimeBuildError(error_message) # Wait before polling again sleep_if_should_continue(30) - raise RuntimeError('Build interrupted (likely received SIGTERM or SIGINT).') + raise AgentRuntimeBuildError( + 'Build interrupted (likely received SIGTERM or SIGINT).' + ) def image_exists(self, image_name: str, pull_from_repo: bool = True) -> bool: """Checks if an image exists in the remote registry using the /image_exists endpoint.""" @@ -129,7 +132,9 @@ class RemoteRuntimeBuilder(RuntimeBuilder): if response.status_code != 200: logger.error(f'Failed to check image existence: {response.text}') - raise RuntimeError(f'Failed to check image existence: {response.text}') + raise AgentRuntimeBuildError( + f'Failed to check image existence: {response.text}' + ) result = response.json() diff --git a/openhands/runtime/impl/eventstream/eventstream_runtime.py b/openhands/runtime/impl/eventstream/eventstream_runtime.py index becff94fb1..384cfd5e48 100644 --- a/openhands/runtime/impl/eventstream/eventstream_runtime.py +++ b/openhands/runtime/impl/eventstream/eventstream_runtime.py @@ -12,6 +12,13 @@ import requests import tenacity from openhands.core.config import AppConfig +from openhands.core.exceptions import ( + AgentRuntimeDisconnectedError, + AgentRuntimeError, + AgentRuntimeNotFoundError, + AgentRuntimeNotReadyError, + AgentRuntimeTimeoutError, +) from openhands.core.logger import DEBUG from openhands.core.logger import openhands_logger as logger from openhands.events import EventStream @@ -34,11 +41,7 @@ from openhands.events.observation import ( ) from openhands.events.serialization import event_to_dict, observation_from_dict from openhands.events.serialization.action import ACTION_TYPE_TO_CLASS -from openhands.runtime.base import ( - Runtime, - RuntimeDisconnectedError, - RuntimeNotFoundError, -) +from openhands.runtime.base import Runtime from openhands.runtime.builder import DockerRuntimeBuilder from openhands.runtime.impl.eventstream.containers import remove_all_containers from openhands.runtime.plugins import PluginRequirement @@ -358,14 +361,16 @@ class EventStreamRuntime(Runtime): try: container = self.docker_client.containers.get(self.container_name) if container.status == 'exited': - raise RuntimeDisconnectedError( + raise AgentRuntimeDisconnectedError( f'Container {self.container_name} has exited.' ) except docker.errors.NotFound: - raise RuntimeNotFoundError(f'Container {self.container_name} not found.') + raise AgentRuntimeNotFoundError( + f'Container {self.container_name} not found.' + ) if not self.log_streamer: - raise RuntimeError('Runtime client is not ready.') + raise AgentRuntimeNotReadyError('Runtime client is not ready.') with send_request( self.session, @@ -445,7 +450,7 @@ class EventStreamRuntime(Runtime): obs = observation_from_dict(output) obs._cause = action.id # type: ignore[attr-defined] except requests.Timeout: - raise RuntimeError( + raise AgentRuntimeTimeoutError( f'Runtime failed to return execute_action before the requested timeout of {action.timeout}s' ) @@ -514,9 +519,9 @@ class EventStreamRuntime(Runtime): pass except requests.Timeout: - raise TimeoutError('Copy operation timed out') + raise AgentRuntimeTimeoutError('Copy operation timed out') except Exception as e: - raise RuntimeError(f'Copy operation failed: {str(e)}') + raise AgentRuntimeError(f'Copy operation failed: {str(e)}') finally: if recursive: os.unlink(temp_zip_path) diff --git a/openhands/runtime/impl/remote/remote_runtime.py b/openhands/runtime/impl/remote/remote_runtime.py index ab3aac4ec9..9dc3827268 100644 --- a/openhands/runtime/impl/remote/remote_runtime.py +++ b/openhands/runtime/impl/remote/remote_runtime.py @@ -10,6 +10,14 @@ import requests import tenacity from openhands.core.config import AppConfig +from openhands.core.exceptions import ( + AgentRuntimeDisconnectedError, + AgentRuntimeError, + AgentRuntimeNotFoundError, + AgentRuntimeNotReadyError, + AgentRuntimeTimeoutError, + AgentRuntimeUnavailableError, +) from openhands.events import EventStream from openhands.events.action import ( BrowseInteractiveAction, @@ -28,13 +36,7 @@ from openhands.events.observation import ( ) from openhands.events.serialization import event_to_dict, observation_from_dict from openhands.events.serialization.action import ACTION_TYPE_TO_CLASS -from openhands.runtime.base import ( - Runtime, - RuntimeDisconnectedError, - RuntimeNotFoundError, - RuntimeNotReadyError, - RuntimeUnavailableError, -) +from openhands.runtime.base import Runtime from openhands.runtime.builder.remote import RemoteRuntimeBuilder from openhands.runtime.plugins import PluginRequirement from openhands.runtime.utils.command import get_remote_startup_command @@ -100,7 +102,7 @@ class RemoteRuntime(Runtime): async def connect(self): try: await call_sync_from_async(self._start_or_attach_to_runtime) - except RuntimeNotReadyError: + except AgentRuntimeNotReadyError: self.log('error', 'Runtime failed to start, timed out before ready') raise await call_sync_from_async(self.setup_initial_env) @@ -111,7 +113,7 @@ class RemoteRuntime(Runtime): if existing_runtime: self.log('debug', f'Using existing runtime with ID: {self.runtime_id}') elif self.attach_to_existing: - raise RuntimeNotFoundError( + raise AgentRuntimeNotFoundError( f'Could not find existing runtime for SID: {self.sid}' ) else: @@ -215,7 +217,7 @@ class RemoteRuntime(Runtime): timeout=60, ) as response: if not response.json()['exists']: - raise RuntimeError( + raise AgentRuntimeError( f'Container image {self.container_image} does not exist' ) @@ -262,7 +264,7 @@ class RemoteRuntime(Runtime): ) except requests.HTTPError as e: self.log('error', f'Unable to start runtime: {e}') - raise RuntimeUnavailableError() from e + raise AgentRuntimeUnavailableError() from e def _resume_runtime(self): with self._send_request( @@ -322,7 +324,7 @@ class RemoteRuntime(Runtime): ) | stop_if_should_exit(), reraise=True, - retry=tenacity.retry_if_exception_type(RuntimeNotReadyError), + retry=tenacity.retry_if_exception_type(AgentRuntimeNotReadyError), wait=tenacity.wait_fixed(2), ) return retry_decorator(self._wait_until_alive_impl)() @@ -356,7 +358,7 @@ class RemoteRuntime(Runtime): self.log( 'warning', f"Runtime /alive failed, but pod says it's ready: {e}" ) - raise RuntimeNotReadyError( + raise AgentRuntimeNotReadyError( f'Runtime /alive failed to respond with 200: {e}' ) return @@ -365,14 +367,14 @@ class RemoteRuntime(Runtime): or pod_status == 'pending' or pod_status == 'running' ): # nb: Running is not yet Ready - raise RuntimeNotReadyError( + raise AgentRuntimeNotReadyError( f'Runtime (ID={self.runtime_id}) is not yet ready. Status: {pod_status}' ) elif pod_status in ('failed', 'unknown', 'crashloopbackoff'): # clean up the runtime self.close() - raise RuntimeError( - f'Runtime (ID={self.runtime_id}) failed to start. Current status: {pod_status}' + raise AgentRuntimeUnavailableError( + f'Runtime (ID={self.runtime_id}) failed to start. Current status: {pod_status}. Pod Logs:\n{runtime_data.get("pod_logs", "N/A")}' ) else: # Maybe this should be a hard failure, but passing through in case the API changes @@ -382,7 +384,7 @@ class RemoteRuntime(Runtime): 'debug', f'Waiting for runtime pod to be active. Current status: {pod_status}', ) - raise RuntimeNotReadyError() + raise AgentRuntimeNotReadyError() def close(self, timeout: int = 10): if self.config.sandbox.keep_runtime_alive or self.attach_to_existing: @@ -437,7 +439,7 @@ class RemoteRuntime(Runtime): obs = observation_from_dict(output) obs._cause = action.id # type: ignore[attr-defined] except requests.Timeout: - raise RuntimeError( + raise AgentRuntimeTimeoutError( f'Runtime failed to return execute_action before the requested timeout of {action.timeout}s' ) return obs @@ -451,7 +453,7 @@ class RemoteRuntime(Runtime): raise except requests.HTTPError as e: if is_runtime_request and e.response.status_code == 404: - raise RuntimeDisconnectedError( + raise AgentRuntimeDisconnectedError( f'404 error while connecting to {self.runtime_url}' ) elif is_runtime_request and e.response.status_code == 503: diff --git a/openhands/runtime/impl/runloop/runloop_runtime.py b/openhands/runtime/impl/runloop/runloop_runtime.py index 064aa104c3..368244a03c 100644 --- a/openhands/runtime/impl/runloop/runloop_runtime.py +++ b/openhands/runtime/impl/runloop/runloop_runtime.py @@ -10,6 +10,10 @@ from runloop_api_client.types import DevboxView from runloop_api_client.types.shared_params import LaunchParameters from openhands.core.config import AppConfig +from openhands.core.exceptions import ( + AgentRuntimeNotReadyError, + AgentRuntimeUnavailableError, +) from openhands.core.logger import openhands_logger as logger from openhands.events import EventStream from openhands.runtime.impl.eventstream.eventstream_runtime import EventStreamRuntime @@ -227,7 +231,7 @@ class RunloopRuntime(EventStreamRuntime): ) def _wait_until_alive(self): if not self.log_streamer: - raise RuntimeError('Runtime client is not ready.') + raise AgentRuntimeNotReadyError('Runtime client is not ready.') response = send_request( self.session, 'GET', @@ -239,7 +243,7 @@ class RunloopRuntime(EventStreamRuntime): else: msg = f'Action execution API is not alive. Response: {response}' logger.error(msg) - raise RuntimeError(msg) + raise AgentRuntimeUnavailableError(msg) def close(self, rm_all_containers: bool | None = True): if self.log_streamer: diff --git a/openhands/runtime/utils/runtime_build.py b/openhands/runtime/utils/runtime_build.py index de939efd9a..bbb83ac7f9 100644 --- a/openhands/runtime/utils/runtime_build.py +++ b/openhands/runtime/utils/runtime_build.py @@ -14,6 +14,7 @@ from jinja2 import Environment, FileSystemLoader import openhands from openhands import __version__ as oh_version +from openhands.core.exceptions import AgentRuntimeBuildError from openhands.core.logger import openhands_logger as logger from openhands.runtime.builder import DockerRuntimeBuilder, RuntimeBuilder @@ -364,7 +365,7 @@ def _build_sandbox_image( extra_build_args=extra_build_args, ) if not image_name: - raise RuntimeError(f'Build failed for image {names}') + raise AgentRuntimeBuildError(f'Build failed for image {names}') return image_name diff --git a/openhands/server/routes/files.py b/openhands/server/routes/files.py index c2d37350c8..3193376286 100644 --- a/openhands/server/routes/files.py +++ b/openhands/server/routes/files.py @@ -13,6 +13,7 @@ from fastapi.responses import FileResponse, JSONResponse from pathspec import PathSpec from pathspec.patterns import GitWildMatchPattern +from openhands.core.exceptions import AgentRuntimeUnavailableError from openhands.core.logger import openhands_logger as logger from openhands.events.action import ( FileReadAction, @@ -23,7 +24,7 @@ from openhands.events.observation import ( FileReadObservation, FileWriteObservation, ) -from openhands.runtime.base import Runtime, RuntimeUnavailableError +from openhands.runtime.base import Runtime from openhands.server.file_config import ( FILES_TO_IGNORE, MAX_FILE_SIZE_MB, @@ -66,7 +67,7 @@ async def list_files(request: Request, path: str | None = None): runtime: Runtime = request.state.conversation.runtime try: file_list = await call_sync_from_async(runtime.list_files, path) - except RuntimeUnavailableError as e: + except AgentRuntimeUnavailableError as e: logger.error(f'Error listing files: {e}', exc_info=True) return JSONResponse( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, @@ -93,7 +94,7 @@ async def list_files(request: Request, path: str | None = None): try: file_list = await filter_for_gitignore(file_list, '') - except RuntimeUnavailableError as e: + except AgentRuntimeUnavailableError as e: logger.error(f'Error filtering files: {e}', exc_info=True) return JSONResponse( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, @@ -129,7 +130,7 @@ async def select_file(file: str, request: Request): read_action = FileReadAction(file) try: observation = await call_sync_from_async(runtime.run_action, read_action) - except RuntimeUnavailableError as e: + except AgentRuntimeUnavailableError as e: logger.error(f'Error opening file {file}: {e}', exc_info=True) return JSONResponse( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, @@ -205,7 +206,7 @@ async def upload_file(request: Request, files: list[UploadFile]): tmp_file_path, runtime.config.workspace_mount_path_in_sandbox, ) - except RuntimeUnavailableError as e: + except AgentRuntimeUnavailableError as e: logger.error( f'Error saving file {safe_filename}: {e}', exc_info=True ) @@ -282,7 +283,7 @@ async def save_file(request: Request): write_action = FileWriteAction(file_path, content) try: observation = await call_sync_from_async(runtime.run_action, write_action) - except RuntimeUnavailableError as e: + except AgentRuntimeUnavailableError as e: logger.error(f'Error saving file: {e}', exc_info=True) return JSONResponse( status_code=500, @@ -317,7 +318,7 @@ async def zip_current_workspace(request: Request, background_tasks: BackgroundTa path = runtime.config.workspace_mount_path_in_sandbox try: zip_file = await call_sync_from_async(runtime.copy_from, path) - except RuntimeUnavailableError as e: + except AgentRuntimeUnavailableError as e: logger.error(f'Error zipping workspace: {e}', exc_info=True) return JSONResponse( status_code=500, diff --git a/openhands/server/session/agent_session.py b/openhands/server/session/agent_session.py index 17bd3c6d1c..7a7108c1f1 100644 --- a/openhands/server/session/agent_session.py +++ b/openhands/server/session/agent_session.py @@ -5,13 +5,14 @@ from openhands.controller import AgentController from openhands.controller.agent import Agent from openhands.controller.state.state import State from openhands.core.config import AgentConfig, AppConfig, LLMConfig +from openhands.core.exceptions import AgentRuntimeUnavailableError from openhands.core.logger import openhands_logger as logger from openhands.core.schema.agent import AgentState from openhands.events.action import ChangeAgentStateAction from openhands.events.event import EventSource from openhands.events.stream import EventStream from openhands.runtime import get_runtime_cls -from openhands.runtime.base import Runtime, RuntimeUnavailableError +from openhands.runtime.base import Runtime from openhands.security import SecurityAnalyzer, options from openhands.storage.files import FileStore from openhands.utils.async_utils import call_async_from_sync @@ -222,7 +223,7 @@ class AgentSession: try: await self.runtime.connect() - except RuntimeUnavailableError as e: + except AgentRuntimeUnavailableError as e: logger.error(f'Runtime initialization failed: {e}', exc_info=True) if self._status_callback: self._status_callback( diff --git a/openhands/server/session/manager.py b/openhands/server/session/manager.py index 7ab8d2a817..1a90cc48fd 100644 --- a/openhands/server/session/manager.py +++ b/openhands/server/session/manager.py @@ -6,9 +6,9 @@ from dataclasses import dataclass, field import socketio from openhands.core.config import AppConfig +from openhands.core.exceptions import AgentRuntimeUnavailableError from openhands.core.logger import openhands_logger as logger from openhands.events.stream import EventStream, session_exists -from openhands.runtime.base import RuntimeUnavailableError from openhands.server.session.conversation import Conversation from openhands.server.session.session import ROOM_KEY, Session from openhands.server.session.session_init_data import SessionInitData @@ -160,7 +160,7 @@ class SessionManager: c = Conversation(sid, file_store=self.file_store, config=self.config) try: await c.connect() - except RuntimeUnavailableError as e: + except AgentRuntimeUnavailableError as e: logger.error(f'Error connecting to conversation {c.sid}: {e}') return None end_time = time.time() diff --git a/tests/unit/test_agent_controller.py b/tests/unit/test_agent_controller.py index 08fe0e0f55..48c9d633c0 100644 --- a/tests/unit/test_agent_controller.py +++ b/tests/unit/test_agent_controller.py @@ -161,7 +161,7 @@ async def test_run_controller_with_fatal_error(mock_agent, mock_event_stream): print(f'event_stream: {list(event_stream.get_events())}') assert state.iteration == 4 assert state.agent_state == AgentState.ERROR - assert state.last_error == 'Agent got stuck in a loop' + assert state.last_error == 'AgentStuckInLoopError: Agent got stuck in a loop' assert len(list(event_stream.get_events())) == 11 @@ -227,7 +227,7 @@ async def test_run_controller_stop_with_stuck(): assert last_event['observation'] == 'agent_state_changed' assert state.agent_state == AgentState.ERROR - assert state.last_error == 'Agent got stuck in a loop' + assert state.last_error == 'AgentStuckInLoopError: Agent got stuck in a loop' @pytest.mark.asyncio