mirror of
https://github.com/OpenHands/OpenHands.git
synced 2025-12-26 05:48:36 +08:00
chore: Cleanup runtime exception handling (#5696)
This commit is contained in:
parent
13097f9d1d
commit
e9cafb0372
@ -15,6 +15,7 @@ from evaluation.utils.shared import (
|
||||
EvalOutput,
|
||||
assert_and_raise,
|
||||
codeact_user_response,
|
||||
is_fatal_evaluation_error,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@ -400,11 +401,7 @@ def process_instance(
|
||||
)
|
||||
|
||||
# if fatal error, throw EvalError to trigger re-run
|
||||
if (
|
||||
state.last_error
|
||||
and 'fatal error during agent execution' in state.last_error
|
||||
and 'stuck in a loop' not in state.last_error
|
||||
):
|
||||
if is_fatal_evaluation_error(state.last_error):
|
||||
raise EvalException('Fatal error detected: ' + state.last_error)
|
||||
|
||||
# ======= THIS IS SWE-Bench specific =======
|
||||
|
||||
@ -16,6 +16,16 @@ from tqdm import tqdm
|
||||
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import LLMConfig
|
||||
from openhands.core.exceptions import (
|
||||
AgentRuntimeBuildError,
|
||||
AgentRuntimeDisconnectedError,
|
||||
AgentRuntimeError,
|
||||
AgentRuntimeNotFoundError,
|
||||
AgentRuntimeNotReadyError,
|
||||
AgentRuntimeTimeoutError,
|
||||
AgentRuntimeUnavailableError,
|
||||
AgentStuckInLoopError,
|
||||
)
|
||||
from openhands.core.logger import get_console_handler
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.events.action import Action
|
||||
@ -503,3 +513,25 @@ def compatibility_for_eval_history_pairs(
|
||||
history_pairs.append((event_to_dict(action), event_to_dict(observation)))
|
||||
|
||||
return history_pairs
|
||||
|
||||
|
||||
def is_fatal_evaluation_error(error: str | None) -> bool:
|
||||
if not error:
|
||||
return False
|
||||
|
||||
FATAL_EXCEPTIONS = [
|
||||
AgentRuntimeError,
|
||||
AgentRuntimeBuildError,
|
||||
AgentRuntimeTimeoutError,
|
||||
AgentRuntimeUnavailableError,
|
||||
AgentRuntimeNotReadyError,
|
||||
AgentRuntimeDisconnectedError,
|
||||
AgentRuntimeNotFoundError,
|
||||
AgentStuckInLoopError,
|
||||
]
|
||||
|
||||
if any(exception.__name__ in error for exception in FATAL_EXCEPTIONS):
|
||||
logger.error(f'Fatal evaluation error detected: {error}')
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
@ -12,6 +12,7 @@ from openhands.controller.state.state import State, TrafficControlState
|
||||
from openhands.controller.stuck import StuckDetector
|
||||
from openhands.core.config import AgentConfig, LLMConfig
|
||||
from openhands.core.exceptions import (
|
||||
AgentStuckInLoopError,
|
||||
FunctionCallNotExistsError,
|
||||
FunctionCallValidationError,
|
||||
LLMMalformedActionError,
|
||||
@ -196,7 +197,7 @@ class AgentController:
|
||||
err_id = ''
|
||||
if isinstance(e, litellm.AuthenticationError):
|
||||
err_id = 'STATUS$ERROR_LLM_AUTHENTICATION'
|
||||
self.status_callback('error', err_id, str(e))
|
||||
self.status_callback('error', err_id, type(e).__name__ + ': ' + str(e))
|
||||
|
||||
async def start_step_loop(self):
|
||||
"""The main loop for the agent's step-by-step execution."""
|
||||
@ -502,7 +503,9 @@ class AgentController:
|
||||
return
|
||||
|
||||
if self._is_stuck():
|
||||
await self._react_to_exception(RuntimeError('Agent got stuck in a loop'))
|
||||
await self._react_to_exception(
|
||||
AgentStuckInLoopError('Agent got stuck in a loop')
|
||||
)
|
||||
return
|
||||
|
||||
self.update_state_before_step()
|
||||
|
||||
@ -1,14 +1,25 @@
|
||||
class AgentNoInstructionError(Exception):
|
||||
# ============================================
|
||||
# Agent Exceptions
|
||||
# ============================================
|
||||
|
||||
|
||||
class AgentError(Exception):
|
||||
"""Base class for all agent exceptions."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class AgentNoInstructionError(AgentError):
|
||||
def __init__(self, message='Instruction must be provided'):
|
||||
super().__init__(message)
|
||||
|
||||
|
||||
class AgentEventTypeError(Exception):
|
||||
class AgentEventTypeError(AgentError):
|
||||
def __init__(self, message='Event must be a dictionary'):
|
||||
super().__init__(message)
|
||||
|
||||
|
||||
class AgentAlreadyRegisteredError(Exception):
|
||||
class AgentAlreadyRegisteredError(AgentError):
|
||||
def __init__(self, name=None):
|
||||
if name is not None:
|
||||
message = f"Agent class already registered under '{name}'"
|
||||
@ -17,7 +28,7 @@ class AgentAlreadyRegisteredError(Exception):
|
||||
super().__init__(message)
|
||||
|
||||
|
||||
class AgentNotRegisteredError(Exception):
|
||||
class AgentNotRegisteredError(AgentError):
|
||||
def __init__(self, name=None):
|
||||
if name is not None:
|
||||
message = f"No agent class registered under '{name}'"
|
||||
@ -26,6 +37,16 @@ class AgentNotRegisteredError(Exception):
|
||||
super().__init__(message)
|
||||
|
||||
|
||||
class AgentStuckInLoopError(AgentError):
|
||||
def __init__(self, message='Agent got stuck in a loop'):
|
||||
super().__init__(message)
|
||||
|
||||
|
||||
# ============================================
|
||||
# Agent Controller Exceptions
|
||||
# ============================================
|
||||
|
||||
|
||||
class TaskInvalidStateError(Exception):
|
||||
def __init__(self, state=None):
|
||||
if state is not None:
|
||||
@ -35,17 +56,9 @@ class TaskInvalidStateError(Exception):
|
||||
super().__init__(message)
|
||||
|
||||
|
||||
class BrowserInitException(Exception):
|
||||
def __init__(self, message='Failed to initialize browser environment'):
|
||||
super().__init__(message)
|
||||
|
||||
|
||||
class BrowserUnavailableException(Exception):
|
||||
def __init__(
|
||||
self,
|
||||
message='Browser environment is not available, please check if has been initialized',
|
||||
):
|
||||
super().__init__(message)
|
||||
# ============================================
|
||||
# LLM Exceptions
|
||||
# ============================================
|
||||
|
||||
|
||||
# This exception gets sent back to the LLM
|
||||
@ -96,6 +109,11 @@ class CloudFlareBlockageError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
# ============================================
|
||||
# LLM function calling Exceptions
|
||||
# ============================================
|
||||
|
||||
|
||||
class FunctionCallConversionError(Exception):
|
||||
"""Exception raised when FunctionCallingConverter failed to convert a non-function call message to a function call message.
|
||||
|
||||
@ -121,3 +139,68 @@ class FunctionCallNotExistsError(Exception):
|
||||
|
||||
def __init__(self, message):
|
||||
super().__init__(message)
|
||||
|
||||
|
||||
# ============================================
|
||||
# Agent Runtime Exceptions
|
||||
# ============================================
|
||||
|
||||
|
||||
class AgentRuntimeError(Exception):
|
||||
"""Base class for all agent runtime exceptions."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class AgentRuntimeBuildError(AgentRuntimeError):
|
||||
"""Exception raised when an agent runtime build operation fails."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class AgentRuntimeTimeoutError(AgentRuntimeError):
|
||||
"""Exception raised when an agent runtime operation times out."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class AgentRuntimeUnavailableError(AgentRuntimeError):
|
||||
"""Exception raised when an agent runtime is unavailable."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class AgentRuntimeNotReadyError(AgentRuntimeUnavailableError):
|
||||
"""Exception raised when an agent runtime is not ready."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class AgentRuntimeDisconnectedError(AgentRuntimeUnavailableError):
|
||||
"""Exception raised when an agent runtime is disconnected."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class AgentRuntimeNotFoundError(AgentRuntimeUnavailableError):
|
||||
"""Exception raised when an agent runtime is not found."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
# ============================================
|
||||
# Browser Exceptions
|
||||
# ============================================
|
||||
|
||||
|
||||
class BrowserInitException(Exception):
|
||||
def __init__(self, message='Failed to initialize browser environment'):
|
||||
super().__init__(message)
|
||||
|
||||
|
||||
class BrowserUnavailableException(Exception):
|
||||
def __init__(
|
||||
self,
|
||||
message='Browser environment is not available, please check if has been initialized',
|
||||
):
|
||||
super().__init__(message)
|
||||
|
||||
@ -9,6 +9,7 @@ from typing import Callable
|
||||
from requests.exceptions import ConnectionError
|
||||
|
||||
from openhands.core.config import AppConfig, SandboxConfig
|
||||
from openhands.core.exceptions import AgentRuntimeDisconnectedError
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.events import EventSource, EventStream, EventStreamSubscriber
|
||||
from openhands.events.action import (
|
||||
@ -47,22 +48,6 @@ STATUS_MESSAGES = {
|
||||
}
|
||||
|
||||
|
||||
class RuntimeUnavailableError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class RuntimeNotReadyError(RuntimeUnavailableError):
|
||||
pass
|
||||
|
||||
|
||||
class RuntimeDisconnectedError(RuntimeUnavailableError):
|
||||
pass
|
||||
|
||||
|
||||
class RuntimeNotFoundError(RuntimeUnavailableError):
|
||||
pass
|
||||
|
||||
|
||||
def _default_env_vars(sandbox_config: SandboxConfig) -> dict[str, str]:
|
||||
ret = {}
|
||||
for key in os.environ:
|
||||
@ -193,7 +178,7 @@ class Runtime(FileEditRuntimeMixin):
|
||||
except Exception as e:
|
||||
err_id = ''
|
||||
if isinstance(e, ConnectionError) or isinstance(
|
||||
e, RuntimeDisconnectedError
|
||||
e, AgentRuntimeDisconnectedError
|
||||
):
|
||||
err_id = 'STATUS$ERROR_RUNTIME_DISCONNECTED'
|
||||
logger.error(
|
||||
|
||||
@ -24,7 +24,7 @@ class RuntimeBuilder(abc.ABC):
|
||||
registry prefix). This should be used for subsequent use (e.g., `docker run`).
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the build failed.
|
||||
AgentRuntimeBuildError: If the build failed.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
@ -6,6 +6,7 @@ import time
|
||||
import docker
|
||||
|
||||
from openhands import __version__ as oh_version
|
||||
from openhands.core.exceptions import AgentRuntimeBuildError
|
||||
from openhands.core.logger import RollingLogger
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.runtime.builder.base import RuntimeBuilder
|
||||
@ -19,7 +20,9 @@ class DockerRuntimeBuilder(RuntimeBuilder):
|
||||
version_info = self.docker_client.version()
|
||||
server_version = version_info.get('Version', '').replace('-', '.')
|
||||
if tuple(map(int, server_version.split('.')[:2])) < (18, 9):
|
||||
raise RuntimeError('Docker server version must be >= 18.09 to use BuildKit')
|
||||
raise AgentRuntimeBuildError(
|
||||
'Docker server version must be >= 18.09 to use BuildKit'
|
||||
)
|
||||
|
||||
self.rolling_logger = RollingLogger(max_lines=10)
|
||||
|
||||
@ -44,7 +47,7 @@ class DockerRuntimeBuilder(RuntimeBuilder):
|
||||
str: The name of the built Docker image.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the Docker server version is incompatible or if the build process fails.
|
||||
AgentRuntimeBuildError: If the Docker server version is incompatible or if the build process fails.
|
||||
|
||||
Note:
|
||||
This method uses Docker BuildKit for improved build performance and caching capabilities.
|
||||
@ -55,7 +58,9 @@ class DockerRuntimeBuilder(RuntimeBuilder):
|
||||
version_info = self.docker_client.version()
|
||||
server_version = version_info.get('Version', '').replace('-', '.')
|
||||
if tuple(map(int, server_version.split('.'))) < (18, 9):
|
||||
raise RuntimeError('Docker server version must be >= 18.09 to use BuildKit')
|
||||
raise AgentRuntimeBuildError(
|
||||
'Docker server version must be >= 18.09 to use BuildKit'
|
||||
)
|
||||
|
||||
target_image_hash_name = tags[0]
|
||||
target_image_repo, target_image_source_tag = target_image_hash_name.split(':')
|
||||
@ -154,7 +159,7 @@ class DockerRuntimeBuilder(RuntimeBuilder):
|
||||
# Check if the image is built successfully
|
||||
image = self.docker_client.images.get(target_image_hash_name)
|
||||
if image is None:
|
||||
raise RuntimeError(
|
||||
raise AgentRuntimeBuildError(
|
||||
f'Build failed: Image {target_image_hash_name} not found'
|
||||
)
|
||||
|
||||
|
||||
@ -5,6 +5,7 @@ import time
|
||||
|
||||
import requests
|
||||
|
||||
from openhands.core.exceptions import AgentRuntimeBuildError
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.runtime.builder import RuntimeBuilder
|
||||
from openhands.runtime.utils.request import send_request
|
||||
@ -77,7 +78,7 @@ class RemoteRuntimeBuilder(RuntimeBuilder):
|
||||
while should_continue():
|
||||
if time.time() - start_time > timeout:
|
||||
logger.error('Build timed out after 30 minutes')
|
||||
raise RuntimeError('Build timed out after 30 minutes')
|
||||
raise AgentRuntimeBuildError('Build timed out after 30 minutes')
|
||||
|
||||
status_response = send_request(
|
||||
self.session,
|
||||
@ -88,7 +89,7 @@ class RemoteRuntimeBuilder(RuntimeBuilder):
|
||||
|
||||
if status_response.status_code != 200:
|
||||
logger.error(f'Failed to get build status: {status_response.text}')
|
||||
raise RuntimeError(
|
||||
raise AgentRuntimeBuildError(
|
||||
f'Failed to get build status: {status_response.text}'
|
||||
)
|
||||
|
||||
@ -110,12 +111,14 @@ class RemoteRuntimeBuilder(RuntimeBuilder):
|
||||
'error', f'Build failed with status: {status}. Build ID: {build_id}'
|
||||
)
|
||||
logger.error(error_message)
|
||||
raise RuntimeError(error_message)
|
||||
raise AgentRuntimeBuildError(error_message)
|
||||
|
||||
# Wait before polling again
|
||||
sleep_if_should_continue(30)
|
||||
|
||||
raise RuntimeError('Build interrupted (likely received SIGTERM or SIGINT).')
|
||||
raise AgentRuntimeBuildError(
|
||||
'Build interrupted (likely received SIGTERM or SIGINT).'
|
||||
)
|
||||
|
||||
def image_exists(self, image_name: str, pull_from_repo: bool = True) -> bool:
|
||||
"""Checks if an image exists in the remote registry using the /image_exists endpoint."""
|
||||
@ -129,7 +132,9 @@ class RemoteRuntimeBuilder(RuntimeBuilder):
|
||||
|
||||
if response.status_code != 200:
|
||||
logger.error(f'Failed to check image existence: {response.text}')
|
||||
raise RuntimeError(f'Failed to check image existence: {response.text}')
|
||||
raise AgentRuntimeBuildError(
|
||||
f'Failed to check image existence: {response.text}'
|
||||
)
|
||||
|
||||
result = response.json()
|
||||
|
||||
|
||||
@ -12,6 +12,13 @@ import requests
|
||||
import tenacity
|
||||
|
||||
from openhands.core.config import AppConfig
|
||||
from openhands.core.exceptions import (
|
||||
AgentRuntimeDisconnectedError,
|
||||
AgentRuntimeError,
|
||||
AgentRuntimeNotFoundError,
|
||||
AgentRuntimeNotReadyError,
|
||||
AgentRuntimeTimeoutError,
|
||||
)
|
||||
from openhands.core.logger import DEBUG
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.events import EventStream
|
||||
@ -34,11 +41,7 @@ from openhands.events.observation import (
|
||||
)
|
||||
from openhands.events.serialization import event_to_dict, observation_from_dict
|
||||
from openhands.events.serialization.action import ACTION_TYPE_TO_CLASS
|
||||
from openhands.runtime.base import (
|
||||
Runtime,
|
||||
RuntimeDisconnectedError,
|
||||
RuntimeNotFoundError,
|
||||
)
|
||||
from openhands.runtime.base import Runtime
|
||||
from openhands.runtime.builder import DockerRuntimeBuilder
|
||||
from openhands.runtime.impl.eventstream.containers import remove_all_containers
|
||||
from openhands.runtime.plugins import PluginRequirement
|
||||
@ -358,14 +361,16 @@ class EventStreamRuntime(Runtime):
|
||||
try:
|
||||
container = self.docker_client.containers.get(self.container_name)
|
||||
if container.status == 'exited':
|
||||
raise RuntimeDisconnectedError(
|
||||
raise AgentRuntimeDisconnectedError(
|
||||
f'Container {self.container_name} has exited.'
|
||||
)
|
||||
except docker.errors.NotFound:
|
||||
raise RuntimeNotFoundError(f'Container {self.container_name} not found.')
|
||||
raise AgentRuntimeNotFoundError(
|
||||
f'Container {self.container_name} not found.'
|
||||
)
|
||||
|
||||
if not self.log_streamer:
|
||||
raise RuntimeError('Runtime client is not ready.')
|
||||
raise AgentRuntimeNotReadyError('Runtime client is not ready.')
|
||||
|
||||
with send_request(
|
||||
self.session,
|
||||
@ -445,7 +450,7 @@ class EventStreamRuntime(Runtime):
|
||||
obs = observation_from_dict(output)
|
||||
obs._cause = action.id # type: ignore[attr-defined]
|
||||
except requests.Timeout:
|
||||
raise RuntimeError(
|
||||
raise AgentRuntimeTimeoutError(
|
||||
f'Runtime failed to return execute_action before the requested timeout of {action.timeout}s'
|
||||
)
|
||||
|
||||
@ -514,9 +519,9 @@ class EventStreamRuntime(Runtime):
|
||||
pass
|
||||
|
||||
except requests.Timeout:
|
||||
raise TimeoutError('Copy operation timed out')
|
||||
raise AgentRuntimeTimeoutError('Copy operation timed out')
|
||||
except Exception as e:
|
||||
raise RuntimeError(f'Copy operation failed: {str(e)}')
|
||||
raise AgentRuntimeError(f'Copy operation failed: {str(e)}')
|
||||
finally:
|
||||
if recursive:
|
||||
os.unlink(temp_zip_path)
|
||||
|
||||
@ -10,6 +10,14 @@ import requests
|
||||
import tenacity
|
||||
|
||||
from openhands.core.config import AppConfig
|
||||
from openhands.core.exceptions import (
|
||||
AgentRuntimeDisconnectedError,
|
||||
AgentRuntimeError,
|
||||
AgentRuntimeNotFoundError,
|
||||
AgentRuntimeNotReadyError,
|
||||
AgentRuntimeTimeoutError,
|
||||
AgentRuntimeUnavailableError,
|
||||
)
|
||||
from openhands.events import EventStream
|
||||
from openhands.events.action import (
|
||||
BrowseInteractiveAction,
|
||||
@ -28,13 +36,7 @@ from openhands.events.observation import (
|
||||
)
|
||||
from openhands.events.serialization import event_to_dict, observation_from_dict
|
||||
from openhands.events.serialization.action import ACTION_TYPE_TO_CLASS
|
||||
from openhands.runtime.base import (
|
||||
Runtime,
|
||||
RuntimeDisconnectedError,
|
||||
RuntimeNotFoundError,
|
||||
RuntimeNotReadyError,
|
||||
RuntimeUnavailableError,
|
||||
)
|
||||
from openhands.runtime.base import Runtime
|
||||
from openhands.runtime.builder.remote import RemoteRuntimeBuilder
|
||||
from openhands.runtime.plugins import PluginRequirement
|
||||
from openhands.runtime.utils.command import get_remote_startup_command
|
||||
@ -100,7 +102,7 @@ class RemoteRuntime(Runtime):
|
||||
async def connect(self):
|
||||
try:
|
||||
await call_sync_from_async(self._start_or_attach_to_runtime)
|
||||
except RuntimeNotReadyError:
|
||||
except AgentRuntimeNotReadyError:
|
||||
self.log('error', 'Runtime failed to start, timed out before ready')
|
||||
raise
|
||||
await call_sync_from_async(self.setup_initial_env)
|
||||
@ -111,7 +113,7 @@ class RemoteRuntime(Runtime):
|
||||
if existing_runtime:
|
||||
self.log('debug', f'Using existing runtime with ID: {self.runtime_id}')
|
||||
elif self.attach_to_existing:
|
||||
raise RuntimeNotFoundError(
|
||||
raise AgentRuntimeNotFoundError(
|
||||
f'Could not find existing runtime for SID: {self.sid}'
|
||||
)
|
||||
else:
|
||||
@ -215,7 +217,7 @@ class RemoteRuntime(Runtime):
|
||||
timeout=60,
|
||||
) as response:
|
||||
if not response.json()['exists']:
|
||||
raise RuntimeError(
|
||||
raise AgentRuntimeError(
|
||||
f'Container image {self.container_image} does not exist'
|
||||
)
|
||||
|
||||
@ -262,7 +264,7 @@ class RemoteRuntime(Runtime):
|
||||
)
|
||||
except requests.HTTPError as e:
|
||||
self.log('error', f'Unable to start runtime: {e}')
|
||||
raise RuntimeUnavailableError() from e
|
||||
raise AgentRuntimeUnavailableError() from e
|
||||
|
||||
def _resume_runtime(self):
|
||||
with self._send_request(
|
||||
@ -322,7 +324,7 @@ class RemoteRuntime(Runtime):
|
||||
)
|
||||
| stop_if_should_exit(),
|
||||
reraise=True,
|
||||
retry=tenacity.retry_if_exception_type(RuntimeNotReadyError),
|
||||
retry=tenacity.retry_if_exception_type(AgentRuntimeNotReadyError),
|
||||
wait=tenacity.wait_fixed(2),
|
||||
)
|
||||
return retry_decorator(self._wait_until_alive_impl)()
|
||||
@ -356,7 +358,7 @@ class RemoteRuntime(Runtime):
|
||||
self.log(
|
||||
'warning', f"Runtime /alive failed, but pod says it's ready: {e}"
|
||||
)
|
||||
raise RuntimeNotReadyError(
|
||||
raise AgentRuntimeNotReadyError(
|
||||
f'Runtime /alive failed to respond with 200: {e}'
|
||||
)
|
||||
return
|
||||
@ -365,14 +367,14 @@ class RemoteRuntime(Runtime):
|
||||
or pod_status == 'pending'
|
||||
or pod_status == 'running'
|
||||
): # nb: Running is not yet Ready
|
||||
raise RuntimeNotReadyError(
|
||||
raise AgentRuntimeNotReadyError(
|
||||
f'Runtime (ID={self.runtime_id}) is not yet ready. Status: {pod_status}'
|
||||
)
|
||||
elif pod_status in ('failed', 'unknown', 'crashloopbackoff'):
|
||||
# clean up the runtime
|
||||
self.close()
|
||||
raise RuntimeError(
|
||||
f'Runtime (ID={self.runtime_id}) failed to start. Current status: {pod_status}'
|
||||
raise AgentRuntimeUnavailableError(
|
||||
f'Runtime (ID={self.runtime_id}) failed to start. Current status: {pod_status}. Pod Logs:\n{runtime_data.get("pod_logs", "N/A")}'
|
||||
)
|
||||
else:
|
||||
# Maybe this should be a hard failure, but passing through in case the API changes
|
||||
@ -382,7 +384,7 @@ class RemoteRuntime(Runtime):
|
||||
'debug',
|
||||
f'Waiting for runtime pod to be active. Current status: {pod_status}',
|
||||
)
|
||||
raise RuntimeNotReadyError()
|
||||
raise AgentRuntimeNotReadyError()
|
||||
|
||||
def close(self, timeout: int = 10):
|
||||
if self.config.sandbox.keep_runtime_alive or self.attach_to_existing:
|
||||
@ -437,7 +439,7 @@ class RemoteRuntime(Runtime):
|
||||
obs = observation_from_dict(output)
|
||||
obs._cause = action.id # type: ignore[attr-defined]
|
||||
except requests.Timeout:
|
||||
raise RuntimeError(
|
||||
raise AgentRuntimeTimeoutError(
|
||||
f'Runtime failed to return execute_action before the requested timeout of {action.timeout}s'
|
||||
)
|
||||
return obs
|
||||
@ -451,7 +453,7 @@ class RemoteRuntime(Runtime):
|
||||
raise
|
||||
except requests.HTTPError as e:
|
||||
if is_runtime_request and e.response.status_code == 404:
|
||||
raise RuntimeDisconnectedError(
|
||||
raise AgentRuntimeDisconnectedError(
|
||||
f'404 error while connecting to {self.runtime_url}'
|
||||
)
|
||||
elif is_runtime_request and e.response.status_code == 503:
|
||||
|
||||
@ -10,6 +10,10 @@ from runloop_api_client.types import DevboxView
|
||||
from runloop_api_client.types.shared_params import LaunchParameters
|
||||
|
||||
from openhands.core.config import AppConfig
|
||||
from openhands.core.exceptions import (
|
||||
AgentRuntimeNotReadyError,
|
||||
AgentRuntimeUnavailableError,
|
||||
)
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.events import EventStream
|
||||
from openhands.runtime.impl.eventstream.eventstream_runtime import EventStreamRuntime
|
||||
@ -227,7 +231,7 @@ class RunloopRuntime(EventStreamRuntime):
|
||||
)
|
||||
def _wait_until_alive(self):
|
||||
if not self.log_streamer:
|
||||
raise RuntimeError('Runtime client is not ready.')
|
||||
raise AgentRuntimeNotReadyError('Runtime client is not ready.')
|
||||
response = send_request(
|
||||
self.session,
|
||||
'GET',
|
||||
@ -239,7 +243,7 @@ class RunloopRuntime(EventStreamRuntime):
|
||||
else:
|
||||
msg = f'Action execution API is not alive. Response: {response}'
|
||||
logger.error(msg)
|
||||
raise RuntimeError(msg)
|
||||
raise AgentRuntimeUnavailableError(msg)
|
||||
|
||||
def close(self, rm_all_containers: bool | None = True):
|
||||
if self.log_streamer:
|
||||
|
||||
@ -14,6 +14,7 @@ from jinja2 import Environment, FileSystemLoader
|
||||
|
||||
import openhands
|
||||
from openhands import __version__ as oh_version
|
||||
from openhands.core.exceptions import AgentRuntimeBuildError
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.runtime.builder import DockerRuntimeBuilder, RuntimeBuilder
|
||||
|
||||
@ -364,7 +365,7 @@ def _build_sandbox_image(
|
||||
extra_build_args=extra_build_args,
|
||||
)
|
||||
if not image_name:
|
||||
raise RuntimeError(f'Build failed for image {names}')
|
||||
raise AgentRuntimeBuildError(f'Build failed for image {names}')
|
||||
|
||||
return image_name
|
||||
|
||||
|
||||
@ -13,6 +13,7 @@ from fastapi.responses import FileResponse, JSONResponse
|
||||
from pathspec import PathSpec
|
||||
from pathspec.patterns import GitWildMatchPattern
|
||||
|
||||
from openhands.core.exceptions import AgentRuntimeUnavailableError
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.events.action import (
|
||||
FileReadAction,
|
||||
@ -23,7 +24,7 @@ from openhands.events.observation import (
|
||||
FileReadObservation,
|
||||
FileWriteObservation,
|
||||
)
|
||||
from openhands.runtime.base import Runtime, RuntimeUnavailableError
|
||||
from openhands.runtime.base import Runtime
|
||||
from openhands.server.file_config import (
|
||||
FILES_TO_IGNORE,
|
||||
MAX_FILE_SIZE_MB,
|
||||
@ -66,7 +67,7 @@ async def list_files(request: Request, path: str | None = None):
|
||||
runtime: Runtime = request.state.conversation.runtime
|
||||
try:
|
||||
file_list = await call_sync_from_async(runtime.list_files, path)
|
||||
except RuntimeUnavailableError as e:
|
||||
except AgentRuntimeUnavailableError as e:
|
||||
logger.error(f'Error listing files: {e}', exc_info=True)
|
||||
return JSONResponse(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
@ -93,7 +94,7 @@ async def list_files(request: Request, path: str | None = None):
|
||||
|
||||
try:
|
||||
file_list = await filter_for_gitignore(file_list, '')
|
||||
except RuntimeUnavailableError as e:
|
||||
except AgentRuntimeUnavailableError as e:
|
||||
logger.error(f'Error filtering files: {e}', exc_info=True)
|
||||
return JSONResponse(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
@ -129,7 +130,7 @@ async def select_file(file: str, request: Request):
|
||||
read_action = FileReadAction(file)
|
||||
try:
|
||||
observation = await call_sync_from_async(runtime.run_action, read_action)
|
||||
except RuntimeUnavailableError as e:
|
||||
except AgentRuntimeUnavailableError as e:
|
||||
logger.error(f'Error opening file {file}: {e}', exc_info=True)
|
||||
return JSONResponse(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
@ -205,7 +206,7 @@ async def upload_file(request: Request, files: list[UploadFile]):
|
||||
tmp_file_path,
|
||||
runtime.config.workspace_mount_path_in_sandbox,
|
||||
)
|
||||
except RuntimeUnavailableError as e:
|
||||
except AgentRuntimeUnavailableError as e:
|
||||
logger.error(
|
||||
f'Error saving file {safe_filename}: {e}', exc_info=True
|
||||
)
|
||||
@ -282,7 +283,7 @@ async def save_file(request: Request):
|
||||
write_action = FileWriteAction(file_path, content)
|
||||
try:
|
||||
observation = await call_sync_from_async(runtime.run_action, write_action)
|
||||
except RuntimeUnavailableError as e:
|
||||
except AgentRuntimeUnavailableError as e:
|
||||
logger.error(f'Error saving file: {e}', exc_info=True)
|
||||
return JSONResponse(
|
||||
status_code=500,
|
||||
@ -317,7 +318,7 @@ async def zip_current_workspace(request: Request, background_tasks: BackgroundTa
|
||||
path = runtime.config.workspace_mount_path_in_sandbox
|
||||
try:
|
||||
zip_file = await call_sync_from_async(runtime.copy_from, path)
|
||||
except RuntimeUnavailableError as e:
|
||||
except AgentRuntimeUnavailableError as e:
|
||||
logger.error(f'Error zipping workspace: {e}', exc_info=True)
|
||||
return JSONResponse(
|
||||
status_code=500,
|
||||
|
||||
@ -5,13 +5,14 @@ from openhands.controller import AgentController
|
||||
from openhands.controller.agent import Agent
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import AgentConfig, AppConfig, LLMConfig
|
||||
from openhands.core.exceptions import AgentRuntimeUnavailableError
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.core.schema.agent import AgentState
|
||||
from openhands.events.action import ChangeAgentStateAction
|
||||
from openhands.events.event import EventSource
|
||||
from openhands.events.stream import EventStream
|
||||
from openhands.runtime import get_runtime_cls
|
||||
from openhands.runtime.base import Runtime, RuntimeUnavailableError
|
||||
from openhands.runtime.base import Runtime
|
||||
from openhands.security import SecurityAnalyzer, options
|
||||
from openhands.storage.files import FileStore
|
||||
from openhands.utils.async_utils import call_async_from_sync
|
||||
@ -222,7 +223,7 @@ class AgentSession:
|
||||
|
||||
try:
|
||||
await self.runtime.connect()
|
||||
except RuntimeUnavailableError as e:
|
||||
except AgentRuntimeUnavailableError as e:
|
||||
logger.error(f'Runtime initialization failed: {e}', exc_info=True)
|
||||
if self._status_callback:
|
||||
self._status_callback(
|
||||
|
||||
@ -6,9 +6,9 @@ from dataclasses import dataclass, field
|
||||
import socketio
|
||||
|
||||
from openhands.core.config import AppConfig
|
||||
from openhands.core.exceptions import AgentRuntimeUnavailableError
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.events.stream import EventStream, session_exists
|
||||
from openhands.runtime.base import RuntimeUnavailableError
|
||||
from openhands.server.session.conversation import Conversation
|
||||
from openhands.server.session.session import ROOM_KEY, Session
|
||||
from openhands.server.session.session_init_data import SessionInitData
|
||||
@ -160,7 +160,7 @@ class SessionManager:
|
||||
c = Conversation(sid, file_store=self.file_store, config=self.config)
|
||||
try:
|
||||
await c.connect()
|
||||
except RuntimeUnavailableError as e:
|
||||
except AgentRuntimeUnavailableError as e:
|
||||
logger.error(f'Error connecting to conversation {c.sid}: {e}')
|
||||
return None
|
||||
end_time = time.time()
|
||||
|
||||
@ -161,7 +161,7 @@ async def test_run_controller_with_fatal_error(mock_agent, mock_event_stream):
|
||||
print(f'event_stream: {list(event_stream.get_events())}')
|
||||
assert state.iteration == 4
|
||||
assert state.agent_state == AgentState.ERROR
|
||||
assert state.last_error == 'Agent got stuck in a loop'
|
||||
assert state.last_error == 'AgentStuckInLoopError: Agent got stuck in a loop'
|
||||
assert len(list(event_stream.get_events())) == 11
|
||||
|
||||
|
||||
@ -227,7 +227,7 @@ async def test_run_controller_stop_with_stuck():
|
||||
assert last_event['observation'] == 'agent_state_changed'
|
||||
|
||||
assert state.agent_state == AgentState.ERROR
|
||||
assert state.last_error == 'Agent got stuck in a loop'
|
||||
assert state.last_error == 'AgentStuckInLoopError: Agent got stuck in a loop'
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user