mirror of
https://github.com/OpenHands/OpenHands.git
synced 2025-12-26 05:48:36 +08:00
feat(eval): reliability improvement for SWE-Bench eval_infer (#6347)
This commit is contained in:
parent
4383be1ab4
commit
2b04ee2e62
@ -355,7 +355,9 @@ def _process_instance_wrapper(
|
||||
)
|
||||
# e is likely an EvalException, so we can't directly infer it from type
|
||||
# but rather check if it's a fatal error
|
||||
if is_fatal_runtime_error(str(e)):
|
||||
# But it can also be AgentRuntime**Error (e.g., swe_bench/eval_infer.py)
|
||||
_error_str = type(e).__name__ + ': ' + str(e)
|
||||
if is_fatal_runtime_error(_error_str):
|
||||
runtime_failure_count += 1
|
||||
msg += f'Runtime disconnected error detected for instance {instance.instance_id}, runtime failure count: {runtime_failure_count}'
|
||||
msg += '\n' + '-' * 10 + '\n'
|
||||
@ -531,6 +533,7 @@ def is_fatal_runtime_error(error: str | None) -> bool:
|
||||
return False
|
||||
|
||||
FATAL_RUNTIME_ERRORS = [
|
||||
AgentRuntimeTimeoutError,
|
||||
AgentRuntimeUnavailableError,
|
||||
AgentRuntimeDisconnectedError,
|
||||
AgentRuntimeNotFoundError,
|
||||
|
||||
@ -60,6 +60,12 @@ class RemoteRuntime(ActionExecutionClient):
|
||||
)
|
||||
self.session.headers.update({'X-API-Key': self.config.sandbox.api_key})
|
||||
|
||||
if self.config.workspace_base is not None:
|
||||
self.log(
|
||||
'debug',
|
||||
'Setting workspace_base is not supported in the remote runtime.',
|
||||
)
|
||||
|
||||
self.runtime_builder = RemoteRuntimeBuilder(
|
||||
self.config.sandbox.remote_runtime_api_url,
|
||||
self.config.sandbox.api_key,
|
||||
@ -70,12 +76,6 @@ class RemoteRuntime(ActionExecutionClient):
|
||||
self.available_hosts: dict[str, int] = {}
|
||||
self._runtime_initialized: bool = False
|
||||
|
||||
if self.config.workspace_base is not None:
|
||||
self.log(
|
||||
'debug',
|
||||
'Setting workspace_base is not supported in the remote runtime.',
|
||||
)
|
||||
|
||||
def log(self, level: str, message: str) -> None:
|
||||
message = f'[runtime session_id={self.sid} runtime_id={self.runtime_id or "unknown"}] {message}'
|
||||
getattr(logger, level)(message, stacklevel=2)
|
||||
@ -230,7 +230,7 @@ class RemoteRuntime(ActionExecutionClient):
|
||||
f'Runtime started. URL: {self.runtime_url}',
|
||||
)
|
||||
except requests.HTTPError as e:
|
||||
self.log('error', f'Unable to start runtime: {e}')
|
||||
self.log('error', f'Unable to start runtime: {str(e)}')
|
||||
raise AgentRuntimeUnavailableError() from e
|
||||
|
||||
def _resume_runtime(self):
|
||||
@ -315,10 +315,11 @@ class RemoteRuntime(ActionExecutionClient):
|
||||
self.check_if_alive()
|
||||
except requests.HTTPError as e:
|
||||
self.log(
|
||||
'warning', f"Runtime /alive failed, but pod says it's ready: {e}"
|
||||
'warning',
|
||||
f"Runtime /alive failed, but pod says it's ready: {str(e)}",
|
||||
)
|
||||
raise AgentRuntimeNotReadyError(
|
||||
f'Runtime /alive failed to respond with 200: {e}'
|
||||
f'Runtime /alive failed to respond with 200: {str(e)}'
|
||||
)
|
||||
return
|
||||
elif (
|
||||
@ -363,6 +364,7 @@ class RemoteRuntime(ActionExecutionClient):
|
||||
):
|
||||
self.log('debug', 'Runtime stopped.')
|
||||
except Exception as e:
|
||||
self.log('error', f'Unable to stop runtime: {str(e)}')
|
||||
raise e
|
||||
finally:
|
||||
super().close()
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user