Major logging overhaul (#4563)

This commit is contained in:
Robert Brennan
2024-10-29 02:30:50 -04:00
committed by GitHub
parent ae13171194
commit 30eeaa641c
36 changed files with 384 additions and 294 deletions

View File

@@ -160,7 +160,7 @@ def get_prompt_and_images(
# the hint, based on the last action
hint = get_hint(event_to_memory(latest_action, max_message_chars).get('action', ''))
logger.info('HINT:\n' + hint, extra={'msg_type': 'DETAIL'})
logger.debug('HINT:\n' + hint, extra={'msg_type': 'DETAIL'})
# the last relevant user message (the task)
message, image_urls = state.get_current_user_intent()

View File

@@ -125,6 +125,15 @@ class AgentController:
await self.set_agent_state_to(AgentState.STOPPED)
self.event_stream.unsubscribe(EventStreamSubscriber.AGENT_CONTROLLER)
def log(self, level: str, message: str, extra: dict | None = None):
"""Logs a message to the agent controller's logger.
Args:
message (str): The message to log.
"""
message = f'[Agent Controller {self.id}] {message}'
getattr(logger, level)(message, extra=extra)
def update_state_before_step(self):
self.state.iteration += 1
self.state.local_iteration += 1
@@ -153,17 +162,17 @@ class AgentController:
async def start_step_loop(self):
"""The main loop for the agent's step-by-step execution."""
logger.info(f'[Agent Controller {self.id}] Starting step loop...')
self.log('info', 'Starting step loop...')
while should_continue():
try:
await self._step()
except asyncio.CancelledError:
logger.info('AgentController task was cancelled')
self.log('debug', 'AgentController task was cancelled')
break
except Exception as e:
traceback.print_exc()
logger.error(f'Error while running the agent: {e}')
logger.error(traceback.format_exc())
self.log('error', f'Error while running the agent: {e}')
self.log('error', traceback.format_exc())
await self.report_error(
'There was an unexpected error while running the agent', exception=e
)
@@ -232,7 +241,7 @@ class AgentController:
observation_to_print.content = truncate_content(
observation_to_print.content, self.agent.llm.config.max_message_chars
)
logger.info(observation_to_print, extra={'msg_type': 'OBSERVATION'})
self.log('debug', str(observation_to_print), extra={'msg_type': 'OBSERVATION'})
# Merge with the metrics from the LLM - it will to synced to the controller's local metrics in update_state_after_step()
if observation.llm_metrics is not None:
@@ -267,8 +276,10 @@ class AgentController:
action (MessageAction): The message action to handle.
"""
if action.source == EventSource.USER:
logger.info(
action, extra={'msg_type': 'ACTION', 'event_source': EventSource.USER}
self.log(
'debug',
str(action),
extra={'msg_type': 'ACTION', 'event_source': EventSource.USER},
)
if self.get_agent_state() != AgentState.RUNNING:
await self.set_agent_state_to(AgentState.RUNNING)
@@ -287,8 +298,9 @@ class AgentController:
Args:
new_state (AgentState): The new state to set for the agent.
"""
logger.debug(
f'[Agent Controller {self.id}] Setting agent({self.agent.name}) state from {self.state.agent_state} to {new_state}'
self.log(
'info',
f'Setting agent({self.agent.name}) state from {self.state.agent_state} to {new_state}',
)
if new_state == self.state.agent_state:
@@ -379,8 +391,9 @@ class AgentController:
# global metrics should be shared between parent and child
metrics=self.state.metrics,
)
logger.info(
f'[Agent Controller {self.id}]: start delegate, creating agent {delegate_agent.name} using LLM {llm}'
self.log(
'debug',
f'start delegate, creating agent {delegate_agent.name} using LLM {llm}',
)
self.delegate = AgentController(
sid=self.id + '-delegate',
@@ -422,8 +435,9 @@ class AgentController:
await self._delegate_step()
return
logger.info(
f'{self.agent.name} LEVEL {self.state.delegate_level} LOCAL STEP {self.state.local_iteration} GLOBAL STEP {self.state.iteration}',
self.log(
'info',
f'LEVEL {self.state.delegate_level} LOCAL STEP {self.state.local_iteration} GLOBAL STEP {self.state.iteration}',
extra={'msg_type': 'STEP'},
)
@@ -479,16 +493,16 @@ class AgentController:
self.event_stream.add_event(action, EventSource.AGENT)
await self.update_state_after_step()
logger.info(action, extra={'msg_type': 'ACTION'})
self.log('debug', str(action), extra={'msg_type': 'ACTION'})
async def _delegate_step(self):
"""Executes a single step of the delegate agent."""
logger.debug(f'[Agent Controller {self.id}] Delegate not none, awaiting...')
self.log('debug', 'Delegate not none, awaiting...')
await self.delegate._step() # type: ignore[union-attr]
logger.debug(f'[Agent Controller {self.id}] Delegate step done')
self.log('debug', 'Delegate step done')
assert self.delegate is not None
delegate_state = self.delegate.get_agent_state()
logger.debug(f'[Agent Controller {self.id}] Delegate state: {delegate_state}')
self.log('debug', f'Delegate state: {delegate_state}')
if delegate_state == AgentState.ERROR:
# update iteration that shall be shared across agents
self.state.iteration = self.delegate.state.iteration
@@ -500,9 +514,7 @@ class AgentController:
await self.report_error('Delegator agent encountered an error')
elif delegate_state in (AgentState.FINISHED, AgentState.REJECTED):
logger.info(
f'[Agent Controller {self.id}] Delegate agent has finished execution'
)
self.log('debug', 'Delegate agent has finished execution')
# retrieve delegate result
outputs = self.delegate.state.outputs if self.delegate.state else {}
@@ -542,7 +554,9 @@ class AgentController:
"""
stop_step = False
if self.state.traffic_control_state == TrafficControlState.PAUSED:
logger.info('Hitting traffic control, temporarily resume upon user request')
self.log(
'debug', 'Hitting traffic control, temporarily resume upon user request'
)
self.state.traffic_control_state = TrafficControlState.NORMAL
else:
self.state.traffic_control_state = TrafficControlState.THROTTLING
@@ -606,7 +620,9 @@ class AgentController:
if start_id == -1:
start_id = self.event_stream.get_latest_event_id() + 1
else:
logger.debug(f'AgentController {self.id} restoring from event {start_id}')
self.log(
'debug', f'AgentController {self.id} restoring from event {start_id}'
)
# make sure history is in sync
self.state.start_id = start_id

View File

@@ -115,7 +115,7 @@ class State:
pickled = base64.b64decode(encoded)
state = pickle.loads(pickled)
except Exception as e:
logger.error(f'Failed to restore state from session: {e}')
logger.warning(f'Failed to restore state from session: {e}')
raise e
# update state

View File

@@ -252,7 +252,7 @@ def get_llm_config_arg(
if llm_config_arg.startswith('llm.'):
llm_config_arg = llm_config_arg[4:]
logger.openhands_logger.info(f'Loading llm config from {llm_config_arg}')
logger.openhands_logger.debug(f'Loading llm config from {llm_config_arg}')
# load the toml file
try:

View File

@@ -80,6 +80,69 @@ file_formatter = logging.Formatter(
llm_formatter = logging.Formatter('%(message)s')
class RollingLogger:
max_lines: int
char_limit: int
log_lines: list[str]
def __init__(self, max_lines=10, char_limit=80):
self.max_lines = max_lines
self.char_limit = char_limit
self.log_lines = [''] * self.max_lines
def is_enabled(self):
return DEBUG and sys.stdout.isatty()
def start(self, message=''):
if message:
print(message)
self._write('\n' * self.max_lines)
self._flush()
def add_line(self, line):
self.log_lines.pop(0)
self.log_lines.append(line[: self.char_limit])
self.print_lines()
def write_immediately(self, line):
self._write(line)
self._flush()
def print_lines(self):
"""Display the last n log_lines in the console (not for file logging).
This will create the effect of a rolling display in the console.
"""
self.move_back()
for line in self.log_lines:
self.replace_current_line(line)
def move_back(self, amount=-1):
"""
'\033[F' moves the cursor up one line.
"""
if amount == -1:
amount = self.max_lines
self._write('\033[F' * (self.max_lines))
self._flush()
def replace_current_line(self, line=''):
"""
'\033[2K\r' clears the line and moves the cursor to the beginning of the line.
"""
self._write('\033[2K' + line + '\n')
self._flush()
def _write(self, line):
if not self.is_enabled():
return
sys.stdout.write(line)
def _flush(self):
if not self.is_enabled():
return
sys.stdout.flush()
class SensitiveDataFilter(logging.Filter):
def filter(self, record):
# start with attributes
@@ -168,7 +231,7 @@ openhands_logger.setLevel(current_log_level)
if current_log_level == logging.DEBUG:
LOG_TO_FILE = True
openhands_logger.info('DEBUG mode enabled.')
openhands_logger.debug('DEBUG mode enabled.')
openhands_logger.addHandler(get_console_handler(current_log_level))
openhands_logger.addFilter(SensitiveDataFilter(openhands_logger.name))
@@ -185,7 +248,7 @@ if LOG_TO_FILE:
openhands_logger.addHandler(
get_file_handler(LOG_DIR, current_log_level)
) # default log to project root
openhands_logger.info(f'Logging to file in: {LOG_DIR}')
openhands_logger.debug(f'Logging to file in: {LOG_DIR}')
# Exclude LiteLLM from logging output
logging.getLogger('LiteLLM').disabled = True

View File

@@ -73,7 +73,7 @@ def create_runtime(
# runtime and tools
runtime_cls = get_runtime_cls(config.runtime)
logger.info(f'Initializing runtime: {runtime_cls.__name__}')
logger.debug(f'Initializing runtime: {runtime_cls.__name__}')
runtime: Runtime = runtime_cls(
config=config,
event_stream=event_stream,
@@ -129,12 +129,12 @@ async def run_controller(
initial_state = None
if config.enable_cli_session:
try:
logger.info(f'Restoring agent state from cli session {event_stream.sid}')
logger.debug(f'Restoring agent state from cli session {event_stream.sid}')
initial_state = State.restore_from_session(
event_stream.sid, event_stream.file_store
)
except Exception as e:
logger.info(f'Error restoring state: {e}')
logger.debug(f'Error restoring state: {e}')
# init controller with this initial state
controller = AgentController(
@@ -154,7 +154,7 @@ async def run_controller(
initial_user_action, Action
), f'initial user actions must be an Action, got {type(initial_user_action)}'
# Logging
logger.info(
logger.debug(
f'Agent Controller Initialized: Running agent {agent.name}, model '
f'{agent.llm.config.model}, with actions: {initial_user_action}'
)

View File

@@ -92,7 +92,7 @@ class AsyncLLM(LLM):
return resp
except UserCancelledError:
logger.info('LLM request cancelled by user.')
logger.debug('LLM request cancelled by user.')
raise
except Exception as e:
logger.error(f'Completion Error occurred:\n{e}')

View File

@@ -122,7 +122,7 @@ class LLM(RetryMixin, DebugMixin):
# noinspection PyBroadException
except Exception:
pass
logger.info(f'Model info: {self.model_info}')
logger.debug(f'Model info: {self.model_info}')
if self.config.log_completions:
if self.config.log_completions_folder is None:
@@ -378,7 +378,7 @@ class LLM(RetryMixin, DebugMixin):
# log the stats
if stats:
logger.info(stats)
logger.debug(stats)
def get_token_count(self, messages):
"""Get the number of tokens in a list of messages.
@@ -432,7 +432,7 @@ class LLM(RetryMixin, DebugMixin):
input_cost_per_token=self.config.input_cost_per_token,
output_cost_per_token=self.config.output_cost_per_token,
)
logger.info(f'Using custom cost per token: {cost_per_token}')
logger.debug(f'Using custom cost per token: {cost_per_token}')
extra_kwargs['custom_cost_per_token'] = cost_per_token
try:
@@ -446,7 +446,7 @@ class LLM(RetryMixin, DebugMixin):
return cost
except Exception:
self.cost_metric_supported = False
logger.warning('Cost calculation not supported for this model.')
logger.debug('Cost calculation not supported for this model.')
return 0.0
def __str__(self):

View File

@@ -87,7 +87,7 @@ class StreamingLLM(AsyncLLM):
yield chunk
except UserCancelledError:
logger.info('LLM request cancelled by user.')
logger.debug('LLM request cancelled by user.')
raise
except Exception as e:
logger.error(f'Completion Error occurred:\n{e}')

View File

@@ -19,7 +19,7 @@ def get_runtime_cls(name: str):
return RemoteRuntime
elif name == 'modal':
logger.info('Using ModalRuntime')
logger.debug('Using ModalRuntime')
from openhands.runtime.impl.modal.modal_runtime import ModalRuntime
return ModalRuntime

View File

@@ -126,15 +126,15 @@ class ActionExecutor:
code='from openhands.runtime.plugins.agent_skills.agentskills import *\n'
)
)
logger.info(f'AgentSkills initialized: {obs}')
logger.debug(f'AgentSkills initialized: {obs}')
await self._init_bash_commands()
logger.info('Runtime client initialized.')
logger.debug('Runtime client initialized.')
async def _init_plugin(self, plugin: Plugin):
await plugin.initialize(self.username)
self.plugins[plugin.name] = plugin
logger.info(f'Initializing plugin: {plugin.name}')
logger.debug(f'Initializing plugin: {plugin.name}')
if isinstance(plugin, JupyterPlugin):
await self.run_ipython(
@@ -144,7 +144,7 @@ class ActionExecutor:
)
async def _init_bash_commands(self):
logger.info(f'Initializing by running {len(INIT_COMMANDS)} bash commands...')
logger.debug(f'Initializing by running {len(INIT_COMMANDS)} bash commands...')
for command in INIT_COMMANDS:
action = CmdRunAction(command=command)
action.timeout = 300
@@ -156,7 +156,7 @@ class ActionExecutor:
)
assert obs.exit_code == 0
logger.info('Bash init commands completed')
logger.debug('Bash init commands completed')
async def run_action(self, action) -> Observation:
action_type = action.action
@@ -445,7 +445,7 @@ if __name__ == '__main__':
shutil.unpack_archive(zip_path, full_dest_path)
os.remove(zip_path) # Remove the zip file after extraction
logger.info(
logger.debug(
f'Uploaded file {file.filename} and extracted to {destination}'
)
else:
@@ -453,7 +453,7 @@ if __name__ == '__main__':
file_path = os.path.join(full_dest_path, file.filename)
with open(file_path, 'wb') as buffer:
shutil.copyfileobj(file.file, buffer)
logger.info(f'Uploaded file {file.filename} to {destination}')
logger.debug(f'Uploaded file {file.filename} to {destination}')
return JSONResponse(
content={
@@ -469,7 +469,7 @@ if __name__ == '__main__':
@app.get('/download_files')
async def download_file(path: str):
logger.info('Downloading files')
logger.debug('Downloading files')
try:
if not os.path.isabs(path):
raise HTTPException(
@@ -587,7 +587,5 @@ if __name__ == '__main__':
logger.error(f'Error listing files: {e}', exc_info=True)
return []
logger.info('Runtime client initialized.')
logger.info(f'Starting action execution API on port {args.port}')
logger.debug(f'Starting action execution API on port {args.port}')
run(app, host='0.0.0.0', port=args.port)

View File

@@ -93,6 +93,10 @@ class Runtime(FileEditRuntimeMixin):
def close(self) -> None:
pass
def log(self, level: str, message: str) -> None:
message = f'[runtime {self.sid}] {message}'
getattr(logger, level)(message)
# ====================================================================
def add_env_vars(self, env_vars: dict[str, str]) -> None:
@@ -104,7 +108,7 @@ class Runtime(FileEditRuntimeMixin):
code += f'os.environ["{key}"] = {json.dumps(value)}\n'
code += '\n'
obs = self.run_ipython(IPythonRunCellAction(code))
logger.info(f'Added env vars to IPython: code={code}, obs={obs}')
self.log('debug', f'Added env vars to IPython: code={code}, obs={obs}')
# Add env vars to the Bash shell
cmd = ''

View File

@@ -57,7 +57,7 @@ class BrowserEnv:
retry=tenacity.retry_if_exception_type(BrowserInitException),
)
def init_browser(self):
logger.info('Starting browser env...')
logger.debug('Starting browser env...')
try:
self.process = multiprocessing.Process(target=self.browser_process)
self.process.start()
@@ -72,7 +72,7 @@ class BrowserEnv:
def browser_process(self):
if self.eval_mode:
assert self.browsergym_eval_env is not None
logger.info('Initializing browser env for web browsing evaluation.')
logger.debug('Initializing browser env for web browsing evaluation.')
if 'webarena' in self.browsergym_eval_env:
import browsergym.webarena # noqa F401 register webarena tasks as gym environments
elif 'miniwob' in self.browsergym_eval_env:
@@ -97,10 +97,10 @@ class BrowserEnv:
self.eval_goal = None
self.eval_rewards: list[float] = []
if self.eval_mode:
logger.info(f"Browsing goal: {obs['goal']}")
logger.debug(f"Browsing goal: {obs['goal']}")
self.eval_goal = obs['goal']
logger.info('Browser env started.')
logger.debug('Browser env started.')
while should_continue():
try:
if self.browser_side.poll(timeout=0.01):
@@ -108,7 +108,7 @@ class BrowserEnv:
# shutdown the browser environment
if unique_request_id == 'SHUTDOWN':
logger.info('SHUTDOWN recv, shutting down browser env...')
logger.debug('SHUTDOWN recv, shutting down browser env...')
env.close()
return
elif unique_request_id == 'IS_ALIVE':
@@ -146,7 +146,7 @@ class BrowserEnv:
obs['elapsed_time'] = obs['elapsed_time'].item()
self.browser_side.send((unique_request_id, obs))
except KeyboardInterrupt:
logger.info('Browser env process interrupted by user.')
logger.debug('Browser env process interrupted by user.')
try:
env.close()
except Exception:
@@ -172,7 +172,7 @@ class BrowserEnv:
response_id, _ = self.agent_side.recv()
if response_id == 'ALIVE':
return True
logger.info(f'Browser env is not alive. Response ID: {response_id}')
logger.debug(f'Browser env is not alive. Response ID: {response_id}')
def close(self):
if not self.process.is_alive():

View File

@@ -1,12 +1,12 @@
import datetime
import os
import subprocess
import sys
import time
import docker
from openhands import __version__ as oh_version
from openhands.core.logger import RollingLogger
from openhands.core.logger import openhands_logger as logger
from openhands.runtime.builder.base import RuntimeBuilder
@@ -20,8 +20,7 @@ class DockerRuntimeBuilder(RuntimeBuilder):
if tuple(map(int, server_version.split('.'))) < (18, 9):
raise RuntimeError('Docker server version must be >= 18.09 to use BuildKit')
self.max_lines = 10
self.log_lines = [''] * self.max_lines
self.rolling_logger = RollingLogger(max_lines=10)
def build(
self,
@@ -90,10 +89,9 @@ class DockerRuntimeBuilder(RuntimeBuilder):
buildx_cmd.append(path) # must be last!
print('================ DOCKER BUILD STARTED ================')
if sys.stdout.isatty():
sys.stdout.write('\n' * self.max_lines)
sys.stdout.flush()
self.rolling_logger.start(
'================ DOCKER BUILD STARTED ================'
)
try:
process = subprocess.Popen(
@@ -226,25 +224,10 @@ class DockerRuntimeBuilder(RuntimeBuilder):
return False
def _output_logs(self, new_line: str) -> None:
"""Display the last 10 log_lines in the console (not for file logging).
This will create the effect of a rolling display in the console.
'\033[F' moves the cursor up one line.
'\033[2K\r' clears the line and moves the cursor to the beginning of the line.
"""
if not sys.stdout.isatty():
if not self.rolling_logger.is_enabled():
logger.debug(new_line)
return
self.log_lines.pop(0)
self.log_lines.append(new_line[:80])
sys.stdout.write('\033[F' * (self.max_lines))
sys.stdout.flush()
for line in self.log_lines:
sys.stdout.write('\033[2K' + line + '\n')
sys.stdout.flush()
else:
self.rolling_logger.add_line(new_line)
def _output_build_progress(
self, current_line: dict, layers: dict, previous_layer_count: int
@@ -273,19 +256,24 @@ class DockerRuntimeBuilder(RuntimeBuilder):
100 if layers[layer_id]['status'] == 'Download complete' else 0
)
if sys.stdout.isatty():
sys.stdout.write('\033[F' * previous_layer_count)
if self.rolling_logger.is_enabled():
self.rolling_logger.move_back(previous_layer_count)
for lid, layer_data in sorted(layers.items()):
sys.stdout.write('\033[2K\r')
self.rolling_logger.replace_current_line()
status = layer_data['status']
progress = layer_data['progress']
if status == 'Download complete':
print(f'Layer {lid}: Download complete')
self.rolling_logger.write_immediately(
f'Layer {lid}: Download complete'
)
elif status == 'Already exists':
print(f'Layer {lid}: Already exists')
self.rolling_logger.write_immediately(
f'Layer {lid}: Already exists'
)
else:
print(f'Layer {lid}: {progress} {status}')
sys.stdout.flush()
self.rolling_logger.write_immediately(
f'Layer {lid}: {progress} {status}'
)
elif percentage != 0 and (
percentage - layers[layer_id]['last_logged'] >= 10 or percentage == 100
):

View File

@@ -89,7 +89,7 @@ class RemoteRuntimeBuilder(RuntimeBuilder):
logger.info(f'Build status: {status}')
if status == 'SUCCESS':
logger.info(f"Successfully built {status_data['image']}")
logger.debug(f"Successfully built {status_data['image']}")
return status_data['image']
elif status in [
'FAILURE',
@@ -127,12 +127,12 @@ class RemoteRuntimeBuilder(RuntimeBuilder):
result = response.json()
if result['exists']:
logger.info(
logger.debug(
f"Image {image_name} exists. "
f"Uploaded at: {result['image']['upload_time']}, "
f"Size: {result['image']['image_size_bytes'] / 1024 / 1024:.2f} MB"
)
else:
logger.info(f'Image {image_name} does not exist.')
logger.debug(f'Image {image_name} does not exist.')
return result['exists']

View File

@@ -7,6 +7,7 @@ from e2b import Sandbox as E2BSandbox
from e2b.sandbox.exception import (
TimeoutException,
)
from openhands.core.config import SandboxConfig
from openhands.core.logger import openhands_logger as logger
@@ -29,11 +30,11 @@ class E2BBox:
api_key=e2b_api_key,
template=template,
# It's possible to stream stdout and stderr from sandbox and from each process
on_stderr=lambda x: logger.info(f'E2B sandbox stderr: {x}'),
on_stdout=lambda x: logger.info(f'E2B sandbox stdout: {x}'),
on_stderr=lambda x: logger.debug(f'E2B sandbox stderr: {x}'),
on_stdout=lambda x: logger.debug(f'E2B sandbox stdout: {x}'),
cwd=self._cwd, # Default workdir inside sandbox
)
logger.info(f'Started E2B sandbox with ID "{self.sandbox.id}"')
logger.debug(f'Started E2B sandbox with ID "{self.sandbox.id}"')
@property
def filesystem(self):
@@ -68,7 +69,7 @@ class E2BBox:
try:
process_output = process.wait(timeout=timeout)
except TimeoutException:
logger.info('Command timed out, killing process...')
logger.debug('Command timed out, killing process...')
process.kill()
return -1, f'Command: "{cmd}" timed out'

View File

@@ -1,7 +1,6 @@
import os
import tempfile
import threading
import traceback
from functools import lru_cache
from typing import Callable
from zipfile import ZipFile
@@ -50,8 +49,7 @@ class LogBuffer:
for appending, retrieving, and clearing logs.
"""
def __init__(self, container: docker.models.containers.Container):
self.client_ready = False
def __init__(self, container: docker.models.containers.Container, logFn: Callable):
self.init_msg = 'Runtime client initialized.'
self.buffer: list[str] = []
@@ -61,6 +59,7 @@ class LogBuffer:
self.log_stream_thread = threading.Thread(target=self.stream_logs)
self.log_stream_thread.daemon = True
self.log_stream_thread.start()
self.log = logFn
def append(self, log_line: str):
with self.lock:
@@ -85,15 +84,14 @@ class LogBuffer:
if log_line:
decoded_line = log_line.decode('utf-8').rstrip()
self.append(decoded_line)
if self.init_msg in decoded_line:
self.client_ready = True
except Exception as e:
logger.error(f'Error streaming docker logs: {e}')
self.log('error', f'Error streaming docker logs: {e}')
def __del__(self):
if self.log_stream_thread.is_alive():
logger.warn(
"LogBuffer was not properly closed. Use 'log_buffer.close()' for clean shutdown."
self.log(
'warn',
"LogBuffer was not properly closed. Use 'log_buffer.close()' for clean shutdown.",
)
self.close(timeout=5)
@@ -168,8 +166,9 @@ class EventStreamRuntime(Runtime):
self.log_buffer: LogBuffer | None = None
if self.config.sandbox.runtime_extra_deps:
logger.debug(
f'Installing extra user-provided dependencies in the runtime image: {self.config.sandbox.runtime_extra_deps}'
self.log(
'debug',
f'Installing extra user-provided dependencies in the runtime image: {self.config.sandbox.runtime_extra_deps}',
)
self.skip_container_logs = (
@@ -194,7 +193,6 @@ class EventStreamRuntime(Runtime):
raise ValueError(
'Neither runtime container image nor base container image is set'
)
logger.info('Preparing container, this might take a few minutes...')
self.send_status_message('STATUS$STARTING_CONTAINER')
self.runtime_container_image = build_runtime_image(
self.base_container_image,
@@ -204,24 +202,32 @@ class EventStreamRuntime(Runtime):
force_rebuild=self.config.sandbox.force_rebuild_runtime,
)
self.log(
'info', f'Starting runtime with image: {self.runtime_container_image}'
)
self._init_container(
sandbox_workspace_dir=self.config.workspace_mount_path_in_sandbox, # e.g. /workspace
mount_dir=self.config.workspace_mount_path, # e.g. /opt/openhands/_test_workspace
plugins=self.plugins,
)
self.log('info', f'Container started: {self.container_name}')
else:
self._attach_to_container()
logger.info('Waiting for client to become ready...')
if not self.attach_to_existing:
self.log('info', f'Waiting for client to become ready at {self.api_url}...')
self.send_status_message('STATUS$WAITING_FOR_CLIENT')
self._wait_until_alive()
if not self.attach_to_existing:
self.log('info', 'Runtime is ready.')
if not self.attach_to_existing:
self.setup_initial_env()
logger.info(
f'Container initialized with plugins: {[plugin.name for plugin in self.plugins]}'
self.log(
'debug',
f'Container initialized with plugins: {[plugin.name for plugin in self.plugins]}',
)
self.send_status_message(' ')
@@ -232,7 +238,7 @@ class EventStreamRuntime(Runtime):
return docker.from_env()
except Exception as ex:
logger.error(
'Launch docker client failed. Please make sure you have installed docker and started docker desktop/daemon.'
'Launch docker client failed. Please make sure you have installed docker and started docker desktop/daemon.',
)
raise ex
@@ -247,7 +253,7 @@ class EventStreamRuntime(Runtime):
plugins: list[PluginRequirement] | None = None,
):
try:
logger.info('Preparing to start container...')
self.log('debug', 'Preparing to start container...')
self.send_status_message('STATUS$PREPARING_CONTAINER')
plugin_arg = ''
if plugins is not None and len(plugins) > 0:
@@ -274,8 +280,9 @@ class EventStreamRuntime(Runtime):
)
if use_host_network:
logger.warn(
'Using host network mode. If you are using MacOS, please make sure you have the latest version of Docker Desktop and enabled host network feature: https://docs.docker.com/network/drivers/host/#docker-desktop'
self.log(
'warn',
'Using host network mode. If you are using MacOS, please make sure you have the latest version of Docker Desktop and enabled host network feature: https://docs.docker.com/network/drivers/host/#docker-desktop',
)
# Combine environment variables
@@ -286,17 +293,18 @@ class EventStreamRuntime(Runtime):
if self.config.debug or DEBUG:
environment['DEBUG'] = 'true'
logger.debug(f'Workspace Base: {self.config.workspace_base}')
self.log('debug', f'Workspace Base: {self.config.workspace_base}')
if mount_dir is not None and sandbox_workspace_dir is not None:
# e.g. result would be: {"/home/user/openhands/workspace": {'bind': "/workspace", 'mode': 'rw'}}
volumes = {mount_dir: {'bind': sandbox_workspace_dir, 'mode': 'rw'}}
logger.debug(f'Mount dir: {mount_dir}')
self.log('debug', f'Mount dir: {mount_dir}')
else:
logger.warn(
'Warning: Mount dir is not set, will not mount the workspace directory to the container!\n'
self.log(
'warn',
'Warning: Mount dir is not set, will not mount the workspace directory to the container!\n',
)
volumes = None
logger.debug(f'Sandbox workspace: {sandbox_workspace_dir}')
self.log('debug', f'Sandbox workspace: {sandbox_workspace_dir}')
if self.config.sandbox.browsergym_eval_env is not None:
browsergym_arg = (
@@ -325,20 +333,21 @@ class EventStreamRuntime(Runtime):
environment=environment,
volumes=volumes,
)
self.log_buffer = LogBuffer(self.container)
logger.info(f'Container started. Server url: {self.api_url}')
self.log_buffer = LogBuffer(self.container, self.log)
self.log('debug', f'Container started. Server url: {self.api_url}')
self.send_status_message('STATUS$CONTAINER_STARTED')
except Exception as e:
logger.error(
f'Error: Instance {self.container_name} FAILED to start container!\n'
self.log(
'error',
f'Error: Instance {self.container_name} FAILED to start container!\n',
)
logger.exception(e)
self.log('error', str(e))
self.close()
raise e
def _attach_to_container(self):
container = self.docker_client.containers.get(self.container_name)
self.log_buffer = LogBuffer(container)
self.log_buffer = LogBuffer(container, self.log)
self.container = container
self._container_port = 0
for port in container.attrs['NetworkSettings']['Ports']:
@@ -346,12 +355,13 @@ class EventStreamRuntime(Runtime):
break
self._host_port = self._container_port
self.api_url = f'{self.config.sandbox.local_runtime_url}:{self._container_port}'
logger.info(
f'attached to container: {self.container_name} {self._container_port} {self.api_url}'
self.log(
'debug',
f'attached to container: {self.container_name} {self._container_port} {self.api_url}',
)
def _refresh_logs(self):
logger.debug('Getting container logs...')
self.log('debug', 'Getting container logs...')
assert (
self.log_buffer is not None
@@ -360,14 +370,15 @@ class EventStreamRuntime(Runtime):
logs = self.log_buffer.get_and_clear()
if logs:
formatted_logs = '\n'.join([f' |{log}' for log in logs])
logger.info(
self.log(
'debug',
'\n'
+ '-' * 35
+ 'Container logs:'
+ '-' * 35
+ f'\n{formatted_logs}'
+ '\n'
+ '-' * 80
+ '-' * 80,
)
@tenacity.retry(
@@ -377,7 +388,7 @@ class EventStreamRuntime(Runtime):
)
def _wait_until_alive(self):
self._refresh_logs()
if not (self.log_buffer and self.log_buffer.client_ready):
if not self.log_buffer:
raise RuntimeError('Runtime client is not ready.')
response = send_request_with_retry(
@@ -391,7 +402,7 @@ class EventStreamRuntime(Runtime):
return
else:
msg = f'Action execution API is not alive. Response: {response}'
logger.error(msg)
self.log('error', msg)
raise RuntimeError(msg)
def close(self, rm_all_containers: bool = True):
@@ -423,8 +434,9 @@ class EventStreamRuntime(Runtime):
elif container.name == self.container_name:
if not self.skip_container_logs:
logs = container.logs(tail=1000).decode('utf-8')
logger.debug(
f'==== Container logs on close ====\n{logs}\n==== End of container logs ===='
self.log(
'debug',
f'==== Container logs on close ====\n{logs}\n==== End of container logs ====',
)
container.remove(force=True)
except docker.errors.APIError:
@@ -483,23 +495,21 @@ class EventStreamRuntime(Runtime):
obs = observation_from_dict(output)
obs._cause = action.id # type: ignore[attr-defined]
else:
logger.debug(f'action: {action}')
logger.debug(f'response: {response}')
self.log('debug', f'action: {action}')
self.log('debug', f'response: {response}')
error_message = response.text
logger.error(f'Error from server: {error_message}')
self.log('error', f'Error from server: {error_message}')
obs = FatalErrorObservation(
f'Action execution failed: {error_message}'
)
except requests.Timeout:
logger.error('No response received within the timeout period.')
self.log('error', 'No response received within the timeout period.')
obs = FatalErrorObservation(
f'Action execution timed out after {action.timeout} seconds.'
)
except Exception as e:
logger.error(f'Error during action execution: {e}')
obs = FatalErrorObservation(
f'Action execution failed: {str(e)}.\n{traceback.format_exc()}'
)
self.log('error', f'Error during action execution: {e}')
obs = FatalErrorObservation(f'Action execution failed: {str(e)}')
self._refresh_logs()
return obs
@@ -577,7 +587,9 @@ class EventStreamRuntime(Runtime):
finally:
if recursive:
os.unlink(temp_zip_path)
logger.info(f'Copy completed: host:{host_src} -> runtime:{sandbox_dest}')
self.log(
'debug', f'Copy completed: host:{host_src} -> runtime:{sandbox_dest}'
)
self._refresh_logs()
def list_files(self, path: str | None = None) -> list[str]:

View File

@@ -9,7 +9,6 @@ import requests
import tenacity
from openhands.core.config import AppConfig
from openhands.core.logger import openhands_logger as logger
from openhands.events import EventStream
from openhands.runtime.impl.eventstream.eventstream_runtime import (
EventStreamRuntime,
@@ -42,7 +41,6 @@ class ModalLogBuffer(LogBuffer):
"""
def __init__(self, sandbox: modal.Sandbox):
self.client_ready = False
self.init_msg = 'Runtime client initialized.'
self.buffer: list[str] = []
@@ -95,8 +93,9 @@ class ModalRuntime(EventStreamRuntime):
# workspace_base cannot be used because we can't bind mount into a sandbox.
if self.config.workspace_base is not None:
logger.warning(
'Setting workspace_base is not supported in the modal runtime.'
self.log(
'warning',
'Setting workspace_base is not supported in the modal runtime.',
)
# This value is arbitrary as it's private to the container
@@ -112,8 +111,9 @@ class ModalRuntime(EventStreamRuntime):
self.log_buffer: LogBuffer | None = None
if self.config.sandbox.runtime_extra_deps:
logger.debug(
f'Installing extra user-provided dependencies in the runtime image: {self.config.sandbox.runtime_extra_deps}'
self.log(
'debug',
f'Installing extra user-provided dependencies in the runtime image: {self.config.sandbox.runtime_extra_deps}',
)
self.init_base_runtime(
@@ -129,7 +129,7 @@ class ModalRuntime(EventStreamRuntime):
async def connect(self):
self.send_status_message('STATUS$STARTING_RUNTIME')
logger.info(f'ModalRuntime `{self.sid}`')
self.log('debug', f'ModalRuntime `{self.sid}`')
self.image = self._get_image_definition(
self.base_container_image_id,
@@ -140,7 +140,7 @@ class ModalRuntime(EventStreamRuntime):
if self.attach_to_existing:
if self.sid in MODAL_RUNTIME_IDS:
sandbox_id = MODAL_RUNTIME_IDS[self.sid]
logger.info(f'Attaching to existing Modal sandbox: {sandbox_id}')
self.log('debug', f'Attaching to existing Modal sandbox: {sandbox_id}')
self.sandbox = modal.Sandbox.from_id(
sandbox_id, client=self.modal_client
)
@@ -159,10 +159,10 @@ class ModalRuntime(EventStreamRuntime):
raise Exception('Sandbox not initialized')
tunnel = self.sandbox.tunnels()[self.container_port]
self.api_url = tunnel.url
logger.info(f'Container started. Server url: {self.api_url}')
self.log('debug', f'Container started. Server url: {self.api_url}')
if not self.attach_to_existing:
logger.info('Waiting for client to become ready...')
self.log('debug', 'Waiting for client to become ready...')
self.send_status_message('STATUS$WAITING_FOR_CLIENT')
self._wait_until_alive()
@@ -219,7 +219,7 @@ echo 'export INPUTRC=/etc/inputrc' >> /etc/bash.bashrc
plugins: list[PluginRequirement] | None = None,
):
try:
logger.info('Preparing to start container...')
self.log('debug', 'Preparing to start container...')
plugin_args = []
if plugins is not None and len(plugins) > 0:
plugin_args.append('--plugins')
@@ -242,7 +242,7 @@ echo 'export INPUTRC=/etc/inputrc' >> /etc/bash.bashrc
env_secret = modal.Secret.from_dict(environment)
logger.debug(f'Sandbox workspace: {sandbox_workspace_dir}')
self.log('debug', f'Sandbox workspace: {sandbox_workspace_dir}')
sandbox_start_cmd = get_remote_startup_command(
self.container_port,
sandbox_workspace_dir,
@@ -251,7 +251,7 @@ echo 'export INPUTRC=/etc/inputrc' >> /etc/bash.bashrc
plugin_args,
browsergym_args,
)
logger.debug(f'Starting container with command: {sandbox_start_cmd}')
self.log('debug', f'Starting container with command: {sandbox_start_cmd}')
self.sandbox = modal.Sandbox.create(
*sandbox_start_cmd,
secrets=[env_secret],
@@ -263,11 +263,13 @@ echo 'export INPUTRC=/etc/inputrc' >> /etc/bash.bashrc
timeout=60 * 60,
)
MODAL_RUNTIME_IDS[self.sid] = self.sandbox.object_id
logger.info('Container started')
self.log('debug', 'Container started')
except Exception as e:
logger.error(f'Error: Instance {self.sid} FAILED to start container!\n')
logger.exception(e)
self.log(
'error', f'Error: Instance {self.sid} FAILED to start container!\n'
)
self.log('error', str(e))
self.close()
raise e

View File

@@ -9,7 +9,6 @@ import requests
from requests.exceptions import Timeout
from openhands.core.config import AppConfig
from openhands.core.logger import openhands_logger as logger
from openhands.events import EventStream
from openhands.events.action import (
BrowseInteractiveAction,
@@ -55,8 +54,15 @@ class RemoteRuntime(Runtime):
status_message_callback: Optional[Callable] = None,
attach_to_existing: bool = False,
):
self.config = config
self.status_message_callback = status_message_callback
super().__init__(
config,
event_stream,
sid,
plugins,
env_vars,
status_message_callback,
attach_to_existing,
)
if self.config.sandbox.api_key is None:
raise ValueError(
@@ -68,8 +74,9 @@ class RemoteRuntime(Runtime):
self.action_semaphore = threading.Semaphore(1)
if self.config.workspace_base is not None:
logger.warning(
'Setting workspace_base is not supported in the remote runtime.'
self.log(
'warning',
'Setting workspace_base is not supported in the remote runtime.',
)
self.runtime_builder = RemoteRuntimeBuilder(
@@ -78,16 +85,6 @@ class RemoteRuntime(Runtime):
self.runtime_id: str | None = None
self.runtime_url: str | None = None
super().__init__(
config,
event_stream,
sid,
plugins,
env_vars,
status_message_callback,
attach_to_existing,
)
async def connect(self):
self._start_or_attach_to_runtime()
self._wait_until_alive()
@@ -96,19 +93,21 @@ class RemoteRuntime(Runtime):
def _start_or_attach_to_runtime(self):
existing_runtime = self._check_existing_runtime()
if existing_runtime:
logger.info(f'Using existing runtime with ID: {self.runtime_id}')
self.log('debug', f'Using existing runtime with ID: {self.runtime_id}')
elif self.attach_to_existing:
raise RuntimeError('Could not find existing runtime to attach to.')
else:
self.send_status_message('STATUS$STARTING_CONTAINER')
if self.config.sandbox.runtime_container_image is None:
logger.info(
f'Building remote runtime with base image: {self.config.sandbox.base_container_image}'
self.log(
'info',
f'Building remote runtime with base image: {self.config.sandbox.base_container_image}',
)
self._build_runtime()
else:
logger.info(
f'Running remote runtime with image: {self.config.sandbox.runtime_container_image}'
self.log(
'info',
f'Starting remote runtime with image: {self.config.sandbox.runtime_container_image}',
)
self.container_image = self.config.sandbox.runtime_container_image
self._start_runtime()
@@ -119,7 +118,12 @@ class RemoteRuntime(Runtime):
self.runtime_url is not None
), 'Runtime URL is not set. This should never happen.'
self.send_status_message('STATUS$WAITING_FOR_CLIENT')
if not self.attach_to_existing:
self.log('info', 'Waiting for runtime to be alive...')
self._wait_until_alive()
if not self.attach_to_existing:
self.log('info', 'Runtime is ready.')
self.send_status_message(' ')
def _check_existing_runtime(self) -> bool:
try:
@@ -130,7 +134,7 @@ class RemoteRuntime(Runtime):
timeout=5,
)
except Exception as e:
logger.debug(f'Error while looking for remote runtime: {e}')
self.log('debug', f'Error while looking for remote runtime: {e}')
return False
if response.status_code == 200:
@@ -140,22 +144,22 @@ class RemoteRuntime(Runtime):
self._parse_runtime_response(response)
return True
elif status == 'stopped':
logger.info('Found existing remote runtime, but it is stopped')
self.log('debug', 'Found existing remote runtime, but it is stopped')
return False
elif status == 'paused':
logger.info('Found existing remote runtime, but it is paused')
self.log('debug', 'Found existing remote runtime, but it is paused')
self._parse_runtime_response(response)
self._resume_runtime()
return True
else:
logger.error(f'Invalid response from runtime API: {data}')
self.log('error', f'Invalid response from runtime API: {data}')
return False
else:
logger.info('Could not find existing remote runtime')
self.log('debug', 'Could not find existing remote runtime')
return False
def _build_runtime(self):
logger.debug(f'RemoteRuntime `{self.sid}` config:\n{self.config}')
self.log('debug', f'Building RemoteRuntime config:\n{self.config}')
response = send_request_with_retry(
self.session,
'GET',
@@ -167,13 +171,15 @@ class RemoteRuntime(Runtime):
os.environ['OH_RUNTIME_RUNTIME_IMAGE_REPO'] = (
registry_prefix.rstrip('/') + '/runtime'
)
logger.info(
f'Runtime image repo: {os.environ["OH_RUNTIME_RUNTIME_IMAGE_REPO"]}'
self.log(
'debug',
f'Runtime image repo: {os.environ["OH_RUNTIME_RUNTIME_IMAGE_REPO"]}',
)
if self.config.sandbox.runtime_extra_deps:
logger.info(
f'Installing extra user-provided dependencies in the runtime image: {self.config.sandbox.runtime_extra_deps}'
self.log(
'debug',
f'Installing extra user-provided dependencies in the runtime image: {self.config.sandbox.runtime_extra_deps}',
)
# Build the container image
@@ -234,8 +240,9 @@ class RemoteRuntime(Runtime):
f'[Runtime (ID={self.runtime_id})] Failed to start runtime: {response.text}'
)
self._parse_runtime_response(response)
logger.info(
f'[Runtime (ID={self.runtime_id})] Runtime started. URL: {self.runtime_url}'
self.log(
'debug',
f'Runtime started. URL: {self.runtime_url}',
)
def _resume_runtime(self):
@@ -250,7 +257,7 @@ class RemoteRuntime(Runtime):
raise RuntimeError(
f'[Runtime (ID={self.runtime_id})] Failed to resume runtime: {response.text}'
)
logger.info(f'[Runtime (ID={self.runtime_id})] Runtime resumed.')
self.log('debug', 'Runtime resumed.')
def _parse_runtime_response(self, response: requests.Response):
start_response = response.json()
@@ -262,7 +269,7 @@ class RemoteRuntime(Runtime):
)
def _wait_until_alive(self):
logger.info(f'Waiting for runtime to be alive at url: {self.runtime_url}')
self.log('debug', f'Waiting for runtime to be alive at url: {self.runtime_url}')
# send GET request to /runtime/<id>
pod_running = False
max_not_found_count = 12 # 2 minutes
@@ -281,16 +288,18 @@ class RemoteRuntime(Runtime):
runtime_data = runtime_info_response.json()
assert runtime_data['runtime_id'] == self.runtime_id
pod_status = runtime_data['pod_status']
logger.info(
f'Waiting for runtime pod to be active. Current status: {pod_status}'
self.log(
'debug',
f'Waiting for runtime pod to be active. Current status: {pod_status}',
)
if pod_status == 'Ready':
pod_running = True
break
elif pod_status == 'Not Found' and not_found_count < max_not_found_count:
not_found_count += 1
logger.info(
f'Runtime pod not found. Count: {not_found_count} / {max_not_found_count}'
self.log(
'debug',
f'Runtime pod not found. Count: {not_found_count} / {max_not_found_count}',
)
elif pod_status in ('Failed', 'Unknown', 'Not Found'):
# clean up the runtime
@@ -314,7 +323,7 @@ class RemoteRuntime(Runtime):
)
if response.status_code != 200:
msg = f'Runtime (ID={self.runtime_id}) is not alive yet. Status: {response.status_code}.'
logger.warning(msg)
self.log('warning', msg)
raise RuntimeError(msg)
def close(self, timeout: int = 10):
@@ -331,11 +340,12 @@ class RemoteRuntime(Runtime):
timeout=timeout,
)
if response.status_code != 200:
logger.error(
f'[Runtime (ID={self.runtime_id})] Failed to stop runtime: {response.text}'
self.log(
'error',
f'Failed to stop runtime: {response.text}',
)
else:
logger.info(f'[Runtime (ID={self.runtime_id})] Runtime stopped.')
self.log('debug', 'Runtime stopped.')
except Exception as e:
raise e
finally:
@@ -363,7 +373,7 @@ class RemoteRuntime(Runtime):
try:
request_body = {'action': event_to_dict(action)}
logger.debug(f'Request body: {request_body}')
self.log('debug', f'Request body: {request_body}')
response = send_request_with_retry(
self.session,
'POST',
@@ -378,17 +388,17 @@ class RemoteRuntime(Runtime):
return obs
else:
error_message = response.text
logger.error(f'Error from server: {error_message}')
self.log('error', f'Error from server: {error_message}')
obs = FatalErrorObservation(
f'Action execution failed: {error_message}'
)
except Timeout:
logger.error('No response received within the timeout period.')
self.log('error', 'No response received within the timeout period.')
obs = FatalErrorObservation(
f'[Runtime (ID={self.runtime_id})] Action execution timed out'
)
except Exception as e:
logger.error(f'Error during action execution: {e}')
self.log('error', f'Error during action execution: {e}')
obs = FatalErrorObservation(
f'[Runtime (ID={self.runtime_id})] Action execution failed: {str(e)}'
)
@@ -449,8 +459,9 @@ class RemoteRuntime(Runtime):
timeout=300,
)
if response.status_code == 200:
logger.info(
f'Copy completed: host:{host_src} -> runtime:{sandbox_dest}. Response: {response.text}'
self.log(
'debug',
f'Copy completed: host:{host_src} -> runtime:{sandbox_dest}. Response: {response.text}',
)
return
else:
@@ -469,7 +480,9 @@ class RemoteRuntime(Runtime):
finally:
if recursive:
os.unlink(temp_zip_path)
logger.info(f'Copy completed: host:{host_src} -> runtime:{sandbox_dest}')
self.log(
'debug', f'Copy completed: host:{host_src} -> runtime:{sandbox_dest}'
)
def list_files(self, path: str | None = None) -> list[str]:
try:

View File

@@ -48,7 +48,7 @@ class JupyterPlugin(Plugin):
time.sleep(1)
logger.debug('Waiting for jupyter kernel gateway to start...')
logger.info(
logger.debug(
f'Jupyter kernel gateway started at port {self.kernel_gateway_port}. Output: {output}'
)
_obs = await self.run(

0
openhands/runtime/plugins/jupyter/execute_server.py Executable file → Normal file
View File

View File

@@ -111,14 +111,14 @@ class FileEditRuntimeMixin(FileEditRuntimeInterface):
model_name='draft_editor:' + llm_config.draft_editor.model
)
if llm_config.draft_editor.caching_prompt:
logger.info(
logger.debug(
'It is not recommended to cache draft editor LLM prompts as it may incur high costs for the same prompt. '
'Automatically setting caching_prompt=false.'
)
llm_config.draft_editor.caching_prompt = False
self.draft_editor_llm = LLM(llm_config.draft_editor, metrics=llm_metrics)
logger.info(
logger.debug(
f'[Draft edit functionality] enabled with LLM: {self.draft_editor_llm}'
)

View File

@@ -70,7 +70,7 @@ def get_runtime_image_repo_and_tag(base_image: str) -> tuple[str, str]:
"""
if get_runtime_image_repo() in base_image:
logger.info(
logger.debug(
f'The provided image [{base_image}] is already a valid runtime image.\n'
f'Will try to reuse it as is.'
)
@@ -173,8 +173,9 @@ def build_runtime_image_in_folder(
source_tag = f'{lock_tag}_{get_hash_for_source_files()}'
hash_image_name = f'{runtime_image_repo}:{source_tag}'
logger.info(f'Building image: {hash_image_name}')
if force_rebuild:
logger.info(f'Force rebuild: [{runtime_image_repo}:{source_tag}] from scratch.')
logger.debug(f'Force rebuild: [{runtime_image_repo}:{source_tag}] from scratch.')
prep_build_folder(
build_folder,
base_image,
@@ -198,14 +199,14 @@ def build_runtime_image_in_folder(
# If the exact image already exists, we do not need to build it
if runtime_builder.image_exists(hash_image_name, False):
logger.info(f'Reusing Image [{hash_image_name}]')
logger.debug(f'Reusing Image [{hash_image_name}]')
return hash_image_name
# We look for an existing image that shares the same lock_tag. If such an image exists, we
# can use it as the base image for the build and just copy source files. This makes the build
# much faster.
if runtime_builder.image_exists(lock_image_name):
logger.info(f'Build [{hash_image_name}] from lock image [{lock_image_name}]')
logger.debug(f'Build [{hash_image_name}] from lock image [{lock_image_name}]')
build_from = BuildFromImageType.LOCK
base_image = lock_image_name
elif runtime_builder.image_exists(versioned_image_name):
@@ -215,7 +216,7 @@ def build_runtime_image_in_folder(
build_from = BuildFromImageType.VERSIONED
base_image = versioned_image_name
else:
logger.info(f'Build [{hash_image_name}] from scratch')
logger.debug(f'Build [{hash_image_name}] from scratch')
prep_build_folder(build_folder, base_image, build_from, extra_deps)
if not dry_run:
@@ -246,7 +247,7 @@ def prep_build_folder(
# If package is not found, build from source code
openhands_source_dir = Path(openhands.__file__).parent
project_root = openhands_source_dir.parent
logger.info(f'Building source distribution using project root: {project_root}')
logger.debug(f'Building source distribution using project root: {project_root}')
# Copy the 'openhands' directory (Source code)
shutil.copytree(
@@ -373,14 +374,14 @@ if __name__ == '__main__':
assert os.path.exists(
build_folder
), f'Build folder {build_folder} does not exist'
logger.info(
logger.debug(
f'Copying the source code and generating the Dockerfile in the build folder: {build_folder}'
)
runtime_image_repo, runtime_image_tag = get_runtime_image_repo_and_tag(
args.base_image
)
logger.info(
logger.debug(
f'Runtime image repo: {runtime_image_repo} and runtime image tag: {runtime_image_tag}'
)
@@ -402,7 +403,7 @@ if __name__ == '__main__':
# Move contents of temp_dir to build_folder
shutil.copytree(temp_dir, build_folder, dirs_exist_ok=True)
logger.info(
logger.debug(
f'Build folder [{build_folder}] is ready: {os.listdir(build_folder)}'
)
@@ -416,18 +417,19 @@ if __name__ == '__main__':
f'DOCKER_IMAGE_SOURCE_TAG={runtime_image_source_tag}\n'
)
)
logger.info(
logger.debug(
f'`config.sh` is updated with the image repo[{runtime_image_repo}] and tags [{runtime_image_tag}, {runtime_image_source_tag}]'
)
logger.info(
logger.debug(
f'Dockerfile, source code and config.sh are ready in {build_folder}'
)
else:
# If a build_folder is not provided, after copying the required source code and dynamically creating the
# Dockerfile, we actually build the Docker image
logger.info('Building image in a temporary folder')
logger.debug('Building image in a temporary folder')
docker_builder = DockerRuntimeBuilder(docker.from_env())
image_name = build_runtime_image(
args.base_image, docker_builder, platform=args.platform
)
print(f'\nBUILT Image: {image_name}\n')
logger.debug(f'\nBuilt image: {image_name}\n')

View File

@@ -33,7 +33,7 @@ def init_user_and_working_directory(
"""
# First create the working directory, independent of the user
logger.info(f'Client working directory: {initial_pwd}')
logger.debug(f'Client working directory: {initial_pwd}')
command = f'umask 002; mkdir -p {initial_pwd}'
output = subprocess.run(command, shell=True, capture_output=True)
out_str = output.stdout.decode()

View File

@@ -24,7 +24,7 @@ class SecurityAnalyzer:
async def on_event(self, event: Event) -> None:
"""Handles the incoming event, and when Action is received, analyzes it for security risks."""
logger.info(f'SecurityAnalyzer received event: {event}')
logger.debug(f'SecurityAnalyzer received event: {event}')
await self.log_event(event)
if not isinstance(event, Action):
return

View File

@@ -82,7 +82,7 @@ class InvariantAnalyzer(SecurityAnalyzer):
while self.container.status != 'running':
self.container = self.docker_client.containers.get(self.container_name)
elapsed += 1
logger.info(
logger.debug(
f'waiting for container to start: {elapsed}, container status: {self.container.status}'
)
if elapsed > self.timeout:
@@ -109,7 +109,7 @@ class InvariantAnalyzer(SecurityAnalyzer):
self.trace.extend(element)
self.input.extend([e.model_dump(exclude_none=True) for e in element]) # type: ignore [call-overload]
else:
logger.info('Invariant skipping element: event')
logger.debug('Invariant skipping element: event')
def get_risk(self, results: list[str]) -> ActionSecurityRisk:
mapping = {
@@ -151,7 +151,7 @@ class InvariantAnalyzer(SecurityAnalyzer):
await call_sync_from_async(self.event_stream.add_event, new_event, event_source)
async def security_risk(self, event: Action) -> ActionSecurityRisk:
logger.info('Calling security_risk on InvariantAnalyzer')
logger.debug('Calling security_risk on InvariantAnalyzer')
new_elements = parse_element(self.trace, event)
input = [e.model_dump(exclude_none=True) for e in new_elements] # type: ignore [call-overload]
self.trace.extend(new_elements)

View File

@@ -28,7 +28,7 @@ def store_feedback(feedback: FeedbackDataModel) -> dict[str, str]:
)
if 'token' in display_feedback:
display_feedback['token'] = 'elided'
logger.info(f'Got feedback: {display_feedback}')
logger.debug(f'Got feedback: {display_feedback}')
# Start actual request
response = requests.post(
FEEDBACK_URL,
@@ -38,5 +38,5 @@ def store_feedback(feedback: FeedbackDataModel) -> dict[str, str]:
if response.status_code != 200:
raise ValueError(f'Failed to store feedback: {response.text}')
response_data = json.loads(response.text)
logger.info(f'Stored feedback: {response.text}')
logger.debug(f'Stored feedback: {response.text}')
return response_data

View File

@@ -325,6 +325,7 @@ async def websocket_endpoint(websocket: WebSocket):
sid = str(uuid.uuid4())
token = sign_token({'sid': sid}, config.jwt_secret)
logger.info(f'New session: {sid}')
session = session_manager.add_or_restart_session(sid, websocket)
await websocket.send_json({'token': token, 'status': 'ok'})
@@ -488,7 +489,7 @@ async def list_files(request: Request, path: str | None = None):
GitWildMatchPattern, observation.content.splitlines()
)
except Exception as e:
print(e)
logger.warning(e)
return file_list
file_list = [entry for entry in file_list if not spec.match_file(entry)]
return file_list
@@ -767,7 +768,7 @@ async def security_api(request: Request):
@app.get('/api/zip-directory')
async def zip_current_workspace(request: Request):
try:
logger.info('Zipping workspace')
logger.debug('Zipping workspace')
runtime: Runtime = request.state.conversation.runtime
path = runtime.config.workspace_mount_path_in_sandbox
@@ -801,7 +802,7 @@ def github_callback(auth_code: AuthCode):
'code': auth_code.code,
}
logger.info('Exchanging code for GitHub token')
logger.debug('Exchanging code for GitHub token')
headers = {'Accept': 'application/json'}
response = requests.post(

View File

@@ -1,6 +1,7 @@
import uvicorn
from fastapi import FastAPI, WebSocket
from openhands.core.logger import openhands_logger as logger
from openhands.core.schema import ActionType
from openhands.runtime.utils.shutdown_listener import should_continue
@@ -19,14 +20,14 @@ async def websocket_endpoint(websocket: WebSocket):
while should_continue():
# receive message
data = await websocket.receive_json()
print(f'Received message: {data}')
logger.debug(f'Received message: {data}')
# send mock response to client
response = {'message': f'receive {data}'}
await websocket.send_json(response)
print(f'Sent message: {response}')
logger.debug(f'Sent message: {response}')
except Exception as e:
print(f'WebSocket Error: {e}')
logger.debug(f'WebSocket Error: {e}')
@app.get('/')

View File

@@ -88,7 +88,7 @@ class AgentSession:
asyncio.run(self._start(*args), debug=True)
except RuntimeError:
logger.error(f'Error starting session: {RuntimeError}', exc_info=True)
logger.info('Session Finished')
logger.debug('Session Finished')
async def _start(
self,
@@ -174,7 +174,7 @@ class AgentSession:
if self.runtime is not None:
raise RuntimeError('Runtime already created')
logger.info(f'Initializing runtime `{runtime_name}` now...')
logger.debug(f'Initializing runtime `{runtime_name}` now...')
runtime_cls = get_runtime_cls(runtime_name)
self.runtime = runtime_cls(
config=config,
@@ -240,7 +240,7 @@ class AgentSession:
f'Plugins: {agent.sandbox_plugins}\n'
'-------------------------------------------------------------------------------------------'
)
logger.info(msg)
logger.debug(msg)
self.controller = AgentController(
sid=self.sid,
@@ -260,7 +260,7 @@ class AgentSession:
self.controller.set_initial_state(
agent_state, max_iterations, confirmation_mode
)
logger.info(f'Restored agent state from session, sid: {self.sid}')
logger.debug(f'Restored agent state from session, sid: {self.sid}')
except Exception as e:
logger.info(f'State could not be restored: {e}')
logger.info('Agent controller initialized.')
logger.debug(f'State could not be restored: {e}')
logger.debug('Agent controller initialized.')

View File

@@ -88,7 +88,7 @@ class SessionManager:
to_del_session: Session | None = self._sessions.pop(sid, None)
if to_del_session is not None:
await to_del_session.close()
logger.info(
logger.debug(
f'Session {sid} and related resource have been removed due to inactivity.'
)

View File

@@ -66,7 +66,7 @@ class Session:
await self.dispatch(data)
except WebSocketDisconnect:
await self.close()
logger.info('WebSocket disconnected, sid: %s', self.sid)
logger.debug('WebSocket disconnected, sid: %s', self.sid)
except RuntimeError as e:
await self.close()
logger.exception('Error in loop_recv: %s', e)

View File

@@ -8,6 +8,8 @@ import pylcs
from pydantic import BaseModel
from tree_sitter_languages import get_parser
from openhands.core.logger import openhands_logger as logger
class Chunk(BaseModel):
text: str
@@ -43,7 +45,7 @@ def create_chunks(
try:
parser = get_parser(language) if language is not None else None
except AttributeError:
# print(f"Language {language} not supported. Falling back to raw string.")
logger.debug(f'Language {language} not supported. Falling back to raw string.')
parser = None
if parser is None:

View File

@@ -4,6 +4,7 @@ import os
from joblib import Parallel, delayed
from openhands.core.config import LLMConfig
from openhands.core.logger import openhands_logger as logger
try:
# check if those we need later are available using importlib
@@ -139,7 +140,7 @@ class EmbeddingsLoader:
torch.backends.mps.is_built = False
# the device being used
print(f'Using device for embeddings: {device}')
logger.debug(f'Using device for embeddings: {device}')
return local_embed_model

View File

@@ -30,7 +30,7 @@ class MicroAgent:
return self._content
def _validate_micro_agent(self):
logger.info(
logger.debug(
f'Loading and validating micro agent [{self._metadata.name}] based on [{self._metadata.agent}]'
)
# Make sure the agent is registered

View File

@@ -405,32 +405,20 @@ def test_build_runtime_image_exact_hash_not_exist_and_lock_not_exist_and_version
# ==============================
def test_output_progress(docker_runtime_builder):
with patch('sys.stdout.isatty', return_value=True):
with patch('sys.stdout.write') as mock_write, patch('sys.stdout.flush'):
docker_runtime_builder._output_logs('new log line')
mock_write.assert_any_call('\033[F' * 10)
mock_write.assert_any_call('\033[2Knew log line\n')
def test_output_build_progress(docker_runtime_builder):
with patch('sys.stdout.isatty', return_value=True):
with patch('sys.stdout.write') as mock_write, patch('sys.stdout.flush'):
layers = {}
docker_runtime_builder._output_build_progress(
{
'id': 'layer1',
'status': 'Downloading',
'progressDetail': {'current': 50, 'total': 100},
},
layers,
0,
)
mock_write.assert_any_call('\033[F' * 0)
mock_write.assert_any_call('\033[2K\r')
assert layers['layer1']['status'] == 'Downloading'
assert layers['layer1']['progress'] == ''
assert layers['layer1']['last_logged'] == 50.0
layers = {}
docker_runtime_builder._output_build_progress(
{
'id': 'layer1',
'status': 'Downloading',
'progressDetail': {'current': 50, 'total': 100},
},
layers,
0,
)
assert layers['layer1']['status'] == 'Downloading'
assert layers['layer1']['progress'] == ''
assert layers['layer1']['last_logged'] == 50.0
@pytest.fixture(scope='function')
@@ -493,8 +481,8 @@ def live_docker_image():
def test_init(docker_runtime_builder):
assert isinstance(docker_runtime_builder.docker_client, docker.DockerClient)
assert docker_runtime_builder.max_lines == 10
assert docker_runtime_builder.log_lines == [''] * 10
assert docker_runtime_builder.rolling_logger.max_lines == 10
assert docker_runtime_builder.rolling_logger.log_lines == [''] * 10
def test_build_image_from_scratch(docker_runtime_builder, tmp_path):
@@ -510,17 +498,16 @@ CMD ["sh", "-c", "echo 'Hello, World!'"]
container = None
client = docker.from_env()
try:
with patch('sys.stdout.isatty', return_value=False):
built_image_name = docker_runtime_builder.build(
context_path,
tags,
use_local_cache=False,
)
assert built_image_name == f'{tags[0]}'
built_image_name = docker_runtime_builder.build(
context_path,
tags,
use_local_cache=False,
)
assert built_image_name == f'{tags[0]}'
# Verify the image was created
image = client.images.get(tags[0])
assert image is not None
# Verify the image was created
image = client.images.get(tags[0])
assert image is not None
except docker.errors.ImageNotFound:
pytest.fail('test_build_image_from_scratch: test image not found!')
@@ -583,16 +570,15 @@ CMD ["sh", "-c", "echo 'Hello, World!'"]
container = None
client = docker.from_env()
try:
with patch('sys.stdout.isatty', return_value=False):
built_image_name = docker_runtime_builder.build(
context_path,
tags,
use_local_cache=False,
)
assert built_image_name == f'{tags[0]}'
built_image_name = docker_runtime_builder.build(
context_path,
tags,
use_local_cache=False,
)
assert built_image_name == f'{tags[0]}'
image = client.images.get(tags[0])
assert image is not None
image = client.images.get(tags[0])
assert image is not None
except docker.errors.ImageNotFound:
pytest.fail('test_build_image_from_repo: test image not found!')