tobitege 554636cf2a
(fix) Fix runtime (RT) tests and split tests in 2 actions (openhands/root) (#3791)
Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>
2024-09-14 21:51:30 +02:00

553 lines
21 KiB
Python

import os
import tempfile
import threading
import time
import uuid
from zipfile import ZipFile
import docker
import requests
import tenacity
from openhands.core.config import AppConfig
from openhands.core.logger import openhands_logger as logger
from openhands.events import EventStream
from openhands.events.action import (
ActionConfirmationStatus,
BrowseInteractiveAction,
BrowseURLAction,
CmdRunAction,
FileReadAction,
FileWriteAction,
IPythonRunCellAction,
)
from openhands.events.action.action import Action
from openhands.events.observation import (
ErrorObservation,
NullObservation,
Observation,
UserRejectObservation,
)
from openhands.events.serialization import event_to_dict, observation_from_dict
from openhands.events.serialization.action import ACTION_TYPE_TO_CLASS
from openhands.runtime.builder import DockerRuntimeBuilder
from openhands.runtime.plugins import PluginRequirement
from openhands.runtime.runtime import Runtime
from openhands.runtime.utils import find_available_tcp_port
from openhands.runtime.utils.runtime_build import build_runtime_image
class LogBuffer:
"""Synchronous buffer for Docker container logs.
This class provides a thread-safe way to collect, store, and retrieve logs
from a Docker container. It uses a list to store log lines and provides methods
for appending, retrieving, and clearing logs.
"""
def __init__(self, container: docker.models.containers.Container):
self.client_ready = False
self.init_msg = 'Runtime client initialized.'
self.buffer: list[str] = []
self.lock = threading.Lock()
self.log_generator = container.logs(stream=True, follow=True)
self.log_stream_thread = threading.Thread(target=self.stream_logs)
self.log_stream_thread.daemon = True
self.log_stream_thread.start()
self._stop_event = threading.Event()
def append(self, log_line: str):
with self.lock:
self.buffer.append(log_line)
def get_and_clear(self) -> list[str]:
with self.lock:
logs = list(self.buffer)
self.buffer.clear()
return logs
def stream_logs(self):
"""
Stream logs from the Docker container in a separate thread.
This method runs in its own thread to handle the blocking
operation of reading log lines from the Docker SDK's synchronous generator.
"""
try:
for log_line in self.log_generator:
if self._stop_event.is_set():
break
if log_line:
decoded_line = log_line.decode('utf-8').rstrip()
self.append(decoded_line)
if self.init_msg in decoded_line:
self.client_ready = True
except Exception as e:
logger.error(f'Error streaming docker logs: {e}')
def __del__(self):
if self.log_stream_thread.is_alive():
logger.warn(
"LogBuffer was not properly closed. Use 'log_buffer.close()' for clean shutdown."
)
self.close(timeout=5)
def close(self, timeout: float = 5.0):
self._stop_event.set()
self.log_stream_thread.join(timeout)
class EventStreamRuntime(Runtime):
"""This runtime will subscribe the event stream.
When receive an event, it will send the event to runtime-client which run inside the docker environment.
From the sid also an instance_id is generated in combination with a UID.
Args:
config (AppConfig): The application configuration.
event_stream (EventStream): The event stream to subscribe to.
sid (str, optional): The session ID. Defaults to 'default'.
plugins (list[PluginRequirement] | None, optional): List of plugin requirements. Defaults to None.
env_vars (dict[str, str] | None, optional): Environment variables to set. Defaults to None.
"""
container_name_prefix = 'openhands-sandbox-'
def __init__(
self,
config: AppConfig,
event_stream: EventStream,
sid: str = 'default',
plugins: list[PluginRequirement] | None = None,
env_vars: dict[str, str] | None = None,
):
self.config = config
self._host_port = 30000 # initial dummy value
self._container_port = 30001 # initial dummy value
self.api_url = (
f'http://{self.config.sandbox.api_hostname}:{self._container_port}'
)
self.session = requests.Session()
self.instance_id = (
sid + '_' + str(uuid.uuid4()) if sid is not None else str(uuid.uuid4())
)
self.docker_client: docker.DockerClient = self._init_docker_client()
self.base_container_image = self.config.sandbox.base_container_image
self.runtime_container_image = self.config.sandbox.runtime_container_image
self.container_name = self.container_name_prefix + self.instance_id
self.container = None
self.action_semaphore = threading.Semaphore(1) # Ensure one action at a time
self.runtime_builder = DockerRuntimeBuilder(self.docker_client)
logger.debug(f'EventStreamRuntime `{self.instance_id}`')
# Buffer for container logs
self.log_buffer: LogBuffer | None = None
if self.config.sandbox.runtime_extra_deps:
logger.info(
f'Installing extra user-provided dependencies in the runtime image: {self.config.sandbox.runtime_extra_deps}'
)
self.skip_container_logs = (
os.environ.get('SKIP_CONTAINER_LOGS', 'false').lower() == 'true'
)
if self.runtime_container_image is None:
if self.base_container_image is None:
raise ValueError(
'Neither runtime container image nor base container image is set'
)
self.runtime_container_image = build_runtime_image(
self.base_container_image,
self.runtime_builder,
extra_deps=self.config.sandbox.runtime_extra_deps,
)
self.container = self._init_container(
sandbox_workspace_dir=self.config.workspace_mount_path_in_sandbox, # e.g. /workspace
mount_dir=self.config.workspace_mount_path, # e.g. /opt/openhands/_test_workspace
plugins=plugins,
)
# will initialize both the event stream and the env vars
super().__init__(config, event_stream, sid, plugins, env_vars)
logger.info(
f'Container initialized with plugins: {[plugin.name for plugin in self.plugins]}'
)
logger.info(f'Container initialized with env vars: {env_vars}')
time.sleep(1)
@staticmethod
def _init_docker_client() -> docker.DockerClient:
try:
return docker.from_env()
except Exception as ex:
logger.error(
'Launch docker client failed. Please make sure you have installed docker and started docker desktop/daemon.'
)
raise ex
@tenacity.retry(
stop=tenacity.stop_after_attempt(5),
wait=tenacity.wait_exponential(multiplier=1, min=4, max=60),
)
def _init_container(
self,
sandbox_workspace_dir: str,
mount_dir: str | None = None,
plugins: list[PluginRequirement] | None = None,
):
try:
logger.info(
f'Starting container with image: {self.runtime_container_image} and name: {self.container_name}'
)
plugin_arg = ''
if plugins is not None and len(plugins) > 0:
plugin_arg = (
f'--plugins {" ".join([plugin.name for plugin in plugins])} '
)
self._host_port = self._find_available_port()
self._container_port = (
self._host_port
) # in future this might differ from host port
self.api_url = (
f'http://{self.config.sandbox.api_hostname}:{self._container_port}'
)
use_host_network = self.config.sandbox.use_host_network
network_mode: str | None = 'host' if use_host_network else None
port_mapping: dict[str, list[dict[str, str]]] | None = (
None
if use_host_network
else {
f'{self._container_port}/tcp': [{'HostPort': str(self._host_port)}]
}
)
if use_host_network:
logger.warn(
'Using host network mode. If you are using MacOS, please make sure you have the latest version of Docker Desktop and enabled host network feature: https://docs.docker.com/network/drivers/host/#docker-desktop'
)
# Combine environment variables
environment = {
'port': str(self._container_port),
'PYTHONUNBUFFERED': 1,
}
if self.config.debug:
environment['DEBUG'] = 'true'
logger.info(f'Workspace Base: {self.config.workspace_base}')
if mount_dir is not None and sandbox_workspace_dir is not None:
# e.g. result would be: {"/home/user/openhands/workspace": {'bind': "/workspace", 'mode': 'rw'}}
volumes = {mount_dir: {'bind': sandbox_workspace_dir, 'mode': 'rw'}}
logger.info(f'Mount dir: {mount_dir}')
else:
logger.warn(
'Warning: Mount dir is not set, will not mount the workspace directory to the container!\n'
)
volumes = None
logger.info(f'Sandbox workspace: {sandbox_workspace_dir}')
if self.config.sandbox.browsergym_eval_env is not None:
browsergym_arg = (
f'--browsergym-eval-env {self.config.sandbox.browsergym_eval_env}'
)
else:
browsergym_arg = ''
container = self.docker_client.containers.run(
self.runtime_container_image,
command=(
f'/openhands/miniforge3/bin/mamba run --no-capture-output -n base '
f'poetry run '
f'python -u -m openhands.runtime.client.client {self._container_port} '
f'--working-dir "{sandbox_workspace_dir}" '
f'{plugin_arg}'
f'--username {"openhands" if self.config.run_as_openhands else "root"} '
f'--user-id {self.config.sandbox.user_id} '
f'{browsergym_arg}'
),
network_mode=network_mode,
ports=port_mapping,
working_dir='/openhands/code/', # do not change this!
name=self.container_name,
detach=True,
environment=environment,
volumes=volumes,
)
self.log_buffer = LogBuffer(container)
logger.info(f'Container started. Server url: {self.api_url}')
return container
except Exception as e:
logger.error(
f'Error: Instance {self.instance_id} FAILED to start container!\n'
)
logger.exception(e)
self.close(close_client=False)
raise e
@tenacity.retry(
stop=tenacity.stop_after_attempt(10),
wait=tenacity.wait_exponential(multiplier=2, min=1, max=20),
reraise=(ConnectionRefusedError,),
)
def _wait_until_alive(self):
logger.debug('Getting container logs...')
# Print and clear the log buffer
assert (
self.log_buffer is not None
), 'Log buffer is expected to be initialized when container is started'
# Always process logs, regardless of client_ready status
logs = self.log_buffer.get_and_clear()
if logs:
formatted_logs = '\n'.join([f' |{log}' for log in logs])
logger.info(
'\n'
+ '-' * 35
+ 'Container logs:'
+ '-' * 35
+ f'\n{formatted_logs}'
+ '\n'
+ '-' * 80
)
if not self.log_buffer.client_ready:
time.sleep(1)
attempts = 0
while not self.log_buffer.client_ready and attempts < 4:
attempts += 1
time.sleep(2)
logs = self.log_buffer.get_and_clear()
if logs:
formatted_logs = '\n'.join([f' |{log}' for log in logs])
logger.info(
'\n'
+ '-' * 35
+ 'Container logs:'
+ '-' * 35
+ f'\n{formatted_logs}'
+ '\n'
+ '-' * 80
)
response = self.session.get(f'{self.api_url}/alive')
if response.status_code == 200:
return
else:
msg = f'Action execution API is not alive. Response: {response}'
logger.error(msg)
raise RuntimeError(msg)
def close(self, close_client: bool = True, rm_all_containers: bool = True):
"""Closes the EventStreamRuntime and associated objects
Parameters:
- close_client (bool): Whether to close the DockerClient
- rm_all_containers (bool): Whether to remove all containers with the 'openhands-sandbox-' prefix
"""
if self.log_buffer:
self.log_buffer.close()
if self.session:
self.session.close()
try:
containers = self.docker_client.containers.list(all=True)
for container in containers:
try:
# If the app doesn't shut down properly, it can leave runtime containers on the system. This ensures
# that all 'openhands-sandbox-' containers are removed as well.
if rm_all_containers and container.name.startswith(
self.container_name_prefix
):
container.remove(force=True)
elif container.name == self.container_name:
if not self.skip_container_logs:
logs = container.logs(tail=1000).decode('utf-8')
logger.debug(
f'==== Container logs on close ====\n{logs}\n==== End of container logs ===='
)
container.remove(force=True)
except docker.errors.APIError:
pass
except docker.errors.NotFound:
pass
except docker.errors.NotFound: # yes, this can happen!
pass
if close_client:
self.docker_client.close()
def run_action(self, action: Action) -> Observation:
# set timeout to default if not set
if action.timeout is None:
action.timeout = self.config.sandbox.timeout
with self.action_semaphore:
if not action.runnable:
return NullObservation('')
if (
hasattr(action, 'is_confirmed')
and action.is_confirmed
== ActionConfirmationStatus.AWAITING_CONFIRMATION
):
return NullObservation('')
action_type = action.action # type: ignore[attr-defined]
if action_type not in ACTION_TYPE_TO_CLASS:
return ErrorObservation(f'Action {action_type} does not exist.')
if not hasattr(self, action_type):
return ErrorObservation(
f'Action {action_type} is not supported in the current runtime.'
)
if (
hasattr(action, 'is_confirmed')
and action.is_confirmed == ActionConfirmationStatus.REJECTED
):
return UserRejectObservation(
'Action has been rejected by the user! Waiting for further user input.'
)
logger.info('Awaiting session')
self._wait_until_alive()
assert action.timeout is not None
try:
response = self.session.post(
f'{self.api_url}/execute_action',
json={'action': event_to_dict(action)},
timeout=action.timeout,
)
if response.status_code == 200:
output = response.json()
obs = observation_from_dict(output)
obs._cause = action.id # type: ignore[attr-defined]
return obs
else:
error_message = response.text
logger.error(f'Error from server: {error_message}')
obs = ErrorObservation(f'Command execution failed: {error_message}')
except requests.Timeout:
logger.error('No response received within the timeout period.')
obs = ErrorObservation('Command execution timed out')
except Exception as e:
logger.error(f'Error during command execution: {e}')
obs = ErrorObservation(f'Command execution failed: {str(e)}')
return obs
def run(self, action: CmdRunAction) -> Observation:
return self.run_action(action)
def run_ipython(self, action: IPythonRunCellAction) -> Observation:
return self.run_action(action)
def read(self, action: FileReadAction) -> Observation:
return self.run_action(action)
def write(self, action: FileWriteAction) -> Observation:
return self.run_action(action)
def browse(self, action: BrowseURLAction) -> Observation:
return self.run_action(action)
def browse_interactive(self, action: BrowseInteractiveAction) -> Observation:
return self.run_action(action)
# ====================================================================
# Implement these methods (for file operations) in the subclass
# ====================================================================
def copy_to(
self, host_src: str, sandbox_dest: str, recursive: bool = False
) -> None:
if not os.path.exists(host_src):
raise FileNotFoundError(f'Source file {host_src} does not exist')
self._wait_until_alive()
try:
if recursive:
# For recursive copy, create a zip file
with tempfile.NamedTemporaryFile(
suffix='.zip', delete=False
) as temp_zip:
temp_zip_path = temp_zip.name
with ZipFile(temp_zip_path, 'w') as zipf:
for root, _, files in os.walk(host_src):
for file in files:
file_path = os.path.join(root, file)
arcname = os.path.relpath(
file_path, os.path.dirname(host_src)
)
zipf.write(file_path, arcname)
upload_data = {'file': open(temp_zip_path, 'rb')}
else:
# For single file copy
upload_data = {'file': open(host_src, 'rb')}
params = {'destination': sandbox_dest, 'recursive': str(recursive).lower()}
response = self.session.post(
f'{self.api_url}/upload_file', files=upload_data, params=params
)
if response.status_code == 200:
return
else:
error_message = response.text
raise Exception(f'Copy operation failed: {error_message}')
except requests.Timeout:
raise TimeoutError('Copy operation timed out')
except Exception as e:
raise RuntimeError(f'Copy operation failed: {str(e)}')
finally:
if recursive:
os.unlink(temp_zip_path)
logger.info(f'Copy completed: host:{host_src} -> runtime:{sandbox_dest}')
def list_files(self, path: str | None = None) -> list[str]:
"""List files in the sandbox.
If path is None, list files in the sandbox's initial working directory (e.g., /workspace).
"""
self._wait_until_alive()
try:
data = {}
if path is not None:
data['path'] = path
response = self.session.post(f'{self.api_url}/list_files', json=data)
if response.status_code == 200:
response_json = response.json()
assert isinstance(response_json, list)
return response_json
else:
error_message = response.text
raise Exception(f'List files operation failed: {error_message}')
except requests.Timeout:
raise TimeoutError('List files operation timed out')
except Exception as e:
raise RuntimeError(f'List files operation failed: {str(e)}')
def _is_port_in_use_docker(self, port):
containers = self.docker_client.containers.list()
for container in containers:
container_ports = container.ports
if str(port) in str(container_ports):
return True
return False
def _find_available_port(self, max_attempts=5):
port = 39999
for _ in range(max_attempts):
port = find_available_tcp_port(30000, 39999)
if not self._is_port_in_use_docker(port):
return port
# If no port is found after max_attempts, return the last tried port
return port