From 554636cf2aa03d16015db56ed583eff61cbb585e Mon Sep 17 00:00:00 2001 From: tobitege <10787084+tobitege@users.noreply.github.com> Date: Sat, 14 Sep 2024 21:51:30 +0200 Subject: [PATCH] (fix) Fix runtime (RT) tests and split tests in 2 actions (openhands/root) (#3791) Co-authored-by: Engel Nyst --- .github/workflows/ghcr_runtime.yml | 89 ++- openhands/core/config.py | 12 +- openhands/runtime/builder/docker.py | 4 + openhands/runtime/client/client.py | 113 ++- openhands/runtime/client/runtime.py | 162 +++-- openhands/runtime/plugins/jupyter/__init__.py | 2 +- openhands/runtime/remote/runtime.py | 6 +- openhands/runtime/runtime.py | 1 - openhands/runtime/utils/__init__.py | 7 +- openhands/runtime/utils/runtime_build.py | 15 +- .../utils/runtime_templates/Dockerfile.j2 | 1 + openhands/runtime/utils/system.py | 39 +- pytest.ini | 1 + tests/integration/regenerate.sh | 74 +- tests/integration/test_agent.py | 35 +- tests/runtime/conftest.py | 198 +++-- tests/runtime/test_bash.py | 679 +++++++----------- tests/runtime/test_browsing.py | 9 +- tests/runtime/test_env_vars.py | 9 +- tests/runtime/test_images.py | 15 +- tests/runtime/test_ipython.py | 98 +-- 21 files changed, 867 insertions(+), 702 deletions(-) diff --git a/.github/workflows/ghcr_runtime.yml b/.github/workflows/ghcr_runtime.yml index 055c5db31a..4efe5293c6 100644 --- a/.github/workflows/ghcr_runtime.yml +++ b/.github/workflows/ghcr_runtime.yml @@ -1,5 +1,5 @@ # Workflow that builds, tests and then pushes the runtime docker images to the ghcr.io repository -name: Build, Test and Publish Runtime Image +name: Build, Test and Publish RT Image # Only run one workflow of the same group at a time. # There can be at most one running and one pending job in a concurrency group at any time. @@ -104,9 +104,9 @@ jobs: name: runtime-${{ matrix.base_image.tag }} path: /tmp/runtime-${{ matrix.base_image.tag }}.tar - # Run unit tests with the EventStream runtime Docker images - test_runtime: - name: Test Runtime + # Run unit tests with the EventStream runtime Docker images as root + test_runtime_root: + name: RT Unit Tests (Root) needs: [ghcr_build_runtime] runs-on: ubuntu-latest strategy: @@ -164,11 +164,84 @@ jobs: image_name=ghcr.io/${{ github.repository_owner }}/runtime:${{ github.sha }}-${{ matrix.base_image }} image_name=$(echo $image_name | tr '[:upper:]' '[:lower:]') + SKIP_CONTAINER_LOGS=true \ TEST_RUNTIME=eventstream \ SANDBOX_USER_ID=$(id -u) \ SANDBOX_BASE_CONTAINER_IMAGE=$image_name \ TEST_IN_CI=true \ - poetry run pytest -n 2 --reruns 2 --cov=agenthub --cov=openhands --cov-report=xml -s ./tests/runtime + RUN_AS_OPENHANDS=false \ + poetry run pytest -n 3 --reruns 1 --reruns-delay 3 --cov=agenthub --cov=openhands --cov-report=xml -s ./tests/runtime + - name: Upload coverage to Codecov + uses: codecov/codecov-action@v4 + env: + CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} + + # Run unit tests with the EventStream runtime Docker images as openhands user + test_runtime_oh: + name: RT Unit Tests (openhands) + runs-on: ubuntu-latest + needs: [ghcr_build_runtime] + strategy: + matrix: + base_image: ['nikolaik'] + steps: + - uses: actions/checkout@v4 + - name: Free Disk Space (Ubuntu) + uses: jlumbroso/free-disk-space@main + with: + tool-cache: true + android: true + dotnet: true + haskell: true + large-packages: true + swap-storage: true + # Forked repos can't push to GHCR, so we need to download the image as an artifact + - name: Download runtime image for fork + if: github.event.pull_request.head.repo.fork + uses: actions/download-artifact@v4 + with: + name: runtime-${{ matrix.base_image }} + path: /tmp + - name: Load runtime image for fork + if: github.event.pull_request.head.repo.fork + run: | + docker load --input /tmp/runtime-${{ matrix.base_image }}.tar + - name: Cache Poetry dependencies + uses: actions/cache@v4 + with: + path: | + ~/.cache/pypoetry + ~/.virtualenvs + key: ${{ runner.os }}-poetry-${{ hashFiles('**/poetry.lock') }} + restore-keys: | + ${{ runner.os }}-poetry- + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + - name: Install poetry via pipx + run: pipx install poetry + - name: Install Python dependencies using Poetry + run: make install-python-dependencies + - name: Run runtime tests + run: | + # We install pytest-xdist in order to run tests across CPUs. However, tests start to fail when we run + # then across more than 2 CPUs for some reason + poetry run pip install pytest-xdist + + # Install to be able to retry on failures for flaky tests + poetry run pip install pytest-rerunfailures + + image_name=ghcr.io/${{ github.repository_owner }}/runtime:${{ github.sha }}-${{ matrix.base_image }} + image_name=$(echo $image_name | tr '[:upper:]' '[:lower:]') + + SKIP_CONTAINER_LOGS=true \ + TEST_RUNTIME=eventstream \ + SANDBOX_USER_ID=$(id -u) \ + SANDBOX_BASE_CONTAINER_IMAGE=$image_name \ + TEST_IN_CI=true \ + RUN_AS_OPENHANDS=true \ + poetry run pytest -n 3 --reruns 1 --reruns-delay 3 --cov=agenthub --cov=openhands --cov-report=xml -s ./tests/runtime - name: Upload coverage to Codecov uses: codecov/codecov-action@v4 env: @@ -176,7 +249,7 @@ jobs: # Run integration tests with the eventstream runtime Docker image runtime_integration_tests_on_linux: - name: Runtime Integration Tests on Linux + name: RT Integration Tests (Linux) runs-on: ubuntu-latest needs: [ghcr_build_runtime] strategy: @@ -237,7 +310,7 @@ jobs: name: All Runtime Tests Passed if: ${{ !cancelled() && !contains(needs.*.result, 'failure') && !contains(needs.*.result, 'cancelled') }} runs-on: ubuntu-latest - needs: [test_runtime, runtime_integration_tests_on_linux] + needs: [test_runtime_root, test_runtime_oh, runtime_integration_tests_on_linux] steps: - name: All tests passed run: echo "All runtime tests have passed successfully!" @@ -246,7 +319,7 @@ jobs: name: All Runtime Tests Passed if: ${{ cancelled() || contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled') }} runs-on: ubuntu-latest - needs: [test_runtime, runtime_integration_tests_on_linux] + needs: [test_runtime_root, test_runtime_oh, runtime_integration_tests_on_linux] steps: - name: Some tests failed run: | diff --git a/openhands/core/config.py b/openhands/core/config.py index 108523adc2..6d4dacb8af 100644 --- a/openhands/core/config.py +++ b/openhands/core/config.py @@ -507,7 +507,7 @@ def load_from_toml(cfg: AppConfig, toml_file: str = 'config.toml'): if isinstance(value, dict): try: if key is not None and key.lower() == 'agent': - logger.openhands_logger.info( + logger.openhands_logger.debug( 'Attempt to load default agent config from config toml' ) non_dict_fields = { @@ -517,13 +517,13 @@ def load_from_toml(cfg: AppConfig, toml_file: str = 'config.toml'): cfg.set_agent_config(agent_config, 'agent') for nested_key, nested_value in value.items(): if isinstance(nested_value, dict): - logger.openhands_logger.info( + logger.openhands_logger.debug( f'Attempt to load group {nested_key} from config toml as agent config' ) agent_config = AgentConfig(**nested_value) cfg.set_agent_config(agent_config, nested_key) elif key is not None and key.lower() == 'llm': - logger.openhands_logger.info( + logger.openhands_logger.debug( 'Attempt to load default LLM config from config toml' ) non_dict_fields = { @@ -533,7 +533,7 @@ def load_from_toml(cfg: AppConfig, toml_file: str = 'config.toml'): cfg.set_llm_config(llm_config, 'llm') for nested_key, nested_value in value.items(): if isinstance(nested_value, dict): - logger.openhands_logger.info( + logger.openhands_logger.debug( f'Attempt to load group {nested_key} from config toml as llm config' ) llm_config = LLMConfig(**nested_value) @@ -584,10 +584,10 @@ def load_from_toml(cfg: AppConfig, toml_file: str = 'config.toml'): def finalize_config(cfg: AppConfig): """More tweaks to the config after it's been loaded.""" + cfg.workspace_base = os.path.abspath(cfg.workspace_base) # Set workspace_mount_path if not set by the user if cfg.workspace_mount_path is UndefinedString.UNDEFINED: - cfg.workspace_mount_path = os.path.abspath(cfg.workspace_base) - cfg.workspace_base = os.path.abspath(cfg.workspace_base) + cfg.workspace_mount_path = cfg.workspace_base if cfg.workspace_mount_rewrite: # and not config.workspace_mount_path: # TODO why do we need to check if workspace_mount_path is None? diff --git a/openhands/runtime/builder/docker.py b/openhands/runtime/builder/docker.py index 441a253fdd..3dcd7cb0a6 100644 --- a/openhands/runtime/builder/docker.py +++ b/openhands/runtime/builder/docker.py @@ -68,6 +68,10 @@ class DockerRuntimeBuilder(RuntimeBuilder): Returns: bool: Whether the Docker image exists in the registry or in the local store """ + if not image_name: + logger.error(f'Invalid image name: `{image_name}`') + return False + try: logger.info(f'Checking, if image exists locally:\n{image_name}') self.docker_client.images.get(image_name) diff --git a/openhands/runtime/client/client.py b/openhands/runtime/client/client.py index f26b739ca8..5d5f63993a 100644 --- a/openhands/runtime/client/client.py +++ b/openhands/runtime/client/client.py @@ -84,7 +84,6 @@ class RuntimeClient: self.lock = asyncio.Lock() self.plugins: dict[str, Plugin] = {} self.browser = BrowserEnv(browsergym_eval_env) - self._initial_pwd = work_dir @property def initial_pwd(self): @@ -116,27 +115,85 @@ class RuntimeClient: logger.info('Runtime client initialized.') def _init_user(self, username: str, user_id: int) -> None: - """Create user if not exists.""" + """Create working directory and user if not exists. + It performs the following steps effectively: + * Creates the Working Directory: + - Uses mkdir -p to create the directory. + - Sets ownership to username:root. + - Adjusts permissions to be readable and writable by group and others. + * User Verification and Creation: + - Checks if the user exists using id -u. + - If the user exists with the correct UID, it skips creation. + - If the UID differs, it logs a warning and updates self.user_id. + - If the user doesn't exist, it proceeds to create the user. + * Sudo Configuration: + - Appends %sudo ALL=(ALL) NOPASSWD:ALL to /etc/sudoers to grant + passwordless sudo access to the sudo group. + - Adds the user to the sudo group with the useradd command, handling + UID conflicts by incrementing the UID if necessary. + """ + + # First create the working directory, independent of the user + logger.info(f'Client working directory: {self.initial_pwd}') + command = f'umask 002; mkdir -p {self.initial_pwd}' + output = subprocess.run(command, shell=True, capture_output=True) + out_str = output.stdout.decode() + + command = f'chown -R {username}:root {self.initial_pwd}' + output = subprocess.run(command, shell=True, capture_output=True) + out_str += output.stdout.decode() + + command = f'chmod g+rw {self.initial_pwd}' + output = subprocess.run(command, shell=True, capture_output=True) + out_str += output.stdout.decode() + logger.debug(f'Created working directory. Output: [{out_str}]') + # Skip root since it is already created if username == 'root': return # Check if the username already exists + existing_user_id = -1 try: - subprocess.run( + result = subprocess.run( f'id -u {username}', shell=True, check=True, capture_output=True ) - logger.debug(f'User {username} already exists. Skipping creation.') + existing_user_id = int(result.stdout.decode().strip()) + + # The user ID already exists, skip setup + if existing_user_id == user_id: + logger.debug( + f'User `{username}` already has the provided UID {user_id}. Skipping user setup.' + ) + else: + logger.warning( + f'User `{username}` already exists with UID {existing_user_id}. Skipping user setup.' + ) + self.user_id = existing_user_id return - except subprocess.CalledProcessError: - pass # User does not exist, continue with creation + except subprocess.CalledProcessError as e: + # Returncode 1 indicates, that the user does not exist yet + if e.returncode == 1: + logger.debug( + f'User `{username}` does not exist. Proceeding with user creation.' + ) + else: + logger.error( + f'Error checking user `{username}`, skipping setup:\n{e}\n' + ) + raise # Add sudoer - sudoer_line = r"echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers" - output = subprocess.run(sudoer_line, shell=True, capture_output=True) - if output.returncode != 0: - raise RuntimeError(f'Failed to add sudoer: {output.stderr.decode()}') - logger.debug(f'Added sudoer successfully. Output: [{output.stdout.decode()}]') + sudoer_line = r'%sudo ALL=(ALL) NOPASSWD:ALL\n' + sudoers_path = '/etc/sudoers.d/99_sudo' + if not Path(sudoers_path).exists(): + with open(sudoers_path, 'w') as f: + f.write(sudoer_line) + output = subprocess.run(['chmod', '0440', sudoers_path]) + if output.returncode != 0: + logger.error('Failed to chmod 99_sudo file!') + else: + logger.debug('Added sudoer successfully.') # Attempt to add the user, retrying with incremented user_id if necessary while True: @@ -144,16 +201,10 @@ class RuntimeClient: f'useradd -rm -d /home/{username} -s /bin/bash ' f'-g root -G sudo -u {user_id} {username}' ) - - if not os.path.exists(self.initial_pwd): - command += f' && mkdir -p {self.initial_pwd}' - command += f' && chown -R {username}:root {self.initial_pwd}' - command += f' && chmod g+s {self.initial_pwd}' - output = subprocess.run(command, shell=True, capture_output=True) if output.returncode == 0: logger.debug( - f'Added user {username} successfully with UID {user_id}. Output: [{output.stdout.decode()}]' + f'Added user `{username}` successfully with UID {user_id}. Output: [{output.stdout.decode()}]' ) break elif f'UID {user_id} is not unique' in output.stderr.decode(): @@ -163,7 +214,7 @@ class RuntimeClient: user_id += 1 else: raise RuntimeError( - f'Failed to create user {username}: {output.stderr.decode()}' + f'Failed to create user `{username}`! Output: [{output.stderr.decode()}]' ) def _init_bash_shell(self, work_dir: str, username: str) -> None: @@ -181,8 +232,8 @@ class RuntimeClient: # This should NOT match "PS1=\u@\h:\w [PEXPECT]$" when `env` is executed self.__bash_expect_regex = r'\[PEXPECT_BEGIN\]\s*(.*?)\s*([a-z0-9_-]*)@([a-zA-Z0-9.-]*):(.+)\s*\[PEXPECT_END\]' - - self.shell.sendline(f'export PS1="{self.__bash_PS1}"; export PS2=""') + # Set umask to allow group write permissions + self.shell.sendline(f'umask 002; export PS1="{self.__bash_PS1}"; export PS2=""') self.shell.expect(self.__bash_expect_regex) self.shell.sendline( @@ -190,8 +241,11 @@ class RuntimeClient: ) self.shell.expect(self.__bash_expect_regex) logger.debug( - f'Bash initialized. Working directory: {work_dir}. Output: {self.shell.before}' + f'Bash initialized. Working directory: {work_dir}. Output: [{self.shell.before}]' ) + # Ensure the group has write permissions on the working directory + self.shell.sendline(f'chmod g+rw "{work_dir}"') + self.shell.expect(self.__bash_expect_regex) async def _init_bash_commands(self): logger.info(f'Initializing by running {len(INIT_COMMANDS)} bash commands...') @@ -295,14 +349,14 @@ class RuntimeClient: bash_prompt = self._get_bash_prompt_and_update_pwd() if keep_prompt: output += '\r\n' + bash_prompt - logger.debug(f'Command output: {output}') + # logger.debug(f'Command output:\n{output}') return output, exit_code async def run_action(self, action) -> Observation: action_type = action.action - logger.debug(f'Running action: {action}') + logger.debug(f'Running action:\n{action}') observation = await getattr(self, action_type)(action) - logger.debug(f'Action output: {observation}') + logger.debug(f'Action output:\n{observation}') return observation async def run(self, action: CmdRunAction) -> CmdOutputObservation: @@ -355,10 +409,9 @@ class RuntimeClient: _jupyter_plugin: JupyterPlugin = self.plugins['jupyter'] # type: ignore # This is used to make AgentSkills in Jupyter aware of the # current working directory in Bash - if self.pwd != getattr(self, '_jupyter_pwd', None): - logger.debug( - f"{self.pwd} != {getattr(self, '_jupyter_pwd', None)} -> reset Jupyter PWD" - ) + jupyter_pwd = getattr(self, '_jupyter_pwd', None) + if self.pwd != jupyter_pwd: + logger.debug(f'{self.pwd} != {jupyter_pwd} -> reset Jupyter PWD') reset_jupyter_pwd_code = f'import os; os.chdir("{self.pwd}")' _aux_action = IPythonRunCellAction(code=reset_jupyter_pwd_code) _reset_obs = await _jupyter_plugin.run(_aux_action) @@ -450,7 +503,7 @@ class RuntimeClient: os.chown(filepath, file_stat.st_uid, file_stat.st_gid) else: # set the new file permissions if the file is new - os.chmod(filepath, 0o644) + os.chmod(filepath, 0o664) os.chown(filepath, self.user_id, self.user_id) except FileNotFoundError: diff --git a/openhands/runtime/client/runtime.py b/openhands/runtime/client/runtime.py index 5767632f37..73ce7d548d 100644 --- a/openhands/runtime/client/runtime.py +++ b/openhands/runtime/client/runtime.py @@ -38,8 +38,7 @@ from openhands.runtime.utils.runtime_build import build_runtime_image class LogBuffer: - """ - Synchronous buffer for Docker container logs. + """Synchronous buffer for Docker container logs. This class provides a thread-safe way to collect, store, and retrieve logs from a Docker container. It uses a list to store log lines and provides methods @@ -94,7 +93,7 @@ class LogBuffer: ) self.close(timeout=5) - def close(self, timeout: float = 10.0): + def close(self, timeout: float = 5.0): self._stop_event.set() self.log_stream_thread.join(timeout) @@ -102,6 +101,14 @@ class LogBuffer: class EventStreamRuntime(Runtime): """This runtime will subscribe the event stream. When receive an event, it will send the event to runtime-client which run inside the docker environment. + From the sid also an instance_id is generated in combination with a UID. + + Args: + config (AppConfig): The application configuration. + event_stream (EventStream): The event stream to subscribe to. + sid (str, optional): The session ID. Defaults to 'default'. + plugins (list[PluginRequirement] | None, optional): List of plugin requirements. Defaults to None. + env_vars (dict[str, str] | None, optional): Environment variables to set. Defaults to None. """ container_name_prefix = 'openhands-sandbox-' @@ -115,13 +122,16 @@ class EventStreamRuntime(Runtime): env_vars: dict[str, str] | None = None, ): self.config = config - self._port = find_available_tcp_port() - self.api_url = f'http://{self.config.sandbox.api_hostname}:{self._port}' + self._host_port = 30000 # initial dummy value + self._container_port = 30001 # initial dummy value + self.api_url = ( + f'http://{self.config.sandbox.api_hostname}:{self._container_port}' + ) self.session = requests.Session() - self.instance_id = ( sid + '_' + str(uuid.uuid4()) if sid is not None else str(uuid.uuid4()) ) + self.docker_client: docker.DockerClient = self._init_docker_client() self.base_container_image = self.config.sandbox.base_container_image self.runtime_container_image = self.config.sandbox.runtime_container_image @@ -131,7 +141,7 @@ class EventStreamRuntime(Runtime): self.action_semaphore = threading.Semaphore(1) # Ensure one action at a time self.runtime_builder = DockerRuntimeBuilder(self.docker_client) - logger.debug(f'EventStreamRuntime `{sid}`') + logger.debug(f'EventStreamRuntime `{self.instance_id}`') # Buffer for container logs self.log_buffer: LogBuffer | None = None @@ -140,7 +150,9 @@ class EventStreamRuntime(Runtime): logger.info( f'Installing extra user-provided dependencies in the runtime image: {self.config.sandbox.runtime_extra_deps}' ) - + self.skip_container_logs = ( + os.environ.get('SKIP_CONTAINER_LOGS', 'false').lower() == 'true' + ) if self.runtime_container_image is None: if self.base_container_image is None: raise ValueError( @@ -152,19 +164,18 @@ class EventStreamRuntime(Runtime): extra_deps=self.config.sandbox.runtime_extra_deps, ) self.container = self._init_container( - self.sandbox_workspace_dir, - mount_dir=self.config.workspace_mount_path, + sandbox_workspace_dir=self.config.workspace_mount_path_in_sandbox, # e.g. /workspace + mount_dir=self.config.workspace_mount_path, # e.g. /opt/openhands/_test_workspace plugins=plugins, ) # will initialize both the event stream and the env vars super().__init__(config, event_stream, sid, plugins, env_vars) - self._wait_until_alive() - logger.info( f'Container initialized with plugins: {[plugin.name for plugin in self.plugins]}' ) logger.info(f'Container initialized with env vars: {env_vars}') + time.sleep(1) @staticmethod def _init_docker_client() -> docker.DockerClient: @@ -196,24 +207,48 @@ class EventStreamRuntime(Runtime): f'--plugins {" ".join([plugin.name for plugin in plugins])} ' ) - network_mode: str | None = None - port_mapping: dict[str, int] | None = None - if self.config.sandbox.use_host_network: - network_mode = 'host' + self._host_port = self._find_available_port() + self._container_port = ( + self._host_port + ) # in future this might differ from host port + self.api_url = ( + f'http://{self.config.sandbox.api_hostname}:{self._container_port}' + ) + + use_host_network = self.config.sandbox.use_host_network + network_mode: str | None = 'host' if use_host_network else None + port_mapping: dict[str, list[dict[str, str]]] | None = ( + None + if use_host_network + else { + f'{self._container_port}/tcp': [{'HostPort': str(self._host_port)}] + } + ) + + if use_host_network: logger.warn( 'Using host network mode. If you are using MacOS, please make sure you have the latest version of Docker Desktop and enabled host network feature: https://docs.docker.com/network/drivers/host/#docker-desktop' ) - else: - port_mapping = {f'{self._port}/tcp': self._port} - if mount_dir is not None: + # Combine environment variables + environment = { + 'port': str(self._container_port), + 'PYTHONUNBUFFERED': 1, + } + if self.config.debug: + environment['DEBUG'] = 'true' + + logger.info(f'Workspace Base: {self.config.workspace_base}') + if mount_dir is not None and sandbox_workspace_dir is not None: + # e.g. result would be: {"/home/user/openhands/workspace": {'bind': "/workspace", 'mode': 'rw'}} volumes = {mount_dir: {'bind': sandbox_workspace_dir, 'mode': 'rw'}} - logger.info(f'Mount dir: {sandbox_workspace_dir}') + logger.info(f'Mount dir: {mount_dir}') else: logger.warn( - 'Mount dir is not set, will not mount the workspace directory to the container.' + 'Warning: Mount dir is not set, will not mount the workspace directory to the container!\n' ) volumes = None + logger.info(f'Sandbox workspace: {sandbox_workspace_dir}') if self.config.sandbox.browsergym_eval_env is not None: browsergym_arg = ( @@ -225,9 +260,9 @@ class EventStreamRuntime(Runtime): self.runtime_container_image, command=( f'/openhands/miniforge3/bin/mamba run --no-capture-output -n base ' - 'PYTHONUNBUFFERED=1 poetry run ' - f'python -u -m openhands.runtime.client.client {self._port} ' - f'--working-dir {sandbox_workspace_dir} ' + f'poetry run ' + f'python -u -m openhands.runtime.client.client {self._container_port} ' + f'--working-dir "{sandbox_workspace_dir}" ' f'{plugin_arg}' f'--username {"openhands" if self.config.run_as_openhands else "root"} ' f'--user-id {self.config.sandbox.user_id} ' @@ -235,24 +270,26 @@ class EventStreamRuntime(Runtime): ), network_mode=network_mode, ports=port_mapping, - working_dir='/openhands/code/', + working_dir='/openhands/code/', # do not change this! name=self.container_name, detach=True, - environment={'DEBUG': 'true'} if self.config.debug else None, + environment=environment, volumes=volumes, ) self.log_buffer = LogBuffer(container) logger.info(f'Container started. Server url: {self.api_url}') return container except Exception as e: - logger.error('Failed to start container') + logger.error( + f'Error: Instance {self.instance_id} FAILED to start container!\n' + ) logger.exception(e) self.close(close_client=False) raise e @tenacity.retry( stop=tenacity.stop_after_attempt(10), - wait=tenacity.wait_exponential(multiplier=2, min=10, max=60), + wait=tenacity.wait_exponential(multiplier=2, min=1, max=20), reraise=(ConnectionRefusedError,), ) def _wait_until_alive(self): @@ -278,10 +315,11 @@ class EventStreamRuntime(Runtime): ) if not self.log_buffer.client_ready: + time.sleep(1) attempts = 0 - while not self.log_buffer.client_ready and attempts < 5: + while not self.log_buffer.client_ready and attempts < 4: attempts += 1 - time.sleep(1) + time.sleep(2) logs = self.log_buffer.get_and_clear() if logs: formatted_logs = '\n'.join([f' |{log}' for log in logs]) @@ -303,13 +341,8 @@ class EventStreamRuntime(Runtime): logger.error(msg) raise RuntimeError(msg) - @property - def sandbox_workspace_dir(self): - return self.config.workspace_mount_path_in_sandbox - def close(self, close_client: bool = True, rm_all_containers: bool = True): - """ - Closes the EventStreamRuntime and associated objects + """Closes the EventStreamRuntime and associated objects Parameters: - close_client (bool): Whether to close the DockerClient @@ -322,23 +355,29 @@ class EventStreamRuntime(Runtime): if self.session: self.session.close() - containers = self.docker_client.containers.list(all=True) - for container in containers: - try: - # If the app doesn't shut down properly, it can leave runtime containers on the system. This ensures - # that all 'openhands-sandbox-' containers are removed as well. - if rm_all_containers and container.name.startswith( - self.container_name_prefix - ): - container.remove(force=True) - elif container.name == self.container_name: - logs = container.logs(tail=1000).decode('utf-8') - logger.debug( - f'==== Container logs ====\n{logs}\n==== End of container logs ====' - ) - container.remove(force=True) - except docker.errors.NotFound: - pass + try: + containers = self.docker_client.containers.list(all=True) + for container in containers: + try: + # If the app doesn't shut down properly, it can leave runtime containers on the system. This ensures + # that all 'openhands-sandbox-' containers are removed as well. + if rm_all_containers and container.name.startswith( + self.container_name_prefix + ): + container.remove(force=True) + elif container.name == self.container_name: + if not self.skip_container_logs: + logs = container.logs(tail=1000).decode('utf-8') + logger.debug( + f'==== Container logs on close ====\n{logs}\n==== End of container logs ====' + ) + container.remove(force=True) + except docker.errors.APIError: + pass + except docker.errors.NotFound: + pass + except docker.errors.NotFound: # yes, this can happen! + pass if close_client: self.docker_client.close() @@ -494,3 +533,20 @@ class EventStreamRuntime(Runtime): raise TimeoutError('List files operation timed out') except Exception as e: raise RuntimeError(f'List files operation failed: {str(e)}') + + def _is_port_in_use_docker(self, port): + containers = self.docker_client.containers.list() + for container in containers: + container_ports = container.ports + if str(port) in str(container_ports): + return True + return False + + def _find_available_port(self, max_attempts=5): + port = 39999 + for _ in range(max_attempts): + port = find_available_tcp_port(30000, 39999) + if not self._is_port_in_use_docker(port): + return port + # If no port is found after max_attempts, return the last tried port + return port diff --git a/openhands/runtime/plugins/jupyter/__init__.py b/openhands/runtime/plugins/jupyter/__init__.py index 0feb5caef0..26eda35054 100644 --- a/openhands/runtime/plugins/jupyter/__init__.py +++ b/openhands/runtime/plugins/jupyter/__init__.py @@ -19,7 +19,7 @@ class JupyterPlugin(Plugin): name: str = 'jupyter' async def initialize(self, username: str, kernel_id: str = 'openhands-default'): - self.kernel_gateway_port = find_available_tcp_port() + self.kernel_gateway_port = find_available_tcp_port(40000, 49999) self.kernel_id = kernel_id self.gateway_process = subprocess.Popen( ( diff --git a/openhands/runtime/remote/runtime.py b/openhands/runtime/remote/runtime.py index 6c3e755608..48862d28aa 100644 --- a/openhands/runtime/remote/runtime.py +++ b/openhands/runtime/remote/runtime.py @@ -142,7 +142,7 @@ class RemoteRuntime(Runtime): f'/openhands/miniforge3/bin/mamba run --no-capture-output -n base ' 'PYTHONUNBUFFERED=1 poetry run ' f'python -u -m openhands.runtime.client.client {self.port} ' - f'--working-dir {self.sandbox_workspace_dir} ' + f'--working-dir {self.config.workspace_mount_path_in_sandbox} ' f'{plugin_arg}' f'--username {"openhands" if self.config.run_as_openhands else "root"} ' f'--user-id {self.config.sandbox.user_id} ' @@ -203,10 +203,6 @@ class RemoteRuntime(Runtime): logger.warning(msg) raise RuntimeError(msg) - @property - def sandbox_workspace_dir(self): - return self.config.workspace_mount_path_in_sandbox - def close(self): if self.runtime_id: try: diff --git a/openhands/runtime/runtime.py b/openhands/runtime/runtime.py index 97b5464d49..ae54f4b177 100644 --- a/openhands/runtime/runtime.py +++ b/openhands/runtime/runtime.py @@ -67,7 +67,6 @@ class Runtime: self.config = copy.deepcopy(config) self.DEFAULT_ENV_VARS = _default_env_vars(config.sandbox) atexit.register(self.close) - logger.debug(f'Runtime `{sid}`') if self.DEFAULT_ENV_VARS: logger.debug(f'Adding default env vars: {self.DEFAULT_ENV_VARS}') diff --git a/openhands/runtime/utils/__init__.py b/openhands/runtime/utils/__init__.py index eca9436da6..71d9d4198c 100644 --- a/openhands/runtime/utils/__init__.py +++ b/openhands/runtime/utils/__init__.py @@ -1,4 +1,7 @@ from openhands.runtime.utils.bash import split_bash_commands -from openhands.runtime.utils.system import find_available_tcp_port +from openhands.runtime.utils.system import ( + display_number_matrix, + find_available_tcp_port, +) -__all__ = ['find_available_tcp_port', 'split_bash_commands'] +__all__ = ['display_number_matrix', 'find_available_tcp_port', 'split_bash_commands'] diff --git a/openhands/runtime/utils/runtime_build.py b/openhands/runtime/utils/runtime_build.py index 2baa6732d0..83df63819f 100644 --- a/openhands/runtime/utils/runtime_build.py +++ b/openhands/runtime/utils/runtime_build.py @@ -55,7 +55,7 @@ def _put_source_code_to_dir(temp_dir: str): ' ', r'\ ' ) # escape spaces in the project root result = subprocess.run( - f'python -m build -s -o {temp_dir} {_cleaned_project_root}', + f'python -m build -s -o "{temp_dir}" {_cleaned_project_root}', shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, @@ -142,13 +142,14 @@ def prep_docker_build_folder( skip_init=skip_init, extra_deps=extra_deps, ) - logger.debug( - ( - f'===== Dockerfile content start =====\n' - f'{dockerfile_content}\n' - f'===== Dockerfile content end =====' + if os.getenv('SKIP_CONTAINER_LOGS', 'false') != 'true': + logger.debug( + ( + f'===== Dockerfile content start =====\n' + f'{dockerfile_content}\n' + f'===== Dockerfile content end =====' + ) ) - ) with open(os.path.join(dir_path, 'Dockerfile'), 'w') as file: file.write(dockerfile_content) diff --git a/openhands/runtime/utils/runtime_templates/Dockerfile.j2 b/openhands/runtime/utils/runtime_templates/Dockerfile.j2 index 519760a2ef..1f49b9eb7c 100644 --- a/openhands/runtime/utils/runtime_templates/Dockerfile.j2 +++ b/openhands/runtime/utils/runtime_templates/Dockerfile.j2 @@ -60,6 +60,7 @@ RUN cd /openhands/code && \ {{ extra_deps }} {% if extra_deps %} && {% endif %} \ /openhands/miniforge3/bin/mamba run -n base poetry cache clear --all . && \ {% if not skip_init %}chmod -R g+rws /openhands/poetry && {% endif %} \ + mkdir -p /openhands/workspace && chmod -R g+rws,o+rw /openhands/workspace && \ apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \ /openhands/miniforge3/bin/mamba clean --all diff --git a/openhands/runtime/utils/system.py b/openhands/runtime/utils/system.py index 09c8408add..921a8bf94b 100644 --- a/openhands/runtime/utils/system.py +++ b/openhands/runtime/utils/system.py @@ -1,17 +1,34 @@ +import random import socket +import time -def find_available_tcp_port() -> int: - """Find an available TCP port, return -1 if none available.""" - sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - try: - sock.bind(('localhost', 0)) - port = sock.getsockname()[1] - return port - except Exception: - return -1 - finally: - sock.close() +def find_available_tcp_port(min_port=30000, max_port=39999, max_attempts=10) -> int: + """Find an available TCP port in a specified range. + + Args: + min_port (int): The lower bound of the port range (default: 30000) + max_port (int): The upper bound of the port range (default: 39999) + max_attempts (int): Maximum number of attempts to find an available port (default: 10) + + Returns: + int: An available port number, or -1 if none found after max_attempts + """ + rng = random.SystemRandom() + ports = list(range(min_port, max_port + 1)) + rng.shuffle(ports) + + for port in ports[:max_attempts]: + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + try: + sock.bind(('localhost', port)) + return port + except OSError: + time.sleep(0.1) # Short delay to further reduce chance of collisions + continue + finally: + sock.close() + return -1 def display_number_matrix(number: int) -> str | None: diff --git a/pytest.ini b/pytest.ini index 1ceab94295..7a222b2d34 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,2 +1,3 @@ [pytest] addopts = -p no:warnings +asyncio_default_fixture_loop_scope = function diff --git a/tests/integration/regenerate.sh b/tests/integration/regenerate.sh index 25bfad49f4..e88a203476 100755 --- a/tests/integration/regenerate.sh +++ b/tests/integration/regenerate.sh @@ -5,6 +5,8 @@ set -eo pipefail ## CONSTANTS AND ENVIRONMENTAL VARIABLES ## ############################################################## +echo -e "\n\n============================================================" + # unset environmental variables that might disturb testing unset OPENAI_API_KEY unset SANDBOX_ENV_OPENAI_API_KEY @@ -16,7 +18,7 @@ get_script_dir() { local source="${BASH_SOURCE[0]}" while [ -h "$source" ]; do local dir="$( cd -P "$( dirname "$source" )" && pwd )" - source="$(readlink "$source")" + source="$(readlink -f "$source" 2>/dev/null || echo "$source")" [[ $source != /* ]] && source="$dir/$source" done echo "$( cd -P "$( dirname "$source" )" && pwd )" @@ -27,9 +29,6 @@ TMP_FILE="${TMP_FILE:-tmp.log}" if [ -z "$WORKSPACE_BASE" ]; then WORKSPACE_BASE=$(pwd) fi -if [ -z "$WORKSPACE_MOUNT_PATH" ]; then - WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE -fi DEBUG=true # needed for llm logging to create mock files! @@ -39,7 +38,7 @@ fi export SCRIPT_DIR=$(get_script_dir) export PROJECT_ROOT=$(realpath "$SCRIPT_DIR/../..") -export LOG_DIR=$PROJECT_ROOT/logs +export LOG_DIR="$PROJECT_ROOT/logs" echo "Current working directory: $(pwd)" echo "SCRIPT_DIR: $SCRIPT_DIR" echo "PROJECT_ROOT: $PROJECT_ROOT" @@ -47,22 +46,29 @@ echo "LOG_DIR: $LOG_DIR" echo "LOG_TO_FILE: $LOG_TO_FILE" WORKSPACE_BASE=${WORKSPACE_BASE}/_test_workspace -mkdir -p $WORKSPACE_BASE -chmod -R 777 $WORKSPACE_BASE -WORKSPACE_BASE=$(realpath $WORKSPACE_BASE) +mkdir -p "$WORKSPACE_BASE" +chmod -R 777 "$WORKSPACE_BASE" +WORKSPACE_BASE=$(realpath "$WORKSPACE_BASE") -WORKSPACE_MOUNT_PATH=${WORKSPACE_MOUNT_PATH}/_test_workspace -mkdir -p $WORKSPACE_MOUNT_PATH -chmod -R 777 $WORKSPACE_MOUNT_PATH -WORKSPACE_MOUNT_PATH=$(realpath $WORKSPACE_MOUNT_PATH) +if [ -z "$WORKSPACE_MOUNT_PATH" ]; then + WORKSPACE_MOUNT_PATH="$WORKSPACE_BASE" +else + WORKSPACE_MOUNT_PATH="${WORKSPACE_MOUNT_PATH}/_test_workspace" + mkdir -p "$WORKSPACE_MOUNT_PATH" + chmod -R 755 "$WORKSPACE_MOUNT_PATH" + WORKSPACE_MOUNT_PATH=$(realpath "$WORKSPACE_MOUNT_PATH") +fi + +WORKSPACE_MOUNT_PATH_IN_SANDBOX="${WORKSPACE_MOUNT_PATH_IN_SANDBOX:-/workspace}" echo "WORKSPACE_BASE: $WORKSPACE_BASE" echo "WORKSPACE_MOUNT_PATH: $WORKSPACE_MOUNT_PATH" +echo "WORKSPACE_MOUNT_PATH_IN_SANDBOX: $WORKSPACE_MOUNT_PATH_IN_SANDBOX" # Ensure we're in the correct directory cd "$PROJECT_ROOT" || exit 1 -mkdir -p $WORKSPACE_BASE +mkdir -p "$WORKSPACE_BASE" # use environmental variable if exists TEST_RUNTIME="${TEST_RUNTIME:-eventstream}" @@ -178,7 +184,7 @@ cleanup() { kill $HTTP_SERVER_PID || true unset HTTP_SERVER_PID fi - [ -f $TMP_FILE ] && rm $TMP_FILE + [ -f "$TMP_FILE" ] && rm "$TMP_FILE" echo "Cleanup done!" } @@ -200,14 +206,14 @@ regenerate_without_llm() { PROJECT_ROOT="$PROJECT_ROOT" \ WORKSPACE_BASE="$WORKSPACE_BASE" \ WORKSPACE_MOUNT_PATH="$WORKSPACE_MOUNT_PATH" \ - MAX_ITERATIONS=$MAX_ITERATIONS \ + MAX_ITERATIONS="$MAX_ITERATIONS" \ FORCE_APPLY_PROMPTS=true \ - DEFAULT_AGENT=$agent \ + DEFAULT_AGENT="$agent" \ TEST_RUNTIME="$TEST_RUNTIME" \ - LLM=$LLM \ - DEBUG=$DEBUG \ - LOG_TO_FILE=$LOG_TO_FILE \ - FORCE_REGENERATE=$FORCE_REGENERATE \ + LLM="$LLM" \ + DEBUG="$DEBUG" \ + LOG_TO_FILE="$LOG_TO_FILE" \ + FORCE_REGENERATE="$FORCE_REGENERATE" \ SANDBOX_BASE_CONTAINER_IMAGE="$SANDBOX_BASE_CONTAINER_IMAGE" \ poetry run pytest -s "$SCRIPT_DIR/test_agent.py::$test_name" set +x @@ -216,12 +222,12 @@ regenerate_without_llm() { regenerate_with_llm() { cd "$PROJECT_ROOT" - rm -rf $WORKSPACE_BASE/* + rm -rf "$WORKSPACE_BASE/*" if [ -d "$SCRIPT_DIR/workspace/$test_name" ]; then - cp -r "$SCRIPT_DIR/workspace/$test_name"/* $WORKSPACE_BASE + cp -r "$SCRIPT_DIR/workspace/$test_name"/* "$WORKSPACE_BASE" fi - rm -rf logs + rm -rf "$LOG_DIR" rm -rf "$SCRIPT_DIR/mock/${TEST_RUNTIME}_runtime/$agent/$test_name/*" # set -x to print the command being executed set -x @@ -233,12 +239,12 @@ regenerate_with_llm() { DEFAULT_AGENT=$agent \ RUNTIME="$TEST_RUNTIME" \ SANDBOX_BASE_CONTAINER_IMAGE="$SANDBOX_BASE_CONTAINER_IMAGE" \ - LLM=$LLM \ - DEBUG=$DEBUG \ - LOG_TO_FILE=$LOG_TO_FILE \ - FORCE_REGENERATE=$FORCE_REGENERATE \ + LLM="$LLM" \ + DEBUG="$DEBUG" \ + LOG_TO_FILE="$LOG_TO_FILE" \ + FORCE_REGENERATE="$FORCE_REGENERATE" \ poetry run python "$PROJECT_ROOT/openhands/core/main.py" \ - -i $MAX_ITERATIONS \ + -i "$MAX_ITERATIONS" \ -t "$task Do not ask me for confirmation at any point." \ -c $agent set +x @@ -256,8 +262,8 @@ if [ "$num_of_tests" -ne "${#test_names[@]}" ]; then exit 1 fi -rm -rf logs -rm -rf $WORKSPACE_BASE/* +rm -rf "$LOG_DIR" +rm -rf "$WORKSPACE_BASE/*" for ((i = 0; i < num_of_tests; i++)); do task=${tasks[i]} test_name=${test_names[i]} @@ -286,9 +292,9 @@ for ((i = 0; i < num_of_tests; i++)); do cd "$PROJECT_ROOT/tests" cd "$PROJECT_ROOT" - rm -rf $WORKSPACE_BASE/* + rm -rf "$WORKSPACE_BASE/*" if [ -d "$SCRIPT_DIR/workspace/$test_name" ]; then - cp -r "$SCRIPT_DIR/workspace/$test_name"/* $WORKSPACE_BASE + cp -r "$SCRIPT_DIR/workspace/$test_name"/* "$WORKSPACE_BASE" fi if [ "$TEST_ONLY" ]; then @@ -395,7 +401,7 @@ for ((i = 0; i < num_of_tests; i++)); do fi done -rm -rf logs -rm -rf $WORKSPACE_BASE +rm -rf "$LOG_DIR" +rm -rf "$WORKSPACE_BASE" echo "Done!" cd "$PROJECT_ROOT" diff --git a/tests/integration/test_agent.py b/tests/integration/test_agent.py index 9199943fcb..c89a8e8b7b 100644 --- a/tests/integration/test_agent.py +++ b/tests/integration/test_agent.py @@ -6,7 +6,7 @@ import subprocess import pytest from openhands.controller.state.state import State -from openhands.core.config import AppConfig, SandboxConfig, load_from_env +from openhands.core.config import load_app_config from openhands.core.main import run_controller from openhands.core.schema import AgentState from openhands.events.action import ( @@ -21,36 +21,23 @@ TEST_RUNTIME = os.getenv('TEST_RUNTIME') assert TEST_RUNTIME in ['eventstream', 'remote'] _ = get_runtime_cls(TEST_RUNTIME) # make sure it does not raise an error -CONFIG = AppConfig( - max_iterations=int(os.getenv('MAX_ITERATIONS', 20)), - max_budget_per_task=int(os.getenv('MAX_BUDGET_PER_TASK', 15)), - runtime=TEST_RUNTIME, - default_agent=os.getenv('DEFAULT_AGENT'), - workspace_base=os.getenv('WORKSPACE_BASE'), - workspace_mount_path=os.getenv('WORKSPACE_MOUNT_PATH'), - sandbox=SandboxConfig( - use_host_network=True, - ), +CONFIG = load_app_config() +CONFIG.max_iterations = int(os.getenv('MAX_ITERATIONS', 20)) +CONFIG.max_budget_per_task = int(os.getenv('MAX_BUDGET_PER_TASK', 15)) +CONFIG.runtime = TEST_RUNTIME +CONFIG.default_agent = os.getenv('DEFAULT_AGENT') +CONFIG.workspace_base = os.getenv('WORKSPACE_BASE') +CONFIG.workspace_mount_path = os.getenv('WORKSPACE_MOUNT_PATH') +CONFIG.workspace_mount_path_in_sandbox = os.getenv( + 'WORKSPACE_MOUNT_PATH_IN_SANDBOX', '/workspace' ) -load_from_env(CONFIG, os.environ) +CONFIG.sandbox.use_host_network = True print('\nPaths used:') print(f'workspace_base: {CONFIG.workspace_base}') print(f'workspace_mount_path: {CONFIG.workspace_mount_path}') print(f'workspace_mount_path_in_sandbox: {CONFIG.workspace_mount_path_in_sandbox}') -# Check if running in WSL environment -if 'WSL_DISTRO_NAME' in os.environ: - if ( - CONFIG.workspace_base - and CONFIG.workspace_mount_path - and CONFIG.workspace_base != CONFIG.workspace_mount_path - ): - print( - '\n**********\nWARNING: if WORKSPACE_MOUNT_PATH is set differently to' - '\nWORKSPACE_BASE some file operation tests may fail!\n**********\n' - ) - def get_number_of_prompts(test_name: str): mock_dir = os.path.join( diff --git a/tests/runtime/conftest.py b/tests/runtime/conftest.py index 6cec0f9c4b..6ce93256e4 100644 --- a/tests/runtime/conftest.py +++ b/tests/runtime/conftest.py @@ -1,11 +1,15 @@ import os import random +import shutil +import stat import time +from pathlib import Path import pytest from pytest import TempPathFactory -from openhands.core.config import AppConfig, SandboxConfig, load_from_env +from openhands.core.config import load_app_config +from openhands.core.logger import openhands_logger as logger from openhands.events import EventStream from openhands.runtime.client.runtime import EventStreamRuntime from openhands.runtime.plugins import AgentSkillsRequirement, JupyterRequirement @@ -13,19 +17,86 @@ from openhands.runtime.remote.runtime import RemoteRuntime from openhands.runtime.runtime import Runtime from openhands.storage import get_file_store +TEST_IN_CI = os.getenv('TEST_IN_CI', 'False').lower() in ['true', '1', 'yes'] +TEST_RUNTIME = os.getenv('TEST_RUNTIME', 'eventstream').lower() +RUN_AS_OPENHANDS = os.getenv('RUN_AS_OPENHANDS', 'True').lower() in ['true', '1', 'yes'] +test_mount_path = '' +project_dir = os.path.dirname( + os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +) +sandbox_test_folder = '/openhands/workspace' + + +def _get_runtime_sid(runtime: Runtime): + logger.debug(f'\nruntime.sid: {runtime.sid}') + return runtime.sid + + +def _get_host_folder(runtime: Runtime): + return runtime.config.workspace_mount_path + + +def _get_sandbox_folder(runtime: Runtime): + sid = _get_runtime_sid(runtime) + if sid: + return Path(os.path.join(sandbox_test_folder, sid)) + return None + + +def _remove_folder(folder: str) -> bool: + success = False + if folder and os.path.isdir(folder): + try: + os.rmdir(folder) + success = True + except OSError: + try: + shutil.rmtree(folder) + success = True + except OSError: + pass + logger.debug(f'\nCleanup: `{folder}`: ' + ('[OK]' if success else '[FAILED]')) + return success + + +def _close_test_runtime(runtime: Runtime): + if isinstance(runtime, EventStreamRuntime): + runtime.close(rm_all_containers=False) + else: + runtime.close() + time.sleep(1) + + +def _reset_pwd(): + global project_dir + # Try to change back to project directory + try: + os.chdir(project_dir) + logger.info(f'Changed back to project directory `{project_dir}') + except Exception as e: + logger.error(f'Failed to change back to project directory: {e}') + + +# ***************************************************************************** +# ***************************************************************************** + @pytest.fixture(autouse=True) def print_method_name(request): - print('\n########################################################################') + print( + '\n\n########################################################################' + ) print(f'Running test: {request.node.name}') - print('########################################################################') - yield + print( + '########################################################################\n\n' + ) @pytest.fixture -def temp_dir(tmp_path_factory: TempPathFactory) -> str: - """ - Creates a unique temporary directory +def temp_dir(tmp_path_factory: TempPathFactory, request) -> str: + """Creates a unique temporary directory. + Upon finalization, the temporary directory and its content is removed. + The cleanup function is also called upon KeyboardInterrupt. Parameters: - tmp_path_factory (TempPathFactory): A TempPathFactory class @@ -33,15 +104,23 @@ def temp_dir(tmp_path_factory: TempPathFactory) -> str: Returns: - str: The temporary directory path that was created """ - - unique_suffix = random.randint(10000, 99999) - temp_directory = tmp_path_factory.mktemp( - f'test_runtime_{unique_suffix}', numbered=False + temp_dir = tmp_path_factory.mktemp( + 'rt_' + str(random.randint(100000, 999999)), numbered=False ) - return str(temp_directory) + logger.info(f'\n*** {request.node.name}\n>> temp folder: {temp_dir}\n') -TEST_RUNTIME = os.getenv('TEST_RUNTIME', 'eventstream') + # Set permissions to ensure the directory is writable and deletable + os.chmod(temp_dir, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO) # 0777 permissions + + def cleanup(): + global project_dir + os.chdir(project_dir) + _remove_folder(temp_dir) + + request.addfinalizer(cleanup) + + return str(temp_dir) # Depending on TEST_RUNTIME, feed the appropriate box class(es) to the test. @@ -55,28 +134,47 @@ def get_box_classes(): raise ValueError(f'Invalid runtime: {runtime}') +def get_run_as_openhands(): + print( + '\n\n########################################################################' + ) + print('USER: ' + 'openhands' if RUN_AS_OPENHANDS else 'root') + print( + '########################################################################\n\n' + ) + return [RUN_AS_OPENHANDS] + + +@pytest.fixture(scope='module') # for xdist +def runtime_setup_module(): + _reset_pwd() + yield + _reset_pwd() + + +@pytest.fixture(scope='session') # not for xdist +def runtime_setup_session(): + _reset_pwd() + yield + _reset_pwd() + + # This assures that all tests run together per runtime, not alternating between them, # which cause errors (especially outside GitHub actions). @pytest.fixture(scope='module', params=get_box_classes()) def box_class(request): - time.sleep(2) + time.sleep(1) return request.param # TODO: We will change this to `run_as_user` when `ServerRuntime` is deprecated. # since `EventStreamRuntime` supports running as an arbitrary user. -@pytest.fixture(scope='module', params=[True, False]) +@pytest.fixture(scope='module', params=get_run_as_openhands()) def run_as_openhands(request): time.sleep(1) return request.param -@pytest.fixture(scope='module', params=[True, False]) -def enable_auto_lint(request): - time.sleep(1) - return request.param - - @pytest.fixture(scope='module', params=None) def base_container_image(request): time.sleep(1) @@ -96,21 +194,12 @@ def base_container_image(request): if request.param is None: request.param = pytest.param( 'nikolaik/python-nodejs:python3.11-nodejs22', - 'python:3.11-bookworm', - 'node:22-bookworm', 'golang:1.23-bookworm', ) print(f'Container image: {request.param}') return request.param -@pytest.fixture -def runtime(temp_dir, box_class, run_as_openhands): - runtime = _load_runtime(temp_dir, box_class, run_as_openhands) - yield runtime - time.sleep(1) - - def _load_runtime( temp_dir, box_class, @@ -118,29 +207,45 @@ def _load_runtime( enable_auto_lint: bool = False, base_container_image: str | None = None, browsergym_eval_env: str | None = None, + use_workspace: bool | None = None, ) -> Runtime: - sid = 'test' - cli_session = 'main_test' + sid = 'rt_' + str(random.randint(100000, 999999)) # AgentSkills need to be initialized **before** Jupyter # otherwise Jupyter will not access the proper dependencies installed by AgentSkills plugins = [AgentSkillsRequirement(), JupyterRequirement()] - config = AppConfig( - workspace_base=temp_dir, - workspace_mount_path=temp_dir, - sandbox=SandboxConfig( - use_host_network=True, - browsergym_eval_env=browsergym_eval_env, - ), - ) - load_from_env(config, os.environ) + + config = load_app_config() config.run_as_openhands = run_as_openhands + + # Folder where all tests create their own folder + global test_mount_path + if use_workspace: + test_mount_path = os.path.join(config.workspace_base, 'rt') + else: + test_mount_path = os.path.join( + temp_dir, sid + ) # need a subfolder to avoid conflicts + config.workspace_mount_path = test_mount_path + + # Mounting folder specific for this test inside the sandbox + config.workspace_mount_path_in_sandbox = f'{sandbox_test_folder}/{sid}' + print('\nPaths used:') + print(f'use_host_network: {config.sandbox.use_host_network}') + print(f'workspace_base: {config.workspace_base}') + print(f'workspace_mount_path: {config.workspace_mount_path}') + print( + f'workspace_mount_path_in_sandbox: {config.workspace_mount_path_in_sandbox}\n' + ) + + config.sandbox.browsergym_eval_env = browsergym_eval_env config.sandbox.enable_auto_lint = enable_auto_lint + if base_container_image is not None: config.sandbox.base_container_image = base_container_image file_store = get_file_store(config.file_store, config.file_store_path) - event_stream = EventStream(cli_session, file_store) + event_stream = EventStream(sid, file_store) runtime = box_class( config=config, @@ -148,9 +253,14 @@ def _load_runtime( sid=sid, plugins=plugins, ) - time.sleep(1) + time.sleep(2) return runtime # Export necessary function -__all__ = ['_load_runtime'] +__all__ = [ + '_load_runtime', + '_get_host_folder', + '_get_sandbox_folder', + '_remove_folder', +] diff --git a/tests/runtime/test_bash.py b/tests/runtime/test_bash.py index 02301ebb14..69ddfe8eca 100644 --- a/tests/runtime/test_bash.py +++ b/tests/runtime/test_bash.py @@ -1,11 +1,14 @@ """Bash-related tests for the EventStreamRuntime, which connects to the RuntimeClient running in the sandbox.""" import os -import tempfile -import time import pytest -from conftest import _load_runtime +from conftest import ( + TEST_IN_CI, + _close_test_runtime, + _get_sandbox_folder, + _load_runtime, +) from openhands.core.logger import openhands_logger as logger from openhands.events.action import CmdRunAction @@ -16,77 +19,63 @@ from openhands.events.observation import CmdOutputObservation # ============================================================================================================================ +def _run_cmd_action(runtime, custom_command: str, keep_prompt=True): + action = CmdRunAction(command=custom_command, keep_prompt=keep_prompt) + logger.info(action, extra={'msg_type': 'ACTION'}) + obs = runtime.run_action(action) + assert isinstance(obs, CmdOutputObservation) + logger.info(obs, extra={'msg_type': 'OBSERVATION'}) + return obs + + def test_bash_command_pexcept(temp_dir, box_class, run_as_openhands): runtime = _load_runtime(temp_dir, box_class, run_as_openhands) + try: + # We set env var PS1="\u@\h:\w $" + # and construct the PEXCEPT prompt base on it. + # When run `env`, bad implementation of CmdRunAction will be pexcepted by this + # and failed to pexcept the right content, causing it fail to get error code. + obs = runtime.run_action(CmdRunAction(command='env')) - # We set env var PS1="\u@\h:\w $" - # and construct the PEXCEPT prompt base on it. - # When run `env`, bad implementation of CmdRunAction will be pexcepted by this - # and failed to pexcept the right content, causing it fail to get error code. - obs = runtime.run_action(CmdRunAction(command='env')) + # For example: + # 02:16:13 - openhands:DEBUG: client.py:78 - Executing command: env + # 02:16:13 - openhands:DEBUG: client.py:82 - Command output: PYTHONUNBUFFERED=1 + # CONDA_EXE=/openhands/miniforge3/bin/conda + # [...] + # LC_CTYPE=C.UTF-8 + # PS1=\u@\h:\w $ + # 02:16:13 - openhands:DEBUG: client.py:89 - Executing command for exit code: env + # 02:16:13 - openhands:DEBUG: client.py:92 - Exit code Output: + # CONDA_DEFAULT_ENV=base - # For example: - # 02:16:13 - openhands:DEBUG: client.py:78 - Executing command: env - # 02:16:13 - openhands:DEBUG: client.py:82 - Command output: PYTHONUNBUFFERED=1 - # CONDA_EXE=/openhands/miniforge3/bin/conda - # [...] - # LC_CTYPE=C.UTF-8 - # PS1=\u@\h:\w $ - # 02:16:13 - openhands:DEBUG: client.py:89 - Executing command for exit code: env - # 02:16:13 - openhands:DEBUG: client.py:92 - Exit code Output: - # CONDA_DEFAULT_ENV=base - - # As long as the exit code is 0, the test will pass. - assert isinstance( - obs, CmdOutputObservation - ), 'The observation should be a CmdOutputObservation.' - assert obs.exit_code == 0, 'The exit code should be 0.' - - runtime.close(rm_all_containers=False) - time.sleep(1) + # As long as the exit code is 0, the test will pass. + assert isinstance( + obs, CmdOutputObservation + ), 'The observation should be a CmdOutputObservation.' + assert obs.exit_code == 0, 'The exit code should be 0.' + finally: + _close_test_runtime(runtime) -def test_single_multiline_command(temp_dir, box_class): +def test_multiline_commands(temp_dir, box_class): runtime = _load_runtime(temp_dir, box_class) + try: + # single multiline command + obs = _run_cmd_action(runtime, 'echo \\\n -e "foo"') + assert obs.exit_code == 0, 'The exit code should be 0.' + assert 'foo' in obs.content - action = CmdRunAction(command='echo \\\n -e "foo"') - logger.info(action, extra={'msg_type': 'ACTION'}) - obs = runtime.run_action(action) - logger.info(obs, extra={'msg_type': 'OBSERVATION'}) - assert obs.exit_code == 0, 'The exit code should be 0.' - assert 'foo' in obs.content + # test multiline echo + obs = _run_cmd_action(runtime, 'echo -e "hello\nworld"') + assert obs.exit_code == 0, 'The exit code should be 0.' + assert 'hello\r\nworld' in obs.content - runtime.close(rm_all_containers=False) - time.sleep(1) - - -def test_multiline_echo(temp_dir, box_class): - runtime = _load_runtime(temp_dir, box_class) - - action = CmdRunAction(command='echo -e "hello\nworld"') - logger.info(action, extra={'msg_type': 'ACTION'}) - obs = runtime.run_action(action) - logger.info(obs, extra={'msg_type': 'OBSERVATION'}) - assert obs.exit_code == 0, 'The exit code should be 0.' - assert 'hello\r\nworld' in obs.content - - runtime.close(rm_all_containers=False) - time.sleep(1) - - -def test_runtime_whitespace(temp_dir, box_class): - runtime = _load_runtime(temp_dir, box_class) - - action = CmdRunAction(command='echo -e "\\n\\n\\n"') - logger.info(action, extra={'msg_type': 'ACTION'}) - obs = runtime.run_action(action) - logger.info(obs, extra={'msg_type': 'OBSERVATION'}) - - assert obs.exit_code == 0, 'The exit code should be 0.' - assert '\r\n\r\n\r\n' in obs.content - - runtime.close(rm_all_containers=False) - time.sleep(1) + # test whitespace + obs = _run_cmd_action(runtime, 'echo -e "\\n\\n\\n"') + assert obs.exit_code == 0, 'The exit code should be 0.' + assert '\r\n\r\n\r\n' in obs.content + finally: + _close_test_runtime(runtime) def test_multiple_multiline_commands(temp_dir, box_class, run_as_openhands): @@ -120,48 +109,36 @@ world " joined_cmds = '\n'.join(cmds) runtime = _load_runtime(temp_dir, box_class, run_as_openhands) + try: + obs = _run_cmd_action(runtime, joined_cmds) + assert obs.exit_code == 0, 'The exit code should be 0.' - action = CmdRunAction(command=joined_cmds) - logger.info(action, extra={'msg_type': 'ACTION'}) - obs = runtime.run_action(action) - logger.info(obs, extra={'msg_type': 'OBSERVATION'}) - - assert isinstance(obs, CmdOutputObservation) - assert obs.exit_code == 0, 'The exit code should be 0.' - - assert 'total 0' in obs.content - assert 'hello\r\nworld' in obs.content - assert "hello it\\'s me" in obs.content - assert 'hello -v' in obs.content - assert 'hello\r\nworld\r\nare\r\nyou\r\nthere?' in obs.content - assert 'hello\r\nworld\r\nare\r\nyou\r\n\r\nthere?' in obs.content - assert 'hello\r\nworld "\r\n' in obs.content - - runtime.close(rm_all_containers=False) - time.sleep(1) + assert 'total 0' in obs.content + assert 'hello\r\nworld' in obs.content + assert "hello it\\'s me" in obs.content + assert 'hello -v' in obs.content + assert 'hello\r\nworld\r\nare\r\nyou\r\nthere?' in obs.content + assert 'hello\r\nworld\r\nare\r\nyou\r\n\r\nthere?' in obs.content + assert 'hello\r\nworld "\r\n' in obs.content + finally: + _close_test_runtime(runtime) def test_no_ps2_in_output(temp_dir, box_class, run_as_openhands): """Test that the PS2 sign is not added to the output of a multiline command.""" runtime = _load_runtime(temp_dir, box_class, run_as_openhands) + try: + obs = _run_cmd_action(runtime, 'echo -e "hello\nworld"') + assert obs.exit_code == 0, 'The exit code should be 0.' - action = CmdRunAction(command='echo -e "hello\nworld"') - logger.info(action, extra={'msg_type': 'ACTION'}) - obs = runtime.run_action(action) - logger.info(obs, extra={'msg_type': 'OBSERVATION'}) - - assert 'hello\r\nworld' in obs.content - assert '>' not in obs.content - - runtime.close(rm_all_containers=False) - time.sleep(1) + assert 'hello\r\nworld' in obs.content + assert '>' not in obs.content + finally: + _close_test_runtime(runtime) def test_multiline_command_loop(temp_dir, box_class): # https://github.com/All-Hands-AI/OpenHands/issues/3143 - - runtime = _load_runtime(temp_dir, box_class) - init_cmd = """ mkdir -p _modules && \ for month in {01..04}; do @@ -171,15 +148,6 @@ for month in {01..04}; do done echo "created files" """ - action = CmdRunAction(command=init_cmd) - logger.info(action, extra={'msg_type': 'ACTION'}) - obs = runtime.run_action(action) - logger.info(obs, extra={'msg_type': 'OBSERVATION'}) - - assert isinstance(obs, CmdOutputObservation) - assert obs.exit_code == 0, 'The exit code should be 0.' - assert 'created files' in obs.content - follow_up_cmd = """ for file in _modules/*.md; do new_date=$(echo $file | sed -E 's/2024-(01|02|03|04)-/2024-/;s/2024-01/2024-08/;s/2024-02/2024-09/;s/2024-03/2024-10/;s/2024-04/2024-11/') @@ -187,153 +155,104 @@ for file in _modules/*.md; do done echo "success" """ - action = CmdRunAction(command=follow_up_cmd) - logger.info(action, extra={'msg_type': 'ACTION'}) - obs = runtime.run_action(action) - logger.info(obs, extra={'msg_type': 'OBSERVATION'}) + runtime = _load_runtime(temp_dir, box_class) + try: + obs = _run_cmd_action(runtime, init_cmd) + assert obs.exit_code == 0, 'The exit code should be 0.' + assert 'created files' in obs.content - assert isinstance(obs, CmdOutputObservation) - assert obs.exit_code == 0, 'The exit code should be 0.' - assert 'success' in obs.content - - runtime.close(rm_all_containers=False) - time.sleep(1) + obs = _run_cmd_action(runtime, follow_up_cmd) + assert obs.exit_code == 0, 'The exit code should be 0.' + assert 'success' in obs.content + finally: + _close_test_runtime(runtime) def test_cmd_run(temp_dir, box_class, run_as_openhands): runtime = _load_runtime(temp_dir, box_class, run_as_openhands) + try: + obs = _run_cmd_action(runtime, 'ls -l /openhands/workspace') + assert obs.exit_code == 0 - action = CmdRunAction(command='ls -l') - logger.info(action, extra={'msg_type': 'ACTION'}) - obs = runtime.run_action(action) - logger.info(obs, extra={'msg_type': 'OBSERVATION'}) - assert isinstance(obs, CmdOutputObservation) - assert obs.exit_code == 0 - assert 'total 0' in obs.content + obs = _run_cmd_action(runtime, 'ls -l') + assert obs.exit_code == 0 + assert 'total 0' in obs.content - action = CmdRunAction(command='mkdir test') - logger.info(action, extra={'msg_type': 'ACTION'}) - obs = runtime.run_action(action) - logger.info(obs, extra={'msg_type': 'OBSERVATION'}) - assert isinstance(obs, CmdOutputObservation) - assert obs.exit_code == 0 + obs = _run_cmd_action(runtime, 'mkdir test') + assert obs.exit_code == 0 - action = CmdRunAction(command='ls -l') - logger.info(action, extra={'msg_type': 'ACTION'}) - obs = runtime.run_action(action) - logger.info(obs, extra={'msg_type': 'OBSERVATION'}) - assert isinstance(obs, CmdOutputObservation) - assert obs.exit_code == 0 - if run_as_openhands: - assert 'openhands' in obs.content - else: - assert 'root' in obs.content - assert 'test' in obs.content + obs = _run_cmd_action(runtime, 'ls -l') + assert obs.exit_code == 0 + if run_as_openhands: + assert 'openhands' in obs.content + else: + assert 'root' in obs.content + assert 'test' in obs.content - action = CmdRunAction(command='touch test/foo.txt') - logger.info(action, extra={'msg_type': 'ACTION'}) - obs = runtime.run_action(action) - logger.info(obs, extra={'msg_type': 'OBSERVATION'}) - assert isinstance(obs, CmdOutputObservation) - assert obs.exit_code == 0 + obs = _run_cmd_action(runtime, 'touch test/foo.txt') + assert obs.exit_code == 0 - action = CmdRunAction(command='ls -l test') - logger.info(action, extra={'msg_type': 'ACTION'}) - obs = runtime.run_action(action) - logger.info(obs, extra={'msg_type': 'OBSERVATION'}) - assert isinstance(obs, CmdOutputObservation) - assert obs.exit_code == 0 - assert 'foo.txt' in obs.content + obs = _run_cmd_action(runtime, 'ls -l test') + assert obs.exit_code == 0 + assert 'foo.txt' in obs.content - # clean up: this is needed, since CI will not be - # run as root, and this test may leave a file - # owned by root - action = CmdRunAction(command='rm -rf test') - logger.info(action, extra={'msg_type': 'ACTION'}) - obs = runtime.run_action(action) - logger.info(obs, extra={'msg_type': 'OBSERVATION'}) - assert isinstance(obs, CmdOutputObservation) - assert obs.exit_code == 0 - - runtime.close(rm_all_containers=False) - time.sleep(1) + # clean up: this is needed, since CI will not be + # run as root, and this test may leave a file + # owned by root + _run_cmd_action(runtime, 'rm -rf test') + assert obs.exit_code == 0 + finally: + _close_test_runtime(runtime) def test_run_as_user_correct_home_dir(temp_dir, box_class, run_as_openhands): runtime = _load_runtime(temp_dir, box_class, run_as_openhands) - - action = CmdRunAction(command='cd ~ && pwd') - logger.info(action, extra={'msg_type': 'ACTION'}) - obs = runtime.run_action(action) - logger.info(obs, extra={'msg_type': 'OBSERVATION'}) - assert isinstance(obs, CmdOutputObservation) - assert obs.exit_code == 0 - if run_as_openhands: - assert '/home/openhands' in obs.content - else: - assert '/root' in obs.content - - runtime.close(rm_all_containers=False) - time.sleep(1) + try: + obs = _run_cmd_action(runtime, 'cd ~ && pwd') + assert obs.exit_code == 0 + if run_as_openhands: + assert '/home/openhands' in obs.content + else: + assert '/root' in obs.content + finally: + _close_test_runtime(runtime) def test_multi_cmd_run_in_single_line(temp_dir, box_class): runtime = _load_runtime(temp_dir, box_class) - - action = CmdRunAction(command='pwd && ls -l') - logger.info(action, extra={'msg_type': 'ACTION'}) - obs = runtime.run_action(action) - logger.info(obs, extra={'msg_type': 'OBSERVATION'}) - assert isinstance(obs, CmdOutputObservation) - assert obs.exit_code == 0 - assert '/workspace' in obs.content - assert 'total 0' in obs.content - - runtime.close(rm_all_containers=False) - time.sleep(1) + try: + obs = _run_cmd_action(runtime, 'pwd && ls -l') + assert obs.exit_code == 0 + assert '/workspace' in obs.content + assert 'total 0' in obs.content + finally: + _close_test_runtime(runtime) def test_stateful_cmd(temp_dir, box_class): runtime = _load_runtime(temp_dir, box_class) + sandbox_dir = _get_sandbox_folder(runtime) + try: + obs = _run_cmd_action(runtime, 'mkdir -p test') + assert obs.exit_code == 0, 'The exit code should be 0.' - action = CmdRunAction(command='mkdir test') - logger.info(action, extra={'msg_type': 'ACTION'}) - obs = runtime.run_action(action) - logger.info(obs, extra={'msg_type': 'OBSERVATION'}) - assert isinstance(obs, CmdOutputObservation) - assert obs.exit_code == 0, 'The exit code should be 0.' + obs = _run_cmd_action(runtime, 'cd test') + assert obs.exit_code == 0, 'The exit code should be 0.' - action = CmdRunAction(command='cd test') - logger.info(action, extra={'msg_type': 'ACTION'}) - obs = runtime.run_action(action) - logger.info(obs, extra={'msg_type': 'OBSERVATION'}) - assert isinstance(obs, CmdOutputObservation) - assert obs.exit_code == 0, 'The exit code should be 0.' - - action = CmdRunAction(command='pwd') - logger.info(action, extra={'msg_type': 'ACTION'}) - obs = runtime.run_action(action) - logger.info(obs, extra={'msg_type': 'OBSERVATION'}) - assert isinstance(obs, CmdOutputObservation) - assert obs.exit_code == 0, 'The exit code should be 0.' - assert '/workspace/test' in obs.content - - runtime.close(rm_all_containers=False) - time.sleep(1) + obs = _run_cmd_action(runtime, 'pwd') + assert obs.exit_code == 0, 'The exit code should be 0.' + assert f'{sandbox_dir}/test' in obs.content + finally: + _close_test_runtime(runtime) def test_failed_cmd(temp_dir, box_class): runtime = _load_runtime(temp_dir, box_class) - - action = CmdRunAction(command='non_existing_command') - logger.info(action, extra={'msg_type': 'ACTION'}) - obs = runtime.run_action(action) - logger.info(obs, extra={'msg_type': 'OBSERVATION'}) - assert isinstance(obs, CmdOutputObservation) - assert obs.exit_code != 0, 'The exit code should not be 0 for a failed command.' - - runtime.close(rm_all_containers=False) - time.sleep(1) + try: + obs = _run_cmd_action(runtime, 'non_existing_command') + assert obs.exit_code != 0, 'The exit code should not be 0 for a failed command.' + finally: + _close_test_runtime(runtime) def _create_test_file(host_temp_dir): @@ -344,154 +263,121 @@ def _create_test_file(host_temp_dir): def test_copy_single_file(temp_dir, box_class): runtime = _load_runtime(temp_dir, box_class) + try: + sandbox_dir = _get_sandbox_folder(runtime) + sandbox_file = os.path.join(sandbox_dir, 'test_file.txt') + _create_test_file(temp_dir) + runtime.copy_to(os.path.join(temp_dir, 'test_file.txt'), sandbox_dir) - with tempfile.TemporaryDirectory() as host_temp_dir: - _create_test_file(host_temp_dir) - runtime.copy_to(os.path.join(host_temp_dir, 'test_file.txt'), '/workspace') + obs = _run_cmd_action(runtime, f'ls -alh {sandbox_dir}') + assert obs.exit_code == 0 + assert 'test_file.txt' in obs.content - action = CmdRunAction(command='ls -alh /workspace') - logger.info(action, extra={'msg_type': 'ACTION'}) - obs = runtime.run_action(action) - logger.info(obs, extra={'msg_type': 'OBSERVATION'}) - assert isinstance(obs, CmdOutputObservation) - assert obs.exit_code == 0 - assert 'test_file.txt' in obs.content - - action = CmdRunAction(command='cat /workspace/test_file.txt') - logger.info(action, extra={'msg_type': 'ACTION'}) - obs = runtime.run_action(action) - logger.info(obs, extra={'msg_type': 'OBSERVATION'}) - assert isinstance(obs, CmdOutputObservation) - assert obs.exit_code == 0 - assert 'Hello, World!' in obs.content - - runtime.close(rm_all_containers=False) - time.sleep(1) + obs = _run_cmd_action(runtime, f'cat {sandbox_file}') + assert obs.exit_code == 0 + assert 'Hello, World!' in obs.content + finally: + _close_test_runtime(runtime) -def _create_test_dir_with_files(host_temp_dir): - os.mkdir(os.path.join(host_temp_dir, 'test_dir')) - with open(os.path.join(host_temp_dir, 'test_dir', 'file1.txt'), 'w') as f: +def _create_host_test_dir_with_files(test_dir): + logger.debug(f'creating `{test_dir}`') + if not os.path.isdir(test_dir): + os.makedirs(test_dir, exist_ok=True) + logger.debug('creating test files in `test_dir`') + with open(os.path.join(test_dir, 'file1.txt'), 'w') as f: f.write('File 1 content') - with open(os.path.join(host_temp_dir, 'test_dir', 'file2.txt'), 'w') as f: + with open(os.path.join(test_dir, 'file2.txt'), 'w') as f: f.write('File 2 content') def test_copy_directory_recursively(temp_dir, box_class): runtime = _load_runtime(temp_dir, box_class) - with tempfile.TemporaryDirectory() as host_temp_dir: + sandbox_dir = _get_sandbox_folder(runtime) + try: + temp_dir_copy = os.path.join(temp_dir, 'test_dir') # We need a separate directory, since temp_dir is mounted to /workspace - _create_test_dir_with_files(host_temp_dir) - runtime.copy_to( - os.path.join(host_temp_dir, 'test_dir'), '/workspace', recursive=True - ) + _create_host_test_dir_with_files(temp_dir_copy) - action = CmdRunAction(command='ls -alh /workspace') - logger.info(action, extra={'msg_type': 'ACTION'}) - obs = runtime.run_action(action) - logger.info(obs, extra={'msg_type': 'OBSERVATION'}) - assert isinstance(obs, CmdOutputObservation) - assert obs.exit_code == 0 - assert 'test_dir' in obs.content - assert 'file1.txt' not in obs.content - assert 'file2.txt' not in obs.content + runtime.copy_to(temp_dir_copy, sandbox_dir, recursive=True) - action = CmdRunAction(command='ls -alh /workspace/test_dir') - logger.info(action, extra={'msg_type': 'ACTION'}) - obs = runtime.run_action(action) - logger.info(obs, extra={'msg_type': 'OBSERVATION'}) - assert isinstance(obs, CmdOutputObservation) - assert obs.exit_code == 0 - assert 'file1.txt' in obs.content - assert 'file2.txt' in obs.content + obs = _run_cmd_action(runtime, f'ls -alh {sandbox_dir}') + assert obs.exit_code == 0 + assert 'test_dir' in obs.content + assert 'file1.txt' not in obs.content + assert 'file2.txt' not in obs.content - action = CmdRunAction(command='cat /workspace/test_dir/file1.txt') - logger.info(action, extra={'msg_type': 'ACTION'}) - obs = runtime.run_action(action) - logger.info(obs, extra={'msg_type': 'OBSERVATION'}) - assert isinstance(obs, CmdOutputObservation) - assert obs.exit_code == 0 - assert 'File 1 content' in obs.content + obs = _run_cmd_action(runtime, f'ls -alh {sandbox_dir}/test_dir') + assert obs.exit_code == 0 + assert 'file1.txt' in obs.content + assert 'file2.txt' in obs.content - runtime.close(rm_all_containers=False) - time.sleep(1) + obs = _run_cmd_action(runtime, f'cat {sandbox_dir}/test_dir/file1.txt') + assert obs.exit_code == 0 + assert 'File 1 content' in obs.content + finally: + _close_test_runtime(runtime) def test_copy_to_non_existent_directory(temp_dir, box_class): runtime = _load_runtime(temp_dir, box_class) - - with tempfile.TemporaryDirectory() as host_temp_dir: - _create_test_file(host_temp_dir) + try: + sandbox_dir = _get_sandbox_folder(runtime) + _create_test_file(temp_dir) runtime.copy_to( - os.path.join(host_temp_dir, 'test_file.txt'), '/workspace/new_dir' + os.path.join(temp_dir, 'test_file.txt'), f'{sandbox_dir}/new_dir' ) - action = CmdRunAction(command='cat /workspace/new_dir/test_file.txt') - logger.info(action, extra={'msg_type': 'ACTION'}) - obs = runtime.run_action(action) - logger.info(obs, extra={'msg_type': 'OBSERVATION'}) - assert isinstance(obs, CmdOutputObservation) - assert obs.exit_code == 0 - assert 'Hello, World!' in obs.content - - runtime.close(rm_all_containers=False) - time.sleep(1) + obs = _run_cmd_action(runtime, f'cat {sandbox_dir}/new_dir/test_file.txt') + assert obs.exit_code == 0 + assert 'Hello, World!' in obs.content + finally: + _close_test_runtime(runtime) def test_overwrite_existing_file(temp_dir, box_class): runtime = _load_runtime(temp_dir, box_class) + try: + sandbox_dir = _get_sandbox_folder(runtime) - # touch a file in /workspace - action = CmdRunAction(command='touch /workspace/test_file.txt') - logger.info(action, extra={'msg_type': 'ACTION'}) - obs = runtime.run_action(action) - logger.info(obs, extra={'msg_type': 'OBSERVATION'}) - assert isinstance(obs, CmdOutputObservation) - assert obs.exit_code == 0 + obs = _run_cmd_action(runtime, f'ls -alh {sandbox_dir}') + assert obs.exit_code == 0 - action = CmdRunAction(command='cat /workspace/test_file.txt') - logger.info(action, extra={'msg_type': 'ACTION'}) - obs = runtime.run_action(action) - logger.info(obs, extra={'msg_type': 'OBSERVATION'}) - assert isinstance(obs, CmdOutputObservation) - assert obs.exit_code == 0 - assert 'Hello, World!' not in obs.content + obs = _run_cmd_action(runtime, f'touch {sandbox_dir}/test_file.txt') + assert obs.exit_code == 0 - with tempfile.TemporaryDirectory() as host_temp_dir: - _create_test_file(host_temp_dir) - runtime.copy_to(os.path.join(host_temp_dir, 'test_file.txt'), '/workspace') + obs = _run_cmd_action(runtime, f'ls -alh {sandbox_dir}') + assert obs.exit_code == 0 - action = CmdRunAction(command='cat /workspace/test_file.txt') - logger.info(action, extra={'msg_type': 'ACTION'}) - obs = runtime.run_action(action) - logger.info(obs, extra={'msg_type': 'OBSERVATION'}) - assert isinstance(obs, CmdOutputObservation) - assert obs.exit_code == 0 - assert 'Hello, World!' in obs.content + obs = _run_cmd_action(runtime, f'cat {sandbox_dir}/test_file.txt') + assert obs.exit_code == 0 + assert 'Hello, World!' not in obs.content - runtime.close(rm_all_containers=False) - time.sleep(1) + _create_test_file(temp_dir) + runtime.copy_to(os.path.join(temp_dir, 'test_file.txt'), sandbox_dir) + + obs = _run_cmd_action(runtime, f'cat {sandbox_dir}/test_file.txt') + assert obs.exit_code == 0 + assert 'Hello, World!' in obs.content + finally: + _close_test_runtime(runtime) def test_copy_non_existent_file(temp_dir, box_class): runtime = _load_runtime(temp_dir, box_class) + try: + sandbox_dir = _get_sandbox_folder(runtime) + with pytest.raises(FileNotFoundError): + runtime.copy_to( + os.path.join(sandbox_dir, 'non_existent_file.txt'), + f'{sandbox_dir}/should_not_exist.txt', + ) - with pytest.raises(FileNotFoundError): - runtime.copy_to( - os.path.join(temp_dir, 'non_existent_file.txt'), - '/workspace/should_not_exist.txt', - ) - - action = CmdRunAction(command='ls /workspace/should_not_exist.txt') - logger.info(action, extra={'msg_type': 'ACTION'}) - obs = runtime.run_action(action) - logger.info(obs, extra={'msg_type': 'OBSERVATION'}) - assert isinstance(obs, CmdOutputObservation) - assert obs.exit_code != 0 # File should not exist - - runtime.close(rm_all_containers=False) - time.sleep(1) + obs = _run_cmd_action(runtime, f'ls {sandbox_dir}/should_not_exist.txt') + assert obs.exit_code != 0 # File should not exist + finally: + _close_test_runtime(runtime) def test_keep_prompt(box_class, temp_dir): @@ -500,27 +386,26 @@ def test_keep_prompt(box_class, temp_dir): box_class=box_class, run_as_openhands=False, ) + try: + sandbox_dir = _get_sandbox_folder(runtime) - action = CmdRunAction(command='touch /workspace/test_file.txt') - logger.info(action, extra={'msg_type': 'ACTION'}) - obs = runtime.run_action(action) - logger.info(obs, extra={'msg_type': 'OBSERVATION'}) - assert isinstance(obs, CmdOutputObservation) - assert obs.exit_code == 0 - assert 'root@' in obs.content + obs = _run_cmd_action(runtime, f'touch {sandbox_dir}/test_file.txt') + assert obs.exit_code == 0 + assert 'root@' in obs.content - action = CmdRunAction(command='cat /workspace/test_file.txt', keep_prompt=False) - logger.info(action, extra={'msg_type': 'ACTION'}) - obs = runtime.run_action(action) - logger.info(obs, extra={'msg_type': 'OBSERVATION'}) - assert isinstance(obs, CmdOutputObservation) - assert obs.exit_code == 0 - assert 'root@' not in obs.content - - runtime.close(rm_all_containers=False) - time.sleep(1) + obs = _run_cmd_action( + runtime, f'cat {sandbox_dir}/test_file.txt', keep_prompt=False + ) + assert obs.exit_code == 0 + assert 'root@' not in obs.content + finally: + _close_test_runtime(runtime) +@pytest.mark.skipif( + TEST_IN_CI != 'True', + reason='This test is not working in WSL (file ownership)', +) def test_git_operation(box_class): # do not mount workspace, since workspace mount by tests will be owned by root # while the user_id we get via os.getuid() is different from root @@ -531,69 +416,43 @@ def test_git_operation(box_class): # Need to use non-root user to expose issues run_as_openhands=True, ) - # this will happen if permission of runtime is not properly configured # fatal: detected dubious ownership in repository at '/workspace' + try: + # check the ownership of the current directory + obs = _run_cmd_action(runtime, 'ls -alh .') + assert obs.exit_code == 0 + # drwx--S--- 2 openhands root 64 Aug 7 23:32 . + # drwxr-xr-x 1 root root 4.0K Aug 7 23:33 .. + for line in obs.content.split('\r\n'): + if ' ..' in line: + # parent directory should be owned by root + assert 'root' in line + assert 'openhands' not in line + elif ' .' in line: + # current directory should be owned by openhands + # and its group should be root + assert 'openhands' in line + assert 'root' in line - # check the ownership of the current directory - action = CmdRunAction(command='ls -alh .') - logger.info(action, extra={'msg_type': 'ACTION'}) - obs = runtime.run_action(action) - logger.info(obs, extra={'msg_type': 'OBSERVATION'}) - assert isinstance(obs, CmdOutputObservation) - assert obs.exit_code == 0 - # drwx--S--- 2 openhands root 64 Aug 7 23:32 . - # drwxr-xr-x 1 root root 4.0K Aug 7 23:33 .. - for line in obs.content.split('\r\n'): - if ' ..' in line: - # parent directory should be owned by root - assert 'root' in line - assert 'openhands' not in line - elif ' .' in line: - # current directory should be owned by openhands - # and its group should be root - assert 'openhands' in line - assert 'root' in line + # make sure all git operations are allowed + obs = _run_cmd_action(runtime, 'git init') + assert obs.exit_code == 0 - # make sure all git operations are allowed - action = CmdRunAction(command='git init') - logger.info(action, extra={'msg_type': 'ACTION'}) - obs = runtime.run_action(action) - logger.info(obs, extra={'msg_type': 'OBSERVATION'}) - assert isinstance(obs, CmdOutputObservation) - assert obs.exit_code == 0 + # create a file + obs = _run_cmd_action(runtime, 'echo "hello" > test_file.txt') + assert obs.exit_code == 0 - # create a file - action = CmdRunAction(command='echo "hello" > test_file.txt') - logger.info(action, extra={'msg_type': 'ACTION'}) - obs = runtime.run_action(action) - logger.info(obs, extra={'msg_type': 'OBSERVATION'}) - assert isinstance(obs, CmdOutputObservation) - assert obs.exit_code == 0 + # git add + obs = _run_cmd_action(runtime, 'git add test_file.txt') + assert obs.exit_code == 0 - # git add - action = CmdRunAction(command='git add test_file.txt') - logger.info(action, extra={'msg_type': 'ACTION'}) - obs = runtime.run_action(action) - logger.info(obs, extra={'msg_type': 'OBSERVATION'}) - assert isinstance(obs, CmdOutputObservation) - assert obs.exit_code == 0 + # git diff + obs = _run_cmd_action(runtime, 'git diff') + assert obs.exit_code == 0 - # git diff - action = CmdRunAction(command='git diff') - logger.info(action, extra={'msg_type': 'ACTION'}) - obs = runtime.run_action(action) - logger.info(obs, extra={'msg_type': 'OBSERVATION'}) - assert isinstance(obs, CmdOutputObservation) - assert obs.exit_code == 0 - - # git commit - action = CmdRunAction(command='git commit -m "test commit"') - logger.info(action, extra={'msg_type': 'ACTION'}) - obs = runtime.run_action(action) - logger.info(obs, extra={'msg_type': 'OBSERVATION'}) - assert isinstance(obs, CmdOutputObservation) - assert obs.exit_code == 0 - - runtime.close(rm_all_containers=False) - time.sleep(1) + # git commit + obs = _run_cmd_action(runtime, 'git commit -m "test commit"') + assert obs.exit_code == 0 + finally: + _close_test_runtime(runtime) diff --git a/tests/runtime/test_browsing.py b/tests/runtime/test_browsing.py index 1108482262..c0ea3e1810 100644 --- a/tests/runtime/test_browsing.py +++ b/tests/runtime/test_browsing.py @@ -1,9 +1,8 @@ """Browsing-related tests for the EventStreamRuntime, which connects to the RuntimeClient running in the sandbox.""" import json -import time -from conftest import _load_runtime +from conftest import _close_test_runtime, _load_runtime from openhands.core.logger import openhands_logger as logger from openhands.events.action import ( @@ -66,8 +65,7 @@ def test_simple_browse(temp_dir, box_class, run_as_openhands): logger.info(obs, extra={'msg_type': 'OBSERVATION'}) assert obs.exit_code == 0 - runtime.close(rm_all_containers=False) - time.sleep(1) + _close_test_runtime(runtime) def test_browsergym_eval_env(box_class, temp_dir): @@ -111,5 +109,4 @@ def test_browsergym_eval_env(box_class, temp_dir): logger.info(obs, extra={'msg_type': 'OBSERVATION'}) assert json.loads(obs.content) == [0.0] - runtime.close(rm_all_containers=False) - time.sleep(1) + _close_test_runtime(runtime) diff --git a/tests/runtime/test_env_vars.py b/tests/runtime/test_env_vars.py index 316bea63d2..c049404901 100644 --- a/tests/runtime/test_env_vars.py +++ b/tests/runtime/test_env_vars.py @@ -1,10 +1,9 @@ """Env vars related tests for the EventStreamRuntime, which connects to the RuntimeClient running in the sandbox.""" import os -import time from unittest.mock import patch -from conftest import _load_runtime +from conftest import _close_test_runtime, _load_runtime from openhands.events.action import CmdRunAction from openhands.events.observation import CmdOutputObservation @@ -30,8 +29,7 @@ def test_env_vars_os_environ(temp_dir, box_class, run_as_openhands): obs.content.strip().split('\n\r')[0].strip() == 'BAZ' ), f'Output: [{obs.content}] for {box_class}' - runtime.close(rm_all_containers=False) - time.sleep(1) + _close_test_runtime(runtime) def test_env_vars_runtime_operations(temp_dir, box_class): @@ -66,5 +64,4 @@ def test_env_vars_runtime_operations(temp_dir, box_class): and obs.content.strip().split('\r\n')[0].strip() == 'new_value' ) - runtime.close(rm_all_containers=False) - time.sleep(1) + _close_test_runtime(runtime) diff --git a/tests/runtime/test_images.py b/tests/runtime/test_images.py index 4660b363ab..99ff28e969 100644 --- a/tests/runtime/test_images.py +++ b/tests/runtime/test_images.py @@ -1,9 +1,7 @@ """Image-related tests for the EventStreamRuntime, which connects to the RuntimeClient running in the sandbox.""" -import time - import pytest -from conftest import _load_runtime +from conftest import _close_test_runtime, _load_runtime from openhands.core.logger import openhands_logger as logger from openhands.events.action import CmdRunAction @@ -17,7 +15,6 @@ def test_bash_python_version(temp_dir, box_class, base_container_image): """Make sure Python is available in bash.""" if base_container_image not in [ 'python:3.11-bookworm', - 'nikolaik/python-nodejs:python3.11-nodejs22', ]: pytest.skip('This test is only for python-related images') @@ -45,15 +42,13 @@ def test_bash_python_version(temp_dir, box_class, base_container_image): assert obs.exit_code == 0 assert 'pip' in obs.content # Check that pip is available - runtime.close(rm_all_containers=False) - time.sleep(1) + _close_test_runtime(runtime) def test_nodejs_22_version(temp_dir, box_class, base_container_image): """Make sure Node.js is available in bash.""" if base_container_image not in [ 'node:22-bookworm', - 'nikolaik/python-nodejs:python3.11-nodejs22', ]: pytest.skip('This test is only for nodejs-related images') @@ -68,8 +63,7 @@ def test_nodejs_22_version(temp_dir, box_class, base_container_image): assert obs.exit_code == 0 assert 'v22' in obs.content # Check for specific version - runtime.close(rm_all_containers=False) - time.sleep(1) + _close_test_runtime(runtime) def test_go_version(temp_dir, box_class, base_container_image): @@ -90,5 +84,4 @@ def test_go_version(temp_dir, box_class, base_container_image): assert obs.exit_code == 0 assert 'go1.23' in obs.content # Check for specific version - runtime.close(rm_all_containers=False) - time.sleep(1) + _close_test_runtime(runtime) diff --git a/tests/runtime/test_ipython.py b/tests/runtime/test_ipython.py index b648198a27..e818e2636f 100644 --- a/tests/runtime/test_ipython.py +++ b/tests/runtime/test_ipython.py @@ -1,8 +1,12 @@ """Test the EventStreamRuntime, which connects to the RuntimeClient running in the sandbox.""" -import time - -from conftest import _load_runtime +import pytest +from conftest import ( + TEST_IN_CI, + _close_test_runtime, + _get_sandbox_folder, + _load_runtime, +) from openhands.core.logger import openhands_logger as logger from openhands.events.action import ( @@ -28,6 +32,8 @@ from openhands.runtime.client.runtime import EventStreamRuntime def test_simple_cmd_ipython_and_fileop(temp_dir, box_class, run_as_openhands): runtime = _load_runtime(temp_dir, box_class, run_as_openhands) + sandbox_dir = _get_sandbox_folder(runtime) + # Test run command action_cmd = CmdRunAction(command='ls -l') logger.info(action_cmd, extra={'msg_type': 'ACTION'}) @@ -48,7 +54,7 @@ def test_simple_cmd_ipython_and_fileop(temp_dir, box_class, run_as_openhands): logger.info(obs, extra={'msg_type': 'OBSERVATION'}) assert obs.content.strip() == ( 'Hello, `World`!\n' - '[Jupyter current working directory: /workspace]\n' + f'[Jupyter current working directory: {sandbox_dir}]\n' '[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]' ) @@ -69,7 +75,7 @@ def test_simple_cmd_ipython_and_fileop(temp_dir, box_class, run_as_openhands): assert obs.content == '' # event stream runtime will always use absolute path - assert obs.path == '/workspace/hello.sh' + assert obs.path == f'{sandbox_dir}/hello.sh' # Test read file (file should exist) action_read = FileReadAction(path='hello.sh') @@ -81,7 +87,7 @@ def test_simple_cmd_ipython_and_fileop(temp_dir, box_class, run_as_openhands): logger.info(obs, extra={'msg_type': 'OBSERVATION'}) assert obs.content == 'echo "Hello, World!"\n' - assert obs.path == '/workspace/hello.sh' + assert obs.path == f'{sandbox_dir}/hello.sh' # clean up action = CmdRunAction(command='rm -rf hello.sh') @@ -90,10 +96,13 @@ def test_simple_cmd_ipython_and_fileop(temp_dir, box_class, run_as_openhands): logger.info(obs, extra={'msg_type': 'OBSERVATION'}) assert obs.exit_code == 0 - runtime.close(rm_all_containers=False) - time.sleep(1) + _close_test_runtime(runtime) +@pytest.mark.skipif( + TEST_IN_CI != 'True', + reason='This test is not working in WSL (file ownership)', +) def test_ipython_multi_user(temp_dir, box_class, run_as_openhands): runtime = _load_runtime(temp_dir, box_class, run_as_openhands) @@ -111,7 +120,7 @@ def test_ipython_multi_user(temp_dir, box_class, run_as_openhands): else: assert 'root' in obs.content - # print pwd + # print the current working directory test_code = 'import os; print(os.getcwd())' action_ipython = IPythonRunCellAction(code=test_code) logger.info(action_ipython, extra={'msg_type': 'ACTION'}) @@ -152,7 +161,6 @@ def test_ipython_multi_user(temp_dir, box_class, run_as_openhands): if run_as_openhands: # -rw-r--r-- 1 openhands root 13 Jul 28 03:53 test.txt assert 'openhands' in obs.content.split('\r\n')[0] - assert 'root' in obs.content.split('\r\n')[0] else: # -rw-r--r-- 1 root root 13 Jul 28 03:53 test.txt assert 'root' in obs.content.split('\r\n')[0] @@ -164,12 +172,12 @@ def test_ipython_multi_user(temp_dir, box_class, run_as_openhands): logger.info(obs, extra={'msg_type': 'OBSERVATION'}) assert obs.exit_code == 0 - runtime.close(rm_all_containers=False) - time.sleep(1) + _close_test_runtime(runtime) def test_ipython_simple(temp_dir, box_class): runtime = _load_runtime(temp_dir, box_class) + sandbox_dir = _get_sandbox_folder(runtime) # Test run ipython # get username @@ -183,20 +191,20 @@ def test_ipython_simple(temp_dir, box_class): obs.content.strip() == ( '1\n' - '[Jupyter current working directory: /workspace]\n' + f'[Jupyter current working directory: {sandbox_dir}]\n' '[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]' ).strip() ) - runtime.close(rm_all_containers=False) - time.sleep(1) + _close_test_runtime(runtime) def _test_ipython_agentskills_fileop_pwd_impl( runtime: EventStreamRuntime, enable_auto_lint: bool ): + sandbox_dir = _get_sandbox_folder(runtime) # remove everything in /workspace - action = CmdRunAction(command='rm -rf /workspace/*') + action = CmdRunAction(command=f'rm -rf {sandbox_dir}/*') logger.info(action, extra={'msg_type': 'ACTION'}) obs = runtime.run_action(action) logger.info(obs, extra={'msg_type': 'OBSERVATION'}) @@ -215,12 +223,12 @@ def _test_ipython_agentskills_fileop_pwd_impl( logger.info(obs, extra={'msg_type': 'OBSERVATION'}) assert isinstance(obs, IPythonRunCellObservation) assert obs.content.replace('\r\n', '\n').strip().split('\n') == ( - '[File: /workspace/hello.py (1 lines total)]\n' + f'[File: {sandbox_dir}/hello.py (1 lines total)]\n' '(this is the beginning of the file)\n' '1|\n' '(this is the end of the file)\n' '[File hello.py created.]\n' - '[Jupyter current working directory: /workspace]\n' + f'[Jupyter current working directory: {sandbox_dir}]\n' '[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]' ).strip().split('\n') @@ -239,12 +247,12 @@ def _test_ipython_agentskills_fileop_pwd_impl( logger.info(obs, extra={'msg_type': 'OBSERVATION'}) assert isinstance(obs, IPythonRunCellObservation) assert obs.content.replace('\r\n', '\n').strip().split('\n') == ( - '[File: /workspace/test/hello.py (1 lines total)]\n' + f'[File: {sandbox_dir}/test/hello.py (1 lines total)]\n' '(this is the beginning of the file)\n' '1|\n' '(this is the end of the file)\n' '[File hello.py created.]\n' - '[Jupyter current working directory: /workspace/test]\n' + f'[Jupyter current working directory: {sandbox_dir}/test]\n' '[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]' ).strip().split('\n') @@ -258,10 +266,10 @@ def _test_ipython_agentskills_fileop_pwd_impl( logger.info(obs, extra={'msg_type': 'OBSERVATION'}) assert isinstance(obs, IPythonRunCellObservation) assert obs.content.replace('\r\n', '\n').strip().split('\n') == ( - """ + f""" [Your proposed edit has introduced new syntax error(s). Please understand the errors and retry your edit command.] ERRORS: -/workspace/test/hello.py:1:3: E999 IndentationError: unexpected indent +{sandbox_dir}/test/hello.py:1:3: E999 IndentationError: unexpected indent [This is how your edit would have looked if applied] ------------------------------------------------- (this is the beginning of the file) @@ -278,7 +286,7 @@ ERRORS: Your changes have NOT been applied. Please fix your edit command and try again. You either need to 1) Specify the correct start/end line arguments or 2) Correct your edit code. DO NOT re-run the same failed edit command. Running it again will lead to the same error. -[Jupyter current working directory: /workspace/test] +[Jupyter current working directory: {sandbox_dir}/test] [Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python] """ ).strip().split('\n') @@ -292,39 +300,44 @@ DO NOT re-run the same failed edit command. Running it again will lead to the sa logger.info(obs, extra={'msg_type': 'OBSERVATION'}) assert isinstance(obs, IPythonRunCellObservation) assert obs.content.replace('\r\n', '\n').strip().split('\n') == ( - """ -[File: /workspace/test/hello.py (1 lines total after edit)] + f""" +[File: {sandbox_dir}/test/hello.py (1 lines total after edit)] (this is the beginning of the file) 1|print("hello world") (this is the end of the file) [File updated (edited at line 1). Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.] -[Jupyter current working directory: /workspace/test] +[Jupyter current working directory: {sandbox_dir}/test] [Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python] """ ).strip().split('\n') - action = CmdRunAction(command='rm -rf /workspace/*') + action = CmdRunAction(command=f'rm -rf {sandbox_dir}/*') logger.info(action, extra={'msg_type': 'ACTION'}) obs = runtime.run_action(action) logger.info(obs, extra={'msg_type': 'OBSERVATION'}) assert obs.exit_code == 0 - runtime.close(rm_all_containers=False) - time.sleep(1) - -def test_ipython_agentskills_fileop_pwd( - temp_dir, box_class, run_as_openhands, enable_auto_lint +def test_ipython_agentskills_fileop_pwd_with_lint( + temp_dir, box_class, run_as_openhands ): - """Make sure that cd in bash also update the current working directory in ipython.""" - runtime = _load_runtime( - temp_dir, box_class, run_as_openhands, enable_auto_lint=enable_auto_lint + temp_dir, box_class, run_as_openhands, enable_auto_lint=True ) - _test_ipython_agentskills_fileop_pwd_impl(runtime, enable_auto_lint) + _test_ipython_agentskills_fileop_pwd_impl(runtime, True) - runtime.close(rm_all_containers=False) - time.sleep(1) + _close_test_runtime(runtime) + + +def test_ipython_agentskills_fileop_pwd_without_lint( + temp_dir, box_class, run_as_openhands +): + runtime = _load_runtime( + temp_dir, box_class, run_as_openhands, enable_auto_lint=False + ) + _test_ipython_agentskills_fileop_pwd_impl(runtime, False) + + _close_test_runtime(runtime) def test_ipython_agentskills_fileop_pwd_with_userdir(temp_dir, box_class): @@ -392,13 +405,13 @@ def test_ipython_agentskills_fileop_pwd_with_userdir(temp_dir, box_class): '[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]' ).strip().split('\n') - runtime.close(rm_all_containers=False) - time.sleep(1) + _close_test_runtime(runtime) def test_ipython_package_install(temp_dir, box_class, run_as_openhands): """Make sure that cd in bash also update the current working directory in ipython.""" runtime = _load_runtime(temp_dir, box_class, run_as_openhands) + sandbox_dir = _get_sandbox_folder(runtime) # It should error out since pymsgbox is not installed action = IPythonRunCellAction(code='import pymsgbox') @@ -424,9 +437,8 @@ def test_ipython_package_install(temp_dir, box_class, run_as_openhands): # import should not error out assert obs.content.strip() == ( '[Code executed successfully with no output]\n' - '[Jupyter current working directory: /workspace]\n' + f'[Jupyter current working directory: {sandbox_dir}]\n' '[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]' ) - runtime.close(rm_all_containers=False) - time.sleep(1) + _close_test_runtime(runtime)