mirror of
https://github.com/OpenHands/OpenHands.git
synced 2025-12-26 05:48:36 +08:00
(fix) Fix runtime (RT) tests and split tests in 2 actions (openhands/root) (#3791)
Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>
This commit is contained in:
parent
57390eb26b
commit
554636cf2a
89
.github/workflows/ghcr_runtime.yml
vendored
89
.github/workflows/ghcr_runtime.yml
vendored
@ -1,5 +1,5 @@
|
||||
# Workflow that builds, tests and then pushes the runtime docker images to the ghcr.io repository
|
||||
name: Build, Test and Publish Runtime Image
|
||||
name: Build, Test and Publish RT Image
|
||||
|
||||
# Only run one workflow of the same group at a time.
|
||||
# There can be at most one running and one pending job in a concurrency group at any time.
|
||||
@ -104,9 +104,9 @@ jobs:
|
||||
name: runtime-${{ matrix.base_image.tag }}
|
||||
path: /tmp/runtime-${{ matrix.base_image.tag }}.tar
|
||||
|
||||
# Run unit tests with the EventStream runtime Docker images
|
||||
test_runtime:
|
||||
name: Test Runtime
|
||||
# Run unit tests with the EventStream runtime Docker images as root
|
||||
test_runtime_root:
|
||||
name: RT Unit Tests (Root)
|
||||
needs: [ghcr_build_runtime]
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
@ -164,11 +164,84 @@ jobs:
|
||||
image_name=ghcr.io/${{ github.repository_owner }}/runtime:${{ github.sha }}-${{ matrix.base_image }}
|
||||
image_name=$(echo $image_name | tr '[:upper:]' '[:lower:]')
|
||||
|
||||
SKIP_CONTAINER_LOGS=true \
|
||||
TEST_RUNTIME=eventstream \
|
||||
SANDBOX_USER_ID=$(id -u) \
|
||||
SANDBOX_BASE_CONTAINER_IMAGE=$image_name \
|
||||
TEST_IN_CI=true \
|
||||
poetry run pytest -n 2 --reruns 2 --cov=agenthub --cov=openhands --cov-report=xml -s ./tests/runtime
|
||||
RUN_AS_OPENHANDS=false \
|
||||
poetry run pytest -n 3 --reruns 1 --reruns-delay 3 --cov=agenthub --cov=openhands --cov-report=xml -s ./tests/runtime
|
||||
- name: Upload coverage to Codecov
|
||||
uses: codecov/codecov-action@v4
|
||||
env:
|
||||
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
|
||||
|
||||
# Run unit tests with the EventStream runtime Docker images as openhands user
|
||||
test_runtime_oh:
|
||||
name: RT Unit Tests (openhands)
|
||||
runs-on: ubuntu-latest
|
||||
needs: [ghcr_build_runtime]
|
||||
strategy:
|
||||
matrix:
|
||||
base_image: ['nikolaik']
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Free Disk Space (Ubuntu)
|
||||
uses: jlumbroso/free-disk-space@main
|
||||
with:
|
||||
tool-cache: true
|
||||
android: true
|
||||
dotnet: true
|
||||
haskell: true
|
||||
large-packages: true
|
||||
swap-storage: true
|
||||
# Forked repos can't push to GHCR, so we need to download the image as an artifact
|
||||
- name: Download runtime image for fork
|
||||
if: github.event.pull_request.head.repo.fork
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: runtime-${{ matrix.base_image }}
|
||||
path: /tmp
|
||||
- name: Load runtime image for fork
|
||||
if: github.event.pull_request.head.repo.fork
|
||||
run: |
|
||||
docker load --input /tmp/runtime-${{ matrix.base_image }}.tar
|
||||
- name: Cache Poetry dependencies
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: |
|
||||
~/.cache/pypoetry
|
||||
~/.virtualenvs
|
||||
key: ${{ runner.os }}-poetry-${{ hashFiles('**/poetry.lock') }}
|
||||
restore-keys: |
|
||||
${{ runner.os }}-poetry-
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11'
|
||||
- name: Install poetry via pipx
|
||||
run: pipx install poetry
|
||||
- name: Install Python dependencies using Poetry
|
||||
run: make install-python-dependencies
|
||||
- name: Run runtime tests
|
||||
run: |
|
||||
# We install pytest-xdist in order to run tests across CPUs. However, tests start to fail when we run
|
||||
# then across more than 2 CPUs for some reason
|
||||
poetry run pip install pytest-xdist
|
||||
|
||||
# Install to be able to retry on failures for flaky tests
|
||||
poetry run pip install pytest-rerunfailures
|
||||
|
||||
image_name=ghcr.io/${{ github.repository_owner }}/runtime:${{ github.sha }}-${{ matrix.base_image }}
|
||||
image_name=$(echo $image_name | tr '[:upper:]' '[:lower:]')
|
||||
|
||||
SKIP_CONTAINER_LOGS=true \
|
||||
TEST_RUNTIME=eventstream \
|
||||
SANDBOX_USER_ID=$(id -u) \
|
||||
SANDBOX_BASE_CONTAINER_IMAGE=$image_name \
|
||||
TEST_IN_CI=true \
|
||||
RUN_AS_OPENHANDS=true \
|
||||
poetry run pytest -n 3 --reruns 1 --reruns-delay 3 --cov=agenthub --cov=openhands --cov-report=xml -s ./tests/runtime
|
||||
- name: Upload coverage to Codecov
|
||||
uses: codecov/codecov-action@v4
|
||||
env:
|
||||
@ -176,7 +249,7 @@ jobs:
|
||||
|
||||
# Run integration tests with the eventstream runtime Docker image
|
||||
runtime_integration_tests_on_linux:
|
||||
name: Runtime Integration Tests on Linux
|
||||
name: RT Integration Tests (Linux)
|
||||
runs-on: ubuntu-latest
|
||||
needs: [ghcr_build_runtime]
|
||||
strategy:
|
||||
@ -237,7 +310,7 @@ jobs:
|
||||
name: All Runtime Tests Passed
|
||||
if: ${{ !cancelled() && !contains(needs.*.result, 'failure') && !contains(needs.*.result, 'cancelled') }}
|
||||
runs-on: ubuntu-latest
|
||||
needs: [test_runtime, runtime_integration_tests_on_linux]
|
||||
needs: [test_runtime_root, test_runtime_oh, runtime_integration_tests_on_linux]
|
||||
steps:
|
||||
- name: All tests passed
|
||||
run: echo "All runtime tests have passed successfully!"
|
||||
@ -246,7 +319,7 @@ jobs:
|
||||
name: All Runtime Tests Passed
|
||||
if: ${{ cancelled() || contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled') }}
|
||||
runs-on: ubuntu-latest
|
||||
needs: [test_runtime, runtime_integration_tests_on_linux]
|
||||
needs: [test_runtime_root, test_runtime_oh, runtime_integration_tests_on_linux]
|
||||
steps:
|
||||
- name: Some tests failed
|
||||
run: |
|
||||
|
||||
@ -507,7 +507,7 @@ def load_from_toml(cfg: AppConfig, toml_file: str = 'config.toml'):
|
||||
if isinstance(value, dict):
|
||||
try:
|
||||
if key is not None and key.lower() == 'agent':
|
||||
logger.openhands_logger.info(
|
||||
logger.openhands_logger.debug(
|
||||
'Attempt to load default agent config from config toml'
|
||||
)
|
||||
non_dict_fields = {
|
||||
@ -517,13 +517,13 @@ def load_from_toml(cfg: AppConfig, toml_file: str = 'config.toml'):
|
||||
cfg.set_agent_config(agent_config, 'agent')
|
||||
for nested_key, nested_value in value.items():
|
||||
if isinstance(nested_value, dict):
|
||||
logger.openhands_logger.info(
|
||||
logger.openhands_logger.debug(
|
||||
f'Attempt to load group {nested_key} from config toml as agent config'
|
||||
)
|
||||
agent_config = AgentConfig(**nested_value)
|
||||
cfg.set_agent_config(agent_config, nested_key)
|
||||
elif key is not None and key.lower() == 'llm':
|
||||
logger.openhands_logger.info(
|
||||
logger.openhands_logger.debug(
|
||||
'Attempt to load default LLM config from config toml'
|
||||
)
|
||||
non_dict_fields = {
|
||||
@ -533,7 +533,7 @@ def load_from_toml(cfg: AppConfig, toml_file: str = 'config.toml'):
|
||||
cfg.set_llm_config(llm_config, 'llm')
|
||||
for nested_key, nested_value in value.items():
|
||||
if isinstance(nested_value, dict):
|
||||
logger.openhands_logger.info(
|
||||
logger.openhands_logger.debug(
|
||||
f'Attempt to load group {nested_key} from config toml as llm config'
|
||||
)
|
||||
llm_config = LLMConfig(**nested_value)
|
||||
@ -584,10 +584,10 @@ def load_from_toml(cfg: AppConfig, toml_file: str = 'config.toml'):
|
||||
|
||||
def finalize_config(cfg: AppConfig):
|
||||
"""More tweaks to the config after it's been loaded."""
|
||||
cfg.workspace_base = os.path.abspath(cfg.workspace_base)
|
||||
# Set workspace_mount_path if not set by the user
|
||||
if cfg.workspace_mount_path is UndefinedString.UNDEFINED:
|
||||
cfg.workspace_mount_path = os.path.abspath(cfg.workspace_base)
|
||||
cfg.workspace_base = os.path.abspath(cfg.workspace_base)
|
||||
cfg.workspace_mount_path = cfg.workspace_base
|
||||
|
||||
if cfg.workspace_mount_rewrite: # and not config.workspace_mount_path:
|
||||
# TODO why do we need to check if workspace_mount_path is None?
|
||||
|
||||
@ -68,6 +68,10 @@ class DockerRuntimeBuilder(RuntimeBuilder):
|
||||
Returns:
|
||||
bool: Whether the Docker image exists in the registry or in the local store
|
||||
"""
|
||||
if not image_name:
|
||||
logger.error(f'Invalid image name: `{image_name}`')
|
||||
return False
|
||||
|
||||
try:
|
||||
logger.info(f'Checking, if image exists locally:\n{image_name}')
|
||||
self.docker_client.images.get(image_name)
|
||||
|
||||
@ -84,7 +84,6 @@ class RuntimeClient:
|
||||
self.lock = asyncio.Lock()
|
||||
self.plugins: dict[str, Plugin] = {}
|
||||
self.browser = BrowserEnv(browsergym_eval_env)
|
||||
self._initial_pwd = work_dir
|
||||
|
||||
@property
|
||||
def initial_pwd(self):
|
||||
@ -116,27 +115,85 @@ class RuntimeClient:
|
||||
logger.info('Runtime client initialized.')
|
||||
|
||||
def _init_user(self, username: str, user_id: int) -> None:
|
||||
"""Create user if not exists."""
|
||||
"""Create working directory and user if not exists.
|
||||
It performs the following steps effectively:
|
||||
* Creates the Working Directory:
|
||||
- Uses mkdir -p to create the directory.
|
||||
- Sets ownership to username:root.
|
||||
- Adjusts permissions to be readable and writable by group and others.
|
||||
* User Verification and Creation:
|
||||
- Checks if the user exists using id -u.
|
||||
- If the user exists with the correct UID, it skips creation.
|
||||
- If the UID differs, it logs a warning and updates self.user_id.
|
||||
- If the user doesn't exist, it proceeds to create the user.
|
||||
* Sudo Configuration:
|
||||
- Appends %sudo ALL=(ALL) NOPASSWD:ALL to /etc/sudoers to grant
|
||||
passwordless sudo access to the sudo group.
|
||||
- Adds the user to the sudo group with the useradd command, handling
|
||||
UID conflicts by incrementing the UID if necessary.
|
||||
"""
|
||||
|
||||
# First create the working directory, independent of the user
|
||||
logger.info(f'Client working directory: {self.initial_pwd}')
|
||||
command = f'umask 002; mkdir -p {self.initial_pwd}'
|
||||
output = subprocess.run(command, shell=True, capture_output=True)
|
||||
out_str = output.stdout.decode()
|
||||
|
||||
command = f'chown -R {username}:root {self.initial_pwd}'
|
||||
output = subprocess.run(command, shell=True, capture_output=True)
|
||||
out_str += output.stdout.decode()
|
||||
|
||||
command = f'chmod g+rw {self.initial_pwd}'
|
||||
output = subprocess.run(command, shell=True, capture_output=True)
|
||||
out_str += output.stdout.decode()
|
||||
logger.debug(f'Created working directory. Output: [{out_str}]')
|
||||
|
||||
# Skip root since it is already created
|
||||
if username == 'root':
|
||||
return
|
||||
|
||||
# Check if the username already exists
|
||||
existing_user_id = -1
|
||||
try:
|
||||
subprocess.run(
|
||||
result = subprocess.run(
|
||||
f'id -u {username}', shell=True, check=True, capture_output=True
|
||||
)
|
||||
logger.debug(f'User {username} already exists. Skipping creation.')
|
||||
existing_user_id = int(result.stdout.decode().strip())
|
||||
|
||||
# The user ID already exists, skip setup
|
||||
if existing_user_id == user_id:
|
||||
logger.debug(
|
||||
f'User `{username}` already has the provided UID {user_id}. Skipping user setup.'
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
f'User `{username}` already exists with UID {existing_user_id}. Skipping user setup.'
|
||||
)
|
||||
self.user_id = existing_user_id
|
||||
return
|
||||
except subprocess.CalledProcessError:
|
||||
pass # User does not exist, continue with creation
|
||||
except subprocess.CalledProcessError as e:
|
||||
# Returncode 1 indicates, that the user does not exist yet
|
||||
if e.returncode == 1:
|
||||
logger.debug(
|
||||
f'User `{username}` does not exist. Proceeding with user creation.'
|
||||
)
|
||||
else:
|
||||
logger.error(
|
||||
f'Error checking user `{username}`, skipping setup:\n{e}\n'
|
||||
)
|
||||
raise
|
||||
|
||||
# Add sudoer
|
||||
sudoer_line = r"echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers"
|
||||
output = subprocess.run(sudoer_line, shell=True, capture_output=True)
|
||||
if output.returncode != 0:
|
||||
raise RuntimeError(f'Failed to add sudoer: {output.stderr.decode()}')
|
||||
logger.debug(f'Added sudoer successfully. Output: [{output.stdout.decode()}]')
|
||||
sudoer_line = r'%sudo ALL=(ALL) NOPASSWD:ALL\n'
|
||||
sudoers_path = '/etc/sudoers.d/99_sudo'
|
||||
if not Path(sudoers_path).exists():
|
||||
with open(sudoers_path, 'w') as f:
|
||||
f.write(sudoer_line)
|
||||
output = subprocess.run(['chmod', '0440', sudoers_path])
|
||||
if output.returncode != 0:
|
||||
logger.error('Failed to chmod 99_sudo file!')
|
||||
else:
|
||||
logger.debug('Added sudoer successfully.')
|
||||
|
||||
# Attempt to add the user, retrying with incremented user_id if necessary
|
||||
while True:
|
||||
@ -144,16 +201,10 @@ class RuntimeClient:
|
||||
f'useradd -rm -d /home/{username} -s /bin/bash '
|
||||
f'-g root -G sudo -u {user_id} {username}'
|
||||
)
|
||||
|
||||
if not os.path.exists(self.initial_pwd):
|
||||
command += f' && mkdir -p {self.initial_pwd}'
|
||||
command += f' && chown -R {username}:root {self.initial_pwd}'
|
||||
command += f' && chmod g+s {self.initial_pwd}'
|
||||
|
||||
output = subprocess.run(command, shell=True, capture_output=True)
|
||||
if output.returncode == 0:
|
||||
logger.debug(
|
||||
f'Added user {username} successfully with UID {user_id}. Output: [{output.stdout.decode()}]'
|
||||
f'Added user `{username}` successfully with UID {user_id}. Output: [{output.stdout.decode()}]'
|
||||
)
|
||||
break
|
||||
elif f'UID {user_id} is not unique' in output.stderr.decode():
|
||||
@ -163,7 +214,7 @@ class RuntimeClient:
|
||||
user_id += 1
|
||||
else:
|
||||
raise RuntimeError(
|
||||
f'Failed to create user {username}: {output.stderr.decode()}'
|
||||
f'Failed to create user `{username}`! Output: [{output.stderr.decode()}]'
|
||||
)
|
||||
|
||||
def _init_bash_shell(self, work_dir: str, username: str) -> None:
|
||||
@ -181,8 +232,8 @@ class RuntimeClient:
|
||||
|
||||
# This should NOT match "PS1=\u@\h:\w [PEXPECT]$" when `env` is executed
|
||||
self.__bash_expect_regex = r'\[PEXPECT_BEGIN\]\s*(.*?)\s*([a-z0-9_-]*)@([a-zA-Z0-9.-]*):(.+)\s*\[PEXPECT_END\]'
|
||||
|
||||
self.shell.sendline(f'export PS1="{self.__bash_PS1}"; export PS2=""')
|
||||
# Set umask to allow group write permissions
|
||||
self.shell.sendline(f'umask 002; export PS1="{self.__bash_PS1}"; export PS2=""')
|
||||
self.shell.expect(self.__bash_expect_regex)
|
||||
|
||||
self.shell.sendline(
|
||||
@ -190,8 +241,11 @@ class RuntimeClient:
|
||||
)
|
||||
self.shell.expect(self.__bash_expect_regex)
|
||||
logger.debug(
|
||||
f'Bash initialized. Working directory: {work_dir}. Output: {self.shell.before}'
|
||||
f'Bash initialized. Working directory: {work_dir}. Output: [{self.shell.before}]'
|
||||
)
|
||||
# Ensure the group has write permissions on the working directory
|
||||
self.shell.sendline(f'chmod g+rw "{work_dir}"')
|
||||
self.shell.expect(self.__bash_expect_regex)
|
||||
|
||||
async def _init_bash_commands(self):
|
||||
logger.info(f'Initializing by running {len(INIT_COMMANDS)} bash commands...')
|
||||
@ -295,14 +349,14 @@ class RuntimeClient:
|
||||
bash_prompt = self._get_bash_prompt_and_update_pwd()
|
||||
if keep_prompt:
|
||||
output += '\r\n' + bash_prompt
|
||||
logger.debug(f'Command output: {output}')
|
||||
# logger.debug(f'Command output:\n{output}')
|
||||
return output, exit_code
|
||||
|
||||
async def run_action(self, action) -> Observation:
|
||||
action_type = action.action
|
||||
logger.debug(f'Running action: {action}')
|
||||
logger.debug(f'Running action:\n{action}')
|
||||
observation = await getattr(self, action_type)(action)
|
||||
logger.debug(f'Action output: {observation}')
|
||||
logger.debug(f'Action output:\n{observation}')
|
||||
return observation
|
||||
|
||||
async def run(self, action: CmdRunAction) -> CmdOutputObservation:
|
||||
@ -355,10 +409,9 @@ class RuntimeClient:
|
||||
_jupyter_plugin: JupyterPlugin = self.plugins['jupyter'] # type: ignore
|
||||
# This is used to make AgentSkills in Jupyter aware of the
|
||||
# current working directory in Bash
|
||||
if self.pwd != getattr(self, '_jupyter_pwd', None):
|
||||
logger.debug(
|
||||
f"{self.pwd} != {getattr(self, '_jupyter_pwd', None)} -> reset Jupyter PWD"
|
||||
)
|
||||
jupyter_pwd = getattr(self, '_jupyter_pwd', None)
|
||||
if self.pwd != jupyter_pwd:
|
||||
logger.debug(f'{self.pwd} != {jupyter_pwd} -> reset Jupyter PWD')
|
||||
reset_jupyter_pwd_code = f'import os; os.chdir("{self.pwd}")'
|
||||
_aux_action = IPythonRunCellAction(code=reset_jupyter_pwd_code)
|
||||
_reset_obs = await _jupyter_plugin.run(_aux_action)
|
||||
@ -450,7 +503,7 @@ class RuntimeClient:
|
||||
os.chown(filepath, file_stat.st_uid, file_stat.st_gid)
|
||||
else:
|
||||
# set the new file permissions if the file is new
|
||||
os.chmod(filepath, 0o644)
|
||||
os.chmod(filepath, 0o664)
|
||||
os.chown(filepath, self.user_id, self.user_id)
|
||||
|
||||
except FileNotFoundError:
|
||||
|
||||
@ -38,8 +38,7 @@ from openhands.runtime.utils.runtime_build import build_runtime_image
|
||||
|
||||
|
||||
class LogBuffer:
|
||||
"""
|
||||
Synchronous buffer for Docker container logs.
|
||||
"""Synchronous buffer for Docker container logs.
|
||||
|
||||
This class provides a thread-safe way to collect, store, and retrieve logs
|
||||
from a Docker container. It uses a list to store log lines and provides methods
|
||||
@ -94,7 +93,7 @@ class LogBuffer:
|
||||
)
|
||||
self.close(timeout=5)
|
||||
|
||||
def close(self, timeout: float = 10.0):
|
||||
def close(self, timeout: float = 5.0):
|
||||
self._stop_event.set()
|
||||
self.log_stream_thread.join(timeout)
|
||||
|
||||
@ -102,6 +101,14 @@ class LogBuffer:
|
||||
class EventStreamRuntime(Runtime):
|
||||
"""This runtime will subscribe the event stream.
|
||||
When receive an event, it will send the event to runtime-client which run inside the docker environment.
|
||||
From the sid also an instance_id is generated in combination with a UID.
|
||||
|
||||
Args:
|
||||
config (AppConfig): The application configuration.
|
||||
event_stream (EventStream): The event stream to subscribe to.
|
||||
sid (str, optional): The session ID. Defaults to 'default'.
|
||||
plugins (list[PluginRequirement] | None, optional): List of plugin requirements. Defaults to None.
|
||||
env_vars (dict[str, str] | None, optional): Environment variables to set. Defaults to None.
|
||||
"""
|
||||
|
||||
container_name_prefix = 'openhands-sandbox-'
|
||||
@ -115,13 +122,16 @@ class EventStreamRuntime(Runtime):
|
||||
env_vars: dict[str, str] | None = None,
|
||||
):
|
||||
self.config = config
|
||||
self._port = find_available_tcp_port()
|
||||
self.api_url = f'http://{self.config.sandbox.api_hostname}:{self._port}'
|
||||
self._host_port = 30000 # initial dummy value
|
||||
self._container_port = 30001 # initial dummy value
|
||||
self.api_url = (
|
||||
f'http://{self.config.sandbox.api_hostname}:{self._container_port}'
|
||||
)
|
||||
self.session = requests.Session()
|
||||
|
||||
self.instance_id = (
|
||||
sid + '_' + str(uuid.uuid4()) if sid is not None else str(uuid.uuid4())
|
||||
)
|
||||
|
||||
self.docker_client: docker.DockerClient = self._init_docker_client()
|
||||
self.base_container_image = self.config.sandbox.base_container_image
|
||||
self.runtime_container_image = self.config.sandbox.runtime_container_image
|
||||
@ -131,7 +141,7 @@ class EventStreamRuntime(Runtime):
|
||||
self.action_semaphore = threading.Semaphore(1) # Ensure one action at a time
|
||||
|
||||
self.runtime_builder = DockerRuntimeBuilder(self.docker_client)
|
||||
logger.debug(f'EventStreamRuntime `{sid}`')
|
||||
logger.debug(f'EventStreamRuntime `{self.instance_id}`')
|
||||
|
||||
# Buffer for container logs
|
||||
self.log_buffer: LogBuffer | None = None
|
||||
@ -140,7 +150,9 @@ class EventStreamRuntime(Runtime):
|
||||
logger.info(
|
||||
f'Installing extra user-provided dependencies in the runtime image: {self.config.sandbox.runtime_extra_deps}'
|
||||
)
|
||||
|
||||
self.skip_container_logs = (
|
||||
os.environ.get('SKIP_CONTAINER_LOGS', 'false').lower() == 'true'
|
||||
)
|
||||
if self.runtime_container_image is None:
|
||||
if self.base_container_image is None:
|
||||
raise ValueError(
|
||||
@ -152,19 +164,18 @@ class EventStreamRuntime(Runtime):
|
||||
extra_deps=self.config.sandbox.runtime_extra_deps,
|
||||
)
|
||||
self.container = self._init_container(
|
||||
self.sandbox_workspace_dir,
|
||||
mount_dir=self.config.workspace_mount_path,
|
||||
sandbox_workspace_dir=self.config.workspace_mount_path_in_sandbox, # e.g. /workspace
|
||||
mount_dir=self.config.workspace_mount_path, # e.g. /opt/openhands/_test_workspace
|
||||
plugins=plugins,
|
||||
)
|
||||
# will initialize both the event stream and the env vars
|
||||
super().__init__(config, event_stream, sid, plugins, env_vars)
|
||||
|
||||
self._wait_until_alive()
|
||||
|
||||
logger.info(
|
||||
f'Container initialized with plugins: {[plugin.name for plugin in self.plugins]}'
|
||||
)
|
||||
logger.info(f'Container initialized with env vars: {env_vars}')
|
||||
time.sleep(1)
|
||||
|
||||
@staticmethod
|
||||
def _init_docker_client() -> docker.DockerClient:
|
||||
@ -196,24 +207,48 @@ class EventStreamRuntime(Runtime):
|
||||
f'--plugins {" ".join([plugin.name for plugin in plugins])} '
|
||||
)
|
||||
|
||||
network_mode: str | None = None
|
||||
port_mapping: dict[str, int] | None = None
|
||||
if self.config.sandbox.use_host_network:
|
||||
network_mode = 'host'
|
||||
self._host_port = self._find_available_port()
|
||||
self._container_port = (
|
||||
self._host_port
|
||||
) # in future this might differ from host port
|
||||
self.api_url = (
|
||||
f'http://{self.config.sandbox.api_hostname}:{self._container_port}'
|
||||
)
|
||||
|
||||
use_host_network = self.config.sandbox.use_host_network
|
||||
network_mode: str | None = 'host' if use_host_network else None
|
||||
port_mapping: dict[str, list[dict[str, str]]] | None = (
|
||||
None
|
||||
if use_host_network
|
||||
else {
|
||||
f'{self._container_port}/tcp': [{'HostPort': str(self._host_port)}]
|
||||
}
|
||||
)
|
||||
|
||||
if use_host_network:
|
||||
logger.warn(
|
||||
'Using host network mode. If you are using MacOS, please make sure you have the latest version of Docker Desktop and enabled host network feature: https://docs.docker.com/network/drivers/host/#docker-desktop'
|
||||
)
|
||||
else:
|
||||
port_mapping = {f'{self._port}/tcp': self._port}
|
||||
|
||||
if mount_dir is not None:
|
||||
# Combine environment variables
|
||||
environment = {
|
||||
'port': str(self._container_port),
|
||||
'PYTHONUNBUFFERED': 1,
|
||||
}
|
||||
if self.config.debug:
|
||||
environment['DEBUG'] = 'true'
|
||||
|
||||
logger.info(f'Workspace Base: {self.config.workspace_base}')
|
||||
if mount_dir is not None and sandbox_workspace_dir is not None:
|
||||
# e.g. result would be: {"/home/user/openhands/workspace": {'bind': "/workspace", 'mode': 'rw'}}
|
||||
volumes = {mount_dir: {'bind': sandbox_workspace_dir, 'mode': 'rw'}}
|
||||
logger.info(f'Mount dir: {sandbox_workspace_dir}')
|
||||
logger.info(f'Mount dir: {mount_dir}')
|
||||
else:
|
||||
logger.warn(
|
||||
'Mount dir is not set, will not mount the workspace directory to the container.'
|
||||
'Warning: Mount dir is not set, will not mount the workspace directory to the container!\n'
|
||||
)
|
||||
volumes = None
|
||||
logger.info(f'Sandbox workspace: {sandbox_workspace_dir}')
|
||||
|
||||
if self.config.sandbox.browsergym_eval_env is not None:
|
||||
browsergym_arg = (
|
||||
@ -225,9 +260,9 @@ class EventStreamRuntime(Runtime):
|
||||
self.runtime_container_image,
|
||||
command=(
|
||||
f'/openhands/miniforge3/bin/mamba run --no-capture-output -n base '
|
||||
'PYTHONUNBUFFERED=1 poetry run '
|
||||
f'python -u -m openhands.runtime.client.client {self._port} '
|
||||
f'--working-dir {sandbox_workspace_dir} '
|
||||
f'poetry run '
|
||||
f'python -u -m openhands.runtime.client.client {self._container_port} '
|
||||
f'--working-dir "{sandbox_workspace_dir}" '
|
||||
f'{plugin_arg}'
|
||||
f'--username {"openhands" if self.config.run_as_openhands else "root"} '
|
||||
f'--user-id {self.config.sandbox.user_id} '
|
||||
@ -235,24 +270,26 @@ class EventStreamRuntime(Runtime):
|
||||
),
|
||||
network_mode=network_mode,
|
||||
ports=port_mapping,
|
||||
working_dir='/openhands/code/',
|
||||
working_dir='/openhands/code/', # do not change this!
|
||||
name=self.container_name,
|
||||
detach=True,
|
||||
environment={'DEBUG': 'true'} if self.config.debug else None,
|
||||
environment=environment,
|
||||
volumes=volumes,
|
||||
)
|
||||
self.log_buffer = LogBuffer(container)
|
||||
logger.info(f'Container started. Server url: {self.api_url}')
|
||||
return container
|
||||
except Exception as e:
|
||||
logger.error('Failed to start container')
|
||||
logger.error(
|
||||
f'Error: Instance {self.instance_id} FAILED to start container!\n'
|
||||
)
|
||||
logger.exception(e)
|
||||
self.close(close_client=False)
|
||||
raise e
|
||||
|
||||
@tenacity.retry(
|
||||
stop=tenacity.stop_after_attempt(10),
|
||||
wait=tenacity.wait_exponential(multiplier=2, min=10, max=60),
|
||||
wait=tenacity.wait_exponential(multiplier=2, min=1, max=20),
|
||||
reraise=(ConnectionRefusedError,),
|
||||
)
|
||||
def _wait_until_alive(self):
|
||||
@ -278,10 +315,11 @@ class EventStreamRuntime(Runtime):
|
||||
)
|
||||
|
||||
if not self.log_buffer.client_ready:
|
||||
time.sleep(1)
|
||||
attempts = 0
|
||||
while not self.log_buffer.client_ready and attempts < 5:
|
||||
while not self.log_buffer.client_ready and attempts < 4:
|
||||
attempts += 1
|
||||
time.sleep(1)
|
||||
time.sleep(2)
|
||||
logs = self.log_buffer.get_and_clear()
|
||||
if logs:
|
||||
formatted_logs = '\n'.join([f' |{log}' for log in logs])
|
||||
@ -303,13 +341,8 @@ class EventStreamRuntime(Runtime):
|
||||
logger.error(msg)
|
||||
raise RuntimeError(msg)
|
||||
|
||||
@property
|
||||
def sandbox_workspace_dir(self):
|
||||
return self.config.workspace_mount_path_in_sandbox
|
||||
|
||||
def close(self, close_client: bool = True, rm_all_containers: bool = True):
|
||||
"""
|
||||
Closes the EventStreamRuntime and associated objects
|
||||
"""Closes the EventStreamRuntime and associated objects
|
||||
|
||||
Parameters:
|
||||
- close_client (bool): Whether to close the DockerClient
|
||||
@ -322,23 +355,29 @@ class EventStreamRuntime(Runtime):
|
||||
if self.session:
|
||||
self.session.close()
|
||||
|
||||
containers = self.docker_client.containers.list(all=True)
|
||||
for container in containers:
|
||||
try:
|
||||
# If the app doesn't shut down properly, it can leave runtime containers on the system. This ensures
|
||||
# that all 'openhands-sandbox-' containers are removed as well.
|
||||
if rm_all_containers and container.name.startswith(
|
||||
self.container_name_prefix
|
||||
):
|
||||
container.remove(force=True)
|
||||
elif container.name == self.container_name:
|
||||
logs = container.logs(tail=1000).decode('utf-8')
|
||||
logger.debug(
|
||||
f'==== Container logs ====\n{logs}\n==== End of container logs ===='
|
||||
)
|
||||
container.remove(force=True)
|
||||
except docker.errors.NotFound:
|
||||
pass
|
||||
try:
|
||||
containers = self.docker_client.containers.list(all=True)
|
||||
for container in containers:
|
||||
try:
|
||||
# If the app doesn't shut down properly, it can leave runtime containers on the system. This ensures
|
||||
# that all 'openhands-sandbox-' containers are removed as well.
|
||||
if rm_all_containers and container.name.startswith(
|
||||
self.container_name_prefix
|
||||
):
|
||||
container.remove(force=True)
|
||||
elif container.name == self.container_name:
|
||||
if not self.skip_container_logs:
|
||||
logs = container.logs(tail=1000).decode('utf-8')
|
||||
logger.debug(
|
||||
f'==== Container logs on close ====\n{logs}\n==== End of container logs ===='
|
||||
)
|
||||
container.remove(force=True)
|
||||
except docker.errors.APIError:
|
||||
pass
|
||||
except docker.errors.NotFound:
|
||||
pass
|
||||
except docker.errors.NotFound: # yes, this can happen!
|
||||
pass
|
||||
|
||||
if close_client:
|
||||
self.docker_client.close()
|
||||
@ -494,3 +533,20 @@ class EventStreamRuntime(Runtime):
|
||||
raise TimeoutError('List files operation timed out')
|
||||
except Exception as e:
|
||||
raise RuntimeError(f'List files operation failed: {str(e)}')
|
||||
|
||||
def _is_port_in_use_docker(self, port):
|
||||
containers = self.docker_client.containers.list()
|
||||
for container in containers:
|
||||
container_ports = container.ports
|
||||
if str(port) in str(container_ports):
|
||||
return True
|
||||
return False
|
||||
|
||||
def _find_available_port(self, max_attempts=5):
|
||||
port = 39999
|
||||
for _ in range(max_attempts):
|
||||
port = find_available_tcp_port(30000, 39999)
|
||||
if not self._is_port_in_use_docker(port):
|
||||
return port
|
||||
# If no port is found after max_attempts, return the last tried port
|
||||
return port
|
||||
|
||||
@ -19,7 +19,7 @@ class JupyterPlugin(Plugin):
|
||||
name: str = 'jupyter'
|
||||
|
||||
async def initialize(self, username: str, kernel_id: str = 'openhands-default'):
|
||||
self.kernel_gateway_port = find_available_tcp_port()
|
||||
self.kernel_gateway_port = find_available_tcp_port(40000, 49999)
|
||||
self.kernel_id = kernel_id
|
||||
self.gateway_process = subprocess.Popen(
|
||||
(
|
||||
|
||||
@ -142,7 +142,7 @@ class RemoteRuntime(Runtime):
|
||||
f'/openhands/miniforge3/bin/mamba run --no-capture-output -n base '
|
||||
'PYTHONUNBUFFERED=1 poetry run '
|
||||
f'python -u -m openhands.runtime.client.client {self.port} '
|
||||
f'--working-dir {self.sandbox_workspace_dir} '
|
||||
f'--working-dir {self.config.workspace_mount_path_in_sandbox} '
|
||||
f'{plugin_arg}'
|
||||
f'--username {"openhands" if self.config.run_as_openhands else "root"} '
|
||||
f'--user-id {self.config.sandbox.user_id} '
|
||||
@ -203,10 +203,6 @@ class RemoteRuntime(Runtime):
|
||||
logger.warning(msg)
|
||||
raise RuntimeError(msg)
|
||||
|
||||
@property
|
||||
def sandbox_workspace_dir(self):
|
||||
return self.config.workspace_mount_path_in_sandbox
|
||||
|
||||
def close(self):
|
||||
if self.runtime_id:
|
||||
try:
|
||||
|
||||
@ -67,7 +67,6 @@ class Runtime:
|
||||
self.config = copy.deepcopy(config)
|
||||
self.DEFAULT_ENV_VARS = _default_env_vars(config.sandbox)
|
||||
atexit.register(self.close)
|
||||
logger.debug(f'Runtime `{sid}`')
|
||||
|
||||
if self.DEFAULT_ENV_VARS:
|
||||
logger.debug(f'Adding default env vars: {self.DEFAULT_ENV_VARS}')
|
||||
|
||||
@ -1,4 +1,7 @@
|
||||
from openhands.runtime.utils.bash import split_bash_commands
|
||||
from openhands.runtime.utils.system import find_available_tcp_port
|
||||
from openhands.runtime.utils.system import (
|
||||
display_number_matrix,
|
||||
find_available_tcp_port,
|
||||
)
|
||||
|
||||
__all__ = ['find_available_tcp_port', 'split_bash_commands']
|
||||
__all__ = ['display_number_matrix', 'find_available_tcp_port', 'split_bash_commands']
|
||||
|
||||
@ -55,7 +55,7 @@ def _put_source_code_to_dir(temp_dir: str):
|
||||
' ', r'\ '
|
||||
) # escape spaces in the project root
|
||||
result = subprocess.run(
|
||||
f'python -m build -s -o {temp_dir} {_cleaned_project_root}',
|
||||
f'python -m build -s -o "{temp_dir}" {_cleaned_project_root}',
|
||||
shell=True,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
@ -142,13 +142,14 @@ def prep_docker_build_folder(
|
||||
skip_init=skip_init,
|
||||
extra_deps=extra_deps,
|
||||
)
|
||||
logger.debug(
|
||||
(
|
||||
f'===== Dockerfile content start =====\n'
|
||||
f'{dockerfile_content}\n'
|
||||
f'===== Dockerfile content end ====='
|
||||
if os.getenv('SKIP_CONTAINER_LOGS', 'false') != 'true':
|
||||
logger.debug(
|
||||
(
|
||||
f'===== Dockerfile content start =====\n'
|
||||
f'{dockerfile_content}\n'
|
||||
f'===== Dockerfile content end ====='
|
||||
)
|
||||
)
|
||||
)
|
||||
with open(os.path.join(dir_path, 'Dockerfile'), 'w') as file:
|
||||
file.write(dockerfile_content)
|
||||
|
||||
|
||||
@ -60,6 +60,7 @@ RUN cd /openhands/code && \
|
||||
{{ extra_deps }} {% if extra_deps %} && {% endif %} \
|
||||
/openhands/miniforge3/bin/mamba run -n base poetry cache clear --all . && \
|
||||
{% if not skip_init %}chmod -R g+rws /openhands/poetry && {% endif %} \
|
||||
mkdir -p /openhands/workspace && chmod -R g+rws,o+rw /openhands/workspace && \
|
||||
apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
|
||||
/openhands/miniforge3/bin/mamba clean --all
|
||||
|
||||
|
||||
@ -1,17 +1,34 @@
|
||||
import random
|
||||
import socket
|
||||
import time
|
||||
|
||||
|
||||
def find_available_tcp_port() -> int:
|
||||
"""Find an available TCP port, return -1 if none available."""
|
||||
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||
try:
|
||||
sock.bind(('localhost', 0))
|
||||
port = sock.getsockname()[1]
|
||||
return port
|
||||
except Exception:
|
||||
return -1
|
||||
finally:
|
||||
sock.close()
|
||||
def find_available_tcp_port(min_port=30000, max_port=39999, max_attempts=10) -> int:
|
||||
"""Find an available TCP port in a specified range.
|
||||
|
||||
Args:
|
||||
min_port (int): The lower bound of the port range (default: 30000)
|
||||
max_port (int): The upper bound of the port range (default: 39999)
|
||||
max_attempts (int): Maximum number of attempts to find an available port (default: 10)
|
||||
|
||||
Returns:
|
||||
int: An available port number, or -1 if none found after max_attempts
|
||||
"""
|
||||
rng = random.SystemRandom()
|
||||
ports = list(range(min_port, max_port + 1))
|
||||
rng.shuffle(ports)
|
||||
|
||||
for port in ports[:max_attempts]:
|
||||
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||
try:
|
||||
sock.bind(('localhost', port))
|
||||
return port
|
||||
except OSError:
|
||||
time.sleep(0.1) # Short delay to further reduce chance of collisions
|
||||
continue
|
||||
finally:
|
||||
sock.close()
|
||||
return -1
|
||||
|
||||
|
||||
def display_number_matrix(number: int) -> str | None:
|
||||
|
||||
@ -1,2 +1,3 @@
|
||||
[pytest]
|
||||
addopts = -p no:warnings
|
||||
asyncio_default_fixture_loop_scope = function
|
||||
|
||||
@ -5,6 +5,8 @@ set -eo pipefail
|
||||
## CONSTANTS AND ENVIRONMENTAL VARIABLES ##
|
||||
##############################################################
|
||||
|
||||
echo -e "\n\n============================================================"
|
||||
|
||||
# unset environmental variables that might disturb testing
|
||||
unset OPENAI_API_KEY
|
||||
unset SANDBOX_ENV_OPENAI_API_KEY
|
||||
@ -16,7 +18,7 @@ get_script_dir() {
|
||||
local source="${BASH_SOURCE[0]}"
|
||||
while [ -h "$source" ]; do
|
||||
local dir="$( cd -P "$( dirname "$source" )" && pwd )"
|
||||
source="$(readlink "$source")"
|
||||
source="$(readlink -f "$source" 2>/dev/null || echo "$source")"
|
||||
[[ $source != /* ]] && source="$dir/$source"
|
||||
done
|
||||
echo "$( cd -P "$( dirname "$source" )" && pwd )"
|
||||
@ -27,9 +29,6 @@ TMP_FILE="${TMP_FILE:-tmp.log}"
|
||||
if [ -z "$WORKSPACE_BASE" ]; then
|
||||
WORKSPACE_BASE=$(pwd)
|
||||
fi
|
||||
if [ -z "$WORKSPACE_MOUNT_PATH" ]; then
|
||||
WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE
|
||||
fi
|
||||
|
||||
DEBUG=true # needed for llm logging to create mock files!
|
||||
|
||||
@ -39,7 +38,7 @@ fi
|
||||
|
||||
export SCRIPT_DIR=$(get_script_dir)
|
||||
export PROJECT_ROOT=$(realpath "$SCRIPT_DIR/../..")
|
||||
export LOG_DIR=$PROJECT_ROOT/logs
|
||||
export LOG_DIR="$PROJECT_ROOT/logs"
|
||||
echo "Current working directory: $(pwd)"
|
||||
echo "SCRIPT_DIR: $SCRIPT_DIR"
|
||||
echo "PROJECT_ROOT: $PROJECT_ROOT"
|
||||
@ -47,22 +46,29 @@ echo "LOG_DIR: $LOG_DIR"
|
||||
echo "LOG_TO_FILE: $LOG_TO_FILE"
|
||||
|
||||
WORKSPACE_BASE=${WORKSPACE_BASE}/_test_workspace
|
||||
mkdir -p $WORKSPACE_BASE
|
||||
chmod -R 777 $WORKSPACE_BASE
|
||||
WORKSPACE_BASE=$(realpath $WORKSPACE_BASE)
|
||||
mkdir -p "$WORKSPACE_BASE"
|
||||
chmod -R 777 "$WORKSPACE_BASE"
|
||||
WORKSPACE_BASE=$(realpath "$WORKSPACE_BASE")
|
||||
|
||||
WORKSPACE_MOUNT_PATH=${WORKSPACE_MOUNT_PATH}/_test_workspace
|
||||
mkdir -p $WORKSPACE_MOUNT_PATH
|
||||
chmod -R 777 $WORKSPACE_MOUNT_PATH
|
||||
WORKSPACE_MOUNT_PATH=$(realpath $WORKSPACE_MOUNT_PATH)
|
||||
if [ -z "$WORKSPACE_MOUNT_PATH" ]; then
|
||||
WORKSPACE_MOUNT_PATH="$WORKSPACE_BASE"
|
||||
else
|
||||
WORKSPACE_MOUNT_PATH="${WORKSPACE_MOUNT_PATH}/_test_workspace"
|
||||
mkdir -p "$WORKSPACE_MOUNT_PATH"
|
||||
chmod -R 755 "$WORKSPACE_MOUNT_PATH"
|
||||
WORKSPACE_MOUNT_PATH=$(realpath "$WORKSPACE_MOUNT_PATH")
|
||||
fi
|
||||
|
||||
WORKSPACE_MOUNT_PATH_IN_SANDBOX="${WORKSPACE_MOUNT_PATH_IN_SANDBOX:-/workspace}"
|
||||
|
||||
echo "WORKSPACE_BASE: $WORKSPACE_BASE"
|
||||
echo "WORKSPACE_MOUNT_PATH: $WORKSPACE_MOUNT_PATH"
|
||||
echo "WORKSPACE_MOUNT_PATH_IN_SANDBOX: $WORKSPACE_MOUNT_PATH_IN_SANDBOX"
|
||||
|
||||
# Ensure we're in the correct directory
|
||||
cd "$PROJECT_ROOT" || exit 1
|
||||
|
||||
mkdir -p $WORKSPACE_BASE
|
||||
mkdir -p "$WORKSPACE_BASE"
|
||||
|
||||
# use environmental variable if exists
|
||||
TEST_RUNTIME="${TEST_RUNTIME:-eventstream}"
|
||||
@ -178,7 +184,7 @@ cleanup() {
|
||||
kill $HTTP_SERVER_PID || true
|
||||
unset HTTP_SERVER_PID
|
||||
fi
|
||||
[ -f $TMP_FILE ] && rm $TMP_FILE
|
||||
[ -f "$TMP_FILE" ] && rm "$TMP_FILE"
|
||||
echo "Cleanup done!"
|
||||
}
|
||||
|
||||
@ -200,14 +206,14 @@ regenerate_without_llm() {
|
||||
PROJECT_ROOT="$PROJECT_ROOT" \
|
||||
WORKSPACE_BASE="$WORKSPACE_BASE" \
|
||||
WORKSPACE_MOUNT_PATH="$WORKSPACE_MOUNT_PATH" \
|
||||
MAX_ITERATIONS=$MAX_ITERATIONS \
|
||||
MAX_ITERATIONS="$MAX_ITERATIONS" \
|
||||
FORCE_APPLY_PROMPTS=true \
|
||||
DEFAULT_AGENT=$agent \
|
||||
DEFAULT_AGENT="$agent" \
|
||||
TEST_RUNTIME="$TEST_RUNTIME" \
|
||||
LLM=$LLM \
|
||||
DEBUG=$DEBUG \
|
||||
LOG_TO_FILE=$LOG_TO_FILE \
|
||||
FORCE_REGENERATE=$FORCE_REGENERATE \
|
||||
LLM="$LLM" \
|
||||
DEBUG="$DEBUG" \
|
||||
LOG_TO_FILE="$LOG_TO_FILE" \
|
||||
FORCE_REGENERATE="$FORCE_REGENERATE" \
|
||||
SANDBOX_BASE_CONTAINER_IMAGE="$SANDBOX_BASE_CONTAINER_IMAGE" \
|
||||
poetry run pytest -s "$SCRIPT_DIR/test_agent.py::$test_name"
|
||||
set +x
|
||||
@ -216,12 +222,12 @@ regenerate_without_llm() {
|
||||
regenerate_with_llm() {
|
||||
cd "$PROJECT_ROOT"
|
||||
|
||||
rm -rf $WORKSPACE_BASE/*
|
||||
rm -rf "$WORKSPACE_BASE/*"
|
||||
if [ -d "$SCRIPT_DIR/workspace/$test_name" ]; then
|
||||
cp -r "$SCRIPT_DIR/workspace/$test_name"/* $WORKSPACE_BASE
|
||||
cp -r "$SCRIPT_DIR/workspace/$test_name"/* "$WORKSPACE_BASE"
|
||||
fi
|
||||
|
||||
rm -rf logs
|
||||
rm -rf "$LOG_DIR"
|
||||
rm -rf "$SCRIPT_DIR/mock/${TEST_RUNTIME}_runtime/$agent/$test_name/*"
|
||||
# set -x to print the command being executed
|
||||
set -x
|
||||
@ -233,12 +239,12 @@ regenerate_with_llm() {
|
||||
DEFAULT_AGENT=$agent \
|
||||
RUNTIME="$TEST_RUNTIME" \
|
||||
SANDBOX_BASE_CONTAINER_IMAGE="$SANDBOX_BASE_CONTAINER_IMAGE" \
|
||||
LLM=$LLM \
|
||||
DEBUG=$DEBUG \
|
||||
LOG_TO_FILE=$LOG_TO_FILE \
|
||||
FORCE_REGENERATE=$FORCE_REGENERATE \
|
||||
LLM="$LLM" \
|
||||
DEBUG="$DEBUG" \
|
||||
LOG_TO_FILE="$LOG_TO_FILE" \
|
||||
FORCE_REGENERATE="$FORCE_REGENERATE" \
|
||||
poetry run python "$PROJECT_ROOT/openhands/core/main.py" \
|
||||
-i $MAX_ITERATIONS \
|
||||
-i "$MAX_ITERATIONS" \
|
||||
-t "$task Do not ask me for confirmation at any point." \
|
||||
-c $agent
|
||||
set +x
|
||||
@ -256,8 +262,8 @@ if [ "$num_of_tests" -ne "${#test_names[@]}" ]; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
rm -rf logs
|
||||
rm -rf $WORKSPACE_BASE/*
|
||||
rm -rf "$LOG_DIR"
|
||||
rm -rf "$WORKSPACE_BASE/*"
|
||||
for ((i = 0; i < num_of_tests; i++)); do
|
||||
task=${tasks[i]}
|
||||
test_name=${test_names[i]}
|
||||
@ -286,9 +292,9 @@ for ((i = 0; i < num_of_tests; i++)); do
|
||||
cd "$PROJECT_ROOT/tests"
|
||||
cd "$PROJECT_ROOT"
|
||||
|
||||
rm -rf $WORKSPACE_BASE/*
|
||||
rm -rf "$WORKSPACE_BASE/*"
|
||||
if [ -d "$SCRIPT_DIR/workspace/$test_name" ]; then
|
||||
cp -r "$SCRIPT_DIR/workspace/$test_name"/* $WORKSPACE_BASE
|
||||
cp -r "$SCRIPT_DIR/workspace/$test_name"/* "$WORKSPACE_BASE"
|
||||
fi
|
||||
|
||||
if [ "$TEST_ONLY" ]; then
|
||||
@ -395,7 +401,7 @@ for ((i = 0; i < num_of_tests; i++)); do
|
||||
fi
|
||||
done
|
||||
|
||||
rm -rf logs
|
||||
rm -rf $WORKSPACE_BASE
|
||||
rm -rf "$LOG_DIR"
|
||||
rm -rf "$WORKSPACE_BASE"
|
||||
echo "Done!"
|
||||
cd "$PROJECT_ROOT"
|
||||
|
||||
@ -6,7 +6,7 @@ import subprocess
|
||||
import pytest
|
||||
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import AppConfig, SandboxConfig, load_from_env
|
||||
from openhands.core.config import load_app_config
|
||||
from openhands.core.main import run_controller
|
||||
from openhands.core.schema import AgentState
|
||||
from openhands.events.action import (
|
||||
@ -21,36 +21,23 @@ TEST_RUNTIME = os.getenv('TEST_RUNTIME')
|
||||
assert TEST_RUNTIME in ['eventstream', 'remote']
|
||||
_ = get_runtime_cls(TEST_RUNTIME) # make sure it does not raise an error
|
||||
|
||||
CONFIG = AppConfig(
|
||||
max_iterations=int(os.getenv('MAX_ITERATIONS', 20)),
|
||||
max_budget_per_task=int(os.getenv('MAX_BUDGET_PER_TASK', 15)),
|
||||
runtime=TEST_RUNTIME,
|
||||
default_agent=os.getenv('DEFAULT_AGENT'),
|
||||
workspace_base=os.getenv('WORKSPACE_BASE'),
|
||||
workspace_mount_path=os.getenv('WORKSPACE_MOUNT_PATH'),
|
||||
sandbox=SandboxConfig(
|
||||
use_host_network=True,
|
||||
),
|
||||
CONFIG = load_app_config()
|
||||
CONFIG.max_iterations = int(os.getenv('MAX_ITERATIONS', 20))
|
||||
CONFIG.max_budget_per_task = int(os.getenv('MAX_BUDGET_PER_TASK', 15))
|
||||
CONFIG.runtime = TEST_RUNTIME
|
||||
CONFIG.default_agent = os.getenv('DEFAULT_AGENT')
|
||||
CONFIG.workspace_base = os.getenv('WORKSPACE_BASE')
|
||||
CONFIG.workspace_mount_path = os.getenv('WORKSPACE_MOUNT_PATH')
|
||||
CONFIG.workspace_mount_path_in_sandbox = os.getenv(
|
||||
'WORKSPACE_MOUNT_PATH_IN_SANDBOX', '/workspace'
|
||||
)
|
||||
load_from_env(CONFIG, os.environ)
|
||||
CONFIG.sandbox.use_host_network = True
|
||||
|
||||
print('\nPaths used:')
|
||||
print(f'workspace_base: {CONFIG.workspace_base}')
|
||||
print(f'workspace_mount_path: {CONFIG.workspace_mount_path}')
|
||||
print(f'workspace_mount_path_in_sandbox: {CONFIG.workspace_mount_path_in_sandbox}')
|
||||
|
||||
# Check if running in WSL environment
|
||||
if 'WSL_DISTRO_NAME' in os.environ:
|
||||
if (
|
||||
CONFIG.workspace_base
|
||||
and CONFIG.workspace_mount_path
|
||||
and CONFIG.workspace_base != CONFIG.workspace_mount_path
|
||||
):
|
||||
print(
|
||||
'\n**********\nWARNING: if WORKSPACE_MOUNT_PATH is set differently to'
|
||||
'\nWORKSPACE_BASE some file operation tests may fail!\n**********\n'
|
||||
)
|
||||
|
||||
|
||||
def get_number_of_prompts(test_name: str):
|
||||
mock_dir = os.path.join(
|
||||
|
||||
@ -1,11 +1,15 @@
|
||||
import os
|
||||
import random
|
||||
import shutil
|
||||
import stat
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
from pytest import TempPathFactory
|
||||
|
||||
from openhands.core.config import AppConfig, SandboxConfig, load_from_env
|
||||
from openhands.core.config import load_app_config
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.events import EventStream
|
||||
from openhands.runtime.client.runtime import EventStreamRuntime
|
||||
from openhands.runtime.plugins import AgentSkillsRequirement, JupyterRequirement
|
||||
@ -13,19 +17,86 @@ from openhands.runtime.remote.runtime import RemoteRuntime
|
||||
from openhands.runtime.runtime import Runtime
|
||||
from openhands.storage import get_file_store
|
||||
|
||||
TEST_IN_CI = os.getenv('TEST_IN_CI', 'False').lower() in ['true', '1', 'yes']
|
||||
TEST_RUNTIME = os.getenv('TEST_RUNTIME', 'eventstream').lower()
|
||||
RUN_AS_OPENHANDS = os.getenv('RUN_AS_OPENHANDS', 'True').lower() in ['true', '1', 'yes']
|
||||
test_mount_path = ''
|
||||
project_dir = os.path.dirname(
|
||||
os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
)
|
||||
sandbox_test_folder = '/openhands/workspace'
|
||||
|
||||
|
||||
def _get_runtime_sid(runtime: Runtime):
|
||||
logger.debug(f'\nruntime.sid: {runtime.sid}')
|
||||
return runtime.sid
|
||||
|
||||
|
||||
def _get_host_folder(runtime: Runtime):
|
||||
return runtime.config.workspace_mount_path
|
||||
|
||||
|
||||
def _get_sandbox_folder(runtime: Runtime):
|
||||
sid = _get_runtime_sid(runtime)
|
||||
if sid:
|
||||
return Path(os.path.join(sandbox_test_folder, sid))
|
||||
return None
|
||||
|
||||
|
||||
def _remove_folder(folder: str) -> bool:
|
||||
success = False
|
||||
if folder and os.path.isdir(folder):
|
||||
try:
|
||||
os.rmdir(folder)
|
||||
success = True
|
||||
except OSError:
|
||||
try:
|
||||
shutil.rmtree(folder)
|
||||
success = True
|
||||
except OSError:
|
||||
pass
|
||||
logger.debug(f'\nCleanup: `{folder}`: ' + ('[OK]' if success else '[FAILED]'))
|
||||
return success
|
||||
|
||||
|
||||
def _close_test_runtime(runtime: Runtime):
|
||||
if isinstance(runtime, EventStreamRuntime):
|
||||
runtime.close(rm_all_containers=False)
|
||||
else:
|
||||
runtime.close()
|
||||
time.sleep(1)
|
||||
|
||||
|
||||
def _reset_pwd():
|
||||
global project_dir
|
||||
# Try to change back to project directory
|
||||
try:
|
||||
os.chdir(project_dir)
|
||||
logger.info(f'Changed back to project directory `{project_dir}')
|
||||
except Exception as e:
|
||||
logger.error(f'Failed to change back to project directory: {e}')
|
||||
|
||||
|
||||
# *****************************************************************************
|
||||
# *****************************************************************************
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def print_method_name(request):
|
||||
print('\n########################################################################')
|
||||
print(
|
||||
'\n\n########################################################################'
|
||||
)
|
||||
print(f'Running test: {request.node.name}')
|
||||
print('########################################################################')
|
||||
yield
|
||||
print(
|
||||
'########################################################################\n\n'
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def temp_dir(tmp_path_factory: TempPathFactory) -> str:
|
||||
"""
|
||||
Creates a unique temporary directory
|
||||
def temp_dir(tmp_path_factory: TempPathFactory, request) -> str:
|
||||
"""Creates a unique temporary directory.
|
||||
Upon finalization, the temporary directory and its content is removed.
|
||||
The cleanup function is also called upon KeyboardInterrupt.
|
||||
|
||||
Parameters:
|
||||
- tmp_path_factory (TempPathFactory): A TempPathFactory class
|
||||
@ -33,15 +104,23 @@ def temp_dir(tmp_path_factory: TempPathFactory) -> str:
|
||||
Returns:
|
||||
- str: The temporary directory path that was created
|
||||
"""
|
||||
|
||||
unique_suffix = random.randint(10000, 99999)
|
||||
temp_directory = tmp_path_factory.mktemp(
|
||||
f'test_runtime_{unique_suffix}', numbered=False
|
||||
temp_dir = tmp_path_factory.mktemp(
|
||||
'rt_' + str(random.randint(100000, 999999)), numbered=False
|
||||
)
|
||||
return str(temp_directory)
|
||||
|
||||
logger.info(f'\n*** {request.node.name}\n>> temp folder: {temp_dir}\n')
|
||||
|
||||
TEST_RUNTIME = os.getenv('TEST_RUNTIME', 'eventstream')
|
||||
# Set permissions to ensure the directory is writable and deletable
|
||||
os.chmod(temp_dir, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO) # 0777 permissions
|
||||
|
||||
def cleanup():
|
||||
global project_dir
|
||||
os.chdir(project_dir)
|
||||
_remove_folder(temp_dir)
|
||||
|
||||
request.addfinalizer(cleanup)
|
||||
|
||||
return str(temp_dir)
|
||||
|
||||
|
||||
# Depending on TEST_RUNTIME, feed the appropriate box class(es) to the test.
|
||||
@ -55,28 +134,47 @@ def get_box_classes():
|
||||
raise ValueError(f'Invalid runtime: {runtime}')
|
||||
|
||||
|
||||
def get_run_as_openhands():
|
||||
print(
|
||||
'\n\n########################################################################'
|
||||
)
|
||||
print('USER: ' + 'openhands' if RUN_AS_OPENHANDS else 'root')
|
||||
print(
|
||||
'########################################################################\n\n'
|
||||
)
|
||||
return [RUN_AS_OPENHANDS]
|
||||
|
||||
|
||||
@pytest.fixture(scope='module') # for xdist
|
||||
def runtime_setup_module():
|
||||
_reset_pwd()
|
||||
yield
|
||||
_reset_pwd()
|
||||
|
||||
|
||||
@pytest.fixture(scope='session') # not for xdist
|
||||
def runtime_setup_session():
|
||||
_reset_pwd()
|
||||
yield
|
||||
_reset_pwd()
|
||||
|
||||
|
||||
# This assures that all tests run together per runtime, not alternating between them,
|
||||
# which cause errors (especially outside GitHub actions).
|
||||
@pytest.fixture(scope='module', params=get_box_classes())
|
||||
def box_class(request):
|
||||
time.sleep(2)
|
||||
time.sleep(1)
|
||||
return request.param
|
||||
|
||||
|
||||
# TODO: We will change this to `run_as_user` when `ServerRuntime` is deprecated.
|
||||
# since `EventStreamRuntime` supports running as an arbitrary user.
|
||||
@pytest.fixture(scope='module', params=[True, False])
|
||||
@pytest.fixture(scope='module', params=get_run_as_openhands())
|
||||
def run_as_openhands(request):
|
||||
time.sleep(1)
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(scope='module', params=[True, False])
|
||||
def enable_auto_lint(request):
|
||||
time.sleep(1)
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(scope='module', params=None)
|
||||
def base_container_image(request):
|
||||
time.sleep(1)
|
||||
@ -96,21 +194,12 @@ def base_container_image(request):
|
||||
if request.param is None:
|
||||
request.param = pytest.param(
|
||||
'nikolaik/python-nodejs:python3.11-nodejs22',
|
||||
'python:3.11-bookworm',
|
||||
'node:22-bookworm',
|
||||
'golang:1.23-bookworm',
|
||||
)
|
||||
print(f'Container image: {request.param}')
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def runtime(temp_dir, box_class, run_as_openhands):
|
||||
runtime = _load_runtime(temp_dir, box_class, run_as_openhands)
|
||||
yield runtime
|
||||
time.sleep(1)
|
||||
|
||||
|
||||
def _load_runtime(
|
||||
temp_dir,
|
||||
box_class,
|
||||
@ -118,29 +207,45 @@ def _load_runtime(
|
||||
enable_auto_lint: bool = False,
|
||||
base_container_image: str | None = None,
|
||||
browsergym_eval_env: str | None = None,
|
||||
use_workspace: bool | None = None,
|
||||
) -> Runtime:
|
||||
sid = 'test'
|
||||
cli_session = 'main_test'
|
||||
sid = 'rt_' + str(random.randint(100000, 999999))
|
||||
|
||||
# AgentSkills need to be initialized **before** Jupyter
|
||||
# otherwise Jupyter will not access the proper dependencies installed by AgentSkills
|
||||
plugins = [AgentSkillsRequirement(), JupyterRequirement()]
|
||||
config = AppConfig(
|
||||
workspace_base=temp_dir,
|
||||
workspace_mount_path=temp_dir,
|
||||
sandbox=SandboxConfig(
|
||||
use_host_network=True,
|
||||
browsergym_eval_env=browsergym_eval_env,
|
||||
),
|
||||
)
|
||||
load_from_env(config, os.environ)
|
||||
|
||||
config = load_app_config()
|
||||
config.run_as_openhands = run_as_openhands
|
||||
|
||||
# Folder where all tests create their own folder
|
||||
global test_mount_path
|
||||
if use_workspace:
|
||||
test_mount_path = os.path.join(config.workspace_base, 'rt')
|
||||
else:
|
||||
test_mount_path = os.path.join(
|
||||
temp_dir, sid
|
||||
) # need a subfolder to avoid conflicts
|
||||
config.workspace_mount_path = test_mount_path
|
||||
|
||||
# Mounting folder specific for this test inside the sandbox
|
||||
config.workspace_mount_path_in_sandbox = f'{sandbox_test_folder}/{sid}'
|
||||
print('\nPaths used:')
|
||||
print(f'use_host_network: {config.sandbox.use_host_network}')
|
||||
print(f'workspace_base: {config.workspace_base}')
|
||||
print(f'workspace_mount_path: {config.workspace_mount_path}')
|
||||
print(
|
||||
f'workspace_mount_path_in_sandbox: {config.workspace_mount_path_in_sandbox}\n'
|
||||
)
|
||||
|
||||
config.sandbox.browsergym_eval_env = browsergym_eval_env
|
||||
config.sandbox.enable_auto_lint = enable_auto_lint
|
||||
|
||||
if base_container_image is not None:
|
||||
config.sandbox.base_container_image = base_container_image
|
||||
|
||||
file_store = get_file_store(config.file_store, config.file_store_path)
|
||||
event_stream = EventStream(cli_session, file_store)
|
||||
event_stream = EventStream(sid, file_store)
|
||||
|
||||
runtime = box_class(
|
||||
config=config,
|
||||
@ -148,9 +253,14 @@ def _load_runtime(
|
||||
sid=sid,
|
||||
plugins=plugins,
|
||||
)
|
||||
time.sleep(1)
|
||||
time.sleep(2)
|
||||
return runtime
|
||||
|
||||
|
||||
# Export necessary function
|
||||
__all__ = ['_load_runtime']
|
||||
__all__ = [
|
||||
'_load_runtime',
|
||||
'_get_host_folder',
|
||||
'_get_sandbox_folder',
|
||||
'_remove_folder',
|
||||
]
|
||||
|
||||
@ -1,11 +1,14 @@
|
||||
"""Bash-related tests for the EventStreamRuntime, which connects to the RuntimeClient running in the sandbox."""
|
||||
|
||||
import os
|
||||
import tempfile
|
||||
import time
|
||||
|
||||
import pytest
|
||||
from conftest import _load_runtime
|
||||
from conftest import (
|
||||
TEST_IN_CI,
|
||||
_close_test_runtime,
|
||||
_get_sandbox_folder,
|
||||
_load_runtime,
|
||||
)
|
||||
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.events.action import CmdRunAction
|
||||
@ -16,77 +19,63 @@ from openhands.events.observation import CmdOutputObservation
|
||||
# ============================================================================================================================
|
||||
|
||||
|
||||
def _run_cmd_action(runtime, custom_command: str, keep_prompt=True):
|
||||
action = CmdRunAction(command=custom_command, keep_prompt=keep_prompt)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
assert isinstance(obs, CmdOutputObservation)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
return obs
|
||||
|
||||
|
||||
def test_bash_command_pexcept(temp_dir, box_class, run_as_openhands):
|
||||
runtime = _load_runtime(temp_dir, box_class, run_as_openhands)
|
||||
try:
|
||||
# We set env var PS1="\u@\h:\w $"
|
||||
# and construct the PEXCEPT prompt base on it.
|
||||
# When run `env`, bad implementation of CmdRunAction will be pexcepted by this
|
||||
# and failed to pexcept the right content, causing it fail to get error code.
|
||||
obs = runtime.run_action(CmdRunAction(command='env'))
|
||||
|
||||
# We set env var PS1="\u@\h:\w $"
|
||||
# and construct the PEXCEPT prompt base on it.
|
||||
# When run `env`, bad implementation of CmdRunAction will be pexcepted by this
|
||||
# and failed to pexcept the right content, causing it fail to get error code.
|
||||
obs = runtime.run_action(CmdRunAction(command='env'))
|
||||
# For example:
|
||||
# 02:16:13 - openhands:DEBUG: client.py:78 - Executing command: env
|
||||
# 02:16:13 - openhands:DEBUG: client.py:82 - Command output: PYTHONUNBUFFERED=1
|
||||
# CONDA_EXE=/openhands/miniforge3/bin/conda
|
||||
# [...]
|
||||
# LC_CTYPE=C.UTF-8
|
||||
# PS1=\u@\h:\w $
|
||||
# 02:16:13 - openhands:DEBUG: client.py:89 - Executing command for exit code: env
|
||||
# 02:16:13 - openhands:DEBUG: client.py:92 - Exit code Output:
|
||||
# CONDA_DEFAULT_ENV=base
|
||||
|
||||
# For example:
|
||||
# 02:16:13 - openhands:DEBUG: client.py:78 - Executing command: env
|
||||
# 02:16:13 - openhands:DEBUG: client.py:82 - Command output: PYTHONUNBUFFERED=1
|
||||
# CONDA_EXE=/openhands/miniforge3/bin/conda
|
||||
# [...]
|
||||
# LC_CTYPE=C.UTF-8
|
||||
# PS1=\u@\h:\w $
|
||||
# 02:16:13 - openhands:DEBUG: client.py:89 - Executing command for exit code: env
|
||||
# 02:16:13 - openhands:DEBUG: client.py:92 - Exit code Output:
|
||||
# CONDA_DEFAULT_ENV=base
|
||||
|
||||
# As long as the exit code is 0, the test will pass.
|
||||
assert isinstance(
|
||||
obs, CmdOutputObservation
|
||||
), 'The observation should be a CmdOutputObservation.'
|
||||
assert obs.exit_code == 0, 'The exit code should be 0.'
|
||||
|
||||
runtime.close(rm_all_containers=False)
|
||||
time.sleep(1)
|
||||
# As long as the exit code is 0, the test will pass.
|
||||
assert isinstance(
|
||||
obs, CmdOutputObservation
|
||||
), 'The observation should be a CmdOutputObservation.'
|
||||
assert obs.exit_code == 0, 'The exit code should be 0.'
|
||||
finally:
|
||||
_close_test_runtime(runtime)
|
||||
|
||||
|
||||
def test_single_multiline_command(temp_dir, box_class):
|
||||
def test_multiline_commands(temp_dir, box_class):
|
||||
runtime = _load_runtime(temp_dir, box_class)
|
||||
try:
|
||||
# single multiline command
|
||||
obs = _run_cmd_action(runtime, 'echo \\\n -e "foo"')
|
||||
assert obs.exit_code == 0, 'The exit code should be 0.'
|
||||
assert 'foo' in obs.content
|
||||
|
||||
action = CmdRunAction(command='echo \\\n -e "foo"')
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert obs.exit_code == 0, 'The exit code should be 0.'
|
||||
assert 'foo' in obs.content
|
||||
# test multiline echo
|
||||
obs = _run_cmd_action(runtime, 'echo -e "hello\nworld"')
|
||||
assert obs.exit_code == 0, 'The exit code should be 0.'
|
||||
assert 'hello\r\nworld' in obs.content
|
||||
|
||||
runtime.close(rm_all_containers=False)
|
||||
time.sleep(1)
|
||||
|
||||
|
||||
def test_multiline_echo(temp_dir, box_class):
|
||||
runtime = _load_runtime(temp_dir, box_class)
|
||||
|
||||
action = CmdRunAction(command='echo -e "hello\nworld"')
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert obs.exit_code == 0, 'The exit code should be 0.'
|
||||
assert 'hello\r\nworld' in obs.content
|
||||
|
||||
runtime.close(rm_all_containers=False)
|
||||
time.sleep(1)
|
||||
|
||||
|
||||
def test_runtime_whitespace(temp_dir, box_class):
|
||||
runtime = _load_runtime(temp_dir, box_class)
|
||||
|
||||
action = CmdRunAction(command='echo -e "\\n\\n\\n"')
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
|
||||
assert obs.exit_code == 0, 'The exit code should be 0.'
|
||||
assert '\r\n\r\n\r\n' in obs.content
|
||||
|
||||
runtime.close(rm_all_containers=False)
|
||||
time.sleep(1)
|
||||
# test whitespace
|
||||
obs = _run_cmd_action(runtime, 'echo -e "\\n\\n\\n"')
|
||||
assert obs.exit_code == 0, 'The exit code should be 0.'
|
||||
assert '\r\n\r\n\r\n' in obs.content
|
||||
finally:
|
||||
_close_test_runtime(runtime)
|
||||
|
||||
|
||||
def test_multiple_multiline_commands(temp_dir, box_class, run_as_openhands):
|
||||
@ -120,48 +109,36 @@ world "
|
||||
joined_cmds = '\n'.join(cmds)
|
||||
|
||||
runtime = _load_runtime(temp_dir, box_class, run_as_openhands)
|
||||
try:
|
||||
obs = _run_cmd_action(runtime, joined_cmds)
|
||||
assert obs.exit_code == 0, 'The exit code should be 0.'
|
||||
|
||||
action = CmdRunAction(command=joined_cmds)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
|
||||
assert isinstance(obs, CmdOutputObservation)
|
||||
assert obs.exit_code == 0, 'The exit code should be 0.'
|
||||
|
||||
assert 'total 0' in obs.content
|
||||
assert 'hello\r\nworld' in obs.content
|
||||
assert "hello it\\'s me" in obs.content
|
||||
assert 'hello -v' in obs.content
|
||||
assert 'hello\r\nworld\r\nare\r\nyou\r\nthere?' in obs.content
|
||||
assert 'hello\r\nworld\r\nare\r\nyou\r\n\r\nthere?' in obs.content
|
||||
assert 'hello\r\nworld "\r\n' in obs.content
|
||||
|
||||
runtime.close(rm_all_containers=False)
|
||||
time.sleep(1)
|
||||
assert 'total 0' in obs.content
|
||||
assert 'hello\r\nworld' in obs.content
|
||||
assert "hello it\\'s me" in obs.content
|
||||
assert 'hello -v' in obs.content
|
||||
assert 'hello\r\nworld\r\nare\r\nyou\r\nthere?' in obs.content
|
||||
assert 'hello\r\nworld\r\nare\r\nyou\r\n\r\nthere?' in obs.content
|
||||
assert 'hello\r\nworld "\r\n' in obs.content
|
||||
finally:
|
||||
_close_test_runtime(runtime)
|
||||
|
||||
|
||||
def test_no_ps2_in_output(temp_dir, box_class, run_as_openhands):
|
||||
"""Test that the PS2 sign is not added to the output of a multiline command."""
|
||||
runtime = _load_runtime(temp_dir, box_class, run_as_openhands)
|
||||
try:
|
||||
obs = _run_cmd_action(runtime, 'echo -e "hello\nworld"')
|
||||
assert obs.exit_code == 0, 'The exit code should be 0.'
|
||||
|
||||
action = CmdRunAction(command='echo -e "hello\nworld"')
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
|
||||
assert 'hello\r\nworld' in obs.content
|
||||
assert '>' not in obs.content
|
||||
|
||||
runtime.close(rm_all_containers=False)
|
||||
time.sleep(1)
|
||||
assert 'hello\r\nworld' in obs.content
|
||||
assert '>' not in obs.content
|
||||
finally:
|
||||
_close_test_runtime(runtime)
|
||||
|
||||
|
||||
def test_multiline_command_loop(temp_dir, box_class):
|
||||
# https://github.com/All-Hands-AI/OpenHands/issues/3143
|
||||
|
||||
runtime = _load_runtime(temp_dir, box_class)
|
||||
|
||||
init_cmd = """
|
||||
mkdir -p _modules && \
|
||||
for month in {01..04}; do
|
||||
@ -171,15 +148,6 @@ for month in {01..04}; do
|
||||
done
|
||||
echo "created files"
|
||||
"""
|
||||
action = CmdRunAction(command=init_cmd)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
|
||||
assert isinstance(obs, CmdOutputObservation)
|
||||
assert obs.exit_code == 0, 'The exit code should be 0.'
|
||||
assert 'created files' in obs.content
|
||||
|
||||
follow_up_cmd = """
|
||||
for file in _modules/*.md; do
|
||||
new_date=$(echo $file | sed -E 's/2024-(01|02|03|04)-/2024-/;s/2024-01/2024-08/;s/2024-02/2024-09/;s/2024-03/2024-10/;s/2024-04/2024-11/')
|
||||
@ -187,153 +155,104 @@ for file in _modules/*.md; do
|
||||
done
|
||||
echo "success"
|
||||
"""
|
||||
action = CmdRunAction(command=follow_up_cmd)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
runtime = _load_runtime(temp_dir, box_class)
|
||||
try:
|
||||
obs = _run_cmd_action(runtime, init_cmd)
|
||||
assert obs.exit_code == 0, 'The exit code should be 0.'
|
||||
assert 'created files' in obs.content
|
||||
|
||||
assert isinstance(obs, CmdOutputObservation)
|
||||
assert obs.exit_code == 0, 'The exit code should be 0.'
|
||||
assert 'success' in obs.content
|
||||
|
||||
runtime.close(rm_all_containers=False)
|
||||
time.sleep(1)
|
||||
obs = _run_cmd_action(runtime, follow_up_cmd)
|
||||
assert obs.exit_code == 0, 'The exit code should be 0.'
|
||||
assert 'success' in obs.content
|
||||
finally:
|
||||
_close_test_runtime(runtime)
|
||||
|
||||
|
||||
def test_cmd_run(temp_dir, box_class, run_as_openhands):
|
||||
runtime = _load_runtime(temp_dir, box_class, run_as_openhands)
|
||||
try:
|
||||
obs = _run_cmd_action(runtime, 'ls -l /openhands/workspace')
|
||||
assert obs.exit_code == 0
|
||||
|
||||
action = CmdRunAction(command='ls -l')
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert isinstance(obs, CmdOutputObservation)
|
||||
assert obs.exit_code == 0
|
||||
assert 'total 0' in obs.content
|
||||
obs = _run_cmd_action(runtime, 'ls -l')
|
||||
assert obs.exit_code == 0
|
||||
assert 'total 0' in obs.content
|
||||
|
||||
action = CmdRunAction(command='mkdir test')
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert isinstance(obs, CmdOutputObservation)
|
||||
assert obs.exit_code == 0
|
||||
obs = _run_cmd_action(runtime, 'mkdir test')
|
||||
assert obs.exit_code == 0
|
||||
|
||||
action = CmdRunAction(command='ls -l')
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert isinstance(obs, CmdOutputObservation)
|
||||
assert obs.exit_code == 0
|
||||
if run_as_openhands:
|
||||
assert 'openhands' in obs.content
|
||||
else:
|
||||
assert 'root' in obs.content
|
||||
assert 'test' in obs.content
|
||||
obs = _run_cmd_action(runtime, 'ls -l')
|
||||
assert obs.exit_code == 0
|
||||
if run_as_openhands:
|
||||
assert 'openhands' in obs.content
|
||||
else:
|
||||
assert 'root' in obs.content
|
||||
assert 'test' in obs.content
|
||||
|
||||
action = CmdRunAction(command='touch test/foo.txt')
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert isinstance(obs, CmdOutputObservation)
|
||||
assert obs.exit_code == 0
|
||||
obs = _run_cmd_action(runtime, 'touch test/foo.txt')
|
||||
assert obs.exit_code == 0
|
||||
|
||||
action = CmdRunAction(command='ls -l test')
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert isinstance(obs, CmdOutputObservation)
|
||||
assert obs.exit_code == 0
|
||||
assert 'foo.txt' in obs.content
|
||||
obs = _run_cmd_action(runtime, 'ls -l test')
|
||||
assert obs.exit_code == 0
|
||||
assert 'foo.txt' in obs.content
|
||||
|
||||
# clean up: this is needed, since CI will not be
|
||||
# run as root, and this test may leave a file
|
||||
# owned by root
|
||||
action = CmdRunAction(command='rm -rf test')
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert isinstance(obs, CmdOutputObservation)
|
||||
assert obs.exit_code == 0
|
||||
|
||||
runtime.close(rm_all_containers=False)
|
||||
time.sleep(1)
|
||||
# clean up: this is needed, since CI will not be
|
||||
# run as root, and this test may leave a file
|
||||
# owned by root
|
||||
_run_cmd_action(runtime, 'rm -rf test')
|
||||
assert obs.exit_code == 0
|
||||
finally:
|
||||
_close_test_runtime(runtime)
|
||||
|
||||
|
||||
def test_run_as_user_correct_home_dir(temp_dir, box_class, run_as_openhands):
|
||||
runtime = _load_runtime(temp_dir, box_class, run_as_openhands)
|
||||
|
||||
action = CmdRunAction(command='cd ~ && pwd')
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert isinstance(obs, CmdOutputObservation)
|
||||
assert obs.exit_code == 0
|
||||
if run_as_openhands:
|
||||
assert '/home/openhands' in obs.content
|
||||
else:
|
||||
assert '/root' in obs.content
|
||||
|
||||
runtime.close(rm_all_containers=False)
|
||||
time.sleep(1)
|
||||
try:
|
||||
obs = _run_cmd_action(runtime, 'cd ~ && pwd')
|
||||
assert obs.exit_code == 0
|
||||
if run_as_openhands:
|
||||
assert '/home/openhands' in obs.content
|
||||
else:
|
||||
assert '/root' in obs.content
|
||||
finally:
|
||||
_close_test_runtime(runtime)
|
||||
|
||||
|
||||
def test_multi_cmd_run_in_single_line(temp_dir, box_class):
|
||||
runtime = _load_runtime(temp_dir, box_class)
|
||||
|
||||
action = CmdRunAction(command='pwd && ls -l')
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert isinstance(obs, CmdOutputObservation)
|
||||
assert obs.exit_code == 0
|
||||
assert '/workspace' in obs.content
|
||||
assert 'total 0' in obs.content
|
||||
|
||||
runtime.close(rm_all_containers=False)
|
||||
time.sleep(1)
|
||||
try:
|
||||
obs = _run_cmd_action(runtime, 'pwd && ls -l')
|
||||
assert obs.exit_code == 0
|
||||
assert '/workspace' in obs.content
|
||||
assert 'total 0' in obs.content
|
||||
finally:
|
||||
_close_test_runtime(runtime)
|
||||
|
||||
|
||||
def test_stateful_cmd(temp_dir, box_class):
|
||||
runtime = _load_runtime(temp_dir, box_class)
|
||||
sandbox_dir = _get_sandbox_folder(runtime)
|
||||
try:
|
||||
obs = _run_cmd_action(runtime, 'mkdir -p test')
|
||||
assert obs.exit_code == 0, 'The exit code should be 0.'
|
||||
|
||||
action = CmdRunAction(command='mkdir test')
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert isinstance(obs, CmdOutputObservation)
|
||||
assert obs.exit_code == 0, 'The exit code should be 0.'
|
||||
obs = _run_cmd_action(runtime, 'cd test')
|
||||
assert obs.exit_code == 0, 'The exit code should be 0.'
|
||||
|
||||
action = CmdRunAction(command='cd test')
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert isinstance(obs, CmdOutputObservation)
|
||||
assert obs.exit_code == 0, 'The exit code should be 0.'
|
||||
|
||||
action = CmdRunAction(command='pwd')
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert isinstance(obs, CmdOutputObservation)
|
||||
assert obs.exit_code == 0, 'The exit code should be 0.'
|
||||
assert '/workspace/test' in obs.content
|
||||
|
||||
runtime.close(rm_all_containers=False)
|
||||
time.sleep(1)
|
||||
obs = _run_cmd_action(runtime, 'pwd')
|
||||
assert obs.exit_code == 0, 'The exit code should be 0.'
|
||||
assert f'{sandbox_dir}/test' in obs.content
|
||||
finally:
|
||||
_close_test_runtime(runtime)
|
||||
|
||||
|
||||
def test_failed_cmd(temp_dir, box_class):
|
||||
runtime = _load_runtime(temp_dir, box_class)
|
||||
|
||||
action = CmdRunAction(command='non_existing_command')
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert isinstance(obs, CmdOutputObservation)
|
||||
assert obs.exit_code != 0, 'The exit code should not be 0 for a failed command.'
|
||||
|
||||
runtime.close(rm_all_containers=False)
|
||||
time.sleep(1)
|
||||
try:
|
||||
obs = _run_cmd_action(runtime, 'non_existing_command')
|
||||
assert obs.exit_code != 0, 'The exit code should not be 0 for a failed command.'
|
||||
finally:
|
||||
_close_test_runtime(runtime)
|
||||
|
||||
|
||||
def _create_test_file(host_temp_dir):
|
||||
@ -344,154 +263,121 @@ def _create_test_file(host_temp_dir):
|
||||
|
||||
def test_copy_single_file(temp_dir, box_class):
|
||||
runtime = _load_runtime(temp_dir, box_class)
|
||||
try:
|
||||
sandbox_dir = _get_sandbox_folder(runtime)
|
||||
sandbox_file = os.path.join(sandbox_dir, 'test_file.txt')
|
||||
_create_test_file(temp_dir)
|
||||
runtime.copy_to(os.path.join(temp_dir, 'test_file.txt'), sandbox_dir)
|
||||
|
||||
with tempfile.TemporaryDirectory() as host_temp_dir:
|
||||
_create_test_file(host_temp_dir)
|
||||
runtime.copy_to(os.path.join(host_temp_dir, 'test_file.txt'), '/workspace')
|
||||
obs = _run_cmd_action(runtime, f'ls -alh {sandbox_dir}')
|
||||
assert obs.exit_code == 0
|
||||
assert 'test_file.txt' in obs.content
|
||||
|
||||
action = CmdRunAction(command='ls -alh /workspace')
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert isinstance(obs, CmdOutputObservation)
|
||||
assert obs.exit_code == 0
|
||||
assert 'test_file.txt' in obs.content
|
||||
|
||||
action = CmdRunAction(command='cat /workspace/test_file.txt')
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert isinstance(obs, CmdOutputObservation)
|
||||
assert obs.exit_code == 0
|
||||
assert 'Hello, World!' in obs.content
|
||||
|
||||
runtime.close(rm_all_containers=False)
|
||||
time.sleep(1)
|
||||
obs = _run_cmd_action(runtime, f'cat {sandbox_file}')
|
||||
assert obs.exit_code == 0
|
||||
assert 'Hello, World!' in obs.content
|
||||
finally:
|
||||
_close_test_runtime(runtime)
|
||||
|
||||
|
||||
def _create_test_dir_with_files(host_temp_dir):
|
||||
os.mkdir(os.path.join(host_temp_dir, 'test_dir'))
|
||||
with open(os.path.join(host_temp_dir, 'test_dir', 'file1.txt'), 'w') as f:
|
||||
def _create_host_test_dir_with_files(test_dir):
|
||||
logger.debug(f'creating `{test_dir}`')
|
||||
if not os.path.isdir(test_dir):
|
||||
os.makedirs(test_dir, exist_ok=True)
|
||||
logger.debug('creating test files in `test_dir`')
|
||||
with open(os.path.join(test_dir, 'file1.txt'), 'w') as f:
|
||||
f.write('File 1 content')
|
||||
with open(os.path.join(host_temp_dir, 'test_dir', 'file2.txt'), 'w') as f:
|
||||
with open(os.path.join(test_dir, 'file2.txt'), 'w') as f:
|
||||
f.write('File 2 content')
|
||||
|
||||
|
||||
def test_copy_directory_recursively(temp_dir, box_class):
|
||||
runtime = _load_runtime(temp_dir, box_class)
|
||||
|
||||
with tempfile.TemporaryDirectory() as host_temp_dir:
|
||||
sandbox_dir = _get_sandbox_folder(runtime)
|
||||
try:
|
||||
temp_dir_copy = os.path.join(temp_dir, 'test_dir')
|
||||
# We need a separate directory, since temp_dir is mounted to /workspace
|
||||
_create_test_dir_with_files(host_temp_dir)
|
||||
runtime.copy_to(
|
||||
os.path.join(host_temp_dir, 'test_dir'), '/workspace', recursive=True
|
||||
)
|
||||
_create_host_test_dir_with_files(temp_dir_copy)
|
||||
|
||||
action = CmdRunAction(command='ls -alh /workspace')
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert isinstance(obs, CmdOutputObservation)
|
||||
assert obs.exit_code == 0
|
||||
assert 'test_dir' in obs.content
|
||||
assert 'file1.txt' not in obs.content
|
||||
assert 'file2.txt' not in obs.content
|
||||
runtime.copy_to(temp_dir_copy, sandbox_dir, recursive=True)
|
||||
|
||||
action = CmdRunAction(command='ls -alh /workspace/test_dir')
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert isinstance(obs, CmdOutputObservation)
|
||||
assert obs.exit_code == 0
|
||||
assert 'file1.txt' in obs.content
|
||||
assert 'file2.txt' in obs.content
|
||||
obs = _run_cmd_action(runtime, f'ls -alh {sandbox_dir}')
|
||||
assert obs.exit_code == 0
|
||||
assert 'test_dir' in obs.content
|
||||
assert 'file1.txt' not in obs.content
|
||||
assert 'file2.txt' not in obs.content
|
||||
|
||||
action = CmdRunAction(command='cat /workspace/test_dir/file1.txt')
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert isinstance(obs, CmdOutputObservation)
|
||||
assert obs.exit_code == 0
|
||||
assert 'File 1 content' in obs.content
|
||||
obs = _run_cmd_action(runtime, f'ls -alh {sandbox_dir}/test_dir')
|
||||
assert obs.exit_code == 0
|
||||
assert 'file1.txt' in obs.content
|
||||
assert 'file2.txt' in obs.content
|
||||
|
||||
runtime.close(rm_all_containers=False)
|
||||
time.sleep(1)
|
||||
obs = _run_cmd_action(runtime, f'cat {sandbox_dir}/test_dir/file1.txt')
|
||||
assert obs.exit_code == 0
|
||||
assert 'File 1 content' in obs.content
|
||||
finally:
|
||||
_close_test_runtime(runtime)
|
||||
|
||||
|
||||
def test_copy_to_non_existent_directory(temp_dir, box_class):
|
||||
runtime = _load_runtime(temp_dir, box_class)
|
||||
|
||||
with tempfile.TemporaryDirectory() as host_temp_dir:
|
||||
_create_test_file(host_temp_dir)
|
||||
try:
|
||||
sandbox_dir = _get_sandbox_folder(runtime)
|
||||
_create_test_file(temp_dir)
|
||||
runtime.copy_to(
|
||||
os.path.join(host_temp_dir, 'test_file.txt'), '/workspace/new_dir'
|
||||
os.path.join(temp_dir, 'test_file.txt'), f'{sandbox_dir}/new_dir'
|
||||
)
|
||||
|
||||
action = CmdRunAction(command='cat /workspace/new_dir/test_file.txt')
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert isinstance(obs, CmdOutputObservation)
|
||||
assert obs.exit_code == 0
|
||||
assert 'Hello, World!' in obs.content
|
||||
|
||||
runtime.close(rm_all_containers=False)
|
||||
time.sleep(1)
|
||||
obs = _run_cmd_action(runtime, f'cat {sandbox_dir}/new_dir/test_file.txt')
|
||||
assert obs.exit_code == 0
|
||||
assert 'Hello, World!' in obs.content
|
||||
finally:
|
||||
_close_test_runtime(runtime)
|
||||
|
||||
|
||||
def test_overwrite_existing_file(temp_dir, box_class):
|
||||
runtime = _load_runtime(temp_dir, box_class)
|
||||
try:
|
||||
sandbox_dir = _get_sandbox_folder(runtime)
|
||||
|
||||
# touch a file in /workspace
|
||||
action = CmdRunAction(command='touch /workspace/test_file.txt')
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert isinstance(obs, CmdOutputObservation)
|
||||
assert obs.exit_code == 0
|
||||
obs = _run_cmd_action(runtime, f'ls -alh {sandbox_dir}')
|
||||
assert obs.exit_code == 0
|
||||
|
||||
action = CmdRunAction(command='cat /workspace/test_file.txt')
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert isinstance(obs, CmdOutputObservation)
|
||||
assert obs.exit_code == 0
|
||||
assert 'Hello, World!' not in obs.content
|
||||
obs = _run_cmd_action(runtime, f'touch {sandbox_dir}/test_file.txt')
|
||||
assert obs.exit_code == 0
|
||||
|
||||
with tempfile.TemporaryDirectory() as host_temp_dir:
|
||||
_create_test_file(host_temp_dir)
|
||||
runtime.copy_to(os.path.join(host_temp_dir, 'test_file.txt'), '/workspace')
|
||||
obs = _run_cmd_action(runtime, f'ls -alh {sandbox_dir}')
|
||||
assert obs.exit_code == 0
|
||||
|
||||
action = CmdRunAction(command='cat /workspace/test_file.txt')
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert isinstance(obs, CmdOutputObservation)
|
||||
assert obs.exit_code == 0
|
||||
assert 'Hello, World!' in obs.content
|
||||
obs = _run_cmd_action(runtime, f'cat {sandbox_dir}/test_file.txt')
|
||||
assert obs.exit_code == 0
|
||||
assert 'Hello, World!' not in obs.content
|
||||
|
||||
runtime.close(rm_all_containers=False)
|
||||
time.sleep(1)
|
||||
_create_test_file(temp_dir)
|
||||
runtime.copy_to(os.path.join(temp_dir, 'test_file.txt'), sandbox_dir)
|
||||
|
||||
obs = _run_cmd_action(runtime, f'cat {sandbox_dir}/test_file.txt')
|
||||
assert obs.exit_code == 0
|
||||
assert 'Hello, World!' in obs.content
|
||||
finally:
|
||||
_close_test_runtime(runtime)
|
||||
|
||||
|
||||
def test_copy_non_existent_file(temp_dir, box_class):
|
||||
runtime = _load_runtime(temp_dir, box_class)
|
||||
try:
|
||||
sandbox_dir = _get_sandbox_folder(runtime)
|
||||
with pytest.raises(FileNotFoundError):
|
||||
runtime.copy_to(
|
||||
os.path.join(sandbox_dir, 'non_existent_file.txt'),
|
||||
f'{sandbox_dir}/should_not_exist.txt',
|
||||
)
|
||||
|
||||
with pytest.raises(FileNotFoundError):
|
||||
runtime.copy_to(
|
||||
os.path.join(temp_dir, 'non_existent_file.txt'),
|
||||
'/workspace/should_not_exist.txt',
|
||||
)
|
||||
|
||||
action = CmdRunAction(command='ls /workspace/should_not_exist.txt')
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert isinstance(obs, CmdOutputObservation)
|
||||
assert obs.exit_code != 0 # File should not exist
|
||||
|
||||
runtime.close(rm_all_containers=False)
|
||||
time.sleep(1)
|
||||
obs = _run_cmd_action(runtime, f'ls {sandbox_dir}/should_not_exist.txt')
|
||||
assert obs.exit_code != 0 # File should not exist
|
||||
finally:
|
||||
_close_test_runtime(runtime)
|
||||
|
||||
|
||||
def test_keep_prompt(box_class, temp_dir):
|
||||
@ -500,27 +386,26 @@ def test_keep_prompt(box_class, temp_dir):
|
||||
box_class=box_class,
|
||||
run_as_openhands=False,
|
||||
)
|
||||
try:
|
||||
sandbox_dir = _get_sandbox_folder(runtime)
|
||||
|
||||
action = CmdRunAction(command='touch /workspace/test_file.txt')
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert isinstance(obs, CmdOutputObservation)
|
||||
assert obs.exit_code == 0
|
||||
assert 'root@' in obs.content
|
||||
obs = _run_cmd_action(runtime, f'touch {sandbox_dir}/test_file.txt')
|
||||
assert obs.exit_code == 0
|
||||
assert 'root@' in obs.content
|
||||
|
||||
action = CmdRunAction(command='cat /workspace/test_file.txt', keep_prompt=False)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert isinstance(obs, CmdOutputObservation)
|
||||
assert obs.exit_code == 0
|
||||
assert 'root@' not in obs.content
|
||||
|
||||
runtime.close(rm_all_containers=False)
|
||||
time.sleep(1)
|
||||
obs = _run_cmd_action(
|
||||
runtime, f'cat {sandbox_dir}/test_file.txt', keep_prompt=False
|
||||
)
|
||||
assert obs.exit_code == 0
|
||||
assert 'root@' not in obs.content
|
||||
finally:
|
||||
_close_test_runtime(runtime)
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
TEST_IN_CI != 'True',
|
||||
reason='This test is not working in WSL (file ownership)',
|
||||
)
|
||||
def test_git_operation(box_class):
|
||||
# do not mount workspace, since workspace mount by tests will be owned by root
|
||||
# while the user_id we get via os.getuid() is different from root
|
||||
@ -531,69 +416,43 @@ def test_git_operation(box_class):
|
||||
# Need to use non-root user to expose issues
|
||||
run_as_openhands=True,
|
||||
)
|
||||
|
||||
# this will happen if permission of runtime is not properly configured
|
||||
# fatal: detected dubious ownership in repository at '/workspace'
|
||||
try:
|
||||
# check the ownership of the current directory
|
||||
obs = _run_cmd_action(runtime, 'ls -alh .')
|
||||
assert obs.exit_code == 0
|
||||
# drwx--S--- 2 openhands root 64 Aug 7 23:32 .
|
||||
# drwxr-xr-x 1 root root 4.0K Aug 7 23:33 ..
|
||||
for line in obs.content.split('\r\n'):
|
||||
if ' ..' in line:
|
||||
# parent directory should be owned by root
|
||||
assert 'root' in line
|
||||
assert 'openhands' not in line
|
||||
elif ' .' in line:
|
||||
# current directory should be owned by openhands
|
||||
# and its group should be root
|
||||
assert 'openhands' in line
|
||||
assert 'root' in line
|
||||
|
||||
# check the ownership of the current directory
|
||||
action = CmdRunAction(command='ls -alh .')
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert isinstance(obs, CmdOutputObservation)
|
||||
assert obs.exit_code == 0
|
||||
# drwx--S--- 2 openhands root 64 Aug 7 23:32 .
|
||||
# drwxr-xr-x 1 root root 4.0K Aug 7 23:33 ..
|
||||
for line in obs.content.split('\r\n'):
|
||||
if ' ..' in line:
|
||||
# parent directory should be owned by root
|
||||
assert 'root' in line
|
||||
assert 'openhands' not in line
|
||||
elif ' .' in line:
|
||||
# current directory should be owned by openhands
|
||||
# and its group should be root
|
||||
assert 'openhands' in line
|
||||
assert 'root' in line
|
||||
# make sure all git operations are allowed
|
||||
obs = _run_cmd_action(runtime, 'git init')
|
||||
assert obs.exit_code == 0
|
||||
|
||||
# make sure all git operations are allowed
|
||||
action = CmdRunAction(command='git init')
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert isinstance(obs, CmdOutputObservation)
|
||||
assert obs.exit_code == 0
|
||||
# create a file
|
||||
obs = _run_cmd_action(runtime, 'echo "hello" > test_file.txt')
|
||||
assert obs.exit_code == 0
|
||||
|
||||
# create a file
|
||||
action = CmdRunAction(command='echo "hello" > test_file.txt')
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert isinstance(obs, CmdOutputObservation)
|
||||
assert obs.exit_code == 0
|
||||
# git add
|
||||
obs = _run_cmd_action(runtime, 'git add test_file.txt')
|
||||
assert obs.exit_code == 0
|
||||
|
||||
# git add
|
||||
action = CmdRunAction(command='git add test_file.txt')
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert isinstance(obs, CmdOutputObservation)
|
||||
assert obs.exit_code == 0
|
||||
# git diff
|
||||
obs = _run_cmd_action(runtime, 'git diff')
|
||||
assert obs.exit_code == 0
|
||||
|
||||
# git diff
|
||||
action = CmdRunAction(command='git diff')
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert isinstance(obs, CmdOutputObservation)
|
||||
assert obs.exit_code == 0
|
||||
|
||||
# git commit
|
||||
action = CmdRunAction(command='git commit -m "test commit"')
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert isinstance(obs, CmdOutputObservation)
|
||||
assert obs.exit_code == 0
|
||||
|
||||
runtime.close(rm_all_containers=False)
|
||||
time.sleep(1)
|
||||
# git commit
|
||||
obs = _run_cmd_action(runtime, 'git commit -m "test commit"')
|
||||
assert obs.exit_code == 0
|
||||
finally:
|
||||
_close_test_runtime(runtime)
|
||||
|
||||
@ -1,9 +1,8 @@
|
||||
"""Browsing-related tests for the EventStreamRuntime, which connects to the RuntimeClient running in the sandbox."""
|
||||
|
||||
import json
|
||||
import time
|
||||
|
||||
from conftest import _load_runtime
|
||||
from conftest import _close_test_runtime, _load_runtime
|
||||
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.events.action import (
|
||||
@ -66,8 +65,7 @@ def test_simple_browse(temp_dir, box_class, run_as_openhands):
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert obs.exit_code == 0
|
||||
|
||||
runtime.close(rm_all_containers=False)
|
||||
time.sleep(1)
|
||||
_close_test_runtime(runtime)
|
||||
|
||||
|
||||
def test_browsergym_eval_env(box_class, temp_dir):
|
||||
@ -111,5 +109,4 @@ def test_browsergym_eval_env(box_class, temp_dir):
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert json.loads(obs.content) == [0.0]
|
||||
|
||||
runtime.close(rm_all_containers=False)
|
||||
time.sleep(1)
|
||||
_close_test_runtime(runtime)
|
||||
|
||||
@ -1,10 +1,9 @@
|
||||
"""Env vars related tests for the EventStreamRuntime, which connects to the RuntimeClient running in the sandbox."""
|
||||
|
||||
import os
|
||||
import time
|
||||
from unittest.mock import patch
|
||||
|
||||
from conftest import _load_runtime
|
||||
from conftest import _close_test_runtime, _load_runtime
|
||||
|
||||
from openhands.events.action import CmdRunAction
|
||||
from openhands.events.observation import CmdOutputObservation
|
||||
@ -30,8 +29,7 @@ def test_env_vars_os_environ(temp_dir, box_class, run_as_openhands):
|
||||
obs.content.strip().split('\n\r')[0].strip() == 'BAZ'
|
||||
), f'Output: [{obs.content}] for {box_class}'
|
||||
|
||||
runtime.close(rm_all_containers=False)
|
||||
time.sleep(1)
|
||||
_close_test_runtime(runtime)
|
||||
|
||||
|
||||
def test_env_vars_runtime_operations(temp_dir, box_class):
|
||||
@ -66,5 +64,4 @@ def test_env_vars_runtime_operations(temp_dir, box_class):
|
||||
and obs.content.strip().split('\r\n')[0].strip() == 'new_value'
|
||||
)
|
||||
|
||||
runtime.close(rm_all_containers=False)
|
||||
time.sleep(1)
|
||||
_close_test_runtime(runtime)
|
||||
|
||||
@ -1,9 +1,7 @@
|
||||
"""Image-related tests for the EventStreamRuntime, which connects to the RuntimeClient running in the sandbox."""
|
||||
|
||||
import time
|
||||
|
||||
import pytest
|
||||
from conftest import _load_runtime
|
||||
from conftest import _close_test_runtime, _load_runtime
|
||||
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.events.action import CmdRunAction
|
||||
@ -17,7 +15,6 @@ def test_bash_python_version(temp_dir, box_class, base_container_image):
|
||||
"""Make sure Python is available in bash."""
|
||||
if base_container_image not in [
|
||||
'python:3.11-bookworm',
|
||||
'nikolaik/python-nodejs:python3.11-nodejs22',
|
||||
]:
|
||||
pytest.skip('This test is only for python-related images')
|
||||
|
||||
@ -45,15 +42,13 @@ def test_bash_python_version(temp_dir, box_class, base_container_image):
|
||||
assert obs.exit_code == 0
|
||||
assert 'pip' in obs.content # Check that pip is available
|
||||
|
||||
runtime.close(rm_all_containers=False)
|
||||
time.sleep(1)
|
||||
_close_test_runtime(runtime)
|
||||
|
||||
|
||||
def test_nodejs_22_version(temp_dir, box_class, base_container_image):
|
||||
"""Make sure Node.js is available in bash."""
|
||||
if base_container_image not in [
|
||||
'node:22-bookworm',
|
||||
'nikolaik/python-nodejs:python3.11-nodejs22',
|
||||
]:
|
||||
pytest.skip('This test is only for nodejs-related images')
|
||||
|
||||
@ -68,8 +63,7 @@ def test_nodejs_22_version(temp_dir, box_class, base_container_image):
|
||||
assert obs.exit_code == 0
|
||||
assert 'v22' in obs.content # Check for specific version
|
||||
|
||||
runtime.close(rm_all_containers=False)
|
||||
time.sleep(1)
|
||||
_close_test_runtime(runtime)
|
||||
|
||||
|
||||
def test_go_version(temp_dir, box_class, base_container_image):
|
||||
@ -90,5 +84,4 @@ def test_go_version(temp_dir, box_class, base_container_image):
|
||||
assert obs.exit_code == 0
|
||||
assert 'go1.23' in obs.content # Check for specific version
|
||||
|
||||
runtime.close(rm_all_containers=False)
|
||||
time.sleep(1)
|
||||
_close_test_runtime(runtime)
|
||||
|
||||
@ -1,8 +1,12 @@
|
||||
"""Test the EventStreamRuntime, which connects to the RuntimeClient running in the sandbox."""
|
||||
|
||||
import time
|
||||
|
||||
from conftest import _load_runtime
|
||||
import pytest
|
||||
from conftest import (
|
||||
TEST_IN_CI,
|
||||
_close_test_runtime,
|
||||
_get_sandbox_folder,
|
||||
_load_runtime,
|
||||
)
|
||||
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.events.action import (
|
||||
@ -28,6 +32,8 @@ from openhands.runtime.client.runtime import EventStreamRuntime
|
||||
def test_simple_cmd_ipython_and_fileop(temp_dir, box_class, run_as_openhands):
|
||||
runtime = _load_runtime(temp_dir, box_class, run_as_openhands)
|
||||
|
||||
sandbox_dir = _get_sandbox_folder(runtime)
|
||||
|
||||
# Test run command
|
||||
action_cmd = CmdRunAction(command='ls -l')
|
||||
logger.info(action_cmd, extra={'msg_type': 'ACTION'})
|
||||
@ -48,7 +54,7 @@ def test_simple_cmd_ipython_and_fileop(temp_dir, box_class, run_as_openhands):
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert obs.content.strip() == (
|
||||
'Hello, `World`!\n'
|
||||
'[Jupyter current working directory: /workspace]\n'
|
||||
f'[Jupyter current working directory: {sandbox_dir}]\n'
|
||||
'[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]'
|
||||
)
|
||||
|
||||
@ -69,7 +75,7 @@ def test_simple_cmd_ipython_and_fileop(temp_dir, box_class, run_as_openhands):
|
||||
|
||||
assert obs.content == ''
|
||||
# event stream runtime will always use absolute path
|
||||
assert obs.path == '/workspace/hello.sh'
|
||||
assert obs.path == f'{sandbox_dir}/hello.sh'
|
||||
|
||||
# Test read file (file should exist)
|
||||
action_read = FileReadAction(path='hello.sh')
|
||||
@ -81,7 +87,7 @@ def test_simple_cmd_ipython_and_fileop(temp_dir, box_class, run_as_openhands):
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
|
||||
assert obs.content == 'echo "Hello, World!"\n'
|
||||
assert obs.path == '/workspace/hello.sh'
|
||||
assert obs.path == f'{sandbox_dir}/hello.sh'
|
||||
|
||||
# clean up
|
||||
action = CmdRunAction(command='rm -rf hello.sh')
|
||||
@ -90,10 +96,13 @@ def test_simple_cmd_ipython_and_fileop(temp_dir, box_class, run_as_openhands):
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert obs.exit_code == 0
|
||||
|
||||
runtime.close(rm_all_containers=False)
|
||||
time.sleep(1)
|
||||
_close_test_runtime(runtime)
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
TEST_IN_CI != 'True',
|
||||
reason='This test is not working in WSL (file ownership)',
|
||||
)
|
||||
def test_ipython_multi_user(temp_dir, box_class, run_as_openhands):
|
||||
runtime = _load_runtime(temp_dir, box_class, run_as_openhands)
|
||||
|
||||
@ -111,7 +120,7 @@ def test_ipython_multi_user(temp_dir, box_class, run_as_openhands):
|
||||
else:
|
||||
assert 'root' in obs.content
|
||||
|
||||
# print pwd
|
||||
# print the current working directory
|
||||
test_code = 'import os; print(os.getcwd())'
|
||||
action_ipython = IPythonRunCellAction(code=test_code)
|
||||
logger.info(action_ipython, extra={'msg_type': 'ACTION'})
|
||||
@ -152,7 +161,6 @@ def test_ipython_multi_user(temp_dir, box_class, run_as_openhands):
|
||||
if run_as_openhands:
|
||||
# -rw-r--r-- 1 openhands root 13 Jul 28 03:53 test.txt
|
||||
assert 'openhands' in obs.content.split('\r\n')[0]
|
||||
assert 'root' in obs.content.split('\r\n')[0]
|
||||
else:
|
||||
# -rw-r--r-- 1 root root 13 Jul 28 03:53 test.txt
|
||||
assert 'root' in obs.content.split('\r\n')[0]
|
||||
@ -164,12 +172,12 @@ def test_ipython_multi_user(temp_dir, box_class, run_as_openhands):
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert obs.exit_code == 0
|
||||
|
||||
runtime.close(rm_all_containers=False)
|
||||
time.sleep(1)
|
||||
_close_test_runtime(runtime)
|
||||
|
||||
|
||||
def test_ipython_simple(temp_dir, box_class):
|
||||
runtime = _load_runtime(temp_dir, box_class)
|
||||
sandbox_dir = _get_sandbox_folder(runtime)
|
||||
|
||||
# Test run ipython
|
||||
# get username
|
||||
@ -183,20 +191,20 @@ def test_ipython_simple(temp_dir, box_class):
|
||||
obs.content.strip()
|
||||
== (
|
||||
'1\n'
|
||||
'[Jupyter current working directory: /workspace]\n'
|
||||
f'[Jupyter current working directory: {sandbox_dir}]\n'
|
||||
'[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]'
|
||||
).strip()
|
||||
)
|
||||
|
||||
runtime.close(rm_all_containers=False)
|
||||
time.sleep(1)
|
||||
_close_test_runtime(runtime)
|
||||
|
||||
|
||||
def _test_ipython_agentskills_fileop_pwd_impl(
|
||||
runtime: EventStreamRuntime, enable_auto_lint: bool
|
||||
):
|
||||
sandbox_dir = _get_sandbox_folder(runtime)
|
||||
# remove everything in /workspace
|
||||
action = CmdRunAction(command='rm -rf /workspace/*')
|
||||
action = CmdRunAction(command=f'rm -rf {sandbox_dir}/*')
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
@ -215,12 +223,12 @@ def _test_ipython_agentskills_fileop_pwd_impl(
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert isinstance(obs, IPythonRunCellObservation)
|
||||
assert obs.content.replace('\r\n', '\n').strip().split('\n') == (
|
||||
'[File: /workspace/hello.py (1 lines total)]\n'
|
||||
f'[File: {sandbox_dir}/hello.py (1 lines total)]\n'
|
||||
'(this is the beginning of the file)\n'
|
||||
'1|\n'
|
||||
'(this is the end of the file)\n'
|
||||
'[File hello.py created.]\n'
|
||||
'[Jupyter current working directory: /workspace]\n'
|
||||
f'[Jupyter current working directory: {sandbox_dir}]\n'
|
||||
'[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]'
|
||||
).strip().split('\n')
|
||||
|
||||
@ -239,12 +247,12 @@ def _test_ipython_agentskills_fileop_pwd_impl(
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert isinstance(obs, IPythonRunCellObservation)
|
||||
assert obs.content.replace('\r\n', '\n').strip().split('\n') == (
|
||||
'[File: /workspace/test/hello.py (1 lines total)]\n'
|
||||
f'[File: {sandbox_dir}/test/hello.py (1 lines total)]\n'
|
||||
'(this is the beginning of the file)\n'
|
||||
'1|\n'
|
||||
'(this is the end of the file)\n'
|
||||
'[File hello.py created.]\n'
|
||||
'[Jupyter current working directory: /workspace/test]\n'
|
||||
f'[Jupyter current working directory: {sandbox_dir}/test]\n'
|
||||
'[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]'
|
||||
).strip().split('\n')
|
||||
|
||||
@ -258,10 +266,10 @@ def _test_ipython_agentskills_fileop_pwd_impl(
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert isinstance(obs, IPythonRunCellObservation)
|
||||
assert obs.content.replace('\r\n', '\n').strip().split('\n') == (
|
||||
"""
|
||||
f"""
|
||||
[Your proposed edit has introduced new syntax error(s). Please understand the errors and retry your edit command.]
|
||||
ERRORS:
|
||||
/workspace/test/hello.py:1:3: E999 IndentationError: unexpected indent
|
||||
{sandbox_dir}/test/hello.py:1:3: E999 IndentationError: unexpected indent
|
||||
[This is how your edit would have looked if applied]
|
||||
-------------------------------------------------
|
||||
(this is the beginning of the file)
|
||||
@ -278,7 +286,7 @@ ERRORS:
|
||||
Your changes have NOT been applied. Please fix your edit command and try again.
|
||||
You either need to 1) Specify the correct start/end line arguments or 2) Correct your edit code.
|
||||
DO NOT re-run the same failed edit command. Running it again will lead to the same error.
|
||||
[Jupyter current working directory: /workspace/test]
|
||||
[Jupyter current working directory: {sandbox_dir}/test]
|
||||
[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]
|
||||
"""
|
||||
).strip().split('\n')
|
||||
@ -292,39 +300,44 @@ DO NOT re-run the same failed edit command. Running it again will lead to the sa
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert isinstance(obs, IPythonRunCellObservation)
|
||||
assert obs.content.replace('\r\n', '\n').strip().split('\n') == (
|
||||
"""
|
||||
[File: /workspace/test/hello.py (1 lines total after edit)]
|
||||
f"""
|
||||
[File: {sandbox_dir}/test/hello.py (1 lines total after edit)]
|
||||
(this is the beginning of the file)
|
||||
1|print("hello world")
|
||||
(this is the end of the file)
|
||||
[File updated (edited at line 1). Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]
|
||||
[Jupyter current working directory: /workspace/test]
|
||||
[Jupyter current working directory: {sandbox_dir}/test]
|
||||
[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]
|
||||
"""
|
||||
).strip().split('\n')
|
||||
|
||||
action = CmdRunAction(command='rm -rf /workspace/*')
|
||||
action = CmdRunAction(command=f'rm -rf {sandbox_dir}/*')
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert obs.exit_code == 0
|
||||
|
||||
runtime.close(rm_all_containers=False)
|
||||
time.sleep(1)
|
||||
|
||||
|
||||
def test_ipython_agentskills_fileop_pwd(
|
||||
temp_dir, box_class, run_as_openhands, enable_auto_lint
|
||||
def test_ipython_agentskills_fileop_pwd_with_lint(
|
||||
temp_dir, box_class, run_as_openhands
|
||||
):
|
||||
"""Make sure that cd in bash also update the current working directory in ipython."""
|
||||
|
||||
runtime = _load_runtime(
|
||||
temp_dir, box_class, run_as_openhands, enable_auto_lint=enable_auto_lint
|
||||
temp_dir, box_class, run_as_openhands, enable_auto_lint=True
|
||||
)
|
||||
_test_ipython_agentskills_fileop_pwd_impl(runtime, enable_auto_lint)
|
||||
_test_ipython_agentskills_fileop_pwd_impl(runtime, True)
|
||||
|
||||
runtime.close(rm_all_containers=False)
|
||||
time.sleep(1)
|
||||
_close_test_runtime(runtime)
|
||||
|
||||
|
||||
def test_ipython_agentskills_fileop_pwd_without_lint(
|
||||
temp_dir, box_class, run_as_openhands
|
||||
):
|
||||
runtime = _load_runtime(
|
||||
temp_dir, box_class, run_as_openhands, enable_auto_lint=False
|
||||
)
|
||||
_test_ipython_agentskills_fileop_pwd_impl(runtime, False)
|
||||
|
||||
_close_test_runtime(runtime)
|
||||
|
||||
|
||||
def test_ipython_agentskills_fileop_pwd_with_userdir(temp_dir, box_class):
|
||||
@ -392,13 +405,13 @@ def test_ipython_agentskills_fileop_pwd_with_userdir(temp_dir, box_class):
|
||||
'[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]'
|
||||
).strip().split('\n')
|
||||
|
||||
runtime.close(rm_all_containers=False)
|
||||
time.sleep(1)
|
||||
_close_test_runtime(runtime)
|
||||
|
||||
|
||||
def test_ipython_package_install(temp_dir, box_class, run_as_openhands):
|
||||
"""Make sure that cd in bash also update the current working directory in ipython."""
|
||||
runtime = _load_runtime(temp_dir, box_class, run_as_openhands)
|
||||
sandbox_dir = _get_sandbox_folder(runtime)
|
||||
|
||||
# It should error out since pymsgbox is not installed
|
||||
action = IPythonRunCellAction(code='import pymsgbox')
|
||||
@ -424,9 +437,8 @@ def test_ipython_package_install(temp_dir, box_class, run_as_openhands):
|
||||
# import should not error out
|
||||
assert obs.content.strip() == (
|
||||
'[Code executed successfully with no output]\n'
|
||||
'[Jupyter current working directory: /workspace]\n'
|
||||
f'[Jupyter current working directory: {sandbox_dir}]\n'
|
||||
'[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]'
|
||||
)
|
||||
|
||||
runtime.close(rm_all_containers=False)
|
||||
time.sleep(1)
|
||||
_close_test_runtime(runtime)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user