mirror of
https://github.com/OpenHands/OpenHands.git
synced 2025-12-26 05:48:36 +08:00
fix: incorrect soft-timeout implementation & fix hard-timeout follow-up command (#6280)
This commit is contained in:
parent
da1a6035ac
commit
0bed17758f
@ -171,7 +171,7 @@ def initialize_runtime(
|
||||
action = CmdRunAction(
|
||||
command=f'git clone -b commit0_combined https://github.com/{instance["repo"]}.git'
|
||||
)
|
||||
action.timeout = 600
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
@ -181,7 +181,7 @@ def initialize_runtime(
|
||||
)
|
||||
|
||||
action = CmdRunAction(command=f'cd /workspace/{workspace_dir_name}')
|
||||
action.timeout = 600
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
@ -191,7 +191,7 @@ def initialize_runtime(
|
||||
)
|
||||
|
||||
action = CmdRunAction(command='git checkout -b openhands')
|
||||
action.timeout = 600
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
@ -201,7 +201,7 @@ def initialize_runtime(
|
||||
|
||||
# Install commit0
|
||||
action = CmdRunAction(command='/root/.cargo/bin/uv pip install commit0')
|
||||
action.timeout = 600
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
# logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
@ -231,7 +231,7 @@ def complete_runtime(
|
||||
workspace_dir_name = _get_commit0_workspace_dir_name(instance)
|
||||
|
||||
action = CmdRunAction(command='git add .')
|
||||
action.timeout = 600
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
@ -241,7 +241,7 @@ def complete_runtime(
|
||||
)
|
||||
|
||||
action = CmdRunAction(command='git commit -m "openhands edits"')
|
||||
action.timeout = 600
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
@ -258,7 +258,7 @@ def complete_runtime(
|
||||
action = CmdRunAction(
|
||||
command=f"git diff {instance['base_commit']} HEAD -- . ':(exclude)spec.pdf.bz2'"
|
||||
)
|
||||
action.timeout = 600 + 100 * n_retries
|
||||
action.set_hard_timeout(600 + 100 * n_retries)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
# logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
@ -282,7 +282,7 @@ def complete_runtime(
|
||||
action = CmdRunAction(
|
||||
command=f"{instance['test']['test_cmd']} --json-report --json-report-file=report.json --continue-on-collection-errors {test_dir} > test_output.txt 2>&1"
|
||||
)
|
||||
action.timeout = 600
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
@ -292,7 +292,7 @@ def complete_runtime(
|
||||
)
|
||||
# Read test output
|
||||
action = CmdRunAction(command='cat test_output.txt')
|
||||
action.timeout = 600
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
# logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
@ -305,7 +305,7 @@ def complete_runtime(
|
||||
|
||||
# Save pytest exit code
|
||||
action = CmdRunAction(command='echo $?')
|
||||
action.timeout = 600
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
# logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
@ -318,7 +318,7 @@ def complete_runtime(
|
||||
|
||||
# Read the test report
|
||||
action = CmdRunAction(command='cat report.json')
|
||||
action.timeout = 600
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
# logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
@ -330,7 +330,7 @@ def complete_runtime(
|
||||
repo_name = instance['repo'].split('/')[1]
|
||||
repo_name = repo_name.replace('.', '-')
|
||||
action = CmdRunAction(command=f'commit0 get-tests {repo_name}')
|
||||
action.timeout = 600
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
# logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
|
||||
@ -174,7 +174,7 @@ def process_instance(
|
||||
|
||||
# Set +x
|
||||
action = CmdRunAction(command='chmod +x /tmp/eval.sh')
|
||||
action.timeout = 600
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
@ -189,7 +189,7 @@ def process_instance(
|
||||
"echo 'APPLY_PATCH_FAIL')))"
|
||||
)
|
||||
action = CmdRunAction(command=exec_command)
|
||||
action.timeout = 600
|
||||
action.set_hard_timeout(600)
|
||||
obs = runtime.run_action(action)
|
||||
assert isinstance(obs, CmdOutputObservation)
|
||||
apply_patch_output = obs.content
|
||||
@ -212,7 +212,7 @@ def process_instance(
|
||||
# Run eval script in background and save output to log file
|
||||
log_file = '/tmp/eval_output.log'
|
||||
action = CmdRunAction(command=f'/tmp/eval.sh > {log_file} 2>&1 & echo $!')
|
||||
action.timeout = 60 # Short timeout just to get the process ID
|
||||
action.set_hard_timeout(60) # Short timeout just to get the process ID
|
||||
obs = runtime.run_action(action)
|
||||
|
||||
if isinstance(obs, CmdOutputObservation) and obs.exit_code == 0:
|
||||
@ -235,7 +235,7 @@ def process_instance(
|
||||
check_action = CmdRunAction(
|
||||
command=f'ps -p {pid} > /dev/null; echo $?'
|
||||
)
|
||||
check_action.timeout = 60
|
||||
check_action.set_hard_timeout(60)
|
||||
check_obs = runtime.run_action(check_action)
|
||||
if (
|
||||
isinstance(check_obs, CmdOutputObservation)
|
||||
@ -252,7 +252,7 @@ def process_instance(
|
||||
|
||||
# Read the log file
|
||||
cat_action = CmdRunAction(command=f'cat {log_file}')
|
||||
cat_action.timeout = 300
|
||||
cat_action.set_hard_timeout(300)
|
||||
cat_obs = runtime.run_action(cat_action)
|
||||
|
||||
# Grade answer
|
||||
|
||||
@ -173,7 +173,7 @@ def initialize_runtime(
|
||||
action = CmdRunAction(
|
||||
command=f"""echo 'export SWE_INSTANCE_ID={instance['instance_id']}' >> ~/.bashrc && echo 'export PIP_CACHE_DIR=~/.cache/pip' >> ~/.bashrc && echo "alias git='git --no-pager'" >> ~/.bashrc"""
|
||||
)
|
||||
action.timeout = 600
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
@ -182,7 +182,7 @@ def initialize_runtime(
|
||||
)
|
||||
|
||||
action = CmdRunAction(command="""export USER=$(whoami); echo USER=${USER} """)
|
||||
action.timeout = 600
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
@ -194,7 +194,7 @@ def initialize_runtime(
|
||||
|
||||
# inject the instance info
|
||||
action = CmdRunAction(command='mkdir -p /swe_util/eval_data/instances')
|
||||
action.timeout = 600
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
@ -223,14 +223,14 @@ def initialize_runtime(
|
||||
'/swe_util/',
|
||||
)
|
||||
action = CmdRunAction(command='cat ~/.bashrc')
|
||||
action.timeout = 600
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert_and_raise(obs.exit_code == 0, f'Failed to cat ~/.bashrc: {str(obs)}')
|
||||
|
||||
action = CmdRunAction(command='source ~/.bashrc')
|
||||
action.timeout = 600
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
@ -239,7 +239,7 @@ def initialize_runtime(
|
||||
assert_and_raise(obs.exit_code == 0, f'Failed to source ~/.bashrc: {str(obs)}')
|
||||
|
||||
action = CmdRunAction(command='source /swe_util/instance_swe_entry.sh')
|
||||
action.timeout = 3600
|
||||
action.set_hard_timeout(3600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
@ -249,7 +249,7 @@ def initialize_runtime(
|
||||
)
|
||||
else:
|
||||
action = CmdRunAction(command='source /swe_util/swe_entry.sh')
|
||||
action.timeout = 1800
|
||||
action.set_hard_timeout(1800)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
@ -259,7 +259,7 @@ def initialize_runtime(
|
||||
)
|
||||
|
||||
action = CmdRunAction(command=f'cd /workspace/{workspace_dir_name}')
|
||||
action.timeout = 600
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
@ -269,7 +269,7 @@ def initialize_runtime(
|
||||
)
|
||||
|
||||
action = CmdRunAction(command='git reset --hard')
|
||||
action.timeout = 600
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
@ -278,14 +278,14 @@ def initialize_runtime(
|
||||
action = CmdRunAction(
|
||||
command='for remote_name in $(git remote); do git remote remove "${remote_name}"; done'
|
||||
)
|
||||
action.timeout = 600
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert_and_raise(obs.exit_code == 0, f'Failed to remove git remotes: {str(obs)}')
|
||||
|
||||
action = CmdRunAction(command='which python')
|
||||
action.timeout = 600
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
@ -316,7 +316,7 @@ def complete_runtime(
|
||||
workspace_dir_name = _get_swebench_workspace_dir_name(instance)
|
||||
|
||||
action = CmdRunAction(command=f'cd /workspace/{workspace_dir_name}')
|
||||
action.timeout = 600
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
@ -326,7 +326,7 @@ def complete_runtime(
|
||||
)
|
||||
|
||||
action = CmdRunAction(command='git config --global core.pager ""')
|
||||
action.timeout = 600
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
@ -336,7 +336,7 @@ def complete_runtime(
|
||||
)
|
||||
|
||||
action = CmdRunAction(command='git add -A')
|
||||
action.timeout = 600
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
@ -351,7 +351,7 @@ def complete_runtime(
|
||||
action = CmdRunAction(
|
||||
command=f'git diff --no-color --cached {instance["base_commit"]}'
|
||||
)
|
||||
action.timeout = 600 + 100 * n_retries
|
||||
action.set_hard_timeout(600 + 100 * n_retries)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
|
||||
@ -262,7 +262,7 @@ def pre_login(
|
||||
instruction = action.to_instruction()
|
||||
|
||||
browser_action = BrowseInteractiveAction(browser_actions=instruction)
|
||||
browser_action.timeout = 10000
|
||||
browser_action.set_hard_timeout(10000)
|
||||
logger.info(browser_action, extra={'msg_type': 'ACTION'})
|
||||
obs: BrowserOutputObservation = runtime.run_action(browser_action)
|
||||
logger.debug(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
|
||||
@ -86,7 +86,7 @@ def init_task_env(runtime: Runtime, hostname: str, env_llm_config: LLMConfig):
|
||||
'bash /utils/init.sh'
|
||||
)
|
||||
action = CmdRunAction(command=command)
|
||||
action.timeout = 900
|
||||
action.set_hard_timeout(900)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
@ -172,7 +172,7 @@ def run_evaluator(
|
||||
f'python_default /utils/eval.py --trajectory_path {trajectory_path} --result_path {result_path}'
|
||||
)
|
||||
action = CmdRunAction(command=command)
|
||||
action.timeout = 600
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
|
||||
@ -64,8 +64,12 @@ class Event:
|
||||
return self._timeout # type: ignore[attr-defined]
|
||||
return None
|
||||
|
||||
@timeout.setter
|
||||
def timeout(self, value: int | None) -> None:
|
||||
def set_hard_timeout(self, value: int | None, blocking: bool = True) -> None:
|
||||
"""Set the timeout for the event.
|
||||
|
||||
NOTE, this is a hard timeout, meaning that the event will be blocked
|
||||
until the timeout is reached.
|
||||
"""
|
||||
self._timeout = value
|
||||
if value is not None and value > 600:
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
@ -78,7 +82,7 @@ class Event:
|
||||
# Check if .blocking is an attribute of the event
|
||||
if hasattr(self, 'blocking'):
|
||||
# .blocking needs to be set to True if .timeout is set
|
||||
self.blocking = True
|
||||
self.blocking = blocking
|
||||
|
||||
# optional metadata, LLM call cost of the edit
|
||||
@property
|
||||
|
||||
@ -74,7 +74,8 @@ def action_from_dict(action: dict) -> Action:
|
||||
try:
|
||||
decoded_action = action_class(**args)
|
||||
if 'timeout' in action:
|
||||
decoded_action.timeout = action['timeout']
|
||||
blocking = args.get('blocking', False)
|
||||
decoded_action.set_hard_timeout(action['timeout'], blocking=blocking)
|
||||
|
||||
# Set timestamp if it was provided
|
||||
if timestamp:
|
||||
|
||||
@ -118,7 +118,7 @@ async def complete_runtime(
|
||||
git_patch = None
|
||||
while n_retries < 5:
|
||||
action = CmdRunAction(command=f'git diff --no-color --cached {base_commit}')
|
||||
action.timeout = 600 + 100 * n_retries
|
||||
action.set_hard_timeout(600 + 100 * n_retries)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
|
||||
@ -120,6 +120,9 @@ class ActionExecutor:
|
||||
self.bash_session = BashSession(
|
||||
work_dir=self._initial_cwd,
|
||||
username=self.username,
|
||||
no_change_timeout_seconds=int(
|
||||
os.environ.get('NO_CHANGE_TIMEOUT_SECONDS', 30)
|
||||
),
|
||||
)
|
||||
self.bash_session.initialize()
|
||||
await wait_all(
|
||||
@ -163,7 +166,7 @@ class ActionExecutor:
|
||||
logger.debug(f'Initializing by running {len(INIT_COMMANDS)} bash commands...')
|
||||
for command in INIT_COMMANDS:
|
||||
action = CmdRunAction(command=command)
|
||||
action.timeout = 300
|
||||
action.set_hard_timeout(300)
|
||||
logger.debug(f'Executing init command: {command}')
|
||||
obs = await self.run(action)
|
||||
assert isinstance(obs, CmdOutputObservation)
|
||||
|
||||
@ -182,7 +182,8 @@ class Runtime(FileEditRuntimeMixin):
|
||||
|
||||
async def _handle_action(self, event: Action) -> None:
|
||||
if event.timeout is None:
|
||||
event.timeout = self.config.sandbox.timeout
|
||||
# We don't block the command if this is a default timeout action
|
||||
event.set_hard_timeout(self.config.sandbox.timeout, blocking=False)
|
||||
assert event.timeout is not None
|
||||
try:
|
||||
observation: Observation = await call_sync_from_async(
|
||||
|
||||
@ -216,7 +216,8 @@ class ActionExecutionClient(Runtime):
|
||||
|
||||
# set timeout to default if not set
|
||||
if action.timeout is None:
|
||||
action.timeout = self.config.sandbox.timeout
|
||||
# We don't block the command if this is a default timeout action
|
||||
action.set_hard_timeout(self.config.sandbox.timeout, blocking=False)
|
||||
|
||||
with self.action_semaphore:
|
||||
if not action.runnable:
|
||||
|
||||
@ -228,6 +228,8 @@ class DockerRuntime(ActionExecutionClient):
|
||||
}
|
||||
if self.config.debug or DEBUG:
|
||||
environment['DEBUG'] = 'true'
|
||||
# also update with runtime_startup_env_vars
|
||||
environment.update(self.config.sandbox.runtime_startup_env_vars)
|
||||
|
||||
self.log('debug', f'Workspace Base: {self.config.workspace_base}')
|
||||
if (
|
||||
|
||||
@ -174,7 +174,7 @@ class BashSession:
|
||||
self,
|
||||
work_dir: str,
|
||||
username: str | None = None,
|
||||
no_change_timeout_seconds: float = 30.0,
|
||||
no_change_timeout_seconds: int = 30,
|
||||
):
|
||||
self.NO_CHANGE_TIMEOUT_SECONDS = no_change_timeout_seconds
|
||||
self.work_dir = work_dir
|
||||
@ -369,7 +369,7 @@ class BashSession:
|
||||
command,
|
||||
raw_command_output,
|
||||
metadata,
|
||||
continue_prefix='[Command output continued from previous command]\n',
|
||||
continue_prefix='[Below is the output of the previous command.]\n',
|
||||
)
|
||||
return CmdOutputObservation(
|
||||
content=command_output,
|
||||
@ -404,7 +404,7 @@ class BashSession:
|
||||
command,
|
||||
raw_command_output,
|
||||
metadata,
|
||||
continue_prefix='[Command output continued from previous command]\n',
|
||||
continue_prefix='[Below is the output of the previous command.]\n',
|
||||
)
|
||||
|
||||
return CmdOutputObservation(
|
||||
@ -441,6 +441,8 @@ class BashSession:
|
||||
else:
|
||||
# The command output is the content after the last PS1 prompt
|
||||
return pane_content[ps1_matches[0].end() + 1 :]
|
||||
elif len(ps1_matches) == 0:
|
||||
return pane_content
|
||||
combined_output = ''
|
||||
for i in range(len(ps1_matches) - 1):
|
||||
# Extract content between current and next PS1 prompt
|
||||
@ -459,6 +461,9 @@ class BashSession:
|
||||
# Strip the command of any leading/trailing whitespace
|
||||
logger.debug(f'RECEIVED ACTION: {action}')
|
||||
command = action.command.strip()
|
||||
is_special_key = self._is_special_key(command)
|
||||
|
||||
# Handle when prev command is hard timeout
|
||||
|
||||
if command == '' and self.prev_status not in {
|
||||
BashCommandStatus.CONTINUE,
|
||||
@ -486,13 +491,45 @@ class BashSession:
|
||||
last_change_time = start_time
|
||||
last_pane_output = self._get_pane_content()
|
||||
|
||||
if command != '':
|
||||
# Do not check hard timeout if the command is a special key
|
||||
if command != '' and is_special_key:
|
||||
logger.debug(f'SENDING SPECIAL KEY: {command!r}')
|
||||
self.pane.send_keys(command, enter=False)
|
||||
# When prev command is hard timeout, and we are trying to execute new command
|
||||
elif self.prev_status == BashCommandStatus.HARD_TIMEOUT and command != '':
|
||||
if not last_pane_output.endswith(CMD_OUTPUT_PS1_END):
|
||||
_ps1_matches = CmdOutputMetadata.matches_ps1_metadata(last_pane_output)
|
||||
raw_command_output = self._combine_outputs_between_matches(
|
||||
last_pane_output, _ps1_matches
|
||||
)
|
||||
metadata = CmdOutputMetadata() # No metadata available
|
||||
metadata.suffix = (
|
||||
f'\n[Your command "{command}" is NOT executed. '
|
||||
f'The previous command was timed out but still running. Above is the output of the previous command. '
|
||||
"You may wait longer to see additional output of the previous command by sending empty command '', "
|
||||
'send other commands to interact with the current process, '
|
||||
'or send keys ("C-c", "C-z", "C-d") to interrupt/kill the previous command before sending your new command.]'
|
||||
)
|
||||
command_output = self._get_command_output(
|
||||
command,
|
||||
raw_command_output,
|
||||
metadata,
|
||||
continue_prefix='[Below is the output of the previous command.]\n',
|
||||
)
|
||||
return CmdOutputObservation(
|
||||
command=command,
|
||||
content=command_output,
|
||||
metadata=metadata,
|
||||
)
|
||||
# Only send the command to the pane if it's not a special key and it's not empty
|
||||
# AND previous hard timeout command is resolved
|
||||
elif command != '' and not is_special_key:
|
||||
# convert command to raw string
|
||||
command = escape_bash_special_chars(command)
|
||||
logger.debug(f'SENDING COMMAND: {command!r}')
|
||||
self.pane.send_keys(
|
||||
command,
|
||||
enter=not self._is_special_key(command),
|
||||
enter=True,
|
||||
)
|
||||
|
||||
# Loop until the command completes or times out
|
||||
@ -525,7 +562,7 @@ class BashSession:
|
||||
# We ignore this if the command is *blocking
|
||||
time_since_last_change = time.time() - last_change_time
|
||||
logger.debug(
|
||||
f'CHECKING NO CHANGE TIMEOUT ({self.NO_CHANGE_TIMEOUT_SECONDS}s): elapsed {time_since_last_change}'
|
||||
f'CHECKING NO CHANGE TIMEOUT ({self.NO_CHANGE_TIMEOUT_SECONDS}s): elapsed {time_since_last_change}. Action blocking: {action.blocking}'
|
||||
)
|
||||
if (
|
||||
not action.blocking
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
"""Bash-related tests for the EventStreamRuntime, which connects to the ActionExecutor running in the sandbox."""
|
||||
|
||||
import os
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
@ -45,7 +46,7 @@ def test_bash_server(temp_dir, runtime_cls, run_as_openhands):
|
||||
runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
|
||||
try:
|
||||
action = CmdRunAction(command='python3 -m http.server 8080')
|
||||
action.timeout = 1
|
||||
action.set_hard_timeout(1)
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert isinstance(obs, CmdOutputObservation)
|
||||
@ -57,7 +58,7 @@ def test_bash_server(temp_dir, runtime_cls, run_as_openhands):
|
||||
)
|
||||
|
||||
action = CmdRunAction(command='C-c')
|
||||
action.timeout = 30
|
||||
action.set_hard_timeout(30)
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert isinstance(obs, CmdOutputObservation)
|
||||
@ -66,7 +67,7 @@ def test_bash_server(temp_dir, runtime_cls, run_as_openhands):
|
||||
assert '/workspace' in obs.metadata.working_dir
|
||||
|
||||
action = CmdRunAction(command='ls')
|
||||
action.timeout = 1
|
||||
action.set_hard_timeout(1)
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert isinstance(obs, CmdOutputObservation)
|
||||
@ -76,7 +77,7 @@ def test_bash_server(temp_dir, runtime_cls, run_as_openhands):
|
||||
|
||||
# run it again!
|
||||
action = CmdRunAction(command='python3 -m http.server 8080')
|
||||
action.timeout = 1
|
||||
action.set_hard_timeout(1)
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert isinstance(obs, CmdOutputObservation)
|
||||
@ -555,15 +556,20 @@ def test_basic_command(temp_dir, runtime_cls, run_as_openhands):
|
||||
|
||||
|
||||
def test_interactive_command(temp_dir, runtime_cls, run_as_openhands):
|
||||
runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
|
||||
runtime = _load_runtime(
|
||||
temp_dir,
|
||||
runtime_cls,
|
||||
run_as_openhands,
|
||||
runtime_startup_env_vars={'NO_CHANGE_TIMEOUT_SECONDS': '1'},
|
||||
)
|
||||
try:
|
||||
# Test interactive command
|
||||
action = CmdRunAction('read -p "Enter name: " name && echo "Hello $name"')
|
||||
action.timeout = 1
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
# assert 'Enter name:' in obs.content # FIXME: this is not working
|
||||
assert '[The command timed out after 1 seconds.' in obs.metadata.suffix
|
||||
# This should trigger SOFT timeout, so no need to set hard timeout
|
||||
assert 'Enter name:' in obs.content
|
||||
assert '[The command has no new output after 1 seconds.' in obs.metadata.suffix
|
||||
|
||||
action = CmdRunAction('John')
|
||||
obs = runtime.run_action(action)
|
||||
@ -590,7 +596,7 @@ def test_long_output(temp_dir, runtime_cls, run_as_openhands):
|
||||
try:
|
||||
# Generate a long output
|
||||
action = CmdRunAction('for i in $(seq 1 5000); do echo "Line $i"; done')
|
||||
action.timeout = 10
|
||||
action.set_hard_timeout(10)
|
||||
obs = runtime.run_action(action)
|
||||
assert obs.exit_code == 0
|
||||
assert 'Line 1' in obs.content
|
||||
@ -604,7 +610,7 @@ def test_long_output_exceed_history_limit(temp_dir, runtime_cls, run_as_openhand
|
||||
try:
|
||||
# Generate a long output
|
||||
action = CmdRunAction('for i in $(seq 1 50000); do echo "Line $i"; done')
|
||||
action.timeout = 30
|
||||
action.set_hard_timeout(30)
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert obs.exit_code == 0
|
||||
@ -621,13 +627,13 @@ def test_long_output_from_nested_directories(temp_dir, runtime_cls, run_as_openh
|
||||
# Create nested directories with many files
|
||||
setup_cmd = 'mkdir -p /tmp/test_dir && cd /tmp/test_dir && for i in $(seq 1 100); do mkdir -p "folder_$i"; for j in $(seq 1 100); do touch "folder_$i/file_$j.txt"; done; done'
|
||||
setup_action = CmdRunAction(setup_cmd.strip())
|
||||
setup_action.timeout = 60
|
||||
setup_action.set_hard_timeout(60)
|
||||
obs = runtime.run_action(setup_action)
|
||||
assert obs.exit_code == 0
|
||||
|
||||
# List the directory structure recursively
|
||||
action = CmdRunAction('ls -R /tmp/test_dir')
|
||||
action.timeout = 60
|
||||
action.set_hard_timeout(60)
|
||||
obs = runtime.run_action(action)
|
||||
assert obs.exit_code == 0
|
||||
|
||||
@ -672,7 +678,7 @@ def test_command_output_continuation(temp_dir, runtime_cls, run_as_openhands):
|
||||
try:
|
||||
# Start a command that produces output slowly
|
||||
action = CmdRunAction('for i in {1..5}; do echo $i; sleep 3; done')
|
||||
action.timeout = 2.5 # Set timeout to 2.5 seconds
|
||||
action.set_hard_timeout(2.5)
|
||||
obs = runtime.run_action(action)
|
||||
assert obs.content.strip() == '1'
|
||||
assert obs.metadata.prefix == ''
|
||||
@ -680,20 +686,19 @@ def test_command_output_continuation(temp_dir, runtime_cls, run_as_openhands):
|
||||
|
||||
# Continue watching output
|
||||
action = CmdRunAction('')
|
||||
action.timeout = 2.5
|
||||
action.set_hard_timeout(2.5)
|
||||
obs = runtime.run_action(action)
|
||||
assert '[Command output continued from previous command]' in obs.metadata.prefix
|
||||
assert '[Below is the output of the previous command.]' in obs.metadata.prefix
|
||||
assert obs.content.strip() == '2'
|
||||
assert '[The command timed out after 2.5 seconds.' in obs.metadata.suffix
|
||||
|
||||
# Continue until completion
|
||||
for expected in ['3', '4', '5']:
|
||||
action = CmdRunAction('')
|
||||
action.timeout = 2.5
|
||||
action.set_hard_timeout(2.5)
|
||||
obs = runtime.run_action(action)
|
||||
assert (
|
||||
'[Command output continued from previous command]'
|
||||
in obs.metadata.prefix
|
||||
'[Below is the output of the previous command.]' in obs.metadata.prefix
|
||||
)
|
||||
assert obs.content.strip() == expected
|
||||
assert '[The command timed out after 2.5 seconds.' in obs.metadata.suffix
|
||||
@ -713,8 +718,7 @@ def test_long_running_command_follow_by_execute(
|
||||
try:
|
||||
# Test command that produces output slowly
|
||||
action = CmdRunAction('for i in {1..3}; do echo $i; sleep 3; done')
|
||||
action.timeout = 2.5
|
||||
action.blocking = False
|
||||
action.set_hard_timeout(2.5)
|
||||
obs = runtime.run_action(action)
|
||||
assert '1' in obs.content # First number should appear before timeout
|
||||
assert obs.metadata.exit_code == -1 # -1 indicates command is still running
|
||||
@ -723,25 +727,32 @@ def test_long_running_command_follow_by_execute(
|
||||
|
||||
# Continue watching output
|
||||
action = CmdRunAction('')
|
||||
action.timeout = 2.5
|
||||
action.set_hard_timeout(2.5)
|
||||
obs = runtime.run_action(action)
|
||||
assert '2' in obs.content
|
||||
assert (
|
||||
obs.metadata.prefix == '[Command output continued from previous command]\n'
|
||||
)
|
||||
assert obs.metadata.prefix == '[Below is the output of the previous command.]\n'
|
||||
assert '[The command timed out after 2.5 seconds.' in obs.metadata.suffix
|
||||
assert obs.metadata.exit_code == -1 # -1 indicates command is still running
|
||||
|
||||
# Test command that produces no output
|
||||
action = CmdRunAction('sleep 15')
|
||||
action.timeout = 2.5
|
||||
action.set_hard_timeout(2.5)
|
||||
obs = runtime.run_action(action)
|
||||
assert '3' in obs.content
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert '3' not in obs.content
|
||||
assert obs.metadata.prefix == '[Below is the output of the previous command.]\n'
|
||||
assert (
|
||||
obs.metadata.prefix == '[Command output continued from previous command]\n'
|
||||
'The previous command was timed out but still running.'
|
||||
in obs.metadata.suffix
|
||||
)
|
||||
assert '[The command timed out after 2.5 seconds.' in obs.metadata.suffix
|
||||
assert obs.metadata.exit_code == -1 # -1 indicates command is still running
|
||||
|
||||
# Finally continue again
|
||||
action = CmdRunAction('')
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert '3' in obs.content
|
||||
assert '[The command completed with exit code 0.]' in obs.metadata.suffix
|
||||
finally:
|
||||
_close_test_runtime(runtime)
|
||||
|
||||
@ -783,3 +794,96 @@ def test_python_interactive_input(temp_dir, runtime_cls, run_as_openhands):
|
||||
assert '[The command completed with exit code 0.]' in obs.metadata.suffix
|
||||
finally:
|
||||
_close_test_runtime(runtime)
|
||||
|
||||
|
||||
def test_stress_long_output_with_soft_and_hard_timeout(
|
||||
temp_dir, runtime_cls, run_as_openhands
|
||||
):
|
||||
runtime = _load_runtime(
|
||||
temp_dir,
|
||||
runtime_cls,
|
||||
run_as_openhands,
|
||||
runtime_startup_env_vars={'NO_CHANGE_TIMEOUT_SECONDS': '1'},
|
||||
docker_runtime_kwargs={
|
||||
'cpu_period': 100000, # 100ms
|
||||
'cpu_quota': 100000, # Can use 100ms out of each 100ms period (1 CPU)
|
||||
'mem_limit': '4G', # 4 GB of memory
|
||||
},
|
||||
)
|
||||
try:
|
||||
# Run a command that generates long output multiple times
|
||||
for i in range(10):
|
||||
start_time = time.time()
|
||||
|
||||
# Check tmux memory usage (in KB)
|
||||
mem_action = CmdRunAction(
|
||||
'ps aux | awk \'{printf "%8.1f KB %s\\n", $6, $0}\' | sort -nr | grep "/usr/bin/tmux" | grep -v grep | awk \'{print $1}\''
|
||||
)
|
||||
mem_obs = runtime.run_action(mem_action)
|
||||
assert mem_obs.exit_code == 0
|
||||
logger.info(
|
||||
f'Tmux memory usage (iteration {i}): {mem_obs.content.strip()} KB'
|
||||
)
|
||||
|
||||
# Check action_execution_server mem
|
||||
mem_action = CmdRunAction(
|
||||
'ps aux | awk \'{printf "%8.1f KB %s\\n", $6, $0}\' | sort -nr | grep "action_execution_server" | grep "/openhands/poetry" | grep -v grep | awk \'{print $1}\''
|
||||
)
|
||||
mem_obs = runtime.run_action(mem_action)
|
||||
assert mem_obs.exit_code == 0
|
||||
logger.info(
|
||||
f'Action execution server memory usage (iteration {i}): {mem_obs.content.strip()} KB'
|
||||
)
|
||||
|
||||
# Test soft timeout
|
||||
action = CmdRunAction(
|
||||
'read -p "Do you want to continue? [Y/n] " answer; if [[ $answer == "Y" ]]; then echo "Proceeding with operation..."; echo "Operation completed successfully!"; else echo "Operation cancelled."; exit 1; fi'
|
||||
)
|
||||
obs = runtime.run_action(action)
|
||||
assert 'Do you want to continue?' in obs.content
|
||||
assert obs.exit_code == -1 # Command is still running, waiting for input
|
||||
|
||||
# Send the confirmation
|
||||
action = CmdRunAction('Y')
|
||||
obs = runtime.run_action(action)
|
||||
assert 'Proceeding with operation...' in obs.content
|
||||
assert 'Operation completed successfully!' in obs.content
|
||||
assert obs.exit_code == 0
|
||||
assert '[The command completed with exit code 0.]' in obs.metadata.suffix
|
||||
|
||||
# Test hard timeout w/ long output
|
||||
# Generate long output with 1000 asterisks per line
|
||||
action = CmdRunAction(
|
||||
f'export i={i}; for j in $(seq 1 100); do echo "Line $j - Iteration $i - $(printf \'%1000s\' | tr " " "*")"; sleep 1; done'
|
||||
)
|
||||
action.set_hard_timeout(2)
|
||||
obs = runtime.run_action(action)
|
||||
|
||||
# Verify the output
|
||||
assert obs.exit_code == -1
|
||||
assert f'Line 1 - Iteration {i}' in obs.content
|
||||
# assert f'Line 1000 - Iteration {i}' in obs.content
|
||||
# assert '[The command completed with exit code 0.]' in obs.metadata.suffix
|
||||
|
||||
# Because hard-timeout is triggered, the terminal will in a weird state
|
||||
# where it will not accept any new commands.
|
||||
obs = runtime.run_action(CmdRunAction('ls'))
|
||||
assert obs.exit_code == -1
|
||||
assert (
|
||||
'The previous command was timed out but still running.'
|
||||
in obs.metadata.suffix
|
||||
)
|
||||
|
||||
# We need to send a Ctrl+C to reset the terminal.
|
||||
obs = runtime.run_action(CmdRunAction('C-c'))
|
||||
assert obs.exit_code == 130
|
||||
|
||||
# Now make sure the terminal is in a good state
|
||||
obs = runtime.run_action(CmdRunAction('ls'))
|
||||
assert obs.exit_code == 0
|
||||
|
||||
duration = time.time() - start_time
|
||||
logger.info(f'Completed iteration {i} in {duration:.2f} seconds')
|
||||
|
||||
finally:
|
||||
_close_test_runtime(runtime)
|
||||
|
||||
@ -28,7 +28,7 @@ def test_stress_docker_runtime(temp_dir, runtime_cls, repeat=1):
|
||||
for _ in range(repeat):
|
||||
# run stress-ng stress tests for 1 minute
|
||||
action = CmdRunAction(command='stress-ng --all 1 -t 1m')
|
||||
action.timeout = 120
|
||||
action.set_hard_timeout(120)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
|
||||
@ -92,14 +92,14 @@ def initialize_runtime(
|
||||
obs: CmdOutputObservation
|
||||
|
||||
action = CmdRunAction(command="""export USER=$(whoami); echo USER=${USER} """)
|
||||
action.timeout = 600
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert_and_raise(obs.exit_code == 0, f'Failed to export USER: {str(obs)}')
|
||||
|
||||
action = CmdRunAction(command='mkdir -p /dummy_dir')
|
||||
action.timeout = 600
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
|
||||
@ -94,7 +94,7 @@ def test_long_running_command_follow_by_execute():
|
||||
obs = session.execute(CmdRunAction(''))
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert '2' in obs.content
|
||||
assert obs.metadata.prefix == '[Command output continued from previous command]\n'
|
||||
assert obs.metadata.prefix == '[Below is the output of the previous command.]\n'
|
||||
assert obs.metadata.suffix == (
|
||||
'\n[The command has no new output after 2 seconds. '
|
||||
"You may wait longer to see additional output by sending empty command '', "
|
||||
@ -108,7 +108,7 @@ def test_long_running_command_follow_by_execute():
|
||||
obs = session.execute(CmdRunAction('sleep 15'))
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert '3' in obs.content
|
||||
assert obs.metadata.prefix == '[Command output continued from previous command]\n'
|
||||
assert obs.metadata.prefix == '[Below is the output of the previous command.]\n'
|
||||
assert obs.metadata.suffix == (
|
||||
'\n[The command has no new output after 2 seconds. '
|
||||
"You may wait longer to see additional output by sending empty command '', "
|
||||
@ -175,7 +175,7 @@ def test_interactive_command():
|
||||
'send other commands to interact with the current process, '
|
||||
'or send keys to interrupt/kill the command.]'
|
||||
)
|
||||
assert obs.metadata.prefix == '[Command output continued from previous command]\n'
|
||||
assert obs.metadata.prefix == '[Below is the output of the previous command.]\n'
|
||||
|
||||
obs = session.execute(CmdRunAction('line 2'))
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
@ -187,7 +187,7 @@ def test_interactive_command():
|
||||
'send other commands to interact with the current process, '
|
||||
'or send keys to interrupt/kill the command.]'
|
||||
)
|
||||
assert obs.metadata.prefix == '[Command output continued from previous command]\n'
|
||||
assert obs.metadata.prefix == '[Below is the output of the previous command.]\n'
|
||||
|
||||
obs = session.execute(CmdRunAction('EOF'))
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
@ -266,14 +266,14 @@ def test_command_output_continuation():
|
||||
|
||||
obs = session.execute(CmdRunAction(''))
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert '[Command output continued from previous command]' in obs.metadata.prefix
|
||||
assert '[Below is the output of the previous command.]' in obs.metadata.prefix
|
||||
assert obs.content.strip() == '2'
|
||||
assert '[The command has no new output after 2 seconds.' in obs.metadata.suffix
|
||||
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
|
||||
|
||||
obs = session.execute(CmdRunAction(''))
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert '[Command output continued from previous command]' in obs.metadata.prefix
|
||||
assert '[Below is the output of the previous command.]' in obs.metadata.prefix
|
||||
assert obs.content.strip() == '3'
|
||||
|
||||
assert '[The command has no new output after 2 seconds.' in obs.metadata.suffix
|
||||
@ -281,14 +281,14 @@ def test_command_output_continuation():
|
||||
|
||||
obs = session.execute(CmdRunAction(''))
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert '[Command output continued from previous command]' in obs.metadata.prefix
|
||||
assert '[Below is the output of the previous command.]' in obs.metadata.prefix
|
||||
assert obs.content.strip() == '4'
|
||||
assert '[The command has no new output after 2 seconds.' in obs.metadata.suffix
|
||||
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
|
||||
|
||||
obs = session.execute(CmdRunAction(''))
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert '[Command output continued from previous command]' in obs.metadata.prefix
|
||||
assert '[Below is the output of the previous command.]' in obs.metadata.prefix
|
||||
assert obs.content.strip() == '5'
|
||||
assert '[The command has no new output after 2 seconds.' in obs.metadata.suffix
|
||||
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
|
||||
|
||||
@ -27,7 +27,7 @@ def runtime(mock_session):
|
||||
def test_runtime_timeout_error(runtime, mock_session):
|
||||
# Create a command action
|
||||
action = CmdRunAction(command='test command')
|
||||
action.timeout = 120
|
||||
action.set_hard_timeout(120)
|
||||
|
||||
# Mock the runtime to raise a timeout error
|
||||
runtime.send_action_for_execution.side_effect = AgentRuntimeTimeoutError(
|
||||
@ -78,7 +78,7 @@ def test_runtime_disconnected_error(
|
||||
|
||||
# Create a command action
|
||||
action = CmdRunAction(command='test command')
|
||||
action.timeout = 120
|
||||
action.set_hard_timeout(120)
|
||||
|
||||
# Verify that the error message is correct
|
||||
with pytest.raises(AgentRuntimeDisconnectedError) as exc_info:
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user