fix(bash): also show timeout reminder when no_change_timeout is triggered (#6318)

Co-authored-by: Robert Brennan <accounts@rbren.io>
This commit is contained in:
Xingyao Wang 2025-01-17 14:31:23 -05:00 committed by GitHub
parent c5d7caf01f
commit 899c1f8360
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
13 changed files with 324 additions and 116 deletions

View File

@ -359,7 +359,7 @@ def complete_runtime(
action = CmdRunAction(
command=f'git diff --no-color --cached {instance["base_commit"]}'
)
action.timeout = max(300 + 100 * n_retries, 600)
action.set_hard_timeout(max(300 + 100 * n_retries, 600))
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})

View File

@ -0,0 +1,73 @@
import hashlib
from evaluation.integration_tests.tests.base import BaseIntegrationTest, TestResult
from openhands.events.action import (
AgentFinishAction,
FileWriteAction,
MessageAction,
)
from openhands.events.event import Event
from openhands.events.observation import AgentDelegateObservation
from openhands.runtime.base import Runtime
class Test(BaseIntegrationTest):
INSTRUCTION = 'Execute the python script /workspace/python_script.py with input "John" and "25" and tell me the secret number.'
SECRET_NUMBER = int(hashlib.sha256(str(25).encode()).hexdigest()[:8], 16) % 1000
@classmethod
def initialize_runtime(cls, runtime: Runtime) -> None:
from openhands.core.logger import openhands_logger as logger
action = FileWriteAction(
path='/workspace/python_script.py',
content=(
'name = input("Enter your name: "); age = input("Enter your age: "); '
'import hashlib; secret = int(hashlib.sha256(str(age).encode()).hexdigest()[:8], 16) % 1000; '
'print(f"Hello {name}, you are {age} years old. Tell you a secret number: {secret}")'
),
)
logger.info(action, extra={'msg_type': 'ACTION'})
observation = runtime.run_action(action)
logger.info(observation, extra={'msg_type': 'OBSERVATION'})
@classmethod
def verify_result(cls, runtime: Runtime, histories: list[Event]) -> TestResult:
from openhands.core.logger import openhands_logger as logger
# check if the license information is in any message
message_actions = [
event
for event in histories
if isinstance(
event, (MessageAction, AgentFinishAction, AgentDelegateObservation)
)
]
logger.info(f'Total message-like events: {len(message_actions)}')
for event in message_actions:
try:
if isinstance(event, AgentDelegateObservation):
content = event.content
elif isinstance(event, AgentFinishAction):
content = event.outputs.get('content', '')
if event.thought:
content += f'\n\n{event.thought}'
elif isinstance(event, MessageAction):
content = event.content
else:
logger.warning(f'Unexpected event type: {type(event)}')
continue
if str(cls.SECRET_NUMBER) in content:
return TestResult(success=True)
except Exception as e:
logger.error(f'Error processing event: {e}')
logger.debug(
f'Total messages: {len(message_actions)}. Messages: {message_actions}'
)
return TestResult(
success=False,
reason=f'The answer is not found in any message. Total messages: {len(message_actions)}.',
)

View File

@ -371,7 +371,6 @@ def _process_instance_wrapper(
error = str(e)
stacktrace = traceback.format_exc()
if attempt == max_retries:
logger.exception(e)
msg = (
'-' * 10
+ '\n'
@ -395,19 +394,13 @@ def _process_instance_wrapper(
+ '-' * 10
+ '\n'
)
if isinstance(
e,
(
AgentRuntimeDisconnectedError,
AgentRuntimeUnavailableError,
AgentRuntimeNotFoundError,
),
):
# e is likely an EvalException, so we can't directly infer it from type
# but rather check if it's a fatal error
if is_fatal_runtime_error(str(e)):
runtime_failure_count += 1
msg += f'Runtime disconnected error detected for instance {instance.instance_id}, runtime failure count: {runtime_failure_count}'
msg += '\n' + '-' * 10 + '\n'
logger.error(msg)
if use_mp:
print(msg) # use print to directly print to console
time.sleep(5)
@ -564,6 +557,7 @@ def is_fatal_evaluation_error(error: str | None) -> bool:
AgentRuntimeNotReadyError,
AgentRuntimeDisconnectedError,
AgentRuntimeNotFoundError,
ConnectionError,
]
if any(exception.__name__ in error for exception in FATAL_EXCEPTIONS):
@ -573,6 +567,23 @@ def is_fatal_evaluation_error(error: str | None) -> bool:
return False
def is_fatal_runtime_error(error: str | None) -> bool:
if not error:
return False
FATAL_RUNTIME_ERRORS = [
AgentRuntimeUnavailableError,
AgentRuntimeDisconnectedError,
AgentRuntimeNotFoundError,
]
if any(exception.__name__ in error for exception in FATAL_RUNTIME_ERRORS):
logger.error(f'Fatal runtime error detected: {error}')
return True
return False
def get_metrics(state: State) -> dict[str, Any]:
"""Extract metrics from the state."""
metrics = state.metrics.get() if state.metrics else {}

View File

@ -31,7 +31,7 @@ from openhands.events.tool import ToolCallMetadata
_BASH_DESCRIPTION = """Execute a bash command in the terminal.
* Long running commands: For commands that may run indefinitely, it should be run in the background and the output should be redirected to a file, e.g. command = `python3 app.py > server.log 2>&1 &`.
* Interactive: If a bash command returns exit code `-1`, this means the process is not yet finished. The assistant must then send a second call to terminal with an empty `command` (which will retrieve any additional logs), or it can send additional text (set `command` to the text) to STDIN of the running process, or it can send command like `C-c` (Ctrl+C) to interrupt the process.
* Interact with running process: If a bash command returns exit code `-1`, this means the process is not yet finished. By setting `is_input` to `true`, the assistant can interact with the running process and send empty `command` to retrieve any additional logs, or send additional text (set `command` to the text) to STDIN of the running process, or send command like `C-c` (Ctrl+C), `C-d` (Ctrl+D), `C-z` (Ctrl+Z) to interrupt the process.
"""
CmdRunTool = ChatCompletionToolParam(
@ -46,6 +46,11 @@ CmdRunTool = ChatCompletionToolParam(
'type': 'string',
'description': 'The bash command to execute. Can be empty string to view additional logs when previous exit code is `-1`. Can be `C-c` (Ctrl+C) to interrupt the currently running process.',
},
'is_input': {
'type': 'string',
'description': 'If True, the command is an input to the running process. If False, the command is a bash command to be executed in the terminal. Default is False.',
'enum': ['true', 'false'],
},
},
'required': ['command'],
},
@ -488,6 +493,12 @@ def response_to_actions(response: ModelResponse) -> list[Action]:
f'Failed to parse tool call arguments: {tool_call.function.arguments}'
) from e
if tool_call.function.name == 'execute_bash':
# this is an LLM error: add empty command to avoid breaking the tool call
if 'command' not in arguments:
arguments['command'] = ''
# convert is_input to boolean
if 'is_input' in arguments:
arguments['is_input'] = arguments['is_input'] == 'true'
action = CmdRunAction(**arguments)
elif tool_call.function.name == 'execute_ipython_cell':
action = IPythonRunCellAction(**arguments)

View File

@ -11,8 +11,10 @@ from openhands.events.action.action import (
@dataclass
class CmdRunAction(Action):
command: str
# When `command` is empty, it will be used to print the current tmux window
command: (
str # When `command` is empty, it will be used to print the current tmux window
)
is_input: bool = False # if True, the command is an input to the running process
thought: str = ''
blocking: bool = False
# If blocking is True, the command will be run in a blocking manner.
@ -28,7 +30,7 @@ class CmdRunAction(Action):
return f'Running command: {self.command}'
def __str__(self) -> str:
ret = f'**CmdRunAction (source={self.source})**\n'
ret = f'**CmdRunAction (source={self.source}, is_input={self.is_input})**\n'
if self.thought:
ret += f'THOUGHT: {self.thought}\n'
ret += f'COMMAND:\n{self.command}'

View File

@ -197,9 +197,10 @@ class Runtime(FileEditRuntimeMixin):
e, AgentRuntimeDisconnectedError
):
err_id = 'STATUS$ERROR_RUNTIME_DISCONNECTED'
self.log('error', f'Unexpected error while running action: {str(e)}')
error_message = f'{type(e).__name__}: {str(e)}'
self.log('error', f'Unexpected error while running action: {error_message}')
self.log('error', f'Problematic action: {str(event)}')
self.send_error_message(err_id, str(e))
self.send_error_message(err_id, error_message)
self.close()
return

View File

@ -59,6 +59,7 @@ class ActionExecutionClient(Runtime):
self.session = HttpSession()
self.action_semaphore = threading.Semaphore(1) # Ensure one action at a time
self._runtime_initialized: bool = False
self._runtime_closed: bool = False
self._vscode_token: str | None = None # initial dummy value
super().__init__(
config,
@ -283,4 +284,9 @@ class ActionExecutionClient(Runtime):
return self.send_action_for_execution(action)
def close(self) -> None:
# Make sure we don't close the session multiple times
# Can happen in evaluation
if self._runtime_closed:
return
self._runtime_closed = True
self.session.close()

View File

@ -13,6 +13,7 @@ from openhands.core.exceptions import (
AgentRuntimeNotReadyError,
AgentRuntimeUnavailableError,
)
from openhands.core.logger import openhands_logger as logger
from openhands.events import EventStream
from openhands.runtime.builder.remote import RemoteRuntimeBuilder
from openhands.runtime.impl.action_execution.action_execution_client import (
@ -75,6 +76,10 @@ class RemoteRuntime(ActionExecutionClient):
self.available_hosts: dict[str, int] = {}
self._runtime_initialized: bool = False
def log(self, level: str, message: str) -> None:
message = f'[runtime session_id={self.sid} runtime_id={self.runtime_id or "unknown"}] {message}'
getattr(logger, level)(message, stacklevel=2)
def _get_action_execution_server_host(self):
return self.runtime_url
@ -350,20 +355,33 @@ class RemoteRuntime(ActionExecutionClient):
super().close()
return
try:
with self._send_runtime_api_request(
'POST',
f'{self.config.sandbox.remote_runtime_api_url}/stop',
json={'runtime_id': self.runtime_id},
):
self.log('debug', 'Runtime stopped.')
if not self._runtime_closed:
with self._send_runtime_api_request(
'POST',
f'{self.config.sandbox.remote_runtime_api_url}/stop',
json={'runtime_id': self.runtime_id},
):
self.log('debug', 'Runtime stopped.')
except Exception as e:
raise e
finally:
super().close()
def _send_runtime_api_request(self, method, url, **kwargs):
return send_request(self.session, method, url, **kwargs)
try:
return send_request(self.session, method, url, **kwargs)
except requests.Timeout:
self.log(
'error',
f'No response received within the timeout period for url: {url}',
)
raise
@tenacity.retry(
retry=tenacity.retry_if_exception_type(ConnectionError),
stop=tenacity.stop_after_attempt(3) | stop_if_should_exit(),
wait=tenacity.wait_exponential(multiplier=1, min=4, max=60),
)
def _send_action_server_request(self, method, url, **kwargs):
try:
return super()._send_action_server_request(method, url, **kwargs)
@ -375,14 +393,14 @@ class RemoteRuntime(ActionExecutionClient):
raise
except requests.HTTPError as e:
if e.response.status_code in (404, 502):
if e.response.status_code in (404, 502, 504):
if e.response.status_code == 404:
raise AgentRuntimeDisconnectedError(
'Runtime is not responding. This may be temporary, please try again.'
f'Runtime is not responding. This may be temporary, please try again. Original error: {e}'
) from e
else: # 502
else: # 502, 504
raise AgentRuntimeDisconnectedError(
'Runtime is temporarily unavailable. This may be due to a restart or network issue, please try again.'
f'Runtime is temporarily unavailable. This may be due to a restart or network issue, please try again. Original error: {e}'
) from e
elif e.response.status_code == 503:
self.log('warning', 'Runtime appears to be paused. Resuming...')

View File

@ -461,22 +461,28 @@ class BashSession:
# Strip the command of any leading/trailing whitespace
logger.debug(f'RECEIVED ACTION: {action}')
command = action.command.strip()
is_special_key = self._is_special_key(command)
is_input: bool = action.is_input
# Handle when prev command is hard timeout
if command == '' and self.prev_status not in {
# If the previous command is not completed, we need to check if the command is empty
if self.prev_status not in {
BashCommandStatus.CONTINUE,
BashCommandStatus.NO_CHANGE_TIMEOUT,
BashCommandStatus.HARD_TIMEOUT,
}:
return CmdOutputObservation(
content='ERROR: No previous command to continue from. '
+ 'Previous command has to be timeout to be continued.',
command='',
metadata=CmdOutputMetadata(),
)
if command == '':
return CmdOutputObservation(
content='ERROR: No previous running command to retrieve logs from.',
command='',
metadata=CmdOutputMetadata(),
)
if is_input:
return CmdOutputObservation(
content='ERROR: No previous running command to interact with.',
command='',
metadata=CmdOutputMetadata(),
)
# Check if the command is a single command or multiple commands
splited_commands = split_bash_commands(command)
if len(splited_commands) > 1:
return ErrorObservation(
@ -491,46 +497,62 @@ class BashSession:
last_change_time = start_time
last_pane_output = self._get_pane_content()
# Do not check hard timeout if the command is a special key
if command != '' and is_special_key:
logger.debug(f'SENDING SPECIAL KEY: {command!r}')
self.pane.send_keys(command, enter=False)
# When prev command is hard timeout, and we are trying to execute new command
elif self.prev_status == BashCommandStatus.HARD_TIMEOUT and command != '':
if not last_pane_output.endswith(CMD_OUTPUT_PS1_END):
_ps1_matches = CmdOutputMetadata.matches_ps1_metadata(last_pane_output)
raw_command_output = self._combine_outputs_between_matches(
last_pane_output, _ps1_matches
)
metadata = CmdOutputMetadata() # No metadata available
metadata.suffix = (
f'\n[Your command "{command}" is NOT executed. '
f'The previous command was timed out but still running. Above is the output of the previous command. '
"You may wait longer to see additional output of the previous command by sending empty command '', "
'send other commands to interact with the current process, '
'or send keys ("C-c", "C-z", "C-d") to interrupt/kill the previous command before sending your new command.]'
)
command_output = self._get_command_output(
command,
raw_command_output,
metadata,
continue_prefix='[Below is the output of the previous command.]\n',
)
return CmdOutputObservation(
command=command,
content=command_output,
metadata=metadata,
)
# Only send the command to the pane if it's not a special key and it's not empty
# AND previous hard timeout command is resolved
elif command != '' and not is_special_key:
# convert command to raw string
command = escape_bash_special_chars(command)
logger.debug(f'SENDING COMMAND: {command!r}')
self.pane.send_keys(
command,
enter=True,
# When prev command is still running, and we are trying to send a new command
if (
self.prev_status
in {
BashCommandStatus.HARD_TIMEOUT,
BashCommandStatus.NO_CHANGE_TIMEOUT,
}
and not last_pane_output.endswith(
CMD_OUTPUT_PS1_END
) # prev command is not completed
and not is_input
and command != '' # not input and not empty command
):
_ps1_matches = CmdOutputMetadata.matches_ps1_metadata(last_pane_output)
raw_command_output = self._combine_outputs_between_matches(
last_pane_output, _ps1_matches
)
metadata = CmdOutputMetadata() # No metadata available
metadata.suffix = (
f'\n[Your command "{command}" is NOT executed. '
f'The previous command is still running - You CANNOT send new commands until the previous command is completed. '
'By setting `is_input` to `true`, you can interact with the current process: '
"You may wait longer to see additional output of the previous command by sending empty command '', "
'send other commands to interact with the current process, '
'or send keys ("C-c", "C-z", "C-d") to interrupt/kill the previous command before sending your new command.]'
)
logger.debug(f'PREVIOUS COMMAND OUTPUT: {raw_command_output}')
command_output = self._get_command_output(
command,
raw_command_output,
metadata,
continue_prefix='[Below is the output of the previous command.]\n',
)
return CmdOutputObservation(
command=command,
content=command_output,
metadata=metadata,
)
# Send actual command/inputs to the pane
if command != '':
is_special_key = self._is_special_key(command)
if is_input:
logger.debug(f'SENDING INPUT TO RUNNING PROCESS: {command!r}')
self.pane.send_keys(
command,
enter=not is_special_key,
)
else:
# convert command to raw string
command = escape_bash_special_chars(command)
logger.debug(f'SENDING COMMAND: {command!r}')
self.pane.send_keys(
command,
enter=not is_special_key,
)
# Loop until the command completes or times out
while should_continue():

View File

@ -57,7 +57,7 @@ def test_bash_server(temp_dir, runtime_cls, run_as_openhands):
in obs.metadata.suffix
)
action = CmdRunAction(command='C-c')
action = CmdRunAction(command='C-c', is_input=True)
action.set_hard_timeout(30)
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
@ -571,7 +571,7 @@ def test_interactive_command(temp_dir, runtime_cls, run_as_openhands):
assert 'Enter name:' in obs.content
assert '[The command has no new output after 1 seconds.' in obs.metadata.suffix
action = CmdRunAction('John')
action = CmdRunAction('John', is_input=True)
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert 'Hello John' in obs.content
@ -741,10 +741,7 @@ def test_long_running_command_follow_by_execute(
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert '3' not in obs.content
assert obs.metadata.prefix == '[Below is the output of the previous command.]\n'
assert (
'The previous command was timed out but still running.'
in obs.metadata.suffix
)
assert 'The previous command is still running' in obs.metadata.suffix
assert obs.metadata.exit_code == -1 # -1 indicates command is still running
# Finally continue again
@ -763,7 +760,9 @@ def test_empty_command_errors(temp_dir, runtime_cls, run_as_openhands):
# Test empty command without previous command
obs = runtime.run_action(CmdRunAction(''))
assert isinstance(obs, CmdOutputObservation)
assert 'ERROR: No previous command to continue from' in obs.content
assert (
'ERROR: No previous running command to retrieve logs from.' in obs.content
)
finally:
_close_test_runtime(runtime)
@ -781,13 +780,52 @@ def test_python_interactive_input(temp_dir, runtime_cls, run_as_openhands):
assert obs.metadata.exit_code == -1 # -1 indicates command is still running
# Send first input (name)
obs = runtime.run_action(CmdRunAction('Alice'))
obs = runtime.run_action(CmdRunAction('Alice', is_input=True))
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert 'Enter your age:' in obs.content
assert obs.metadata.exit_code == -1
# Send second input (age)
obs = runtime.run_action(CmdRunAction('25'))
obs = runtime.run_action(CmdRunAction('25', is_input=True))
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert 'Hello Alice, you are 25 years old' in obs.content
assert obs.metadata.exit_code == 0
assert '[The command completed with exit code 0.]' in obs.metadata.suffix
finally:
_close_test_runtime(runtime)
def test_python_interactive_input_without_set_input(
temp_dir, runtime_cls, run_as_openhands
):
runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
try:
# Test Python program that asks for input - properly escaped for bash
python_script = """name = input('Enter your name: '); age = input('Enter your age: '); print(f'Hello {name}, you are {age} years old')"""
# Start Python with the interactive script
obs = runtime.run_action(CmdRunAction(f'python3 -c "{python_script}"'))
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert 'Enter your name:' in obs.content
assert obs.metadata.exit_code == -1 # -1 indicates command is still running
# Send first input (name)
obs = runtime.run_action(CmdRunAction('Alice', is_input=False))
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert 'Enter your age:' not in obs.content
assert (
'Your command "Alice" is NOT executed. The previous command is still running'
in obs.metadata.suffix
)
assert obs.metadata.exit_code == -1
# Try again now with input
obs = runtime.run_action(CmdRunAction('Alice', is_input=True))
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert 'Enter your age:' in obs.content
assert obs.metadata.exit_code == -1
obs = runtime.run_action(CmdRunAction('25', is_input=True))
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert 'Hello Alice, you are 25 years old' in obs.content
assert obs.metadata.exit_code == 0
@ -844,7 +882,7 @@ def test_stress_long_output_with_soft_and_hard_timeout(
assert obs.exit_code == -1 # Command is still running, waiting for input
# Send the confirmation
action = CmdRunAction('Y')
action = CmdRunAction('Y', is_input=True)
obs = runtime.run_action(action)
assert 'Proceeding with operation...' in obs.content
assert 'Operation completed successfully!' in obs.content
@ -869,13 +907,10 @@ def test_stress_long_output_with_soft_and_hard_timeout(
# where it will not accept any new commands.
obs = runtime.run_action(CmdRunAction('ls'))
assert obs.exit_code == -1
assert (
'The previous command was timed out but still running.'
in obs.metadata.suffix
)
assert 'The previous command is still running' in obs.metadata.suffix
# We need to send a Ctrl+C to reset the terminal.
obs = runtime.run_action(CmdRunAction('C-c'))
obs = runtime.run_action(CmdRunAction('C-c', is_input=True))
assert obs.exit_code == 130
# Now make sure the terminal is in a good state
@ -887,3 +922,25 @@ def test_stress_long_output_with_soft_and_hard_timeout(
finally:
_close_test_runtime(runtime)
def test_bash_remove_prefix(temp_dir, runtime_cls, run_as_openhands):
runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
try:
# create a git repo
action = CmdRunAction(
'git init && git remote add origin https://github.com/All-Hands-AI/OpenHands'
)
obs = runtime.run_action(action)
# logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert obs.metadata.exit_code == 0
# Start Python with the interactive script
obs = runtime.run_action(CmdRunAction('git remote -v'))
# logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert obs.metadata.exit_code == 0
assert 'https://github.com/All-Hands-AI/OpenHands' in obs.content
assert 'git remote -v' not in obs.content
finally:
_close_test_runtime(runtime)

View File

@ -97,6 +97,7 @@ def test_cmd_run_action_serialization_deserialization():
'args': {
'blocking': False,
'command': 'echo "Hello world"',
'is_input': False,
'thought': '',
'hidden': False,
'confirmation_state': ActionConfirmationStatus.CONFIRMED,
@ -181,3 +182,4 @@ def test_legacy_serialization():
assert event_dict['args']['blocking'] is False
assert event_dict['args']['command'] == 'echo "Hello world"'
assert event_dict['args']['thought'] == ''
assert event_dict['args']['is_input'] is False

View File

@ -1,5 +1,6 @@
import os
import tempfile
import time
from openhands.core.logger import openhands_logger as logger
from openhands.events.action import CmdRunAction
@ -91,7 +92,7 @@ def test_long_running_command_follow_by_execute():
assert obs.metadata.prefix == ''
# Continue watching output
obs = session.execute(CmdRunAction(''))
obs = session.execute(CmdRunAction('', is_input=True))
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert '2' in obs.content
assert obs.metadata.prefix == '[Below is the output of the previous command.]\n'
@ -107,14 +108,20 @@ def test_long_running_command_follow_by_execute():
# Test command that produces no output
obs = session.execute(CmdRunAction('sleep 15'))
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert '3' not in obs.content
assert obs.metadata.prefix == '[Below is the output of the previous command.]\n'
assert 'The previous command is still running' in obs.metadata.suffix
assert obs.metadata.exit_code == -1 # -1 indicates command is still running
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
time.sleep(3)
# Run it again, this time it should produce output
obs = session.execute(CmdRunAction('sleep 15'))
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert '3' in obs.content
assert obs.metadata.prefix == '[Below is the output of the previous command.]\n'
assert obs.metadata.suffix == (
'\n[The command has no new output after 2 seconds. '
"You may wait longer to see additional output by sending empty command '', "
'send other commands to interact with the current process, '
'or send keys to interrupt/kill the command.]'
)
assert 'The previous command is still running' in obs.metadata.suffix
assert obs.metadata.exit_code == -1 # -1 indicates command is still running
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
@ -144,7 +151,7 @@ def test_interactive_command():
assert obs.metadata.prefix == ''
# Send input
obs = session.execute(CmdRunAction('John'))
obs = session.execute(CmdRunAction('John', is_input=True))
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert 'Hello John' in obs.content
assert obs.metadata.exit_code == 0
@ -165,7 +172,7 @@ def test_interactive_command():
)
assert obs.metadata.prefix == ''
obs = session.execute(CmdRunAction('line 1'))
obs = session.execute(CmdRunAction('line 1', is_input=True))
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert obs.metadata.exit_code == -1
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
@ -177,7 +184,7 @@ def test_interactive_command():
)
assert obs.metadata.prefix == '[Below is the output of the previous command.]\n'
obs = session.execute(CmdRunAction('line 2'))
obs = session.execute(CmdRunAction('line 2', is_input=True))
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert obs.metadata.exit_code == -1
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
@ -189,7 +196,7 @@ def test_interactive_command():
)
assert obs.metadata.prefix == '[Below is the output of the previous command.]\n'
obs = session.execute(CmdRunAction('EOF'))
obs = session.execute(CmdRunAction('EOF', is_input=True))
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert 'line 1' in obs.content and 'line 2' in obs.content
assert obs.metadata.exit_code == 0
@ -220,7 +227,7 @@ def test_ctrl_c():
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
# Send Ctrl+C
obs = session.execute(CmdRunAction('C-c'))
obs = session.execute(CmdRunAction('C-c', is_input=True))
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert obs.metadata.exit_code == 130 # Standard exit code for Ctrl+C
assert (
@ -240,10 +247,7 @@ def test_empty_command_errors():
# Test empty command without previous command
obs = session.execute(CmdRunAction(''))
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert (
obs.content
== 'ERROR: No previous command to continue from. Previous command has to be timeout to be continued.'
)
assert obs.content == 'ERROR: No previous running command to retrieve logs from.'
assert obs.metadata.exit_code == -1
assert obs.metadata.prefix == ''
assert obs.metadata.suffix == ''
@ -264,14 +268,14 @@ def test_command_output_continuation():
assert '[The command has no new output after 2 seconds.' in obs.metadata.suffix
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
obs = session.execute(CmdRunAction(''))
obs = session.execute(CmdRunAction('', is_input=True))
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert '[Below is the output of the previous command.]' in obs.metadata.prefix
assert obs.content.strip() == '2'
assert '[The command has no new output after 2 seconds.' in obs.metadata.suffix
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
obs = session.execute(CmdRunAction(''))
obs = session.execute(CmdRunAction('', is_input=True))
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert '[Below is the output of the previous command.]' in obs.metadata.prefix
assert obs.content.strip() == '3'
@ -279,21 +283,21 @@ def test_command_output_continuation():
assert '[The command has no new output after 2 seconds.' in obs.metadata.suffix
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
obs = session.execute(CmdRunAction(''))
obs = session.execute(CmdRunAction('', is_input=True))
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert '[Below is the output of the previous command.]' in obs.metadata.prefix
assert obs.content.strip() == '4'
assert '[The command has no new output after 2 seconds.' in obs.metadata.suffix
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
obs = session.execute(CmdRunAction(''))
obs = session.execute(CmdRunAction('', is_input=True))
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert '[Below is the output of the previous command.]' in obs.metadata.prefix
assert obs.content.strip() == '5'
assert '[The command has no new output after 2 seconds.' in obs.metadata.suffix
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
obs = session.execute(CmdRunAction(''))
obs = session.execute(CmdRunAction('', is_input=True))
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert '[The command completed with exit code 0.]' in obs.metadata.suffix
assert session.prev_status == BashCommandStatus.COMPLETED
@ -367,14 +371,14 @@ def test_python_interactive_input():
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
# Send first input (name)
obs = session.execute(CmdRunAction('Alice'))
obs = session.execute(CmdRunAction('Alice', is_input=True))
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert 'Enter your age:' in obs.content
assert obs.metadata.exit_code == -1
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
# Send second input (age)
obs = session.execute(CmdRunAction('25'))
obs = session.execute(CmdRunAction('25', is_input=True))
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert 'Hello Alice, you are 25 years old' in obs.content
assert obs.metadata.exit_code == 0

View File

@ -367,6 +367,7 @@ async def test_unsafe_bash_command(temp_dir: str):
arguments={
'blocking': False,
'command': 'ls',
'is_input': False,
'hidden': False,
'confirmation_state': ActionConfirmationStatus.CONFIRMED,
},