mirror of
https://github.com/OpenHands/OpenHands.git
synced 2025-12-26 05:48:36 +08:00
fix(bash): also show timeout reminder when no_change_timeout is triggered (#6318)
Co-authored-by: Robert Brennan <accounts@rbren.io>
This commit is contained in:
parent
c5d7caf01f
commit
899c1f8360
@ -359,7 +359,7 @@ def complete_runtime(
|
||||
action = CmdRunAction(
|
||||
command=f'git diff --no-color --cached {instance["base_commit"]}'
|
||||
)
|
||||
action.timeout = max(300 + 100 * n_retries, 600)
|
||||
action.set_hard_timeout(max(300 + 100 * n_retries, 600))
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
|
||||
@ -0,0 +1,73 @@
|
||||
import hashlib
|
||||
|
||||
from evaluation.integration_tests.tests.base import BaseIntegrationTest, TestResult
|
||||
from openhands.events.action import (
|
||||
AgentFinishAction,
|
||||
FileWriteAction,
|
||||
MessageAction,
|
||||
)
|
||||
from openhands.events.event import Event
|
||||
from openhands.events.observation import AgentDelegateObservation
|
||||
from openhands.runtime.base import Runtime
|
||||
|
||||
|
||||
class Test(BaseIntegrationTest):
|
||||
INSTRUCTION = 'Execute the python script /workspace/python_script.py with input "John" and "25" and tell me the secret number.'
|
||||
SECRET_NUMBER = int(hashlib.sha256(str(25).encode()).hexdigest()[:8], 16) % 1000
|
||||
|
||||
@classmethod
|
||||
def initialize_runtime(cls, runtime: Runtime) -> None:
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
|
||||
action = FileWriteAction(
|
||||
path='/workspace/python_script.py',
|
||||
content=(
|
||||
'name = input("Enter your name: "); age = input("Enter your age: "); '
|
||||
'import hashlib; secret = int(hashlib.sha256(str(age).encode()).hexdigest()[:8], 16) % 1000; '
|
||||
'print(f"Hello {name}, you are {age} years old. Tell you a secret number: {secret}")'
|
||||
),
|
||||
)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
observation = runtime.run_action(action)
|
||||
logger.info(observation, extra={'msg_type': 'OBSERVATION'})
|
||||
|
||||
@classmethod
|
||||
def verify_result(cls, runtime: Runtime, histories: list[Event]) -> TestResult:
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
|
||||
# check if the license information is in any message
|
||||
message_actions = [
|
||||
event
|
||||
for event in histories
|
||||
if isinstance(
|
||||
event, (MessageAction, AgentFinishAction, AgentDelegateObservation)
|
||||
)
|
||||
]
|
||||
logger.info(f'Total message-like events: {len(message_actions)}')
|
||||
|
||||
for event in message_actions:
|
||||
try:
|
||||
if isinstance(event, AgentDelegateObservation):
|
||||
content = event.content
|
||||
elif isinstance(event, AgentFinishAction):
|
||||
content = event.outputs.get('content', '')
|
||||
if event.thought:
|
||||
content += f'\n\n{event.thought}'
|
||||
elif isinstance(event, MessageAction):
|
||||
content = event.content
|
||||
else:
|
||||
logger.warning(f'Unexpected event type: {type(event)}')
|
||||
continue
|
||||
|
||||
if str(cls.SECRET_NUMBER) in content:
|
||||
return TestResult(success=True)
|
||||
except Exception as e:
|
||||
logger.error(f'Error processing event: {e}')
|
||||
|
||||
logger.debug(
|
||||
f'Total messages: {len(message_actions)}. Messages: {message_actions}'
|
||||
)
|
||||
return TestResult(
|
||||
success=False,
|
||||
reason=f'The answer is not found in any message. Total messages: {len(message_actions)}.',
|
||||
)
|
||||
@ -371,7 +371,6 @@ def _process_instance_wrapper(
|
||||
error = str(e)
|
||||
stacktrace = traceback.format_exc()
|
||||
if attempt == max_retries:
|
||||
logger.exception(e)
|
||||
msg = (
|
||||
'-' * 10
|
||||
+ '\n'
|
||||
@ -395,19 +394,13 @@ def _process_instance_wrapper(
|
||||
+ '-' * 10
|
||||
+ '\n'
|
||||
)
|
||||
if isinstance(
|
||||
e,
|
||||
(
|
||||
AgentRuntimeDisconnectedError,
|
||||
AgentRuntimeUnavailableError,
|
||||
AgentRuntimeNotFoundError,
|
||||
),
|
||||
):
|
||||
# e is likely an EvalException, so we can't directly infer it from type
|
||||
# but rather check if it's a fatal error
|
||||
if is_fatal_runtime_error(str(e)):
|
||||
runtime_failure_count += 1
|
||||
msg += f'Runtime disconnected error detected for instance {instance.instance_id}, runtime failure count: {runtime_failure_count}'
|
||||
msg += '\n' + '-' * 10 + '\n'
|
||||
logger.error(msg)
|
||||
if use_mp:
|
||||
print(msg) # use print to directly print to console
|
||||
time.sleep(5)
|
||||
|
||||
|
||||
@ -564,6 +557,7 @@ def is_fatal_evaluation_error(error: str | None) -> bool:
|
||||
AgentRuntimeNotReadyError,
|
||||
AgentRuntimeDisconnectedError,
|
||||
AgentRuntimeNotFoundError,
|
||||
ConnectionError,
|
||||
]
|
||||
|
||||
if any(exception.__name__ in error for exception in FATAL_EXCEPTIONS):
|
||||
@ -573,6 +567,23 @@ def is_fatal_evaluation_error(error: str | None) -> bool:
|
||||
return False
|
||||
|
||||
|
||||
def is_fatal_runtime_error(error: str | None) -> bool:
|
||||
if not error:
|
||||
return False
|
||||
|
||||
FATAL_RUNTIME_ERRORS = [
|
||||
AgentRuntimeUnavailableError,
|
||||
AgentRuntimeDisconnectedError,
|
||||
AgentRuntimeNotFoundError,
|
||||
]
|
||||
|
||||
if any(exception.__name__ in error for exception in FATAL_RUNTIME_ERRORS):
|
||||
logger.error(f'Fatal runtime error detected: {error}')
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def get_metrics(state: State) -> dict[str, Any]:
|
||||
"""Extract metrics from the state."""
|
||||
metrics = state.metrics.get() if state.metrics else {}
|
||||
|
||||
@ -31,7 +31,7 @@ from openhands.events.tool import ToolCallMetadata
|
||||
|
||||
_BASH_DESCRIPTION = """Execute a bash command in the terminal.
|
||||
* Long running commands: For commands that may run indefinitely, it should be run in the background and the output should be redirected to a file, e.g. command = `python3 app.py > server.log 2>&1 &`.
|
||||
* Interactive: If a bash command returns exit code `-1`, this means the process is not yet finished. The assistant must then send a second call to terminal with an empty `command` (which will retrieve any additional logs), or it can send additional text (set `command` to the text) to STDIN of the running process, or it can send command like `C-c` (Ctrl+C) to interrupt the process.
|
||||
* Interact with running process: If a bash command returns exit code `-1`, this means the process is not yet finished. By setting `is_input` to `true`, the assistant can interact with the running process and send empty `command` to retrieve any additional logs, or send additional text (set `command` to the text) to STDIN of the running process, or send command like `C-c` (Ctrl+C), `C-d` (Ctrl+D), `C-z` (Ctrl+Z) to interrupt the process.
|
||||
"""
|
||||
|
||||
CmdRunTool = ChatCompletionToolParam(
|
||||
@ -46,6 +46,11 @@ CmdRunTool = ChatCompletionToolParam(
|
||||
'type': 'string',
|
||||
'description': 'The bash command to execute. Can be empty string to view additional logs when previous exit code is `-1`. Can be `C-c` (Ctrl+C) to interrupt the currently running process.',
|
||||
},
|
||||
'is_input': {
|
||||
'type': 'string',
|
||||
'description': 'If True, the command is an input to the running process. If False, the command is a bash command to be executed in the terminal. Default is False.',
|
||||
'enum': ['true', 'false'],
|
||||
},
|
||||
},
|
||||
'required': ['command'],
|
||||
},
|
||||
@ -488,6 +493,12 @@ def response_to_actions(response: ModelResponse) -> list[Action]:
|
||||
f'Failed to parse tool call arguments: {tool_call.function.arguments}'
|
||||
) from e
|
||||
if tool_call.function.name == 'execute_bash':
|
||||
# this is an LLM error: add empty command to avoid breaking the tool call
|
||||
if 'command' not in arguments:
|
||||
arguments['command'] = ''
|
||||
# convert is_input to boolean
|
||||
if 'is_input' in arguments:
|
||||
arguments['is_input'] = arguments['is_input'] == 'true'
|
||||
action = CmdRunAction(**arguments)
|
||||
elif tool_call.function.name == 'execute_ipython_cell':
|
||||
action = IPythonRunCellAction(**arguments)
|
||||
|
||||
@ -11,8 +11,10 @@ from openhands.events.action.action import (
|
||||
|
||||
@dataclass
|
||||
class CmdRunAction(Action):
|
||||
command: str
|
||||
# When `command` is empty, it will be used to print the current tmux window
|
||||
command: (
|
||||
str # When `command` is empty, it will be used to print the current tmux window
|
||||
)
|
||||
is_input: bool = False # if True, the command is an input to the running process
|
||||
thought: str = ''
|
||||
blocking: bool = False
|
||||
# If blocking is True, the command will be run in a blocking manner.
|
||||
@ -28,7 +30,7 @@ class CmdRunAction(Action):
|
||||
return f'Running command: {self.command}'
|
||||
|
||||
def __str__(self) -> str:
|
||||
ret = f'**CmdRunAction (source={self.source})**\n'
|
||||
ret = f'**CmdRunAction (source={self.source}, is_input={self.is_input})**\n'
|
||||
if self.thought:
|
||||
ret += f'THOUGHT: {self.thought}\n'
|
||||
ret += f'COMMAND:\n{self.command}'
|
||||
|
||||
@ -197,9 +197,10 @@ class Runtime(FileEditRuntimeMixin):
|
||||
e, AgentRuntimeDisconnectedError
|
||||
):
|
||||
err_id = 'STATUS$ERROR_RUNTIME_DISCONNECTED'
|
||||
self.log('error', f'Unexpected error while running action: {str(e)}')
|
||||
error_message = f'{type(e).__name__}: {str(e)}'
|
||||
self.log('error', f'Unexpected error while running action: {error_message}')
|
||||
self.log('error', f'Problematic action: {str(event)}')
|
||||
self.send_error_message(err_id, str(e))
|
||||
self.send_error_message(err_id, error_message)
|
||||
self.close()
|
||||
return
|
||||
|
||||
|
||||
@ -59,6 +59,7 @@ class ActionExecutionClient(Runtime):
|
||||
self.session = HttpSession()
|
||||
self.action_semaphore = threading.Semaphore(1) # Ensure one action at a time
|
||||
self._runtime_initialized: bool = False
|
||||
self._runtime_closed: bool = False
|
||||
self._vscode_token: str | None = None # initial dummy value
|
||||
super().__init__(
|
||||
config,
|
||||
@ -283,4 +284,9 @@ class ActionExecutionClient(Runtime):
|
||||
return self.send_action_for_execution(action)
|
||||
|
||||
def close(self) -> None:
|
||||
# Make sure we don't close the session multiple times
|
||||
# Can happen in evaluation
|
||||
if self._runtime_closed:
|
||||
return
|
||||
self._runtime_closed = True
|
||||
self.session.close()
|
||||
|
||||
@ -13,6 +13,7 @@ from openhands.core.exceptions import (
|
||||
AgentRuntimeNotReadyError,
|
||||
AgentRuntimeUnavailableError,
|
||||
)
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.events import EventStream
|
||||
from openhands.runtime.builder.remote import RemoteRuntimeBuilder
|
||||
from openhands.runtime.impl.action_execution.action_execution_client import (
|
||||
@ -75,6 +76,10 @@ class RemoteRuntime(ActionExecutionClient):
|
||||
self.available_hosts: dict[str, int] = {}
|
||||
self._runtime_initialized: bool = False
|
||||
|
||||
def log(self, level: str, message: str) -> None:
|
||||
message = f'[runtime session_id={self.sid} runtime_id={self.runtime_id or "unknown"}] {message}'
|
||||
getattr(logger, level)(message, stacklevel=2)
|
||||
|
||||
def _get_action_execution_server_host(self):
|
||||
return self.runtime_url
|
||||
|
||||
@ -350,20 +355,33 @@ class RemoteRuntime(ActionExecutionClient):
|
||||
super().close()
|
||||
return
|
||||
try:
|
||||
with self._send_runtime_api_request(
|
||||
'POST',
|
||||
f'{self.config.sandbox.remote_runtime_api_url}/stop',
|
||||
json={'runtime_id': self.runtime_id},
|
||||
):
|
||||
self.log('debug', 'Runtime stopped.')
|
||||
if not self._runtime_closed:
|
||||
with self._send_runtime_api_request(
|
||||
'POST',
|
||||
f'{self.config.sandbox.remote_runtime_api_url}/stop',
|
||||
json={'runtime_id': self.runtime_id},
|
||||
):
|
||||
self.log('debug', 'Runtime stopped.')
|
||||
except Exception as e:
|
||||
raise e
|
||||
finally:
|
||||
super().close()
|
||||
|
||||
def _send_runtime_api_request(self, method, url, **kwargs):
|
||||
return send_request(self.session, method, url, **kwargs)
|
||||
try:
|
||||
return send_request(self.session, method, url, **kwargs)
|
||||
except requests.Timeout:
|
||||
self.log(
|
||||
'error',
|
||||
f'No response received within the timeout period for url: {url}',
|
||||
)
|
||||
raise
|
||||
|
||||
@tenacity.retry(
|
||||
retry=tenacity.retry_if_exception_type(ConnectionError),
|
||||
stop=tenacity.stop_after_attempt(3) | stop_if_should_exit(),
|
||||
wait=tenacity.wait_exponential(multiplier=1, min=4, max=60),
|
||||
)
|
||||
def _send_action_server_request(self, method, url, **kwargs):
|
||||
try:
|
||||
return super()._send_action_server_request(method, url, **kwargs)
|
||||
@ -375,14 +393,14 @@ class RemoteRuntime(ActionExecutionClient):
|
||||
raise
|
||||
|
||||
except requests.HTTPError as e:
|
||||
if e.response.status_code in (404, 502):
|
||||
if e.response.status_code in (404, 502, 504):
|
||||
if e.response.status_code == 404:
|
||||
raise AgentRuntimeDisconnectedError(
|
||||
'Runtime is not responding. This may be temporary, please try again.'
|
||||
f'Runtime is not responding. This may be temporary, please try again. Original error: {e}'
|
||||
) from e
|
||||
else: # 502
|
||||
else: # 502, 504
|
||||
raise AgentRuntimeDisconnectedError(
|
||||
'Runtime is temporarily unavailable. This may be due to a restart or network issue, please try again.'
|
||||
f'Runtime is temporarily unavailable. This may be due to a restart or network issue, please try again. Original error: {e}'
|
||||
) from e
|
||||
elif e.response.status_code == 503:
|
||||
self.log('warning', 'Runtime appears to be paused. Resuming...')
|
||||
|
||||
@ -461,22 +461,28 @@ class BashSession:
|
||||
# Strip the command of any leading/trailing whitespace
|
||||
logger.debug(f'RECEIVED ACTION: {action}')
|
||||
command = action.command.strip()
|
||||
is_special_key = self._is_special_key(command)
|
||||
is_input: bool = action.is_input
|
||||
|
||||
# Handle when prev command is hard timeout
|
||||
|
||||
if command == '' and self.prev_status not in {
|
||||
# If the previous command is not completed, we need to check if the command is empty
|
||||
if self.prev_status not in {
|
||||
BashCommandStatus.CONTINUE,
|
||||
BashCommandStatus.NO_CHANGE_TIMEOUT,
|
||||
BashCommandStatus.HARD_TIMEOUT,
|
||||
}:
|
||||
return CmdOutputObservation(
|
||||
content='ERROR: No previous command to continue from. '
|
||||
+ 'Previous command has to be timeout to be continued.',
|
||||
command='',
|
||||
metadata=CmdOutputMetadata(),
|
||||
)
|
||||
if command == '':
|
||||
return CmdOutputObservation(
|
||||
content='ERROR: No previous running command to retrieve logs from.',
|
||||
command='',
|
||||
metadata=CmdOutputMetadata(),
|
||||
)
|
||||
if is_input:
|
||||
return CmdOutputObservation(
|
||||
content='ERROR: No previous running command to interact with.',
|
||||
command='',
|
||||
metadata=CmdOutputMetadata(),
|
||||
)
|
||||
|
||||
# Check if the command is a single command or multiple commands
|
||||
splited_commands = split_bash_commands(command)
|
||||
if len(splited_commands) > 1:
|
||||
return ErrorObservation(
|
||||
@ -491,46 +497,62 @@ class BashSession:
|
||||
last_change_time = start_time
|
||||
last_pane_output = self._get_pane_content()
|
||||
|
||||
# Do not check hard timeout if the command is a special key
|
||||
if command != '' and is_special_key:
|
||||
logger.debug(f'SENDING SPECIAL KEY: {command!r}')
|
||||
self.pane.send_keys(command, enter=False)
|
||||
# When prev command is hard timeout, and we are trying to execute new command
|
||||
elif self.prev_status == BashCommandStatus.HARD_TIMEOUT and command != '':
|
||||
if not last_pane_output.endswith(CMD_OUTPUT_PS1_END):
|
||||
_ps1_matches = CmdOutputMetadata.matches_ps1_metadata(last_pane_output)
|
||||
raw_command_output = self._combine_outputs_between_matches(
|
||||
last_pane_output, _ps1_matches
|
||||
)
|
||||
metadata = CmdOutputMetadata() # No metadata available
|
||||
metadata.suffix = (
|
||||
f'\n[Your command "{command}" is NOT executed. '
|
||||
f'The previous command was timed out but still running. Above is the output of the previous command. '
|
||||
"You may wait longer to see additional output of the previous command by sending empty command '', "
|
||||
'send other commands to interact with the current process, '
|
||||
'or send keys ("C-c", "C-z", "C-d") to interrupt/kill the previous command before sending your new command.]'
|
||||
)
|
||||
command_output = self._get_command_output(
|
||||
command,
|
||||
raw_command_output,
|
||||
metadata,
|
||||
continue_prefix='[Below is the output of the previous command.]\n',
|
||||
)
|
||||
return CmdOutputObservation(
|
||||
command=command,
|
||||
content=command_output,
|
||||
metadata=metadata,
|
||||
)
|
||||
# Only send the command to the pane if it's not a special key and it's not empty
|
||||
# AND previous hard timeout command is resolved
|
||||
elif command != '' and not is_special_key:
|
||||
# convert command to raw string
|
||||
command = escape_bash_special_chars(command)
|
||||
logger.debug(f'SENDING COMMAND: {command!r}')
|
||||
self.pane.send_keys(
|
||||
command,
|
||||
enter=True,
|
||||
# When prev command is still running, and we are trying to send a new command
|
||||
if (
|
||||
self.prev_status
|
||||
in {
|
||||
BashCommandStatus.HARD_TIMEOUT,
|
||||
BashCommandStatus.NO_CHANGE_TIMEOUT,
|
||||
}
|
||||
and not last_pane_output.endswith(
|
||||
CMD_OUTPUT_PS1_END
|
||||
) # prev command is not completed
|
||||
and not is_input
|
||||
and command != '' # not input and not empty command
|
||||
):
|
||||
_ps1_matches = CmdOutputMetadata.matches_ps1_metadata(last_pane_output)
|
||||
raw_command_output = self._combine_outputs_between_matches(
|
||||
last_pane_output, _ps1_matches
|
||||
)
|
||||
metadata = CmdOutputMetadata() # No metadata available
|
||||
metadata.suffix = (
|
||||
f'\n[Your command "{command}" is NOT executed. '
|
||||
f'The previous command is still running - You CANNOT send new commands until the previous command is completed. '
|
||||
'By setting `is_input` to `true`, you can interact with the current process: '
|
||||
"You may wait longer to see additional output of the previous command by sending empty command '', "
|
||||
'send other commands to interact with the current process, '
|
||||
'or send keys ("C-c", "C-z", "C-d") to interrupt/kill the previous command before sending your new command.]'
|
||||
)
|
||||
logger.debug(f'PREVIOUS COMMAND OUTPUT: {raw_command_output}')
|
||||
command_output = self._get_command_output(
|
||||
command,
|
||||
raw_command_output,
|
||||
metadata,
|
||||
continue_prefix='[Below is the output of the previous command.]\n',
|
||||
)
|
||||
return CmdOutputObservation(
|
||||
command=command,
|
||||
content=command_output,
|
||||
metadata=metadata,
|
||||
)
|
||||
|
||||
# Send actual command/inputs to the pane
|
||||
if command != '':
|
||||
is_special_key = self._is_special_key(command)
|
||||
if is_input:
|
||||
logger.debug(f'SENDING INPUT TO RUNNING PROCESS: {command!r}')
|
||||
self.pane.send_keys(
|
||||
command,
|
||||
enter=not is_special_key,
|
||||
)
|
||||
else:
|
||||
# convert command to raw string
|
||||
command = escape_bash_special_chars(command)
|
||||
logger.debug(f'SENDING COMMAND: {command!r}')
|
||||
self.pane.send_keys(
|
||||
command,
|
||||
enter=not is_special_key,
|
||||
)
|
||||
|
||||
# Loop until the command completes or times out
|
||||
while should_continue():
|
||||
|
||||
@ -57,7 +57,7 @@ def test_bash_server(temp_dir, runtime_cls, run_as_openhands):
|
||||
in obs.metadata.suffix
|
||||
)
|
||||
|
||||
action = CmdRunAction(command='C-c')
|
||||
action = CmdRunAction(command='C-c', is_input=True)
|
||||
action.set_hard_timeout(30)
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
@ -571,7 +571,7 @@ def test_interactive_command(temp_dir, runtime_cls, run_as_openhands):
|
||||
assert 'Enter name:' in obs.content
|
||||
assert '[The command has no new output after 1 seconds.' in obs.metadata.suffix
|
||||
|
||||
action = CmdRunAction('John')
|
||||
action = CmdRunAction('John', is_input=True)
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert 'Hello John' in obs.content
|
||||
@ -741,10 +741,7 @@ def test_long_running_command_follow_by_execute(
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert '3' not in obs.content
|
||||
assert obs.metadata.prefix == '[Below is the output of the previous command.]\n'
|
||||
assert (
|
||||
'The previous command was timed out but still running.'
|
||||
in obs.metadata.suffix
|
||||
)
|
||||
assert 'The previous command is still running' in obs.metadata.suffix
|
||||
assert obs.metadata.exit_code == -1 # -1 indicates command is still running
|
||||
|
||||
# Finally continue again
|
||||
@ -763,7 +760,9 @@ def test_empty_command_errors(temp_dir, runtime_cls, run_as_openhands):
|
||||
# Test empty command without previous command
|
||||
obs = runtime.run_action(CmdRunAction(''))
|
||||
assert isinstance(obs, CmdOutputObservation)
|
||||
assert 'ERROR: No previous command to continue from' in obs.content
|
||||
assert (
|
||||
'ERROR: No previous running command to retrieve logs from.' in obs.content
|
||||
)
|
||||
finally:
|
||||
_close_test_runtime(runtime)
|
||||
|
||||
@ -781,13 +780,52 @@ def test_python_interactive_input(temp_dir, runtime_cls, run_as_openhands):
|
||||
assert obs.metadata.exit_code == -1 # -1 indicates command is still running
|
||||
|
||||
# Send first input (name)
|
||||
obs = runtime.run_action(CmdRunAction('Alice'))
|
||||
obs = runtime.run_action(CmdRunAction('Alice', is_input=True))
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert 'Enter your age:' in obs.content
|
||||
assert obs.metadata.exit_code == -1
|
||||
|
||||
# Send second input (age)
|
||||
obs = runtime.run_action(CmdRunAction('25'))
|
||||
obs = runtime.run_action(CmdRunAction('25', is_input=True))
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert 'Hello Alice, you are 25 years old' in obs.content
|
||||
assert obs.metadata.exit_code == 0
|
||||
assert '[The command completed with exit code 0.]' in obs.metadata.suffix
|
||||
finally:
|
||||
_close_test_runtime(runtime)
|
||||
|
||||
|
||||
def test_python_interactive_input_without_set_input(
|
||||
temp_dir, runtime_cls, run_as_openhands
|
||||
):
|
||||
runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
|
||||
try:
|
||||
# Test Python program that asks for input - properly escaped for bash
|
||||
python_script = """name = input('Enter your name: '); age = input('Enter your age: '); print(f'Hello {name}, you are {age} years old')"""
|
||||
|
||||
# Start Python with the interactive script
|
||||
obs = runtime.run_action(CmdRunAction(f'python3 -c "{python_script}"'))
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert 'Enter your name:' in obs.content
|
||||
assert obs.metadata.exit_code == -1 # -1 indicates command is still running
|
||||
|
||||
# Send first input (name)
|
||||
obs = runtime.run_action(CmdRunAction('Alice', is_input=False))
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert 'Enter your age:' not in obs.content
|
||||
assert (
|
||||
'Your command "Alice" is NOT executed. The previous command is still running'
|
||||
in obs.metadata.suffix
|
||||
)
|
||||
assert obs.metadata.exit_code == -1
|
||||
|
||||
# Try again now with input
|
||||
obs = runtime.run_action(CmdRunAction('Alice', is_input=True))
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert 'Enter your age:' in obs.content
|
||||
assert obs.metadata.exit_code == -1
|
||||
|
||||
obs = runtime.run_action(CmdRunAction('25', is_input=True))
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert 'Hello Alice, you are 25 years old' in obs.content
|
||||
assert obs.metadata.exit_code == 0
|
||||
@ -844,7 +882,7 @@ def test_stress_long_output_with_soft_and_hard_timeout(
|
||||
assert obs.exit_code == -1 # Command is still running, waiting for input
|
||||
|
||||
# Send the confirmation
|
||||
action = CmdRunAction('Y')
|
||||
action = CmdRunAction('Y', is_input=True)
|
||||
obs = runtime.run_action(action)
|
||||
assert 'Proceeding with operation...' in obs.content
|
||||
assert 'Operation completed successfully!' in obs.content
|
||||
@ -869,13 +907,10 @@ def test_stress_long_output_with_soft_and_hard_timeout(
|
||||
# where it will not accept any new commands.
|
||||
obs = runtime.run_action(CmdRunAction('ls'))
|
||||
assert obs.exit_code == -1
|
||||
assert (
|
||||
'The previous command was timed out but still running.'
|
||||
in obs.metadata.suffix
|
||||
)
|
||||
assert 'The previous command is still running' in obs.metadata.suffix
|
||||
|
||||
# We need to send a Ctrl+C to reset the terminal.
|
||||
obs = runtime.run_action(CmdRunAction('C-c'))
|
||||
obs = runtime.run_action(CmdRunAction('C-c', is_input=True))
|
||||
assert obs.exit_code == 130
|
||||
|
||||
# Now make sure the terminal is in a good state
|
||||
@ -887,3 +922,25 @@ def test_stress_long_output_with_soft_and_hard_timeout(
|
||||
|
||||
finally:
|
||||
_close_test_runtime(runtime)
|
||||
|
||||
|
||||
def test_bash_remove_prefix(temp_dir, runtime_cls, run_as_openhands):
|
||||
runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
|
||||
try:
|
||||
# create a git repo
|
||||
action = CmdRunAction(
|
||||
'git init && git remote add origin https://github.com/All-Hands-AI/OpenHands'
|
||||
)
|
||||
obs = runtime.run_action(action)
|
||||
# logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert obs.metadata.exit_code == 0
|
||||
|
||||
# Start Python with the interactive script
|
||||
obs = runtime.run_action(CmdRunAction('git remote -v'))
|
||||
# logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert obs.metadata.exit_code == 0
|
||||
assert 'https://github.com/All-Hands-AI/OpenHands' in obs.content
|
||||
assert 'git remote -v' not in obs.content
|
||||
|
||||
finally:
|
||||
_close_test_runtime(runtime)
|
||||
|
||||
@ -97,6 +97,7 @@ def test_cmd_run_action_serialization_deserialization():
|
||||
'args': {
|
||||
'blocking': False,
|
||||
'command': 'echo "Hello world"',
|
||||
'is_input': False,
|
||||
'thought': '',
|
||||
'hidden': False,
|
||||
'confirmation_state': ActionConfirmationStatus.CONFIRMED,
|
||||
@ -181,3 +182,4 @@ def test_legacy_serialization():
|
||||
assert event_dict['args']['blocking'] is False
|
||||
assert event_dict['args']['command'] == 'echo "Hello world"'
|
||||
assert event_dict['args']['thought'] == ''
|
||||
assert event_dict['args']['is_input'] is False
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
import os
|
||||
import tempfile
|
||||
import time
|
||||
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.events.action import CmdRunAction
|
||||
@ -91,7 +92,7 @@ def test_long_running_command_follow_by_execute():
|
||||
assert obs.metadata.prefix == ''
|
||||
|
||||
# Continue watching output
|
||||
obs = session.execute(CmdRunAction(''))
|
||||
obs = session.execute(CmdRunAction('', is_input=True))
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert '2' in obs.content
|
||||
assert obs.metadata.prefix == '[Below is the output of the previous command.]\n'
|
||||
@ -107,14 +108,20 @@ def test_long_running_command_follow_by_execute():
|
||||
# Test command that produces no output
|
||||
obs = session.execute(CmdRunAction('sleep 15'))
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert '3' not in obs.content
|
||||
assert obs.metadata.prefix == '[Below is the output of the previous command.]\n'
|
||||
assert 'The previous command is still running' in obs.metadata.suffix
|
||||
assert obs.metadata.exit_code == -1 # -1 indicates command is still running
|
||||
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
|
||||
|
||||
time.sleep(3)
|
||||
|
||||
# Run it again, this time it should produce output
|
||||
obs = session.execute(CmdRunAction('sleep 15'))
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert '3' in obs.content
|
||||
assert obs.metadata.prefix == '[Below is the output of the previous command.]\n'
|
||||
assert obs.metadata.suffix == (
|
||||
'\n[The command has no new output after 2 seconds. '
|
||||
"You may wait longer to see additional output by sending empty command '', "
|
||||
'send other commands to interact with the current process, '
|
||||
'or send keys to interrupt/kill the command.]'
|
||||
)
|
||||
assert 'The previous command is still running' in obs.metadata.suffix
|
||||
assert obs.metadata.exit_code == -1 # -1 indicates command is still running
|
||||
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
|
||||
|
||||
@ -144,7 +151,7 @@ def test_interactive_command():
|
||||
assert obs.metadata.prefix == ''
|
||||
|
||||
# Send input
|
||||
obs = session.execute(CmdRunAction('John'))
|
||||
obs = session.execute(CmdRunAction('John', is_input=True))
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert 'Hello John' in obs.content
|
||||
assert obs.metadata.exit_code == 0
|
||||
@ -165,7 +172,7 @@ def test_interactive_command():
|
||||
)
|
||||
assert obs.metadata.prefix == ''
|
||||
|
||||
obs = session.execute(CmdRunAction('line 1'))
|
||||
obs = session.execute(CmdRunAction('line 1', is_input=True))
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert obs.metadata.exit_code == -1
|
||||
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
|
||||
@ -177,7 +184,7 @@ def test_interactive_command():
|
||||
)
|
||||
assert obs.metadata.prefix == '[Below is the output of the previous command.]\n'
|
||||
|
||||
obs = session.execute(CmdRunAction('line 2'))
|
||||
obs = session.execute(CmdRunAction('line 2', is_input=True))
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert obs.metadata.exit_code == -1
|
||||
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
|
||||
@ -189,7 +196,7 @@ def test_interactive_command():
|
||||
)
|
||||
assert obs.metadata.prefix == '[Below is the output of the previous command.]\n'
|
||||
|
||||
obs = session.execute(CmdRunAction('EOF'))
|
||||
obs = session.execute(CmdRunAction('EOF', is_input=True))
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert 'line 1' in obs.content and 'line 2' in obs.content
|
||||
assert obs.metadata.exit_code == 0
|
||||
@ -220,7 +227,7 @@ def test_ctrl_c():
|
||||
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
|
||||
|
||||
# Send Ctrl+C
|
||||
obs = session.execute(CmdRunAction('C-c'))
|
||||
obs = session.execute(CmdRunAction('C-c', is_input=True))
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert obs.metadata.exit_code == 130 # Standard exit code for Ctrl+C
|
||||
assert (
|
||||
@ -240,10 +247,7 @@ def test_empty_command_errors():
|
||||
# Test empty command without previous command
|
||||
obs = session.execute(CmdRunAction(''))
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert (
|
||||
obs.content
|
||||
== 'ERROR: No previous command to continue from. Previous command has to be timeout to be continued.'
|
||||
)
|
||||
assert obs.content == 'ERROR: No previous running command to retrieve logs from.'
|
||||
assert obs.metadata.exit_code == -1
|
||||
assert obs.metadata.prefix == ''
|
||||
assert obs.metadata.suffix == ''
|
||||
@ -264,14 +268,14 @@ def test_command_output_continuation():
|
||||
assert '[The command has no new output after 2 seconds.' in obs.metadata.suffix
|
||||
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
|
||||
|
||||
obs = session.execute(CmdRunAction(''))
|
||||
obs = session.execute(CmdRunAction('', is_input=True))
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert '[Below is the output of the previous command.]' in obs.metadata.prefix
|
||||
assert obs.content.strip() == '2'
|
||||
assert '[The command has no new output after 2 seconds.' in obs.metadata.suffix
|
||||
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
|
||||
|
||||
obs = session.execute(CmdRunAction(''))
|
||||
obs = session.execute(CmdRunAction('', is_input=True))
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert '[Below is the output of the previous command.]' in obs.metadata.prefix
|
||||
assert obs.content.strip() == '3'
|
||||
@ -279,21 +283,21 @@ def test_command_output_continuation():
|
||||
assert '[The command has no new output after 2 seconds.' in obs.metadata.suffix
|
||||
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
|
||||
|
||||
obs = session.execute(CmdRunAction(''))
|
||||
obs = session.execute(CmdRunAction('', is_input=True))
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert '[Below is the output of the previous command.]' in obs.metadata.prefix
|
||||
assert obs.content.strip() == '4'
|
||||
assert '[The command has no new output after 2 seconds.' in obs.metadata.suffix
|
||||
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
|
||||
|
||||
obs = session.execute(CmdRunAction(''))
|
||||
obs = session.execute(CmdRunAction('', is_input=True))
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert '[Below is the output of the previous command.]' in obs.metadata.prefix
|
||||
assert obs.content.strip() == '5'
|
||||
assert '[The command has no new output after 2 seconds.' in obs.metadata.suffix
|
||||
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
|
||||
|
||||
obs = session.execute(CmdRunAction(''))
|
||||
obs = session.execute(CmdRunAction('', is_input=True))
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert '[The command completed with exit code 0.]' in obs.metadata.suffix
|
||||
assert session.prev_status == BashCommandStatus.COMPLETED
|
||||
@ -367,14 +371,14 @@ def test_python_interactive_input():
|
||||
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
|
||||
|
||||
# Send first input (name)
|
||||
obs = session.execute(CmdRunAction('Alice'))
|
||||
obs = session.execute(CmdRunAction('Alice', is_input=True))
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert 'Enter your age:' in obs.content
|
||||
assert obs.metadata.exit_code == -1
|
||||
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
|
||||
|
||||
# Send second input (age)
|
||||
obs = session.execute(CmdRunAction('25'))
|
||||
obs = session.execute(CmdRunAction('25', is_input=True))
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert 'Hello Alice, you are 25 years old' in obs.content
|
||||
assert obs.metadata.exit_code == 0
|
||||
|
||||
@ -367,6 +367,7 @@ async def test_unsafe_bash_command(temp_dir: str):
|
||||
arguments={
|
||||
'blocking': False,
|
||||
'command': 'ls',
|
||||
'is_input': False,
|
||||
'hidden': False,
|
||||
'confirmation_state': ActionConfirmationStatus.CONFIRMED,
|
||||
},
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user