feat(agent): remind the agent that it can use timeout to increase the amount of time the command is running (#8932)

Co-authored-by: openhands <openhands@all-hands.dev>
This commit is contained in:
Xingyao Wang 2025-06-05 23:57:33 -04:00 committed by GitHub
parent 4df3ee9d2e
commit 59f5f0dc9b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 52 additions and 63 deletions

View File

@ -17,6 +17,7 @@ from openhands.events.observation.commands import (
CmdOutputMetadata,
CmdOutputObservation,
)
from openhands.runtime.utils.bash_constants import TIMEOUT_MESSAGE_TEMPLATE
from openhands.utils.shutdown_listener import should_continue
@ -379,9 +380,7 @@ class BashSession:
metadata = CmdOutputMetadata() # No metadata available
metadata.suffix = (
f'\n[The command has no new output after {self.NO_CHANGE_TIMEOUT_SECONDS} seconds. '
"You may wait longer to see additional output by sending empty command '', "
'send other commands to interact with the current process, '
'or send keys to interrupt/kill the command.]'
f'{TIMEOUT_MESSAGE_TEMPLATE}]'
)
command_output = self._get_command_output(
command,
@ -414,9 +413,7 @@ class BashSession:
metadata = CmdOutputMetadata() # No metadata available
metadata.suffix = (
f'\n[The command timed out after {timeout} seconds. '
"You may wait longer to see additional output by sending empty command '', "
'send other commands to interact with the current process, '
'or send keys to interrupt/kill the command.]'
f'{TIMEOUT_MESSAGE_TEMPLATE}]'
)
command_output = self._get_command_output(
command,

View File

@ -0,0 +1,7 @@
# Common timeout message that can be used across different timeout scenarios
TIMEOUT_MESSAGE_TEMPLATE = (
"You may wait longer to see additional output by sending empty command '', "
'send other commands to interact with the current process, '
'send keys to interrupt/kill the command, '
'or use the timeout parameter in execute_bash for future commands.'
)

View File

@ -20,6 +20,7 @@ from openhands.events.observation.commands import (
CmdOutputMetadata,
CmdOutputObservation,
)
from openhands.runtime.utils.bash_constants import TIMEOUT_MESSAGE_TEMPLATE
from openhands.utils.shutdown_listener import should_continue
pythonnet.load('coreclr')
@ -559,9 +560,7 @@ class WindowsPowershellSession:
else:
metadata.suffix = (
f'\n[The command timed out after {timeout_seconds} seconds. '
"You may wait longer to see additional output by sending empty command '', "
'send other commands to interact with the current process, '
'or send keys to interrupt/kill the command.]'
f'{TIMEOUT_MESSAGE_TEMPLATE}]'
)
return CmdOutputObservation(
@ -1331,9 +1330,7 @@ class WindowsPowershellSession:
# Align suffix with bash.py timeout message
suffix = (
f'\n[The command timed out after {timeout_seconds} seconds. '
"You may wait longer to see additional output by sending empty command '', "
'send other commands to interact with the current process, '
'or send keys to interrupt/kill the command.]'
f'{TIMEOUT_MESSAGE_TEMPLATE}]'
)
elif shutdown_requested:
# Align suffix with bash.py equivalent (though bash.py might not have specific shutdown message)

View File

@ -16,6 +16,16 @@ from openhands.events.action import CmdRunAction
from openhands.events.observation import CmdOutputObservation, ErrorObservation
from openhands.runtime.impl.cli.cli_runtime import CLIRuntime
from openhands.runtime.impl.local.local_runtime import LocalRuntime
from openhands.runtime.utils.bash_constants import TIMEOUT_MESSAGE_TEMPLATE
def get_timeout_suffix(timeout_seconds):
"""Helper function to generate the expected timeout suffix."""
return (
f'[The command timed out after {timeout_seconds} seconds. '
f'{TIMEOUT_MESSAGE_TEMPLATE}]'
)
# ============================================================================================================================
# Bash-specific tests
@ -56,10 +66,7 @@ def test_bash_server(temp_dir, runtime_cls, run_as_openhands):
if runtime_cls == CLIRuntime:
assert '[The command timed out after 1.0 seconds.]' in obs.metadata.suffix
else:
assert (
"[The command timed out after 1.0 seconds. You may wait longer to see additional output by sending empty command '', send other commands to interact with the current process, or send keys to interrupt/kill the command.]"
in obs.metadata.suffix
)
assert get_timeout_suffix(1.0) in obs.metadata.suffix
action = CmdRunAction(command='C-c', is_input=True)
action.set_hard_timeout(30)

View File

@ -589,7 +589,7 @@
"working_dir": null,
"py_interpreter_path": null,
"prefix": "",
"suffix": "\n[The command has no new output after 30 seconds. You may wait longer to see additional output by sending empty command '', send other commands to interact with the current process, or send keys to interrupt/kill the command.]"
"suffix": "\n[The command has no new output after 30 seconds. You may wait longer to see additional output by sending empty command '', send other commands to interact with the current process, send keys to interrupt/kill the command, or use the timeout parameter in execute_bash for future commands.]"
},
"hidden": false
},

View File

@ -5,6 +5,15 @@ import time
from openhands.core.logger import openhands_logger as logger
from openhands.events.action import CmdRunAction
from openhands.runtime.utils.bash import BashCommandStatus, BashSession
from openhands.runtime.utils.bash_constants import TIMEOUT_MESSAGE_TEMPLATE
def get_no_change_timeout_suffix(timeout_seconds):
"""Helper function to generate the expected no-change timeout suffix."""
return (
f'\n[The command has no new output after {timeout_seconds} seconds. '
f'{TIMEOUT_MESSAGE_TEMPLATE}]'
)
def test_session_initialization():
@ -83,12 +92,7 @@ def test_long_running_command_follow_by_execute():
assert '1' in obs.content # First number should appear before timeout
assert obs.metadata.exit_code == -1 # -1 indicates command is still running
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
assert obs.metadata.suffix == (
'\n[The command has no new output after 2 seconds. '
"You may wait longer to see additional output by sending empty command '', "
'send other commands to interact with the current process, '
'or send keys to interrupt/kill the command.]'
)
assert obs.metadata.suffix == get_no_change_timeout_suffix(2)
assert obs.metadata.prefix == ''
# Continue watching output
@ -96,12 +100,7 @@ def test_long_running_command_follow_by_execute():
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert '2' in obs.content
assert obs.metadata.prefix == '[Below is the output of the previous command.]\n'
assert obs.metadata.suffix == (
'\n[The command has no new output after 2 seconds. '
"You may wait longer to see additional output by sending empty command '', "
'send other commands to interact with the current process, '
'or send keys to interrupt/kill the command.]'
)
assert obs.metadata.suffix == get_no_change_timeout_suffix(2)
assert obs.metadata.exit_code == -1 # -1 indicates command is still running
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
@ -142,12 +141,7 @@ def test_interactive_command():
assert 'Enter name:' in obs.content
assert obs.metadata.exit_code == -1 # -1 indicates command is still running
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
assert obs.metadata.suffix == (
'\n[The command has no new output after 3 seconds. '
"You may wait longer to see additional output by sending empty command '', "
'send other commands to interact with the current process, '
'or send keys to interrupt/kill the command.]'
)
assert obs.metadata.suffix == get_no_change_timeout_suffix(3)
assert obs.metadata.prefix == ''
# Send input
@ -164,36 +158,21 @@ def test_interactive_command():
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert obs.metadata.exit_code == -1
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
assert obs.metadata.suffix == (
'\n[The command has no new output after 3 seconds. '
"You may wait longer to see additional output by sending empty command '', "
'send other commands to interact with the current process, '
'or send keys to interrupt/kill the command.]'
)
assert obs.metadata.suffix == get_no_change_timeout_suffix(3)
assert obs.metadata.prefix == ''
obs = session.execute(CmdRunAction('line 1', is_input=True))
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert obs.metadata.exit_code == -1
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
assert obs.metadata.suffix == (
'\n[The command has no new output after 3 seconds. '
"You may wait longer to see additional output by sending empty command '', "
'send other commands to interact with the current process, '
'or send keys to interrupt/kill the command.]'
)
assert obs.metadata.suffix == get_no_change_timeout_suffix(3)
assert obs.metadata.prefix == '[Below is the output of the previous command.]\n'
obs = session.execute(CmdRunAction('line 2', is_input=True))
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert obs.metadata.exit_code == -1
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
assert obs.metadata.suffix == (
'\n[The command has no new output after 3 seconds. '
"You may wait longer to see additional output by sending empty command '', "
'send other commands to interact with the current process, '
'or send keys to interrupt/kill the command.]'
)
assert obs.metadata.suffix == get_no_change_timeout_suffix(3)
assert obs.metadata.prefix == '[Below is the output of the previous command.]\n'
obs = session.execute(CmdRunAction('EOF', is_input=True))
@ -216,12 +195,7 @@ def test_ctrl_c():
)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert 'looping' in obs.content
assert obs.metadata.suffix == (
'\n[The command has no new output after 2 seconds. '
"You may wait longer to see additional output by sending empty command '', "
'send other commands to interact with the current process, '
'or send keys to interrupt/kill the command.]'
)
assert obs.metadata.suffix == get_no_change_timeout_suffix(2)
assert obs.metadata.prefix == ''
assert obs.metadata.exit_code == -1 # -1 indicates command is still running
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT

View File

@ -12,6 +12,16 @@ from openhands.events.observation import ErrorObservation
from openhands.events.observation.commands import (
CmdOutputObservation,
)
from openhands.runtime.utils.bash_constants import TIMEOUT_MESSAGE_TEMPLATE
def get_timeout_suffix(timeout_seconds):
"""Helper function to generate the expected timeout suffix."""
return (
f'[The command timed out after {timeout_seconds} seconds. '
f'{TIMEOUT_MESSAGE_TEMPLATE}]'
)
# Skip all tests in this module if not running on Windows
pytestmark = pytest.mark.skipif(
@ -168,10 +178,7 @@ def test_long_running_command(windows_bash_session):
# Verify the initial output was captured
assert 'Serving HTTP on' in result.content
# Check for timeout specific metadata
assert (
"[The command timed out after 1.0 seconds. You may wait longer to see additional output by sending empty command '', send other commands to interact with the current process, or send keys to interrupt/kill the command.]"
in result.metadata.suffix
)
assert get_timeout_suffix(1.0) in result.metadata.suffix
assert result.exit_code == -1
# The action timed out, but the command should be still running