Tim O'Farrell f292f3a84d
V1 Integration (#11183)
Co-authored-by: openhands <openhands@all-hands.dev>
Co-authored-by: sp.wack <83104063+amanape@users.noreply.github.com>
Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>
2025-10-14 02:16:44 +00:00

682 lines
26 KiB
Python

import os
import re
import time
import uuid
from enum import Enum
from typing import Any
import bashlex
import libtmux
from openhands.core.logger import openhands_logger as logger
from openhands.events.action import CmdRunAction
from openhands.events.observation import ErrorObservation
from openhands.events.observation.commands import (
CMD_OUTPUT_PS1_END,
CmdOutputMetadata,
CmdOutputObservation,
)
from openhands.runtime.utils.bash_constants import TIMEOUT_MESSAGE_TEMPLATE
from openhands.utils.shutdown_listener import should_continue
RUNTIME_USERNAME = os.getenv('RUNTIME_USERNAME')
SU_TO_USER = os.getenv('SU_TO_USER', 'true').lower() in (
'1',
'true',
't',
'yes',
'y',
'on',
)
def split_bash_commands(commands: str) -> list[str]:
if not commands.strip():
return ['']
try:
parsed = bashlex.parse(commands)
except (
bashlex.errors.ParsingError,
NotImplementedError,
TypeError,
AttributeError,
):
# Added AttributeError to catch 'str' object has no attribute 'kind' error (issue #8369)
logger.debug(
f'Failed to parse bash commands\n'
f'[input]: {commands}\n'
f'The original command will be returned as is.',
exc_info=True,
)
# If parsing fails, return the original commands
return [commands]
result: list[str] = []
last_end = 0
for node in parsed:
start, end = node.pos
# Include any text between the last command and this one
if start > last_end:
between = commands[last_end:start]
logger.debug(f'BASH PARSING between: {between}')
if result:
result[-1] += between.rstrip()
elif between.strip():
# THIS SHOULD NOT HAPPEN
result.append(between.rstrip())
# Extract the command, preserving original formatting
command = commands[start:end].rstrip()
logger.debug(f'BASH PARSING command: {command}')
result.append(command)
last_end = end
# Add any remaining text after the last command to the last command
remaining = commands[last_end:].rstrip()
logger.debug(f'BASH PARSING remaining: {remaining}')
if last_end < len(commands) and result:
result[-1] += remaining
logger.debug(f'BASH PARSING result[-1] += remaining: {result[-1]}')
elif last_end < len(commands):
if remaining:
result.append(remaining)
logger.debug(f'BASH PARSING result.append(remaining): {result[-1]}')
return result
def escape_bash_special_chars(command: str) -> str:
r"""Escapes characters that have different interpretations in bash vs python.
Specifically handles escape sequences like \;, \|, \&, etc.
"""
if command.strip() == '':
return ''
try:
parts = []
last_pos = 0
def visit_node(node: Any) -> None:
nonlocal last_pos
if (
node.kind == 'redirect'
and hasattr(node, 'heredoc')
and node.heredoc is not None
):
# We're entering a heredoc - preserve everything as-is until we see EOF
# Store the heredoc end marker (usually 'EOF' but could be different)
between = command[last_pos : node.pos[0]]
parts.append(between)
# Add the heredoc start marker
parts.append(command[node.pos[0] : node.heredoc.pos[0]])
# Add the heredoc content as-is
parts.append(command[node.heredoc.pos[0] : node.heredoc.pos[1]])
last_pos = node.pos[1]
return
if node.kind == 'word':
# Get the raw text between the last position and current word
between = command[last_pos : node.pos[0]]
word_text = command[node.pos[0] : node.pos[1]]
# Add the between text, escaping special characters
between = re.sub(r'\\([;&|><])', r'\\\\\1', between)
parts.append(between)
# Check if word_text is a quoted string or command substitution
if (
(word_text.startswith('"') and word_text.endswith('"'))
or (word_text.startswith("'") and word_text.endswith("'"))
or (word_text.startswith('$(') and word_text.endswith(')'))
or (word_text.startswith('`') and word_text.endswith('`'))
):
# Preserve quoted strings, command substitutions, and heredoc content as-is
parts.append(word_text)
else:
# Escape special chars in unquoted text
word_text = re.sub(r'\\([;&|><])', r'\\\\\1', word_text)
parts.append(word_text)
last_pos = node.pos[1]
return
# Visit child nodes
if hasattr(node, 'parts'):
for part in node.parts:
visit_node(part)
# Process all nodes in the AST
nodes = list(bashlex.parse(command))
for node in nodes:
between = command[last_pos : node.pos[0]]
between = re.sub(r'\\([;&|><])', r'\\\\\1', between)
parts.append(between)
last_pos = node.pos[0]
visit_node(node)
# Handle any remaining text after the last word
remaining = command[last_pos:]
parts.append(remaining)
return ''.join(parts)
except (bashlex.errors.ParsingError, NotImplementedError, TypeError):
logger.debug(
f'Failed to parse bash commands for special characters escape\n'
f'[input]: {command}\n'
f'The original command will be returned as is.',
exc_info=True,
)
return command
class BashCommandStatus(Enum):
CONTINUE = 'continue'
COMPLETED = 'completed'
NO_CHANGE_TIMEOUT = 'no_change_timeout'
HARD_TIMEOUT = 'hard_timeout'
def _remove_command_prefix(command_output: str, command: str) -> str:
return command_output.lstrip().removeprefix(command.lstrip()).lstrip()
class BashSession:
POLL_INTERVAL = 0.5
HISTORY_LIMIT = 10_000
PS1 = CmdOutputMetadata.to_ps1_prompt()
def __init__(
self,
work_dir: str,
username: str | None = None,
no_change_timeout_seconds: int = 30,
max_memory_mb: int | None = None,
):
self.NO_CHANGE_TIMEOUT_SECONDS = no_change_timeout_seconds
self.work_dir = work_dir
self.username = username
self._initialized = False
self.max_memory_mb = max_memory_mb
def initialize(self) -> None:
self.server = libtmux.Server()
_shell_command = '/bin/bash'
if SU_TO_USER and self.username in list(
filter(None, [RUNTIME_USERNAME, 'root', 'openhands'])
):
# This starts a non-login (new) shell for the given user
_shell_command = f'su {self.username} -'
# FIXME: we will introduce memory limit using sysbox-runc in coming PR
# # otherwise, we are running as the CURRENT USER (e.g., when running LocalRuntime)
# if self.max_memory_mb is not None:
# window_command = (
# f'prlimit --as={self.max_memory_mb * 1024 * 1024} {_shell_command}'
# )
# else:
window_command = _shell_command
logger.debug(
f'Initializing bash session in {self.work_dir} with command: {window_command}'
)
session_name = f'openhands-{self.username}-{uuid.uuid4()}'
self.session = self.server.new_session(
session_name=session_name,
start_directory=self.work_dir, # This parameter is supported by libtmux
kill_session=True,
x=1000,
y=1000,
)
# Set history limit to a large number to avoid losing history
# https://unix.stackexchange.com/questions/43414/unlimited-history-in-tmux
self.session.set_option('history-limit', str(self.HISTORY_LIMIT), global_=True)
self.session.history_limit = self.HISTORY_LIMIT
# We need to create a new pane because the initial pane's history limit is (default) 2000
_initial_window = self.session.active_window
self.window = self.session.new_window(
window_name='bash',
window_shell=window_command,
start_directory=self.work_dir, # This parameter is supported by libtmux
)
self.pane = self.window.active_pane
logger.debug(f'pane: {self.pane}; history_limit: {self.session.history_limit}')
_initial_window.kill()
# Configure bash to use simple PS1 and disable PS2
self.pane.send_keys(
f'export PROMPT_COMMAND=\'export PS1="{self.PS1}"\'; export PS2=""'
)
time.sleep(0.1) # Wait for command to take effect
self._clear_screen()
# Store the last command for interactive input handling
self.prev_status: BashCommandStatus | None = None
self.prev_output: str = ''
self._closed: bool = False
logger.debug(f'Bash session initialized with work dir: {self.work_dir}')
# Maintain the current working directory
self._cwd = os.path.abspath(self.work_dir)
self._initialized = True
def __del__(self) -> None:
"""Ensure the session is closed when the object is destroyed."""
self.close()
def _get_pane_content(self) -> str:
"""Capture the current pane content and update the buffer."""
content = '\n'.join(
map(
# avoid double newlines
lambda line: line.rstrip(),
self.pane.cmd('capture-pane', '-J', '-pS', '-').stdout,
)
)
return content
def close(self) -> None:
"""Clean up the session."""
if self._closed:
return
self.session.kill()
self._closed = True
@property
def cwd(self) -> str:
return self._cwd
def _is_special_key(self, command: str) -> bool:
"""Check if the command is a special key."""
# Special keys are of the form C-<key>
_command = command.strip()
return _command.startswith('C-') and len(_command) == 3
def _clear_screen(self) -> None:
"""Clear the tmux pane screen and history."""
self.pane.send_keys('C-l', enter=False)
time.sleep(0.1)
self.pane.cmd('clear-history')
def _get_command_output(
self,
command: str,
raw_command_output: str,
metadata: CmdOutputMetadata,
continue_prefix: str = '',
) -> str:
"""Get the command output with the previous command output removed.
Args:
command: The command that was executed.
raw_command_output: The raw output from the command.
metadata: The metadata object to store prefix/suffix in.
continue_prefix: The prefix to add to the command output if it's a continuation of the previous command.
"""
# remove the previous command output from the new output if any
if self.prev_output:
command_output = raw_command_output.removeprefix(self.prev_output)
metadata.prefix = continue_prefix
else:
command_output = raw_command_output
self.prev_output = raw_command_output # update current command output anyway
command_output = _remove_command_prefix(command_output, command)
return command_output.rstrip()
def _handle_completed_command(
self,
command: str,
pane_content: str,
ps1_matches: list[re.Match],
hidden: bool,
) -> CmdOutputObservation:
is_special_key = self._is_special_key(command)
assert len(ps1_matches) >= 1, (
f'Expected at least one PS1 metadata block, but got {len(ps1_matches)}.\n'
f'---FULL OUTPUT---\n{pane_content!r}\n---END OF OUTPUT---'
)
metadata = CmdOutputMetadata.from_ps1_match(ps1_matches[-1])
# Special case where the previous command output is truncated due to history limit
# We should get the content BEFORE the last PS1 prompt
get_content_before_last_match = bool(len(ps1_matches) == 1)
# Update the current working directory if it has changed
if metadata.working_dir != self._cwd and metadata.working_dir:
logger.debug(
f'directory_changed: {self._cwd}; {metadata.working_dir}; {command}'
)
self._cwd = metadata.working_dir
logger.debug(f'COMMAND OUTPUT: {pane_content}')
# Extract the command output between the two PS1 prompts
raw_command_output = self._combine_outputs_between_matches(
pane_content,
ps1_matches,
get_content_before_last_match=get_content_before_last_match,
)
if get_content_before_last_match:
# Count the number of lines in the truncated output
num_lines = len(raw_command_output.splitlines())
metadata.prefix = f'[Previous command outputs are truncated. Showing the last {num_lines} lines of the output below.]\n'
metadata.suffix = (
f'\n[The command completed with exit code {metadata.exit_code}.]'
if not is_special_key
else f'\n[The command completed with exit code {metadata.exit_code}. CTRL+{command[-1].upper()} was sent.]'
)
command_output = self._get_command_output(
command,
raw_command_output,
metadata,
)
self.prev_status = BashCommandStatus.COMPLETED
self.prev_output = '' # Reset previous command output
self._ready_for_next_command()
return CmdOutputObservation(
content=command_output,
command=command,
metadata=metadata,
hidden=hidden,
)
def _handle_nochange_timeout_command(
self,
command: str,
pane_content: str,
ps1_matches: list[re.Match],
) -> CmdOutputObservation:
self.prev_status = BashCommandStatus.NO_CHANGE_TIMEOUT
if len(ps1_matches) != 1:
logger.warning(
'Expected exactly one PS1 metadata block BEFORE the execution of a command, '
f'but got {len(ps1_matches)} PS1 metadata blocks:\n---\n{pane_content!r}\n---'
)
raw_command_output = self._combine_outputs_between_matches(
pane_content, ps1_matches
)
metadata = CmdOutputMetadata() # No metadata available
metadata.suffix = (
f'\n[The command has no new output after {self.NO_CHANGE_TIMEOUT_SECONDS} seconds. '
f'{TIMEOUT_MESSAGE_TEMPLATE}]'
)
command_output = self._get_command_output(
command,
raw_command_output,
metadata,
continue_prefix='[Below is the output of the previous command.]\n',
)
return CmdOutputObservation(
content=command_output,
command=command,
metadata=metadata,
)
def _handle_hard_timeout_command(
self,
command: str,
pane_content: str,
ps1_matches: list[re.Match],
timeout: float,
) -> CmdOutputObservation:
self.prev_status = BashCommandStatus.HARD_TIMEOUT
if len(ps1_matches) != 1:
logger.warning(
'Expected exactly one PS1 metadata block BEFORE the execution of a command, '
f'but got {len(ps1_matches)} PS1 metadata blocks:\n---\n{pane_content!r}\n---'
)
raw_command_output = self._combine_outputs_between_matches(
pane_content, ps1_matches
)
metadata = CmdOutputMetadata() # No metadata available
metadata.suffix = (
f'\n[The command timed out after {timeout} seconds. '
f'{TIMEOUT_MESSAGE_TEMPLATE}]'
)
command_output = self._get_command_output(
command,
raw_command_output,
metadata,
continue_prefix='[Below is the output of the previous command.]\n',
)
return CmdOutputObservation(
command=command,
content=command_output,
metadata=metadata,
)
def _ready_for_next_command(self) -> None:
"""Reset the content buffer for a new command."""
# Clear the current content
self._clear_screen()
def _combine_outputs_between_matches(
self,
pane_content: str,
ps1_matches: list[re.Match],
get_content_before_last_match: bool = False,
) -> str:
"""Combine all outputs between PS1 matches.
Args:
pane_content: The full pane content containing PS1 prompts and command outputs
ps1_matches: List of regex matches for PS1 prompts
get_content_before_last_match: when there's only one PS1 match, whether to get
the content before the last PS1 prompt (True) or after the last PS1 prompt (False)
Returns:
Combined string of all outputs between matches
"""
if len(ps1_matches) == 1:
if get_content_before_last_match:
# The command output is the content before the last PS1 prompt
return pane_content[: ps1_matches[0].start()]
else:
# The command output is the content after the last PS1 prompt
return pane_content[ps1_matches[0].end() + 1 :]
elif len(ps1_matches) == 0:
return pane_content
combined_output = ''
for i in range(len(ps1_matches) - 1):
# Extract content between current and next PS1 prompt
output_segment = pane_content[
ps1_matches[i].end() + 1 : ps1_matches[i + 1].start()
]
combined_output += output_segment + '\n'
# Add the content after the last PS1 prompt
combined_output += pane_content[ps1_matches[-1].end() + 1 :]
logger.debug(f'COMBINED OUTPUT: {combined_output}')
return combined_output
def execute(self, action: CmdRunAction) -> CmdOutputObservation | ErrorObservation:
"""Execute a command in the bash session."""
if not self._initialized:
raise RuntimeError('Bash session is not initialized')
# Strip the command of any leading/trailing whitespace
logger.debug(f'RECEIVED ACTION: {action}')
command = action.command.strip()
is_input: bool = action.is_input
# If the previous command is not completed, we need to check if the command is empty
if self.prev_status not in {
BashCommandStatus.CONTINUE,
BashCommandStatus.NO_CHANGE_TIMEOUT,
BashCommandStatus.HARD_TIMEOUT,
}:
if command == '':
return CmdOutputObservation(
content='ERROR: No previous running command to retrieve logs from.',
command='',
metadata=CmdOutputMetadata(),
)
if is_input:
return CmdOutputObservation(
content='ERROR: No previous running command to interact with.',
command='',
metadata=CmdOutputMetadata(),
)
# Check if the command is a single command or multiple commands
splited_commands = split_bash_commands(command)
if len(splited_commands) > 1:
return ErrorObservation(
content=(
f'ERROR: Cannot execute multiple commands at once.\n'
f'Please run each command separately OR chain them into a single command via && or ;\n'
f'Provided commands:\n{"\n".join(f"({i + 1}) {cmd}" for i, cmd in enumerate(splited_commands))}'
)
)
# Get initial state before sending command
initial_pane_output = self._get_pane_content()
initial_ps1_matches = CmdOutputMetadata.matches_ps1_metadata(
initial_pane_output
)
initial_ps1_count = len(initial_ps1_matches)
logger.debug(f'Initial PS1 count: {initial_ps1_count}')
start_time = time.time()
last_change_time = start_time
last_pane_output = (
initial_pane_output # Use initial output as the starting point
)
# When prev command is still running, and we are trying to send a new command
if (
self.prev_status
in {
BashCommandStatus.HARD_TIMEOUT,
BashCommandStatus.NO_CHANGE_TIMEOUT,
}
and not last_pane_output.rstrip().endswith(
CMD_OUTPUT_PS1_END.rstrip()
) # prev command is not completed
and not is_input
and command != '' # not input and not empty command
):
_ps1_matches = CmdOutputMetadata.matches_ps1_metadata(last_pane_output)
# Use initial_ps1_matches if _ps1_matches is empty, otherwise use _ps1_matches
# This handles the case where the prompt might be scrolled off screen but existed before
current_matches_for_output = (
_ps1_matches if _ps1_matches else initial_ps1_matches
)
raw_command_output = self._combine_outputs_between_matches(
last_pane_output, current_matches_for_output
)
metadata = CmdOutputMetadata() # No metadata available
metadata.suffix = (
f'\n[Your command "{command}" is NOT executed. '
'The previous command is still running - You CANNOT send new commands until the previous command is completed. '
'By setting `is_input` to `true`, you can interact with the current process: '
f'{TIMEOUT_MESSAGE_TEMPLATE}]'
)
logger.debug(f'PREVIOUS COMMAND OUTPUT: {raw_command_output}')
command_output = self._get_command_output(
command,
raw_command_output,
metadata,
continue_prefix='[Below is the output of the previous command.]\n',
)
return CmdOutputObservation(
command=command,
content=command_output,
metadata=metadata,
hidden=getattr(action, 'hidden', False),
)
# Send actual command/inputs to the pane
if command != '':
is_special_key = self._is_special_key(command)
if is_input:
logger.debug(f'SENDING INPUT TO RUNNING PROCESS: {command!r}')
self.pane.send_keys(
command,
enter=not is_special_key,
)
else:
# convert command to raw string
command = escape_bash_special_chars(command)
logger.debug(f'SENDING COMMAND: {command!r}')
self.pane.send_keys(
command,
enter=not is_special_key,
)
# Loop until the command completes or times out
while should_continue():
_start_time = time.time()
logger.debug(f'GETTING PANE CONTENT at {_start_time}')
cur_pane_output = self._get_pane_content()
logger.debug(
f'PANE CONTENT GOT after {time.time() - _start_time:.2f} seconds'
)
cur_pane_lines = cur_pane_output.split('\n')
if len(cur_pane_lines) <= 20:
logger.debug('PANE_CONTENT: {cur_pane_output}')
else:
logger.debug(f'BEGIN OF PANE CONTENT: {cur_pane_lines[:10]}')
logger.debug(f'END OF PANE CONTENT: {cur_pane_lines[-10:]}')
ps1_matches = CmdOutputMetadata.matches_ps1_metadata(cur_pane_output)
current_ps1_count = len(ps1_matches)
if cur_pane_output != last_pane_output:
last_pane_output = cur_pane_output
last_change_time = time.time()
logger.debug(f'CONTENT UPDATED DETECTED at {last_change_time}')
# 1) Execution completed:
# Condition 1: A new prompt has appeared since the command started.
# Condition 2: The prompt count hasn't increased (potentially because the initial one scrolled off),
# BUT the *current* visible pane ends with a prompt, indicating completion.
if (
current_ps1_count > initial_ps1_count
or cur_pane_output.rstrip().endswith(CMD_OUTPUT_PS1_END.rstrip())
):
return self._handle_completed_command(
command,
pane_content=cur_pane_output,
ps1_matches=ps1_matches,
hidden=getattr(action, 'hidden', False),
)
# Timeout checks should only trigger if a new prompt hasn't appeared yet.
# 2) Execution timed out since there's no change in output
# for a while (self.NO_CHANGE_TIMEOUT_SECONDS)
# We ignore this if the command is *blocking*
time_since_last_change = time.time() - last_change_time
logger.debug(
f'CHECKING NO CHANGE TIMEOUT ({self.NO_CHANGE_TIMEOUT_SECONDS}s): elapsed {time_since_last_change}. Action blocking: {action.blocking}'
)
if (
not action.blocking
and time_since_last_change >= self.NO_CHANGE_TIMEOUT_SECONDS
):
return self._handle_nochange_timeout_command(
command,
pane_content=cur_pane_output,
ps1_matches=ps1_matches,
)
# 3) Execution timed out due to hard timeout
elapsed_time = time.time() - start_time
logger.debug(
f'CHECKING HARD TIMEOUT ({action.timeout}s): elapsed {elapsed_time:.2f}'
)
if action.timeout and elapsed_time >= action.timeout:
logger.debug('Hard timeout triggered.')
return self._handle_hard_timeout_command(
command,
pane_content=cur_pane_output,
ps1_matches=ps1_matches,
timeout=action.timeout,
)
logger.debug(f'SLEEPING for {self.POLL_INTERVAL} seconds for next poll')
time.sleep(self.POLL_INTERVAL)
raise RuntimeError('Bash session was likely interrupted...')