refactor: Replace pexpect with libtmux in BashSession (#4881)

Co-authored-by: openhands <openhands@all-hands.dev>
Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>
Co-authored-by: Robert Brennan <accounts@rbren.io>
This commit is contained in:
Xingyao Wang 2025-01-03 16:22:13 -05:00 committed by GitHub
parent 761a574b09
commit ec70af9412
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
66 changed files with 2345 additions and 760 deletions

View File

@ -36,6 +36,8 @@ jobs:
- name: Set up Docker Buildx
id: buildx
uses: docker/setup-buildx-action@v3
- name: Install tmux
run: sudo apt-get update && sudo apt-get install -y tmux
- name: Install poetry via pipx
run: pipx install poetry
- name: Set up Python

View File

@ -29,6 +29,8 @@ jobs:
- name: Checkout repository
uses: actions/checkout@v4
- name: Install tmux
run: sudo apt-get update && sudo apt-get install -y tmux
- name: Install poetry via pipx
run: pipx install poetry

View File

@ -31,6 +31,8 @@ jobs:
key: ${{ runner.os }}-poetry-${{ hashFiles('**/poetry.lock') }}
restore-keys: |
${{ runner.os }}-poetry-
- name: Install tmux
run: brew install tmux
- name: Install poetry via pipx
run: pipx install poetry
- name: Install Python dependencies using Poetry

View File

@ -30,6 +30,8 @@ jobs:
- name: Set up Docker Buildx
id: buildx
uses: docker/setup-buildx-action@v3
- name: Install tmux
run: sudo apt-get update && sudo apt-get install -y tmux
- name: Install poetry via pipx
run: pipx install poetry
- name: Set up Python

View File

@ -123,7 +123,6 @@ class openhands.state.State {
updated_info: List[Tuple[Action, Observation]]
}
class openhands.observation.CmdOutputObservation {
command_id: int
command: str
exit_code: int
observation: str

View File

@ -137,7 +137,6 @@ def complete_runtime(
action = CmdRunAction(
command=f'chmod +x ./{script_name} && ./{script_name}',
keep_prompt=False,
)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
@ -164,8 +163,7 @@ def complete_runtime(
logger.info(f'Running get ground truth cmd: {script_name}')
action = CmdRunAction(
command=f'chmod +x ./{script_name} && ./{script_name}',
keep_prompt=False,
command=f'chmod +x ./{script_name} && ./{script_name}'
)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)

View File

@ -145,10 +145,7 @@ def complete_runtime(
)
logger.info(f'Running test file: {script_name}')
action = CmdRunAction(
command=f'python3 -m unittest {script_name}',
keep_prompt=False,
)
action = CmdRunAction(command=f'python3 -m unittest {script_name}')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})

View File

@ -199,7 +199,7 @@ def complete_runtime(
if obs.exit_code == 0:
test_result['metadata']['1_copy_change_success'] = True
action = CmdRunAction(command=f'cat {generated_path}', keep_prompt=False)
action = CmdRunAction(command=f'cat {generated_path}')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
assert obs.exit_code == 0
@ -223,9 +223,7 @@ def complete_runtime(
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert obs.exit_code == 0
action = CmdRunAction(
command='cat /testing_files/results_biocoder.json', keep_prompt=False
)
action = CmdRunAction(command='cat /testing_files/results_biocoder.json')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
if obs.exit_code == 0:

View File

@ -127,7 +127,6 @@ For each problem, OpenHands is given a set number of iterations to fix the faili
"observation": "run",
"content": "california_schools/california_schools.sqlite\r\n[(1.0,)]",
"extras": {
"command_id": -1,
"command": "python3 0.py",
"exit_code": 0
}

View File

@ -268,10 +268,7 @@ def initialize_runtime(
runtime.copy_to(db_file, '/workspace')
# Check the database is copied
action = CmdRunAction(
command='cd /workspace && ls -l',
keep_prompt=False,
)
action = CmdRunAction(command='cd /workspace && ls -l')
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert obs.exit_code == 0
@ -300,10 +297,7 @@ def complete_runtime(
instance_id = instance.instance_id.replace('/', '__')
path = os.path.join('/workspace', f'{instance_id}.py')
action = CmdRunAction(
command=f'cat {path}',
keep_prompt=False,
)
action = CmdRunAction(command=f'cat {path}')
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})

View File

@ -71,7 +71,6 @@ For each problem, OpenHands is given a set number of iterations to fix the faili
"observation": "run",
"content": "[File: /workspace/Python__2.py (14 lines total)]\r\n1:def truncate_number(number: float) -> float:\r\n2: return number % 1.0 + 1.0\r\n3:\r\n4:\r\n5:\r\n6:\r\n7:\r\n8:\r\n9:def check(truncate_number):\r\n10: assert truncate_number(3.5) == 0.5\r\n11: assert abs(truncate_number(1.33) - 0.33) < 1e-6\r\n12: assert abs(truncate_number(123.456) - 0.456) < 1e-6\r\n13:\r\n14:check(truncate_number)",
"extras": {
"command_id": -1,
"command": "open Python__2.py",
"exit_code": 0
}
@ -98,7 +97,6 @@ For each problem, OpenHands is given a set number of iterations to fix the faili
"observation": "run",
"content": "> > [File: /workspace/Python__2.py (14 lines total)]\r\n1:def truncate_number(number: float) -> float:\r\n2: return number % 1.0\r\n3:\r\n4:\r\n5:\r\n6:\r\n7:\r\n8:\r\n9:def check(truncate_number):\r\n10: assert truncate_number(3.5) == 0.5\r\n11: assert abs(truncate_number(1.33) - 0.33) < 1e-6\r\n12: assert abs(truncate_number(123.456) - 0.456) < 1e-6\r\n13:\r\n14:check(truncate_number)\r\nFile updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.",
"extras": {
"command_id": -1,
"command": "edit 2:2 <<EOF\n return number % 1.0\nEOF",
"exit_code": 0
}
@ -125,7 +123,6 @@ For each problem, OpenHands is given a set number of iterations to fix the faili
"observation": "run",
"content": "",
"extras": {
"command_id": -1,
"command": "python3 Python__2.py",
"exit_code": 0
}

View File

@ -171,9 +171,7 @@ def complete_runtime(
num_workers = LANGUAGE_TO_NUM_WORKERS[language]
python_imports = '\n'.join(IMPORT_HELPER[language])
action = CmdRunAction(
command=f'cat /workspace/{_get_instance_id(instance)}.py', keep_prompt=False
)
action = CmdRunAction(command=f'cat /workspace/{_get_instance_id(instance)}.py')
obs = runtime.run_action(action)
assert obs.exit_code == 0

View File

@ -163,7 +163,7 @@ def complete_runtime(
eval_script = os.path.join(task_path, 'run.sh')
logger.info(f'Running evaluation script: {eval_script}')
action = CmdRunAction(command=f'cat {eval_script}', keep_prompt=False)
action = CmdRunAction(command=f'cat {eval_script}')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
if obs.exit_code == 0:

View File

@ -121,10 +121,7 @@ def initialize_runtime(
runtime.copy_to(dataset_dir, '/workspace/benchmark/datasets', recursive=True)
# Check the dataset exists
action = CmdRunAction(
command='cd /workspace/benchmark/datasets && ls',
keep_prompt=False,
)
action = CmdRunAction(command='cd /workspace/benchmark/datasets && ls')
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert obs.exit_code == 0
@ -154,10 +151,7 @@ def complete_runtime(
assert obs.exit_code == 0
action = CmdRunAction(
command=f'cat pred_programs/{instance.pred_program_name}',
keep_prompt=False,
)
action = CmdRunAction(command=f'cat pred_programs/{instance.pred_program_name}')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)

View File

@ -98,6 +98,7 @@ def process_instance(
metadata: EvalMetadata,
reset_logger: bool = True,
log_dir: str | None = None,
runtime_failure_count: int = 0,
) -> EvalOutput:
"""
Evaluate agent performance on a SWE-bench problem instance.
@ -146,6 +147,16 @@ def process_instance(
metadata=metadata,
)
# Increase resource_factor with increasing attempt_id
if runtime_failure_count > 0:
config.sandbox.remote_runtime_resource_factor = min(
config.sandbox.remote_runtime_resource_factor * (2**runtime_failure_count),
4, # hardcode maximum resource factor to 4
)
logger.warning(
f'This is the second attempt for instance {instance.instance_id}, setting resource factor to {config.sandbox.remote_runtime_resource_factor}'
)
runtime = create_runtime(config)
call_async_from_sync(runtime.connect)
# Get patch and save it to /tmp/patch.diff
@ -177,7 +188,7 @@ def process_instance(
"(patch --batch --fuzz=5 -p1 -i /tmp/patch.diff && echo 'APPLY_PATCH_PASS' || "
"echo 'APPLY_PATCH_FAIL')))"
)
action = CmdRunAction(command=exec_command, keep_prompt=False)
action = CmdRunAction(command=exec_command)
action.timeout = 600
obs = runtime.run_action(action)
assert isinstance(obs, CmdOutputObservation)
@ -200,9 +211,7 @@ def process_instance(
# Run eval script in background and save output to log file
log_file = '/tmp/eval_output.log'
action = CmdRunAction(
command=f'/tmp/eval.sh > {log_file} 2>&1 & echo $!', keep_prompt=False
)
action = CmdRunAction(command=f'/tmp/eval.sh > {log_file} 2>&1 & echo $!')
action.timeout = 60 # Short timeout just to get the process ID
obs = runtime.run_action(action)
@ -224,7 +233,7 @@ def process_instance(
instance['test_result']['report']['test_timeout'] = True
break
check_action = CmdRunAction(
command=f'ps -p {pid} > /dev/null; echo $?', keep_prompt=False
command=f'ps -p {pid} > /dev/null; echo $?'
)
check_action.timeout = 60
check_obs = runtime.run_action(check_action)
@ -242,7 +251,7 @@ def process_instance(
time.sleep(30) # Wait for 30 seconds before checking again
# Read the log file
cat_action = CmdRunAction(command=f'cat {log_file}', keep_prompt=False)
cat_action = CmdRunAction(command=f'cat {log_file}')
cat_action.timeout = 300
cat_obs = runtime.run_action(cat_action)

View File

@ -282,6 +282,16 @@ def initialize_runtime(
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(obs.exit_code == 0, f'Failed to remove git remotes: {str(obs)}')
action = CmdRunAction(command='which python')
action.timeout = 600
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(
obs.exit_code == 0 and 'testbed' in obs.content,
f'Expected to find python interpreter from testbed, but got: {str(obs)}',
)
logger.info('-' * 30)
logger.info('END Runtime Initialization Fn')
logger.info('-' * 30)
@ -337,8 +347,7 @@ def complete_runtime(
git_patch = None
while n_retries < 5:
action = CmdRunAction(
command=f'git diff --no-color --cached {instance["base_commit"]}',
keep_prompt=False,
command=f'git diff --no-color --cached {instance["base_commit"]}'
)
action.timeout = 600 + 100 * n_retries
logger.info(action, extra={'msg_type': 'ACTION'})
@ -385,7 +394,7 @@ def process_instance(
if runtime_failure_count > 0:
config.sandbox.remote_runtime_resource_factor = min(
config.sandbox.remote_runtime_resource_factor * (2**runtime_failure_count),
2, # hardcode maximum resource factor to 2
8,
)
logger.warning(
f'This is the second attempt for instance {instance.instance_id}, setting resource factor to {config.sandbox.remote_runtime_resource_factor}'
@ -535,4 +544,5 @@ if __name__ == '__main__':
args.eval_num_workers,
process_instance,
timeout_seconds=120 * 60, # 2 hour PER instance should be more than enough
max_retries=5,
)

View File

@ -104,9 +104,9 @@ for repo, diff in repo_diffs:
# Determine if this repo has a significant diff
is_significant = diff >= threshold
repo_color = 'red' if is_significant else 'yellow'
print(colored(f'Difference: {diff} instances!', repo_color, attrs=['bold']))
print(f"\n{colored(repo, repo_color, attrs=['bold'])}:")
print(colored(f'Difference: {diff} instances!', repo_color, attrs=['bold']))
print(colored(f'X resolved but Y failed: ({len(x_instances)} instances)', 'green'))
if x_instances:
print(' ' + str(x_instances))

View File

@ -20,6 +20,13 @@ output_md_folder = args.oh_output_file.replace('.jsonl', '.viz')
print(f'Converting {args.oh_output_file} to markdown files in {output_md_folder}')
oh_format = pd.read_json(args.oh_output_file, orient='records', lines=True)
swebench_eval_file = args.oh_output_file.replace('.jsonl', '.swebench_eval.jsonl')
if os.path.exists(swebench_eval_file):
eval_output_df = pd.read_json(swebench_eval_file, orient='records', lines=True)
else:
eval_output_df = None
# model name is the folder name of oh_output_file
model_name = os.path.basename(os.path.dirname(args.oh_output_file))
@ -50,7 +57,7 @@ def convert_history_to_str(history):
return ret
def write_row_to_md_file(row):
def write_row_to_md_file(row, instance_id_to_test_result):
if 'git_patch' in row:
model_patch = row['git_patch']
elif 'test_result' in row and 'git_patch' in row['test_result']:
@ -58,8 +65,21 @@ def write_row_to_md_file(row):
else:
raise ValueError(f'Row {row} does not have a git_patch')
if 'report' in row:
resolved = row['report'].get('resolved', False)
test_output = None
if row['instance_id'] in instance_id_to_test_result:
report = instance_id_to_test_result[row['instance_id']].get('report', {})
resolved = report.get('resolved', False)
test_output = instance_id_to_test_result[row['instance_id']].get(
'test_output', None
)
elif 'report' in row and row['report'] is not None:
if not isinstance(row['report'], dict):
resolved = None
print(
f'ERROR: Report is not a dict, but a {type(row["report"])}. Row: {row}'
)
else:
resolved = row['report'].get('resolved', False)
else:
resolved = None
@ -84,5 +104,18 @@ def write_row_to_md_file(row):
f.write('## Model Patch\n')
f.write(f'{process_git_patch(model_patch)}\n')
f.write('## Test Output\n')
f.write(str(test_output))
oh_format.progress_apply(write_row_to_md_file, axis=1)
instance_id_to_test_result = {}
if eval_output_df is not None:
instance_id_to_test_result = (
eval_output_df[['instance_id', 'test_result']]
.set_index('instance_id')['test_result']
.to_dict()
)
oh_format.progress_apply(
write_row_to_md_file, axis=1, instance_id_to_test_result=instance_id_to_test_result
)

View File

@ -111,6 +111,11 @@ elif os.path.exists(openhands_remote_report_jsonl):
instance_id_to_status[row['instance_id']] = row['test_result']['report']
df['report'] = df.apply(apply_report, axis=1)
report_is_dict = df['report'].apply(lambda x: isinstance(x, dict))
if not report_is_dict.all():
print(df[~report_is_dict])
raise ValueError(f'Report is not a dict, but a {type(row["report"])}')
_n_instances = len(df)
_n_resolved = len(df[df['report'].apply(lambda x: x.get('resolved', False))])
_n_unresolved = _n_instances - _n_resolved

View File

@ -24,7 +24,7 @@ class Test(BaseIntegrationTest):
@classmethod
def verify_result(cls, runtime: Runtime, histories: list[Event]) -> TestResult:
# check if the file /workspace/bad.txt has been fixed
action = CmdRunAction(command='cat /workspace/bad.txt', keep_prompt=False)
action = CmdRunAction(command='cat /workspace/bad.txt')
obs = runtime.run_action(action)
if obs.exit_code != 0:
return TestResult(

View File

@ -10,14 +10,14 @@ class Test(BaseIntegrationTest):
@classmethod
def initialize_runtime(cls, runtime: Runtime) -> None:
action = CmdRunAction(command='mkdir -p /workspace', keep_prompt=False)
action = CmdRunAction(command='mkdir -p /workspace')
obs = runtime.run_action(action)
assert_and_raise(obs.exit_code == 0, f'Failed to run command: {obs.content}')
@classmethod
def verify_result(cls, runtime: Runtime, histories: list[Event]) -> TestResult:
# check if the file /workspace/hello.sh exists
action = CmdRunAction(command='cat /workspace/hello.sh', keep_prompt=False)
action = CmdRunAction(command='cat /workspace/hello.sh')
obs = runtime.run_action(action)
if obs.exit_code != 0:
return TestResult(
@ -26,7 +26,7 @@ class Test(BaseIntegrationTest):
)
# execute the script
action = CmdRunAction(command='bash /workspace/hello.sh', keep_prompt=False)
action = CmdRunAction(command='bash /workspace/hello.sh')
obs = runtime.run_action(action)
if obs.exit_code != 0:
return TestResult(

View File

@ -10,14 +10,14 @@ class Test(BaseIntegrationTest):
@classmethod
def initialize_runtime(cls, runtime: Runtime) -> None:
action = CmdRunAction(command='mkdir -p /workspace', keep_prompt=False)
action = CmdRunAction(command='mkdir -p /workspace')
obs = runtime.run_action(action)
assert_and_raise(obs.exit_code == 0, f'Failed to run command: {obs.content}')
@classmethod
def verify_result(cls, runtime: Runtime, histories: list[Event]) -> TestResult:
# check if the file /workspace/hello.sh exists
action = CmdRunAction(command='cat /workspace/test.txt', keep_prompt=False)
action = CmdRunAction(command='cat /workspace/test.txt')
obs = runtime.run_action(action)
if obs.exit_code != 0:
return TestResult(
@ -26,7 +26,7 @@ class Test(BaseIntegrationTest):
)
# execute the script
action = CmdRunAction(command='cat /workspace/test.txt', keep_prompt=False)
action = CmdRunAction(command='cat /workspace/test.txt')
obs = runtime.run_action(action)
if obs.exit_code != 0:

View File

@ -10,31 +10,29 @@ class Test(BaseIntegrationTest):
@classmethod
def initialize_runtime(cls, runtime: Runtime) -> None:
action = CmdRunAction(command='mkdir -p /workspace', keep_prompt=False)
action = CmdRunAction(command='mkdir -p /workspace')
obs = runtime.run_action(action)
assert_and_raise(obs.exit_code == 0, f'Failed to run command: {obs.content}')
# git init
action = CmdRunAction(command='git init', keep_prompt=False)
action = CmdRunAction(command='git init')
obs = runtime.run_action(action)
assert_and_raise(obs.exit_code == 0, f'Failed to run command: {obs.content}')
# create README.md
action = CmdRunAction(
command='echo \'print("hello world")\' > hello.py', keep_prompt=False
)
action = CmdRunAction(command='echo \'print("hello world")\' > hello.py')
obs = runtime.run_action(action)
assert_and_raise(obs.exit_code == 0, f'Failed to run command: {obs.content}')
# git add README.md
action = CmdRunAction(command='git add hello.py', keep_prompt=False)
action = CmdRunAction(command='git add hello.py')
obs = runtime.run_action(action)
assert_and_raise(obs.exit_code == 0, f'Failed to run command: {obs.content}')
@classmethod
def verify_result(cls, runtime: Runtime, histories: list[Event]) -> TestResult:
# check if the file /workspace/hello.py exists
action = CmdRunAction(command='cat /workspace/hello.py', keep_prompt=False)
action = CmdRunAction(command='cat /workspace/hello.py')
obs = runtime.run_action(action)
if obs.exit_code != 0:
return TestResult(
@ -43,7 +41,7 @@ class Test(BaseIntegrationTest):
)
# check if the staging area is empty
action = CmdRunAction(command='git status', keep_prompt=False)
action = CmdRunAction(command='git status')
obs = runtime.run_action(action)
if obs.exit_code != 0:
return TestResult(

View File

@ -83,11 +83,11 @@ class Test(BaseIntegrationTest):
@classmethod
def initialize_runtime(cls, runtime: Runtime) -> None:
action = CmdRunAction(command='mkdir -p /workspace', keep_prompt=False)
action = CmdRunAction(command='mkdir -p /workspace')
obs = runtime.run_action(action)
assert_and_raise(obs.exit_code == 0, f'Failed to run command: {obs.content}')
action = CmdRunAction(command='mkdir -p /tmp/server', keep_prompt=False)
action = CmdRunAction(command='mkdir -p /tmp/server')
obs = runtime.run_action(action)
assert_and_raise(obs.exit_code == 0, f'Failed to run command: {obs.content}')
@ -101,8 +101,7 @@ class Test(BaseIntegrationTest):
# create README.md
action = CmdRunAction(
command='cd /tmp/server && nohup python3 -m http.server 8000 &',
keep_prompt=False,
command='cd /tmp/server && nohup python3 -m http.server 8000 &'
)
obs = runtime.run_action(action)

View File

@ -16,6 +16,7 @@ import {
StatusMessage,
} from "#/types/message";
import { handleObservationMessage } from "./observations";
import { appendInput } from "#/state/command-slice";
const messageActions = {
[ActionType.BROWSE]: (message: ActionMessage) => {
@ -62,6 +63,10 @@ export function handleActionMessage(message: ActionMessage) {
return;
}
if (message.action === ActionType.RUN) {
store.dispatch(appendInput(message.args.command));
}
if ("args" in message && "security_risk" in message.args) {
store.dispatch(appendSecurityAnalyzerInput(message));
}

View File

@ -80,8 +80,7 @@ export function handleObservationMessage(message: ObservationMessage) {
observation: "run" as const,
extras: {
command: String(message.extras.command || ""),
command_id: Number(message.extras.command_id || 0),
exit_code: Number(message.extras.exit_code || 0),
metadata: message.extras.metadata,
hidden: Boolean(message.extras.hidden),
},
}),

View File

@ -93,7 +93,7 @@ export const chatSlice = createSlice({
const translationID = `ACTION_MESSAGE$${actionID.toUpperCase()}`;
let text = "";
if (actionID === "run") {
text = `\`${action.payload.args.command}\``;
text = `Command:\n\`${action.payload.args.command}\``;
} else if (actionID === "run_ipython") {
text = `\`\`\`\n${action.payload.args.code}\n\`\`\``;
} else if (actionID === "write") {
@ -144,7 +144,7 @@ export const chatSlice = createSlice({
// Set success property based on observation type
if (observationID === "run") {
const commandObs = observation.payload as CommandObservation;
causeMessage.success = commandObs.extras.exit_code === 0;
causeMessage.success = commandObs.extras.metadata.exit_code === 0;
} else if (observationID === "run_ipython") {
// For IPython, we consider it successful if there's no error message
const ipythonObs = observation.payload as IPythonObservation;
@ -158,7 +158,9 @@ export const chatSlice = createSlice({
if (content.length > MAX_CONTENT_LENGTH) {
content = `${content.slice(0, MAX_CONTENT_LENGTH)}...`;
}
content = `\`\`\`\n${content}\n\`\`\``;
content = `${
causeMessage.content
}\n\nOutput:\n\`\`\`\n${content.trim() || "[Command finished execution with no output]"}\n\`\`\``;
causeMessage.content = content; // Observation content includes the action
} else if (observationID === "read" || observationID === "edit") {
const { content } = observation.payload;

View File

@ -13,9 +13,8 @@ export interface CommandObservation extends OpenHandsObservationEvent<"run"> {
source: "agent";
extras: {
command: string;
command_id: number;
exit_code: number;
hidden?: boolean;
metadata: Record<string, unknown>;
};
}

View File

@ -27,8 +27,11 @@ export interface ObservationMessage {
// The observed data
content: string;
// Additional structured data
extras: Record<string, string>;
extras: {
metadata: Record<string, unknown>;
error_id: string;
[key: string]: string | Record<string, unknown>;
};
// A friendly message that can be put in the chat log
message: string;

View File

@ -277,7 +277,9 @@ class CodeActAgent(Agent):
)
else:
text = truncate_content(
obs.content + obs.interpreter_details, max_message_chars
obs.content
+ f'\n[Python Interpreter: {obs.metadata.py_interpreter_path}]',
max_message_chars,
)
text += f'\n[Command finished with exit code {obs.exit_code}]'
message = Message(role='user', content=[TextContent(text=text)])

View File

@ -31,8 +31,7 @@ from openhands.events.tool import ToolCallMetadata
_BASH_DESCRIPTION = """Execute a bash command in the terminal.
* Long running commands: For commands that may run indefinitely, it should be run in the background and the output should be redirected to a file, e.g. command = `python3 app.py > server.log 2>&1 &`.
* Interactive: If a bash command returns exit code `-1`, this means the process is not yet finished. The assistant must then send a second call to terminal with an empty `command` (which will retrieve any additional logs), or it can send additional text (set `command` to the text) to STDIN of the running process, or it can send command=`ctrl+c` to interrupt the process.
* Timeout: If a command execution result says "Command timed out. Sending SIGINT to the process", the assistant should retry running the command in the background.
* Interactive: If a bash command returns exit code `-1`, this means the process is not yet finished. The assistant must then send a second call to terminal with an empty `command` (which will retrieve any additional logs), or it can send additional text (set `command` to the text) to STDIN of the running process, or it can send command like `C-c` (Ctrl+C) to interrupt the process.
"""
CmdRunTool = ChatCompletionToolParam(
@ -45,7 +44,7 @@ CmdRunTool = ChatCompletionToolParam(
'properties': {
'command': {
'type': 'string',
'description': 'The bash command to execute. Can be empty to view additional logs when previous exit code is `-1`. Can be `ctrl+c` to interrupt the currently running process.',
'description': 'The bash command to execute. Can be empty string to view additional logs when previous exit code is `-1`. Can be `C-c` (Ctrl+C) to interrupt the currently running process.',
},
},
'required': ['command'],

View File

@ -18,6 +18,7 @@ from openhands.events.action import (
from openhands.events.observation import (
AgentStateChangedObservation,
BrowserOutputObservation,
CmdOutputMetadata,
CmdOutputObservation,
FileReadObservation,
FileWriteObservation,
@ -54,11 +55,7 @@ class DummyAgent(Agent):
},
{
'action': CmdRunAction(command='echo "foo"'),
'observations': [
CmdOutputObservation(
'foo', command_id=-1, command='echo "foo"', exit_code=0
)
],
'observations': [CmdOutputObservation('foo', command='echo "foo"')],
},
{
'action': FileWriteAction(
@ -81,9 +78,8 @@ class DummyAgent(Agent):
'observations': [
CmdOutputObservation(
'bash: hello.sh: No such file or directory',
command_id=-1,
command='bash workspace/hello.sh',
exit_code=127,
metadata=CmdOutputMetadata(exit_code=127),
)
],
},
@ -152,8 +148,6 @@ class DummyAgent(Agent):
obs.pop('timestamp', None)
obs.pop('cause', None)
obs.pop('source', None)
if 'extras' in obs:
obs['extras'].pop('command_id', None)
if hist_obs != expected_obs:
print(

View File

@ -5,7 +5,7 @@ from openhands.events.action.commands import IPythonRunCellAction
from openhands.events.action.empty import NullAction
from openhands.events.action.message import MessageAction
from openhands.events.event import Event, EventSource
from openhands.events.observation.commands import (
from openhands.events.observation import (
CmdOutputObservation,
IPythonRunCellObservation,
)

View File

@ -6,6 +6,7 @@ from litellm.types.utils import ModelResponse
from openhands.core.exceptions import LLMResponseError
from openhands.events.event import Event
from openhands.events.observation import CmdOutputMetadata
from openhands.events.serialization import event_to_dict
from openhands.llm.metrics import Metrics
@ -20,6 +21,8 @@ def my_default_encoder(obj):
return obj.get()
if isinstance(obj, ModelResponse):
return obj.model_dump()
if isinstance(obj, CmdOutputMetadata):
return obj.model_dump()
return json.JSONEncoder().default(obj)

View File

@ -12,19 +12,11 @@ from openhands.events.action.action import (
@dataclass
class CmdRunAction(Action):
command: str
# When `command` is empty, it will be used to print the current tmux window
thought: str = ''
blocking: bool = False
# If False, the command will be run in a non-blocking / interactive way
# The partial command outputs will be returned as output observation.
# If True, the command will be run for max .timeout seconds.
keep_prompt: bool = True
# if True, the command prompt will be kept in the command output observation
# Example of command output:
# root@sandbox:~# ls
# file1.txt
# file2.txt
# root@sandbox:~# <-- this is the command prompt
# If blocking is True, the command will be run in a blocking manner.
# e.g., it will NOT return early due to soft timeout.
hidden: bool = False
action: str = ActionType.RUN
runnable: ClassVar[bool] = True

View File

@ -48,6 +48,15 @@ class FileWriteAction(Action):
def message(self) -> str:
return f'Writing file: {self.path}'
def __repr__(self) -> str:
return (
f'**FileWriteAction**\n'
f'Path: {self.path}\n'
f'Range: [L{self.start}:L{self.end}]\n'
f'Thought: {self.thought}\n'
f'Content:\n```\n{self.content}\n```\n'
)
@dataclass
class FileEditAction(Action):

View File

@ -1,6 +1,7 @@
from openhands.events.observation.agent import AgentStateChangedObservation
from openhands.events.observation.browse import BrowserOutputObservation
from openhands.events.observation.commands import (
CmdOutputMetadata,
CmdOutputObservation,
IPythonRunCellObservation,
)
@ -20,6 +21,7 @@ __all__ = [
'Observation',
'NullObservation',
'CmdOutputObservation',
'CmdOutputMetadata',
'IPythonRunCellObservation',
'BrowserOutputObservation',
'FileReadObservation',

View File

@ -1,19 +1,136 @@
from dataclasses import dataclass
import json
import re
import traceback
from dataclasses import dataclass, field
from typing import Self
from pydantic import BaseModel
from openhands.core.logger import openhands_logger as logger
from openhands.core.schema import ObservationType
from openhands.events.observation.observation import Observation
CMD_OUTPUT_PS1_BEGIN = '\n###PS1JSON###\n'
CMD_OUTPUT_PS1_END = '\n###PS1END###'
CMD_OUTPUT_METADATA_PS1_REGEX = re.compile(
f'^{CMD_OUTPUT_PS1_BEGIN.strip()}(.*?){CMD_OUTPUT_PS1_END.strip()}',
re.DOTALL | re.MULTILINE,
)
class CmdOutputMetadata(BaseModel):
"""Additional metadata captured from PS1"""
exit_code: int = -1
pid: int = -1
username: str | None = None
hostname: str | None = None
working_dir: str | None = None
py_interpreter_path: str | None = None
prefix: str = '' # Prefix to add to command output
suffix: str = '' # Suffix to add to command output
@classmethod
def to_ps1_prompt(cls) -> str:
"""Convert the required metadata into a PS1 prompt."""
prompt = CMD_OUTPUT_PS1_BEGIN
json_str = json.dumps(
{
'pid': '$!',
'exit_code': '$?',
'username': r'\u',
'hostname': r'\h',
'working_dir': r'$(pwd)',
'py_interpreter_path': r'$(which python 2>/dev/null || echo "")',
},
indent=2,
)
# Make sure we escape double quotes in the JSON string
# So that PS1 will keep them as part of the output
prompt += json_str.replace('"', r'\"')
prompt += CMD_OUTPUT_PS1_END + '\n' # Ensure there's a newline at the end
return prompt
@classmethod
def matches_ps1_metadata(cls, string: str) -> list[re.Match[str]]:
matches = []
for match in CMD_OUTPUT_METADATA_PS1_REGEX.finditer(string):
try:
json.loads(match.group(1).strip()) # Try to parse as JSON
matches.append(match)
except json.JSONDecodeError:
logger.warning(
f'Failed to parse PS1 metadata: {match.group(1)}. Skipping.'
+ traceback.format_exc()
)
continue # Skip if not valid JSON
return matches
@classmethod
def from_ps1_match(cls, match: re.Match[str]) -> Self:
"""Extract the required metadata from a PS1 prompt."""
metadata = json.loads(match.group(1))
# Create a copy of metadata to avoid modifying the original
processed = metadata.copy()
# Convert numeric fields
if 'pid' in metadata:
try:
processed['pid'] = int(float(str(metadata['pid'])))
except (ValueError, TypeError):
processed['pid'] = -1
if 'exit_code' in metadata:
try:
processed['exit_code'] = int(float(str(metadata['exit_code'])))
except (ValueError, TypeError):
logger.warning(
f'Failed to parse exit code: {metadata["exit_code"]}. Setting to -1.'
)
processed['exit_code'] = -1
return cls(**processed)
@dataclass
class CmdOutputObservation(Observation):
"""This data class represents the output of a command."""
command_id: int
command: str
exit_code: int = 0
hidden: bool = False
observation: str = ObservationType.RUN
interpreter_details: str = ''
# Additional metadata captured from PS1
metadata: CmdOutputMetadata = field(default_factory=CmdOutputMetadata)
# Whether the command output should be hidden from the user
hidden: bool = False
def __init__(
self,
content: str,
command: str,
observation: str = ObservationType.RUN,
metadata: dict | CmdOutputMetadata | None = None,
hidden: bool = False,
**kwargs,
):
super().__init__(content)
self.command = command
self.observation = observation
self.hidden = hidden
if isinstance(metadata, dict):
self.metadata = CmdOutputMetadata(**metadata)
else:
self.metadata = metadata or CmdOutputMetadata()
# Handle legacy attribute
if 'exit_code' in kwargs:
self.metadata.exit_code = kwargs['exit_code']
if 'command_id' in kwargs:
self.metadata.pid = kwargs['command_id']
@property
def command_id(self) -> int:
return self.metadata.pid
@property
def exit_code(self) -> int:
return self.metadata.exit_code
@property
def error(self) -> bool:
@ -28,7 +145,21 @@ class CmdOutputObservation(Observation):
return not self.error
def __str__(self) -> str:
return f'**CmdOutputObservation (source={self.source}, exit code={self.exit_code})**\n{self.content}'
return (
f'**CmdOutputObservation (source={self.source}, exit code={self.exit_code}, '
f'metadata={json.dumps(self.metadata.model_dump(), indent=2)})**\n'
'--BEGIN AGENT OBSERVATION--\n'
f'{self._to_agent_observation()}\n'
'--END AGENT OBSERVATION--'
)
def _to_agent_observation(self) -> str:
ret = f'{self.metadata.prefix}{self.content}{self.metadata.suffix}'
if self.metadata.working_dir:
ret += f'\n[Current working directory: {self.metadata.working_dir}]'
if self.metadata.py_interpreter_path:
ret += f'\n[Python interpreter: {self.metadata.py_interpreter_path}]'
return ret
@dataclass

View File

@ -18,6 +18,9 @@ class FileReadObservation(Observation):
def message(self) -> str:
return f'I read the file {self.path}.'
def __str__(self) -> str:
return f'[Read from {self.path} is successful.]\n' f'{self.content}'
@dataclass
class FileWriteObservation(Observation):
@ -30,6 +33,9 @@ class FileWriteObservation(Observation):
def message(self) -> str:
return f'I wrote to the file {self.path}.'
def __str__(self) -> str:
return f'[Write to {self.path} is successful.]\n' f'{self.content}'
@dataclass
class FileEditObservation(Observation):

View File

@ -67,6 +67,10 @@ def action_from_dict(action: dict) -> Action:
if 'images_urls' in args:
args['image_urls'] = args.pop('images_urls')
# keep_prompt has been deprecated in https://github.com/All-Hands-AI/OpenHands/pull/4881
if 'keep_prompt' in args:
args.pop('keep_prompt')
try:
decoded_action = action_class(**args)
if 'timeout' in action:

View File

@ -1,6 +1,8 @@
from dataclasses import asdict
from datetime import datetime
from pydantic import BaseModel
from openhands.events import Event, EventSource
from openhands.events.observation.observation import Observation
from openhands.events.serialization.action import action_from_dict
@ -56,6 +58,12 @@ def event_from_dict(data) -> 'Event':
return evt
def _convert_pydantic_to_dict(obj: BaseModel | dict) -> dict:
if isinstance(obj, BaseModel):
return obj.model_dump()
return obj
def event_to_dict(event: 'Event') -> dict:
props = asdict(event)
d = {}
@ -82,7 +90,11 @@ def event_to_dict(event: 'Event') -> dict:
d['timeout'] = event.timeout
elif 'observation' in d:
d['content'] = props.pop('content', '')
d['extras'] = props
# props is a dict whose values can include a complex object like an instance of a BaseModel subclass
# such as CmdOutputMetadata
# we serialize it along with the rest
d['extras'] = {k: _convert_pydantic_to_dict(v) for k, v in props.items()}
# Include success field for CmdOutputObservation
if hasattr(event, 'success'):
d['success'] = event.success
@ -109,7 +121,6 @@ def event_to_memory(event: 'Event', max_message_chars: int) -> dict:
# runnable actions have some extra fields used in the BE/FE, which should not be sent to the LLM
if 'args' in d:
d['args'].pop('blocking', None)
d['args'].pop('keep_prompt', None)
d['args'].pop('confirmation_state', None)
if 'extras' in d:

View File

@ -1,6 +1,9 @@
import copy
from openhands.events.observation.agent import AgentStateChangedObservation
from openhands.events.observation.browse import BrowserOutputObservation
from openhands.events.observation.commands import (
CmdOutputMetadata,
CmdOutputObservation,
IPythonRunCellObservation,
)
@ -37,6 +40,26 @@ OBSERVATION_TYPE_TO_CLASS = {
}
def _update_cmd_output_metadata(
metadata: dict | CmdOutputMetadata | None, **kwargs
) -> dict | CmdOutputMetadata:
"""Update the metadata of a CmdOutputObservation.
If metadata is None, create a new CmdOutputMetadata instance.
If metadata is a dict, update the dict.
If metadata is a CmdOutputMetadata instance, update the instance.
"""
if metadata is None:
return CmdOutputMetadata(**kwargs)
if isinstance(metadata, dict):
metadata.update(**kwargs)
elif isinstance(metadata, CmdOutputMetadata):
for key, value in kwargs.items():
setattr(metadata, key, value)
return metadata
def observation_from_dict(observation: dict) -> Observation:
observation = observation.copy()
if 'observation' not in observation:
@ -49,6 +72,24 @@ def observation_from_dict(observation: dict) -> Observation:
observation.pop('observation')
observation.pop('message', None)
content = observation.pop('content', '')
extras = observation.pop('extras', {})
extras = copy.deepcopy(observation.pop('extras', {}))
# Handle legacy attributes for CmdOutputObservation
if 'exit_code' in extras:
extras['metadata'] = _update_cmd_output_metadata(
extras.get('metadata', None), exit_code=extras.pop('exit_code')
)
if 'command_id' in extras:
extras['metadata'] = _update_cmd_output_metadata(
extras.get('metadata', None), pid=extras.pop('command_id')
)
# convert metadata to CmdOutputMetadata if it is a dict
if observation_class is CmdOutputObservation:
if 'metadata' in extras and isinstance(extras['metadata'], dict):
extras['metadata'] = CmdOutputMetadata(**extras['metadata'])
elif 'metadata' in extras and isinstance(extras['metadata'], CmdOutputMetadata):
pass
else:
extras['metadata'] = CmdOutputMetadata()
return observation_class(content=content, **extras)

View File

@ -2,9 +2,11 @@ from openhands.core.logger import openhands_logger as logger
from openhands.events.action.action import Action
from openhands.events.action.empty import NullAction
from openhands.events.event import Event
from openhands.events.observation.commands import CmdOutputObservation
from openhands.events.observation.empty import NullObservation
from openhands.events.observation.observation import Observation
from openhands.events.observation import (
CmdOutputObservation,
NullObservation,
Observation,
)
def get_pairs_from_events(events: list[Event]) -> list[tuple[Action, Observation]]:

View File

@ -122,10 +122,7 @@ async def complete_runtime(
n_retries = 0
git_patch = None
while n_retries < 5:
action = CmdRunAction(
command=f'git diff --no-color --cached {base_commit}',
keep_prompt=False,
)
action = CmdRunAction(command=f'git diff --no-color --cached {base_commit}')
action.timeout = 600 + 100 * n_retries
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)

View File

@ -95,31 +95,34 @@ class ActionExecutor:
browsergym_eval_env: str | None,
) -> None:
self.plugins_to_load = plugins_to_load
self._initial_pwd = work_dir
self._initial_cwd = work_dir
self.username = username
self.user_id = user_id
_updated_user_id = init_user_and_working_directory(
username=username, user_id=self.user_id, initial_pwd=work_dir
username=username, user_id=self.user_id, initial_cwd=work_dir
)
if _updated_user_id is not None:
self.user_id = _updated_user_id
self.bash_session = BashSession(
work_dir=work_dir,
username=username,
)
self.bash_session: BashSession | None = None
self.lock = asyncio.Lock()
self.plugins: dict[str, Plugin] = {}
self.browser = BrowserEnv(browsergym_eval_env)
self.start_time = time.time()
self.last_execution_time = self.start_time
self._initialized = False
@property
def initial_pwd(self):
return self._initial_pwd
def initial_cwd(self):
return self._initial_cwd
async def ainit(self):
# bash needs to be initialized first
self.bash_session = BashSession(
work_dir=self._initial_cwd,
username=self.username,
)
self.bash_session.initialize()
await wait_all(
(self._init_plugin(plugin) for plugin in self.plugins_to_load),
timeout=30,
@ -138,8 +141,14 @@ class ActionExecutor:
await self._init_bash_commands()
logger.debug('Runtime client initialized.')
self._initialized = True
@property
def initialized(self) -> bool:
return self._initialized
async def _init_plugin(self, plugin: Plugin):
assert self.bash_session is not None
await plugin.initialize(self.username)
self.plugins[plugin.name] = plugin
logger.debug(f'Initializing plugin: {plugin.name}')
@ -147,7 +156,7 @@ class ActionExecutor:
if isinstance(plugin, JupyterPlugin):
await self.run_ipython(
IPythonRunCellAction(
code=f'import os; os.chdir("{self.bash_session.pwd}")'
code=f'import os; os.chdir("{self.bash_session.cwd}")'
)
)
@ -177,30 +186,32 @@ class ActionExecutor:
async def run(
self, action: CmdRunAction
) -> CmdOutputObservation | ErrorObservation:
obs = await call_sync_from_async(self.bash_session.run, action)
assert self.bash_session is not None
obs = await call_sync_from_async(self.bash_session.execute, action)
return obs
async def run_ipython(self, action: IPythonRunCellAction) -> Observation:
assert self.bash_session is not None
if 'jupyter' in self.plugins:
_jupyter_plugin: JupyterPlugin = self.plugins['jupyter'] # type: ignore
# This is used to make AgentSkills in Jupyter aware of the
# current working directory in Bash
jupyter_pwd = getattr(self, '_jupyter_pwd', None)
if self.bash_session.pwd != jupyter_pwd:
jupyter_cwd = getattr(self, '_jupyter_cwd', None)
if self.bash_session.cwd != jupyter_cwd:
logger.debug(
f'{self.bash_session.pwd} != {jupyter_pwd} -> reset Jupyter PWD'
f'{self.bash_session.cwd} != {jupyter_cwd} -> reset Jupyter PWD'
)
reset_jupyter_pwd_code = (
f'import os; os.chdir("{self.bash_session.pwd}")'
reset_jupyter_cwd_code = (
f'import os; os.chdir("{self.bash_session.cwd}")'
)
_aux_action = IPythonRunCellAction(code=reset_jupyter_pwd_code)
_aux_action = IPythonRunCellAction(code=reset_jupyter_cwd_code)
_reset_obs: IPythonRunCellObservation = await _jupyter_plugin.run(
_aux_action
)
logger.debug(
f'Changed working directory in IPython to: {self.bash_session.pwd}. Output: {_reset_obs}'
f'Changed working directory in IPython to: {self.bash_session.cwd}. Output: {_reset_obs}'
)
self._jupyter_pwd = self.bash_session.pwd
self._jupyter_cwd = self.bash_session.cwd
obs: IPythonRunCellObservation = await _jupyter_plugin.run(action)
obs.content = obs.content.rstrip()
@ -266,7 +277,7 @@ class ActionExecutor:
if action.include_extra:
obs.content += (
f'\n[Jupyter current working directory: {self.bash_session.pwd}]'
f'\n[Jupyter current working directory: {self.bash_session.cwd}]'
)
obs.content += f'\n[Jupyter Python interpreter: {_jupyter_plugin.python_interpreter_path}]'
return obs
@ -282,6 +293,7 @@ class ActionExecutor:
return str(filepath)
async def read(self, action: FileReadAction) -> Observation:
assert self.bash_session is not None
if action.impl_source == FileReadSource.OH_ACI:
return await self.run_ipython(
IPythonRunCellAction(
@ -292,7 +304,7 @@ class ActionExecutor:
# NOTE: the client code is running inside the sandbox,
# so there's no need to check permission
working_dir = self.bash_session.workdir
working_dir = self.bash_session.cwd
filepath = self._resolve_path(action.path, working_dir)
try:
if filepath.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif')):
@ -339,7 +351,8 @@ class ActionExecutor:
return FileReadObservation(path=filepath, content=code_view)
async def write(self, action: FileWriteAction) -> Observation:
working_dir = self.bash_session.workdir
assert self.bash_session is not None
working_dir = self.bash_session.cwd
filepath = self._resolve_path(action.path, working_dir)
insert = action.content.split('\n')
@ -400,7 +413,8 @@ class ActionExecutor:
return await browse(action, self.browser)
def close(self):
self.bash_session.close()
if self.bash_session is not None:
self.bash_session.close()
self.browser.close()
@ -609,6 +623,8 @@ if __name__ == '__main__':
@app.get('/alive')
async def alive():
if client is None or not client.initialized:
return {'status': 'not initialized'}
return {'status': 'ok'}
# ================================
@ -658,11 +674,11 @@ if __name__ == '__main__':
# Get the full path of the requested directory
if path is None:
full_path = client.initial_pwd
full_path = client.initial_cwd
elif os.path.isabs(path):
full_path = path
else:
full_path = os.path.join(client.initial_pwd, path)
full_path = os.path.join(client.initial_cwd, path)
if not os.path.exists(full_path):
# if user just removed a folder, prevent server error 500 in UI

View File

@ -264,7 +264,6 @@ class ActionExecutionClient(Runtime):
raise AgentRuntimeTimeoutError(
f'Runtime failed to return execute_action before the requested timeout of {action.timeout}s'
)
return obs
def run(self, action: CmdRunAction) -> Observation:

View File

@ -219,7 +219,9 @@ class RemoteRuntime(ActionExecutionClient):
'image': self.container_image,
'command': command,
'working_dir': '/openhands/code/',
'environment': {'DEBUG': 'true'} if self.config.debug else {},
'environment': {'DEBUG': 'true'}
if self.config.debug or os.environ.get('DEBUG', 'false').lower() == 'true'
else {},
'session_id': self.sid,
'resource_factor': self.config.sandbox.remote_runtime_resource_factor,
}
@ -364,7 +366,10 @@ class RemoteRuntime(ActionExecutionClient):
try:
return super()._send_action_server_request(method, url, **kwargs)
except requests.Timeout:
self.log('error', 'No response received within the timeout period.')
self.log(
'error',
f'No response received within the timeout period for url: {url}',
)
raise
except requests.HTTPError as e:
if e.response.status_code == 404:

View File

@ -1,18 +1,21 @@
import os
import re
import time
import uuid
from enum import Enum
import bashlex
import pexpect
import libtmux
from openhands.core.logger import openhands_logger as logger
from openhands.events.action import CmdRunAction
from openhands.events.event import EventSource
from openhands.events.observation import (
from openhands.events.observation import ErrorObservation
from openhands.events.observation.commands import (
CMD_OUTPUT_PS1_END,
CmdOutputMetadata,
CmdOutputObservation,
ErrorObservation,
)
SOFT_TIMEOUT_SECONDS = 5
from openhands.utils.shutdown_listener import should_continue
def split_bash_commands(commands):
@ -66,269 +69,493 @@ def split_bash_commands(commands):
return result
def escape_bash_special_chars(command: str) -> str:
r"""
Escapes characters that have different interpretations in bash vs python.
Specifically handles escape sequences like \;, \|, \&, etc.
"""
if command.strip() == '':
return ''
try:
parts = []
last_pos = 0
def visit_node(node):
nonlocal last_pos
if (
node.kind == 'redirect'
and hasattr(node, 'heredoc')
and node.heredoc is not None
):
# We're entering a heredoc - preserve everything as-is until we see EOF
# Store the heredoc end marker (usually 'EOF' but could be different)
between = command[last_pos : node.pos[0]]
parts.append(between)
# Add the heredoc start marker
parts.append(command[node.pos[0] : node.heredoc.pos[0]])
# Add the heredoc content as-is
parts.append(command[node.heredoc.pos[0] : node.heredoc.pos[1]])
last_pos = node.pos[1]
return
if node.kind == 'word':
# Get the raw text between the last position and current word
between = command[last_pos : node.pos[0]]
word_text = command[node.pos[0] : node.pos[1]]
# Add the between text, escaping special characters
between = re.sub(r'\\([;&|><])', r'\\\\\1', between)
parts.append(between)
# Check if word_text is a quoted string or command substitution
if (
(word_text.startswith('"') and word_text.endswith('"'))
or (word_text.startswith("'") and word_text.endswith("'"))
or (word_text.startswith('$(') and word_text.endswith(')'))
or (word_text.startswith('`') and word_text.endswith('`'))
):
# Preserve quoted strings, command substitutions, and heredoc content as-is
parts.append(word_text)
else:
# Escape special chars in unquoted text
word_text = re.sub(r'\\([;&|><])', r'\\\\\1', word_text)
parts.append(word_text)
last_pos = node.pos[1]
return
# Visit child nodes
if hasattr(node, 'parts'):
for part in node.parts:
visit_node(part)
# Process all nodes in the AST
nodes = list(bashlex.parse(command))
for node in nodes:
between = command[last_pos : node.pos[0]]
between = re.sub(r'\\([;&|><])', r'\\\\\1', between)
parts.append(between)
last_pos = node.pos[0]
visit_node(node)
# Handle any remaining text after the last word
remaining = command[last_pos:]
parts.append(remaining)
return ''.join(parts)
except bashlex.errors.ParsingError:
# Fallback if parsing fails
logger.warning(f'Failed to parse command: {command}')
return command
class BashCommandStatus(Enum):
CONTINUE = 'continue'
COMPLETED = 'completed'
NO_CHANGE_TIMEOUT = 'no_change_timeout'
HARD_TIMEOUT = 'hard_timeout'
def _remove_command_prefix(command_output: str, command: str) -> str:
return command_output.lstrip().removeprefix(command.lstrip()).lstrip()
class BashSession:
"""A class that maintains a pexpect process and provides a simple interface for running commands and interacting with the shell."""
POLL_INTERVAL = 0.5
HISTORY_LIMIT = 10_000
PS1 = CmdOutputMetadata.to_ps1_prompt()
def __init__(self, work_dir: str, username: str):
self._pwd = work_dir
def __init__(
self,
work_dir: str,
username: str | None = None,
no_change_timeout_seconds: float = 30.0,
):
self.NO_CHANGE_TIMEOUT_SECONDS = no_change_timeout_seconds
self.work_dir = work_dir
self.username = username
self._initialized = False
self.shell = pexpect.spawn(
f'su {username}',
encoding='utf-8',
codec_errors='replace',
echo=False,
def initialize(self):
self.server = libtmux.Server()
window_command = '/bin/bash'
if self.username:
# This starts a non-login (new) shell for the given user
window_command = f'su {self.username} -'
session_name = f'openhands-{self.username}-{uuid.uuid4()}'
self.session = self.server.new_session(
session_name=session_name,
window_name='bash',
window_command=window_command,
start_directory=self.work_dir,
kill_session=True,
x=1000,
y=1000,
)
self._init_bash_shell(work_dir)
# Set history limit to a large number to avoid losing history
# https://unix.stackexchange.com/questions/43414/unlimited-history-in-tmux
self.session.set_option('history-limit', str(self.HISTORY_LIMIT), _global=True)
self.session.history_limit = self.HISTORY_LIMIT
# We need to create a new pane because the initial pane's history limit is (default) 2000
_initial_window = self.session.attached_window
self.window = self.session.new_window(
window_shell=window_command,
start_directory=self.work_dir,
)
self.pane = self.window.attached_pane
logger.debug(f'pane: {self.pane}; history_limit: {self.session.history_limit}')
_initial_window.kill_window()
# Configure bash to use simple PS1 and disable PS2
self.pane.send_keys(
f'export PROMPT_COMMAND=\'export PS1="{self.PS1}"\'; export PS2=""'
)
time.sleep(0.1) # Wait for command to take effect
self._clear_screen()
# Store the last command for interactive input handling
self.prev_status: BashCommandStatus | None = None
self.prev_output: str = ''
self._closed: bool = False
logger.debug(f'Bash session initialized with work dir: {self.work_dir}')
# Maintain the current working directory
self._cwd = os.path.abspath(self.work_dir)
self._initialized = True
def __del__(self):
"""Ensure the session is closed when the object is destroyed."""
self.close()
def _get_pane_content(self) -> str:
"""Capture the current pane content and update the buffer."""
content = '\n'.join(
map(
# avoid double newlines
lambda line: line.rstrip(),
self.pane.cmd('capture-pane', '-J', '-pS', '-').stdout,
)
)
return content
def close(self):
self.shell.close()
"""Clean up the session."""
if self._closed:
return
self.session.kill_session()
self._closed = True
@property
def pwd(self):
return self._pwd
def cwd(self):
return self._cwd
@property
def workdir(self):
return self._get_working_directory()
def _is_special_key(self, command: str) -> bool:
"""Check if the command is a special key."""
# Special keys are of the form C-<key>
_command = command.strip()
return _command.startswith('C-') and len(_command) == 3
def _get_working_directory(self):
# NOTE: this is part of initialization, so we hard code the timeout
result, exit_code = self._execute_bash('pwd', timeout=60, keep_prompt=False)
if exit_code != 0:
raise RuntimeError(
f'Failed to get working directory (exit code: {exit_code}): {result}'
)
return result.strip()
def _clear_screen(self):
"""Clear the tmux pane screen and history."""
self.pane.send_keys('C-l', enter=False)
time.sleep(0.1)
self.pane.cmd('clear-history')
def _init_bash_shell(self, work_dir: str):
self.__bash_PS1 = (
r'[PEXPECT_BEGIN]\n'
r'$(which python >/dev/null 2>&1 && echo "[Python Interpreter: $(which python)]\n")'
r'\u@\h:\w\n'
r'[PEXPECT_END]'
)
# This should NOT match "PS1=\u@\h:\w [PEXPECT]$" when `env` is executed
self.__bash_expect_regex = r'\[PEXPECT_BEGIN\]\s*(.*?)\s*([a-z0-9_-]*)@([a-zA-Z0-9.-]*):(.+)\s*\[PEXPECT_END\]'
# Set umask to allow group write permissions
self.shell.sendline(f'umask 002; export PS1="{self.__bash_PS1}"; export PS2=""')
self.shell.expect(self.__bash_expect_regex)
self.shell.sendline(
f'if [ ! -d "{work_dir}" ]; then mkdir -p "{work_dir}"; fi && cd "{work_dir}"'
)
self.shell.expect(self.__bash_expect_regex)
logger.debug(
f'Bash initialized. Working directory: {work_dir}. Output: [{self.shell.before}]'
)
# Ensure the group has write permissions on the working directory
self.shell.sendline(f'chmod g+rw "{work_dir}"')
self.shell.expect(self.__bash_expect_regex)
def _get_bash_prompt_and_update_pwd(self):
ps1 = self.shell.after
if ps1 == pexpect.EOF:
logger.error(f'Bash shell EOF! {self.shell.after=}, {self.shell.before=}')
raise RuntimeError('Bash shell EOF')
if ps1 == pexpect.TIMEOUT:
logger.warning('Bash shell timeout')
return ''
# begin at the last occurrence of '[PEXPECT_BEGIN]'.
# In multi-line bash commands, the prompt will be repeated
# and the matched regex captures all of them
# - we only want the last one (newest prompt)
_begin_pos = ps1.rfind('[PEXPECT_BEGIN]')
if _begin_pos != -1:
ps1 = ps1[_begin_pos:]
# parse the ps1 to get username, hostname, and working directory
matched = re.match(self.__bash_expect_regex, ps1)
assert (
matched is not None
), f'Failed to parse bash prompt: {ps1}. This should not happen.'
other_info, username, hostname, working_dir = matched.groups()
working_dir = working_dir.rstrip()
self._pwd = os.path.expanduser(working_dir)
# re-assemble the prompt
# ignore the hostname AND use 'openhands-workspace'
prompt = f'{other_info.strip()}\n{username}@openhands-workspace:{working_dir} '
if username == 'root':
prompt += '#'
else:
prompt += '$'
return prompt + ' '
def _execute_bash(
def _get_command_output(
self,
command: str,
timeout: int,
keep_prompt: bool = True,
kill_on_timeout: bool = True,
) -> tuple[str, int]:
logger.debug(f'Executing command: {command}')
self.shell.sendline(command)
return self._continue_bash(
timeout=timeout, keep_prompt=keep_prompt, kill_on_timeout=kill_on_timeout
raw_command_output: str,
metadata: CmdOutputMetadata,
continue_prefix: str = '',
) -> str:
"""Get the command output with the previous command output removed.
Args:
command: The command that was executed.
raw_command_output: The raw output from the command.
metadata: The metadata object to store prefix/suffix in.
continue_prefix: The prefix to add to the command output if it's a continuation of the previous command.
"""
# remove the previous command output from the new output if any
if self.prev_output:
command_output = raw_command_output.removeprefix(self.prev_output)
metadata.prefix = continue_prefix
else:
command_output = raw_command_output
self.prev_output = raw_command_output # update current command output anyway
command_output = _remove_command_prefix(command_output, command)
return command_output.rstrip()
def _handle_completed_command(
self, command: str, pane_content: str, ps1_matches: list[re.Match]
) -> CmdOutputObservation:
is_special_key = self._is_special_key(command)
assert len(ps1_matches) >= 1, (
f'Expected at least one PS1 metadata block, but got {len(ps1_matches)}.\n'
f'---FULL OUTPUT---\n{pane_content!r}\n---END OF OUTPUT---'
)
metadata = CmdOutputMetadata.from_ps1_match(ps1_matches[-1])
# Special case where the previous command output is truncated due to history limit
# We should get the content BEFORE the last PS1 prompt
get_content_before_last_match = bool(len(ps1_matches) == 1)
# Update the current working directory if it has changed
if metadata.working_dir != self._cwd and metadata.working_dir:
self._cwd = metadata.working_dir
logger.debug(f'COMMAND OUTPUT: {pane_content}')
# Extract the command output between the two PS1 prompts
raw_command_output = self._combine_outputs_between_matches(
pane_content,
ps1_matches,
get_content_before_last_match=get_content_before_last_match,
)
def _interrupt_bash(
if get_content_before_last_match:
# Count the number of lines in the truncated output
num_lines = len(raw_command_output.splitlines())
metadata.prefix = f'[Previous command outputs are truncated. Showing the last {num_lines} lines of the output below.]\n'
metadata.suffix = (
f'\n[The command completed with exit code {metadata.exit_code}.]'
if not is_special_key
else f'\n[The command completed with exit code {metadata.exit_code}. CTRL+{command[-1].upper()} was sent.]'
)
command_output = self._get_command_output(
command,
raw_command_output,
metadata,
)
self.prev_status = BashCommandStatus.COMPLETED
self.prev_output = '' # Reset previous command output
self._ready_for_next_command()
return CmdOutputObservation(
content=command_output,
command=command,
metadata=metadata,
)
def _handle_nochange_timeout_command(
self,
action_timeout: int | None,
interrupt_timeout: int | None = None,
max_retries: int = 2,
) -> tuple[str, int]:
interrupt_timeout = interrupt_timeout or 1 # default timeout for SIGINT
# try to interrupt the bash shell use SIGINT
while max_retries > 0:
self.shell.sendintr() # send SIGINT to the shell
logger.debug('Sent SIGINT to bash. Waiting for output...')
try:
self.shell.expect(self.__bash_expect_regex, timeout=interrupt_timeout)
output = self.shell.before
logger.debug(f'Received output after SIGINT: {output}')
exit_code = 130 # SIGINT
_additional_msg = ''
if action_timeout is not None:
_additional_msg = (
f'Command timed out after {action_timeout} seconds. '
)
output += (
'\r\n\r\n'
+ f'[{_additional_msg}SIGINT was sent to interrupt the command.]'
)
return output, exit_code
except pexpect.TIMEOUT as e:
logger.warning(f'Bash pexpect.TIMEOUT while waiting for SIGINT: {e}')
max_retries -= 1
# fall back to send control-z
logger.error(
'Failed to get output after SIGINT. Max retries reached. Sending control-z...'
command: str,
pane_content: str,
ps1_matches: list[re.Match],
) -> CmdOutputObservation:
self.prev_status = BashCommandStatus.NO_CHANGE_TIMEOUT
if len(ps1_matches) != 1:
logger.warning(
'Expected exactly one PS1 metadata block BEFORE the execution of a command, '
f'but got {len(ps1_matches)} PS1 metadata blocks:\n---\n{pane_content!r}\n---'
)
raw_command_output = self._combine_outputs_between_matches(
pane_content, ps1_matches
)
self.shell.sendcontrol('z')
self.shell.expect(self.__bash_expect_regex)
output = self.shell.before
logger.debug(f'Received output after control-z: {output}')
# Try to kill the job
self.shell.sendline('kill -9 %1')
self.shell.expect(self.__bash_expect_regex)
logger.debug(f'Received output after killing job %1: {self.shell.before}')
output += self.shell.before
_additional_msg = ''
if action_timeout is not None:
_additional_msg = f'Command timed out after {action_timeout} seconds. '
output += (
'\r\n\r\n'
+ f'[{_additional_msg}SIGINT was sent to interrupt the command, but failed. The command was killed.]'
metadata = CmdOutputMetadata() # No metadata available
metadata.suffix = (
f'\n[The command has no new output after {self.NO_CHANGE_TIMEOUT_SECONDS} seconds. '
"You may wait longer to see additional output by sending empty command '', "
'send other commands to interact with the current process, '
'or send keys to interrupt/kill the command.]'
)
command_output = self._get_command_output(
command,
raw_command_output,
metadata,
continue_prefix='[Command output continued from previous command]\n',
)
return CmdOutputObservation(
content=command_output,
command=command,
metadata=metadata,
)
# Try to get the exit code again
self.shell.sendline('echo $?')
self.shell.expect(self.__bash_expect_regex)
_exit_code_output = self.shell.before
exit_code = self._parse_exit_code(_exit_code_output)
return output, exit_code
def _parse_exit_code(self, output: str) -> int:
try:
exit_code = int(output.strip().split()[0])
except Exception:
logger.error('Error getting exit code from bash script')
# If we try to run an invalid shell script the output sometimes includes error text
# rather than the error code - we assume this is an error
exit_code = 2
return exit_code
def _continue_bash(
def _handle_hard_timeout_command(
self,
timeout: int,
keep_prompt: bool = True,
kill_on_timeout: bool = True,
) -> tuple[str, int]:
logger.debug(f'Continuing bash with timeout={timeout}')
try:
self.shell.expect(self.__bash_expect_regex, timeout=timeout)
command: str,
pane_content: str,
ps1_matches: list[re.Match],
timeout: float,
) -> CmdOutputObservation:
self.prev_status = BashCommandStatus.HARD_TIMEOUT
if len(ps1_matches) != 1:
logger.warning(
'Expected exactly one PS1 metadata block BEFORE the execution of a command, '
f'but got {len(ps1_matches)} PS1 metadata blocks:\n---\n{pane_content!r}\n---'
)
raw_command_output = self._combine_outputs_between_matches(
pane_content, ps1_matches
)
metadata = CmdOutputMetadata() # No metadata available
metadata.suffix = (
f'\n[The command timed out after {timeout} seconds. '
"You may wait longer to see additional output by sending empty command '', "
'send other commands to interact with the current process, '
'or send keys to interrupt/kill the command.]'
)
command_output = self._get_command_output(
command,
raw_command_output,
metadata,
continue_prefix='[Command output continued from previous command]\n',
)
output = self.shell.before
return CmdOutputObservation(
command=command,
content=command_output,
metadata=metadata,
)
# Get exit code
self.shell.sendline('echo $?')
logger.debug('Requesting exit code...')
self.shell.expect(self.__bash_expect_regex, timeout=timeout)
_exit_code_output = self.shell.before
exit_code = self._parse_exit_code(_exit_code_output)
except pexpect.TIMEOUT as e:
logger.warning(f'Bash pexpect.TIMEOUT while executing bash command: {e}')
if kill_on_timeout:
output, exit_code = self._interrupt_bash(action_timeout=timeout)
def _ready_for_next_command(self):
"""Reset the content buffer for a new command."""
# Clear the current content
self._clear_screen()
def _combine_outputs_between_matches(
self,
pane_content: str,
ps1_matches: list[re.Match],
get_content_before_last_match: bool = False,
) -> str:
"""Combine all outputs between PS1 matches.
Args:
pane_content: The full pane content containing PS1 prompts and command outputs
ps1_matches: List of regex matches for PS1 prompts
get_content_before_last_match: when there's only one PS1 match, whether to get
the content before the last PS1 prompt (True) or after the last PS1 prompt (False)
Returns:
Combined string of all outputs between matches
"""
if len(ps1_matches) == 1:
if get_content_before_last_match:
# The command output is the content before the last PS1 prompt
return pane_content[: ps1_matches[0].start()]
else:
output = self.shell.before or ''
exit_code = -1
finally:
bash_prompt = self._get_bash_prompt_and_update_pwd()
if keep_prompt:
output += '\r\n' + bash_prompt
return output, exit_code
# The command output is the content after the last PS1 prompt
return pane_content[ps1_matches[0].end() + 1 :]
combined_output = ''
for i in range(len(ps1_matches) - 1):
# Extract content between current and next PS1 prompt
output_segment = pane_content[
ps1_matches[i].end() + 1 : ps1_matches[i + 1].start()
]
combined_output += output_segment + '\n'
logger.debug(f'COMBINED OUTPUT: {combined_output}')
return combined_output
def run(self, action: CmdRunAction) -> CmdOutputObservation | ErrorObservation:
try:
assert (
action.timeout is not None
), f'Timeout argument is required for CmdRunAction: {action}'
commands = split_bash_commands(action.command)
all_output = ''
python_interpreter = ''
for command in commands:
if command == '':
output, exit_code = self._continue_bash(
timeout=SOFT_TIMEOUT_SECONDS,
keep_prompt=action.keep_prompt,
kill_on_timeout=False,
)
elif command.lower() == 'ctrl+c':
output, exit_code = self._interrupt_bash(
action_timeout=None, # intentionally None
)
else:
output, exit_code = self._execute_bash(
command,
timeout=SOFT_TIMEOUT_SECONDS
if not action.blocking
else action.timeout,
keep_prompt=action.keep_prompt,
kill_on_timeout=False if not action.blocking else True,
)
# Get rid of the python interpreter string from each line of the output.
# We need it only once at the end.
parts = output.rsplit('[Python Interpreter: ', 1)
output = parts[0]
if len(parts) == 2:
python_interpreter = '[Python Interpreter: ' + parts[1]
if all_output:
# previous output already exists so we add a newline
all_output += '\r\n'
def execute(self, action: CmdRunAction) -> CmdOutputObservation | ErrorObservation:
"""Execute a command in the bash session."""
if not self._initialized:
raise RuntimeError('Bash session is not initialized')
# If the command originated with the agent, append the command that was run...
if action.source == EventSource.AGENT:
all_output += command + '\r\n'
# Strip the command of any leading/trailing whitespace
logger.debug(f'RECEIVED ACTION: {action}')
command = action.command.strip()
all_output += str(output)
if exit_code != 0:
break
if command == '' and self.prev_status not in {
BashCommandStatus.CONTINUE,
BashCommandStatus.NO_CHANGE_TIMEOUT,
BashCommandStatus.HARD_TIMEOUT,
}:
return CmdOutputObservation(
command_id=-1,
content=all_output.rstrip('\r\n'),
command=action.command,
hidden=action.hidden,
exit_code=exit_code,
interpreter_details=python_interpreter,
content='ERROR: No previous command to continue from. '
+ 'Previous command has to be timeout to be continued.',
command='',
metadata=CmdOutputMetadata(),
)
except UnicodeDecodeError as e:
splited_commands = split_bash_commands(command)
if len(splited_commands) > 1:
return ErrorObservation(
f'Runtime bash execution failed: Command output could not be decoded as utf-8. {str(e)}',
content=(
f'ERROR: Cannot execute multiple commands at once.\n'
f'Please run each command separately OR chain them into a single command via && or ;\n'
f'Provided commands:\n{"\n".join(f"({i+1}) {cmd}" for i, cmd in enumerate(splited_commands))}'
)
)
start_time = time.time()
last_change_time = start_time
last_pane_output = self._get_pane_content()
_ps1_matches = CmdOutputMetadata.matches_ps1_metadata(last_pane_output)
assert len(_ps1_matches) >= 1, (
'Expected at least one PS1 metadata block BEFORE the execution of a command, '
f'but got {len(_ps1_matches)} PS1 metadata blocks:\n---\n{last_pane_output!r}\n---'
)
if len(_ps1_matches) > 1:
logger.warning(
'Found multiple PS1 metadata blocks BEFORE the execution of a command. '
'Only the last one will be used.'
)
_ps1_matches = [_ps1_matches[-1]]
if command != '':
# convert command to raw string
command = escape_bash_special_chars(command)
logger.debug(f'SENDING COMMAND: {command!r}')
self.pane.send_keys(
command,
enter=not self._is_special_key(command),
)
# Loop until the command completes or times out
while should_continue():
_start_time = time.time()
logger.debug(f'GETTING PANE CONTENT at {_start_time}')
cur_pane_output = self._get_pane_content()
logger.debug(
f'PANE CONTENT GOT after {time.time() - _start_time:.2f} seconds'
)
logger.debug(f'BEGIN OF PANE CONTENT: {cur_pane_output.split("\n")[:10]}')
logger.debug(f'END OF PANE CONTENT: {cur_pane_output.split("\n")[-10:]}')
ps1_matches = CmdOutputMetadata.matches_ps1_metadata(cur_pane_output)
if cur_pane_output != last_pane_output:
last_pane_output = cur_pane_output
last_change_time = time.time()
logger.debug(f'CONTENT UPDATED DETECTED at {last_change_time}')
# 1) Execution completed
# if the last command output contains the end marker
if cur_pane_output.rstrip().endswith(CMD_OUTPUT_PS1_END.rstrip()):
return self._handle_completed_command(
command,
pane_content=cur_pane_output,
ps1_matches=ps1_matches,
)
# 2) Execution timed out since there's no change in output
# for a while (self.NO_CHANGE_TIMEOUT_SECONDS)
# We ignore this if the command is *blocking
time_since_last_change = time.time() - last_change_time
logger.debug(
f'CHECKING NO CHANGE TIMEOUT ({self.NO_CHANGE_TIMEOUT_SECONDS}s): elapsed {time_since_last_change}'
)
if (
not action.blocking
and time_since_last_change >= self.NO_CHANGE_TIMEOUT_SECONDS
):
return self._handle_nochange_timeout_command(
command,
pane_content=cur_pane_output,
ps1_matches=ps1_matches,
)
# 3) Execution timed out due to hard timeout
logger.debug(
f'CHECKING HARD TIMEOUT ({action.timeout}s): elapsed {time.time() - start_time}'
)
if action.timeout and time.time() - start_time >= action.timeout:
return self._handle_hard_timeout_command(
command,
pane_content=cur_pane_output,
ps1_matches=ps1_matches,
timeout=action.timeout,
)
logger.debug(f'SLEEPING for {self.POLL_INTERVAL} seconds for next poll')
time.sleep(self.POLL_INTERVAL)
raise RuntimeError('Bash session was likely interrupted...')

View File

@ -4,7 +4,7 @@ from openhands.core.logger import openhands_logger as logger
def init_user_and_working_directory(
username: str, user_id: int, initial_pwd: str
username: str, user_id: int, initial_cwd: str
) -> int | None:
"""Create working directory and user if not exists.
It performs the following steps effectively:
@ -26,23 +26,23 @@ def init_user_and_working_directory(
Args:
username (str): The username to create.
user_id (int): The user ID to assign to the user.
initial_pwd (str): The initial working directory to create.
initial_cwd (str): The initial working directory to create.
Returns:
int | None: The user ID if it was updated, None otherwise.
"""
# First create the working directory, independent of the user
logger.debug(f'Client working directory: {initial_pwd}')
command = f'umask 002; mkdir -p {initial_pwd}'
logger.debug(f'Client working directory: {initial_cwd}')
command = f'umask 002; mkdir -p {initial_cwd}'
output = subprocess.run(command, shell=True, capture_output=True)
out_str = output.stdout.decode()
command = f'chown -R {username}:root {initial_pwd}'
command = f'chown -R {username}:root {initial_cwd}'
output = subprocess.run(command, shell=True, capture_output=True)
out_str += output.stdout.decode()
command = f'chmod g+rw {initial_pwd}'
command = f'chmod g+rw {initial_cwd}'
output = subprocess.run(command, shell=True, capture_output=True)
out_str += output.stdout.decode()
logger.debug(f'Created working directory. Output: [{out_str}]')

View File

@ -15,7 +15,7 @@ ENV POETRY_VIRTUALENVS_PATH=/openhands/poetry \
# Install base system dependencies
RUN apt-get update && \
apt-get install -y --no-install-recommends \
wget curl sudo apt-utils git jq \
wget curl sudo apt-utils git jq tmux \
{% if 'ubuntu' in base_image and (base_image.endswith(':latest') or base_image.endswith(':24.04')) %}
libgl1 \
{% else %}

34
poetry.lock generated
View File

@ -1,4 +1,4 @@
# This file is automatically @generated by Poetry 1.8.5 and should not be changed by hand.
# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
[[package]]
name = "aiohappyeyeballs"
@ -3711,6 +3711,17 @@ websocket-client = ">=0.32.0,<0.40.0 || >0.40.0,<0.41.dev0 || >=0.43.dev0"
[package.extras]
adal = ["adal (>=1.0.2)"]
[[package]]
name = "libtmux"
version = "0.37.0"
description = "Typed library that provides an ORM wrapper for tmux, a terminal multiplexer."
optional = false
python-versions = "<4.0,>=3.8"
files = [
{file = "libtmux-0.37.0-py3-none-any.whl", hash = "sha256:7e8cbab30b033d132b6fca5dddb575bb7f6a1fd802328e7174f9b49023556376"},
{file = "libtmux-0.37.0.tar.gz", hash = "sha256:21955c5dce6332db41abad5e26ae8c4062ef2b9a89099bd57a36f52be1d5270f"},
]
[[package]]
name = "libwebarena"
version = "0.0.3"
@ -8298,6 +8309,25 @@ postgresql-psycopgbinary = ["psycopg[binary] (>=3.0.7)"]
pymysql = ["pymysql"]
sqlcipher = ["sqlcipher3_binary"]
[[package]]
name = "sse-starlette"
version = "2.1.3"
description = "SSE plugin for Starlette"
optional = false
python-versions = ">=3.8"
files = [
{file = "sse_starlette-2.1.3-py3-none-any.whl", hash = "sha256:8ec846438b4665b9e8c560fcdea6bc8081a3abf7942faa95e5a744999d219772"},
{file = "sse_starlette-2.1.3.tar.gz", hash = "sha256:9cd27eb35319e1414e3d2558ee7414487f9529ce3b3cf9b21434fd110e017169"},
]
[package.dependencies]
anyio = "*"
starlette = "*"
uvicorn = "*"
[package.extras]
examples = ["fastapi"]
[[package]]
name = "stack-data"
version = "0.6.3"
@ -10054,4 +10084,4 @@ testing = ["coverage[toml]", "zope.event", "zope.testing"]
[metadata]
lock-version = "2.0"
python-versions = "^3.12"
content-hash = "691bdd0f64e3476858eb34ce6ed6d0b0e7d97458cfd69fd366cd9c1c4f4ec897"
content-hash = "db887f071f7dbb712cfba5d9b4de8938afbedee22fd166b4527f4aec40e37cfd"

View File

@ -1,6 +1,6 @@
[tool.poetry]
name = "openhands-ai"
version = "0.18.0"
version = "0.18.0a0"
description = "OpenHands: Code Less, Make More"
authors = ["OpenHands"]
license = "MIT"
@ -62,11 +62,13 @@ opentelemetry-api = "1.25.0"
opentelemetry-exporter-otlp-proto-grpc = "1.25.0"
modal = ">=0.66.26,<0.71.0"
runloop-api-client = "0.11.0"
libtmux = "^0.37.0"
pygithub = "^2.5.0"
joblib = "*"
openhands-aci = "0.1.5"
python-socketio = "^5.11.4"
redis = "^5.2.0"
sse-starlette = "^2.1.3"
[tool.poetry.group.llama-index.dependencies]
llama-index = "*"
@ -100,6 +102,7 @@ reportlab = "*"
[tool.coverage.run]
concurrency = ["gevent"]
[tool.poetry.group.runtime.dependencies]
jupyterlab = "*"
notebook = "*"
@ -129,6 +132,7 @@ ignore = ["D1"]
[tool.ruff.lint.pydocstyle]
convention = "google"
[tool.poetry.group.evaluation.dependencies]
streamlit = "*"
whatthepatch = "*"

View File

@ -69,7 +69,7 @@ def _close_test_runtime(runtime: Runtime) -> None:
time.sleep(1)
def _reset_pwd() -> None:
def _reset_cwd() -> None:
global project_dir
# Try to change back to project directory
try:
@ -152,16 +152,16 @@ def get_run_as_openhands() -> list[bool]:
@pytest.fixture(scope='module') # for xdist
def runtime_setup_module():
_reset_pwd()
_reset_cwd()
yield
_reset_pwd()
_reset_cwd()
@pytest.fixture(scope='session') # not for xdist
def runtime_setup_session():
_reset_pwd()
_reset_cwd()
yield
_reset_pwd()
_reset_cwd()
# This assures that all tests run together per runtime, not alternating between them,
@ -230,14 +230,14 @@ def _load_runtime(
global test_mount_path
if use_workspace:
test_mount_path = os.path.join(config.workspace_base, 'rt')
elif temp_dir is not None:
test_mount_path = os.path.join(temp_dir, sid)
else:
test_mount_path = os.path.join(
temp_dir, sid
) # need a subfolder to avoid conflicts
test_mount_path = None
config.workspace_mount_path = test_mount_path
# Mounting folder specific for this test inside the sandbox
config.workspace_mount_path_in_sandbox = f'{sandbox_test_folder}/{sid}'
config.workspace_mount_path_in_sandbox = f'{sandbox_test_folder}'
print('\nPaths used:')
print(f'use_host_network: {config.sandbox.use_host_network}')
print(f'workspace_base: {config.workspace_base}')

View File

@ -5,7 +5,6 @@ from pathlib import Path
import pytest
from conftest import (
TEST_IN_CI,
_close_test_runtime,
_get_sandbox_folder,
_load_runtime,
@ -13,7 +12,7 @@ from conftest import (
from openhands.core.logger import openhands_logger as logger
from openhands.events.action import CmdRunAction
from openhands.events.observation import CmdOutputObservation
from openhands.events.observation import CmdOutputObservation, ErrorObservation
from openhands.runtime.base import Runtime
# ============================================================================================================================
@ -21,36 +20,19 @@ from openhands.runtime.base import Runtime
# ============================================================================================================================
def _run_cmd_action(runtime, custom_command: str, keep_prompt=True):
action = CmdRunAction(command=custom_command, keep_prompt=keep_prompt)
def _run_cmd_action(runtime, custom_command: str):
action = CmdRunAction(command=custom_command)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
assert isinstance(obs, CmdOutputObservation)
assert isinstance(obs, (CmdOutputObservation, ErrorObservation))
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
return obs
def test_bash_command_pexcept(temp_dir, runtime_cls, run_as_openhands):
def test_bash_command_env(temp_dir, runtime_cls, run_as_openhands):
runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
try:
# We set env var PS1="\u@\h:\w $"
# and construct the PEXCEPT prompt base on it.
# When run `env`, bad implementation of CmdRunAction will be pexcepted by this
# and failed to pexcept the right content, causing it fail to get error code.
obs = runtime.run_action(CmdRunAction(command='env'))
# For example:
# 02:16:13 - openhands:DEBUG: client.py:78 - Executing command: env
# 02:16:13 - openhands:DEBUG: client.py:82 - Command output: PYTHONUNBUFFERED=1
# CONDA_EXE=/openhands/miniforge3/bin/conda
# [...]
# LC_CTYPE=C.UTF-8
# PS1=\u@\h:\w $
# 02:16:13 - openhands:DEBUG: client.py:89 - Executing command for exit code: env
# 02:16:13 - openhands:DEBUG: client.py:92 - Exit code Output:
# CONDA_DEFAULT_ENV=base
# As long as the exit code is 0, the test will pass.
assert isinstance(
obs, CmdOutputObservation
), 'The observation should be a CmdOutputObservation.'
@ -59,52 +41,7 @@ def test_bash_command_pexcept(temp_dir, runtime_cls, run_as_openhands):
_close_test_runtime(runtime)
def test_bash_timeout_and_keyboard_interrupt(temp_dir, runtime_cls, run_as_openhands):
runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
try:
action = CmdRunAction(command='python -c "import time; time.sleep(10)"')
action.timeout = 1
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert isinstance(obs, CmdOutputObservation)
assert (
'[Command timed out after 1 seconds. SIGINT was sent to interrupt the command.]'
in obs.content
)
assert 'KeyboardInterrupt' in obs.content
# follow up command should not be affected
action = CmdRunAction(command='ls')
action.timeout = 1
obs = runtime.run_action(action)
assert isinstance(obs, CmdOutputObservation)
assert obs.exit_code == 0
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
# run it again!
action = CmdRunAction(command='python -c "import time; time.sleep(10)"')
action.timeout = 1
obs = runtime.run_action(action)
assert isinstance(obs, CmdOutputObservation)
assert (
'[Command timed out after 1 seconds. SIGINT was sent to interrupt the command.]'
in obs.content
)
assert 'KeyboardInterrupt' in obs.content
# things should still work
action = CmdRunAction(command='ls')
action.timeout = 1
obs = runtime.run_action(action)
assert isinstance(obs, CmdOutputObservation)
assert obs.exit_code == 0
assert '/workspace' in obs.interpreter_details
finally:
_close_test_runtime(runtime)
def test_bash_pexcept_eof(temp_dir, runtime_cls, run_as_openhands):
def test_bash_server(temp_dir, runtime_cls, run_as_openhands):
runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
try:
action = CmdRunAction(command='python3 -m http.server 8080')
@ -112,9 +49,21 @@ def test_bash_pexcept_eof(temp_dir, runtime_cls, run_as_openhands):
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert isinstance(obs, CmdOutputObservation)
assert obs.exit_code == 130 # script was killed by SIGINT
assert obs.exit_code == -1
assert 'Serving HTTP on 0.0.0.0 port 8080' in obs.content
assert (
"[The command timed out after 1 seconds. You may wait longer to see additional output by sending empty command '', send other commands to interact with the current process, or send keys to interrupt/kill the command.]"
in obs.metadata.suffix
)
action = CmdRunAction(command='C-c')
action.timeout = 30
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert isinstance(obs, CmdOutputObservation)
assert obs.exit_code == 0
assert 'Keyboard interrupt received, exiting.' in obs.content
assert '/workspace' in obs.metadata.working_dir
action = CmdRunAction(command='ls')
action.timeout = 1
@ -122,7 +71,8 @@ def test_bash_pexcept_eof(temp_dir, runtime_cls, run_as_openhands):
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert isinstance(obs, CmdOutputObservation)
assert obs.exit_code == 0
assert '/workspace' in obs.interpreter_details
assert 'Keyboard interrupt received, exiting.' not in obs.content
assert '/workspace' in obs.metadata.working_dir
# run it again!
action = CmdRunAction(command='python3 -m http.server 8080')
@ -130,122 +80,8 @@ def test_bash_pexcept_eof(temp_dir, runtime_cls, run_as_openhands):
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert isinstance(obs, CmdOutputObservation)
assert obs.exit_code == 130 # script was killed by SIGINT
assert obs.exit_code == -1
assert 'Serving HTTP on 0.0.0.0 port 8080' in obs.content
assert 'Keyboard interrupt received, exiting.' in obs.content
# things should still work
action = CmdRunAction(command='ls')
action.timeout = 1
obs = runtime.run_action(action)
assert isinstance(obs, CmdOutputObservation)
assert obs.exit_code == 0
assert '/workspace' in obs.interpreter_details
finally:
_close_test_runtime(runtime)
def test_process_resistant_to_one_sigint(temp_dir, runtime_cls, run_as_openhands):
runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
try:
# Create a bash script that ignores SIGINT up to 1 times
script_content = """
#!/bin/bash
trap_count=0
trap 'echo "Caught SIGINT ($((++trap_count))/1), ignoring..."; [ $trap_count -ge 1 ] && trap - INT && exit' INT
while true; do
echo "Still running..."
sleep 1
done
""".strip()
with open(f'{temp_dir}/resistant_script.sh', 'w') as f:
f.write(script_content)
os.chmod(f'{temp_dir}/resistant_script.sh', 0o777)
runtime.copy_to(
os.path.join(temp_dir, 'resistant_script.sh'),
runtime.config.workspace_mount_path_in_sandbox,
)
# Run the resistant script
action = CmdRunAction(command='sudo bash ./resistant_script.sh')
action.timeout = 5
action.blocking = True
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert isinstance(obs, CmdOutputObservation)
assert obs.exit_code == 130 # script was killed by SIGINT
assert 'Still running...' in obs.content
assert 'Caught SIGINT (1/1), ignoring...' in obs.content
assert 'Stopped' not in obs.content
assert (
'[Command timed out after 5 seconds. SIGINT was sent to interrupt the command.]'
in obs.content
)
# Normal command should still work
action = CmdRunAction(command='ls')
action.timeout = 10
obs = runtime.run_action(action)
assert isinstance(obs, CmdOutputObservation)
assert obs.exit_code == 0
assert '/workspace' in obs.interpreter_details
assert 'resistant_script.sh' in obs.content
finally:
_close_test_runtime(runtime)
def test_process_resistant_to_multiple_sigint(temp_dir, runtime_cls, run_as_openhands):
runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
try:
# Create a bash script that ignores SIGINT up to 2 times
script_content = """
#!/bin/bash
trap_count=0
trap 'echo "Caught SIGINT ($((++trap_count))/3), ignoring..."; [ $trap_count -ge 3 ] && trap - INT && exit' INT
while true; do
echo "Still running..."
sleep 1
done
""".strip()
with open(f'{temp_dir}/resistant_script.sh', 'w') as f:
f.write(script_content)
os.chmod(f'{temp_dir}/resistant_script.sh', 0o777)
runtime.copy_to(
os.path.join(temp_dir, 'resistant_script.sh'),
runtime.config.workspace_mount_path_in_sandbox,
)
# Run the resistant script
action = CmdRunAction(command='sudo bash ./resistant_script.sh')
action.timeout = 2
action.blocking = True
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert isinstance(obs, CmdOutputObservation)
assert obs.exit_code == 0
assert 'Still running...' in obs.content
assert 'Caught SIGINT (1/3), ignoring...' in obs.content
assert '[1]+' and 'Stopped' in obs.content
assert (
'[Command timed out after 2 seconds. SIGINT was sent to interrupt the command, but failed. The command was killed.]'
in obs.content
)
# Normal command should still work
action = CmdRunAction(command='ls')
action.timeout = 10
obs = runtime.run_action(action)
assert isinstance(obs, CmdOutputObservation)
assert obs.exit_code == 0
assert '/workspace' in obs.interpreter_details
assert 'resistant_script.sh' in obs.content
finally:
_close_test_runtime(runtime)
@ -262,12 +98,12 @@ def test_multiline_commands(temp_dir, runtime_cls):
# test multiline echo
obs = _run_cmd_action(runtime, 'echo -e "hello\nworld"')
assert obs.exit_code == 0, 'The exit code should be 0.'
assert 'hello\r\nworld' in obs.content
assert 'hello\nworld' in obs.content
# test whitespace
obs = _run_cmd_action(runtime, 'echo -e "a\\n\\n\\nz"')
assert obs.exit_code == 0, 'The exit code should be 0.'
assert '\r\n\r\n\r\n' in obs.content
assert '\n\n\n' in obs.content
finally:
_close_test_runtime(runtime)
@ -276,43 +112,43 @@ def test_multiple_multiline_commands(temp_dir, runtime_cls, run_as_openhands):
cmds = [
'ls -l',
'echo -e "hello\nworld"',
"""
echo -e "hello it\\'s me"
""".strip(),
"""
echo \\
"""echo -e "hello it's me\"""",
"""echo \\
-e 'hello' \\
-v
""".strip(),
"""
echo -e 'hello\\nworld\\nare\\nyou\\nthere?'
""".strip(),
"""
echo -e 'hello
world
are
you\\n
there?'
""".strip(),
"""
echo -e 'hello
world "
'
""".strip(),
-v""",
"""echo -e 'hello\\nworld\\nare\\nyou\\nthere?'""",
"""echo -e 'hello\nworld\nare\nyou\n\nthere?'""",
"""echo -e 'hello\nworld "'""",
]
joined_cmds = '\n'.join(cmds)
runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
try:
# First test that running multiple commands at once fails
obs = _run_cmd_action(runtime, joined_cmds)
assert obs.exit_code == 0, 'The exit code should be 0.'
assert isinstance(obs, ErrorObservation)
assert 'Cannot execute multiple commands at once' in obs.content
assert 'total 0' in obs.content
assert 'hello\r\nworld' in obs.content
assert "hello it\\'s me" in obs.content
assert 'hello -v' in obs.content
assert 'hello\r\nworld\r\nare\r\nyou\r\nthere?' in obs.content
assert 'hello\r\nworld\r\nare\r\nyou\r\n\r\nthere?' in obs.content
# Now run each command individually and verify they work
results = []
for cmd in cmds:
obs = _run_cmd_action(runtime, cmd)
assert isinstance(obs, CmdOutputObservation)
assert obs.exit_code == 0
results.append(obs.content)
# Verify all expected outputs are present
assert 'total 0' in results[0] # ls -l
assert 'hello\nworld' in results[1] # echo -e "hello\nworld"
assert "hello it's me" in results[2] # echo -e "hello it\'s me"
assert 'hello -v' in results[3] # echo -e 'hello' -v
assert (
'hello\nworld\nare\nyou\nthere?' in results[4]
) # echo -e 'hello\nworld\nare\nyou\nthere?'
assert (
'hello\nworld\nare\nyou\n\nthere?' in results[5]
) # echo -e with literal newlines
assert 'hello\nworld "' in results[6] # echo -e with quote
finally:
_close_test_runtime(runtime)
@ -324,7 +160,7 @@ def test_no_ps2_in_output(temp_dir, runtime_cls, run_as_openhands):
obs = _run_cmd_action(runtime, 'echo -e "hello\nworld"')
assert obs.exit_code == 0, 'The exit code should be 0.'
assert 'hello\r\nworld' in obs.content
assert 'hello\nworld' in obs.content
assert '>' not in obs.content
finally:
_close_test_runtime(runtime)
@ -332,21 +168,17 @@ def test_no_ps2_in_output(temp_dir, runtime_cls, run_as_openhands):
def test_multiline_command_loop(temp_dir, runtime_cls):
# https://github.com/All-Hands-AI/OpenHands/issues/3143
init_cmd = """
mkdir -p _modules && \
init_cmd = """mkdir -p _modules && \
for month in {01..04}; do
for day in {01..05}; do
touch "_modules/2024-${month}-${day}-sample.md"
done
done
echo "created files"
done && echo "created files"
"""
follow_up_cmd = """
for file in _modules/*.md; do
follow_up_cmd = """for file in _modules/*.md; do
new_date=$(echo $file | sed -E 's/2024-(01|02|03|04)-/2024-/;s/2024-01/2024-08/;s/2024-02/2024-09/;s/2024-03/2024-10/;s/2024-04/2024-11/')
mv "$file" "$new_date"
done
echo "success"
done && echo "success"
"""
runtime = _load_runtime(temp_dir, runtime_cls)
try:
@ -424,7 +256,6 @@ def test_multi_cmd_run_in_single_line(temp_dir, runtime_cls):
def test_stateful_cmd(temp_dir, runtime_cls):
runtime = _load_runtime(temp_dir, runtime_cls)
sandbox_dir = _get_sandbox_folder(runtime)
try:
obs = _run_cmd_action(runtime, 'mkdir -p test')
assert obs.exit_code == 0, 'The exit code should be 0.'
@ -434,7 +265,7 @@ def test_stateful_cmd(temp_dir, runtime_cls):
obs = _run_cmd_action(runtime, 'pwd')
assert obs.exit_code == 0, 'The exit code should be 0.'
assert f'{sandbox_dir}/test' in obs.content
assert '/workspace/test' in obs.content
finally:
_close_test_runtime(runtime)
@ -532,7 +363,7 @@ def test_copy_to_non_existent_directory(temp_dir, runtime_cls):
def test_overwrite_existing_file(temp_dir, runtime_cls):
runtime = _load_runtime(temp_dir, runtime_cls)
try:
sandbox_dir = _get_sandbox_folder(runtime)
sandbox_dir = '/openhands/workspace'
obs = _run_cmd_action(runtime, f'ls -alh {sandbox_dir}')
assert obs.exit_code == 0
@ -595,38 +426,13 @@ def test_copy_from_directory(temp_dir, runtime_cls):
_close_test_runtime(runtime)
def test_keep_prompt(runtime_cls, temp_dir):
runtime = _load_runtime(
temp_dir,
runtime_cls=runtime_cls,
run_as_openhands=False,
)
try:
sandbox_dir = _get_sandbox_folder(runtime)
obs = _run_cmd_action(runtime, f'touch {sandbox_dir}/test_file.txt')
assert obs.exit_code == 0
assert 'root@' in obs.interpreter_details
obs = _run_cmd_action(
runtime, f'cat {sandbox_dir}/test_file.txt', keep_prompt=False
)
assert obs.exit_code == 0
assert 'root@' not in obs.interpreter_details
finally:
_close_test_runtime(runtime)
@pytest.mark.skipif(
TEST_IN_CI != 'True',
reason='This test is not working in WSL (file ownership)',
)
def test_git_operation(runtime_cls):
# do not mount workspace, since workspace mount by tests will be owned by root
# while the user_id we get via os.getuid() is different from root
# which causes permission issues
runtime = _load_runtime(
temp_dir=None,
use_workspace=False,
runtime_cls=runtime_cls,
# Need to use non-root user to expose issues
run_as_openhands=True,
@ -634,12 +440,15 @@ def test_git_operation(runtime_cls):
# this will happen if permission of runtime is not properly configured
# fatal: detected dubious ownership in repository at '/workspace'
try:
obs = _run_cmd_action(runtime, 'sudo chown -R openhands:root .')
assert obs.exit_code == 0
# check the ownership of the current directory
obs = _run_cmd_action(runtime, 'ls -alh .')
assert obs.exit_code == 0
# drwx--S--- 2 openhands root 64 Aug 7 23:32 .
# drwxr-xr-x 1 root root 4.0K Aug 7 23:33 ..
for line in obs.content.split('\r\n'):
for line in obs.content.split('\n'):
if ' ..' in line:
# parent directory should be owned by root
assert 'root' in line
@ -663,8 +472,10 @@ def test_git_operation(runtime_cls):
assert obs.exit_code == 0
# git diff
obs = _run_cmd_action(runtime, 'git diff')
obs = _run_cmd_action(runtime, 'git diff --no-color --cached')
assert obs.exit_code == 0
assert 'b/test_file.txt' in obs.content
assert '+hello' in obs.content
# git commit
obs = _run_cmd_action(runtime, 'git commit -m "test commit"')
@ -685,3 +496,276 @@ def test_python_version(temp_dir, runtime_cls, run_as_openhands):
assert 'Python 3' in obs.content, 'The output should contain "Python 3".'
finally:
_close_test_runtime(runtime)
def test_pwd_property(temp_dir, runtime_cls, run_as_openhands):
runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
try:
# Create a subdirectory and verify pwd updates
obs = _run_cmd_action(runtime, 'mkdir -p random_dir')
assert obs.exit_code == 0
obs = _run_cmd_action(runtime, 'cd random_dir && pwd')
assert obs.exit_code == 0
assert 'random_dir' in obs.content
finally:
_close_test_runtime(runtime)
def test_basic_command(temp_dir, runtime_cls, run_as_openhands):
runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
try:
# Test simple command
obs = _run_cmd_action(runtime, "echo 'hello world'")
assert 'hello world' in obs.content
assert obs.exit_code == 0
# Test command with error
obs = _run_cmd_action(runtime, 'nonexistent_command')
assert obs.exit_code == 127
assert 'nonexistent_command: command not found' in obs.content
# Test command with special characters
obs = _run_cmd_action(runtime, "echo 'hello world with\nspecial chars'")
assert 'hello world with\nspecial chars' in obs.content
assert obs.exit_code == 0
# Test multiple commands in sequence
obs = _run_cmd_action(runtime, 'echo "first" && echo "second" && echo "third"')
assert 'first' in obs.content
assert 'second' in obs.content
assert 'third' in obs.content
assert obs.exit_code == 0
finally:
_close_test_runtime(runtime)
def test_interactive_command(temp_dir, runtime_cls, run_as_openhands):
runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
try:
# Test interactive command
action = CmdRunAction('read -p "Enter name: " name && echo "Hello $name"')
action.timeout = 1
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
# assert 'Enter name:' in obs.content # FIXME: this is not working
assert '[The command timed out after 1 seconds.' in obs.metadata.suffix
action = CmdRunAction('John')
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert 'Hello John' in obs.content
assert '[The command completed with exit code 0.]' in obs.metadata.suffix
# Test multiline command input with here document
action = CmdRunAction("""cat << EOF
line 1
line 2
EOF""")
obs = runtime.run_action(action)
assert 'line 1\nline 2' in obs.content
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert '[The command completed with exit code 0.]' in obs.metadata.suffix
assert obs.exit_code == 0
finally:
_close_test_runtime(runtime)
def test_long_output(temp_dir, runtime_cls, run_as_openhands):
runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
try:
# Generate a long output
action = CmdRunAction('for i in $(seq 1 5000); do echo "Line $i"; done')
action.timeout = 10
obs = runtime.run_action(action)
assert obs.exit_code == 0
assert 'Line 1' in obs.content
assert 'Line 5000' in obs.content
finally:
_close_test_runtime(runtime)
def test_long_output_exceed_history_limit(temp_dir, runtime_cls, run_as_openhands):
runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
try:
# Generate a long output
action = CmdRunAction('for i in $(seq 1 50000); do echo "Line $i"; done')
action.timeout = 30
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert obs.exit_code == 0
assert 'Previous command outputs are truncated' in obs.metadata.prefix
assert 'Line 40000' in obs.content
assert 'Line 50000' in obs.content
finally:
_close_test_runtime(runtime)
def test_long_output_from_nested_directories(temp_dir, runtime_cls, run_as_openhands):
runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
try:
# Create nested directories with many files
setup_cmd = 'mkdir -p /tmp/test_dir && cd /tmp/test_dir && for i in $(seq 1 100); do mkdir -p "folder_$i"; for j in $(seq 1 100); do touch "folder_$i/file_$j.txt"; done; done'
setup_action = CmdRunAction(setup_cmd.strip())
setup_action.timeout = 60
obs = runtime.run_action(setup_action)
assert obs.exit_code == 0
# List the directory structure recursively
action = CmdRunAction('ls -R /tmp/test_dir')
action.timeout = 60
obs = runtime.run_action(action)
assert obs.exit_code == 0
# Verify output contains expected files
assert 'folder_1' in obs.content
assert 'file_1.txt' in obs.content
assert 'folder_100' in obs.content
assert 'file_100.txt' in obs.content
finally:
_close_test_runtime(runtime)
def test_command_backslash(temp_dir, runtime_cls, run_as_openhands):
runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
try:
# Create a file with the content "implemented_function"
action = CmdRunAction(
'mkdir -p /tmp/test_dir && echo "implemented_function" > /tmp/test_dir/file_1.txt'
)
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert obs.exit_code == 0
# Reproduce an issue we ran into during evaluation
# find /workspace/sympy__sympy__1.0 -type f -exec grep -l "implemented_function" {} \;
# find: missing argument to `-exec'
# --> This is unexpected output due to incorrect escaping of \;
# This tests for correct escaping of \;
action = CmdRunAction(
'find /tmp/test_dir -type f -exec grep -l "implemented_function" {} \\;'
)
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert obs.exit_code == 0
assert '/tmp/test_dir/file_1.txt' in obs.content
finally:
_close_test_runtime(runtime)
def test_command_output_continuation(temp_dir, runtime_cls, run_as_openhands):
runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
try:
# Start a command that produces output slowly
action = CmdRunAction('for i in {1..5}; do echo $i; sleep 3; done')
action.timeout = 2.5 # Set timeout to 2.5 seconds
obs = runtime.run_action(action)
assert obs.content.strip() == '1'
assert obs.metadata.prefix == ''
assert '[The command timed out after 2.5 seconds.' in obs.metadata.suffix
# Continue watching output
action = CmdRunAction('')
action.timeout = 2.5
obs = runtime.run_action(action)
assert '[Command output continued from previous command]' in obs.metadata.prefix
assert obs.content.strip() == '2'
assert '[The command timed out after 2.5 seconds.' in obs.metadata.suffix
# Continue until completion
for expected in ['3', '4', '5']:
action = CmdRunAction('')
action.timeout = 2.5
obs = runtime.run_action(action)
assert (
'[Command output continued from previous command]'
in obs.metadata.prefix
)
assert obs.content.strip() == expected
assert '[The command timed out after 2.5 seconds.' in obs.metadata.suffix
# Final empty command to complete
action = CmdRunAction('')
obs = runtime.run_action(action)
assert '[The command completed with exit code 0.]' in obs.metadata.suffix
finally:
_close_test_runtime(runtime)
def test_long_running_command_follow_by_execute(
temp_dir, runtime_cls, run_as_openhands
):
runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
try:
# Test command that produces output slowly
action = CmdRunAction('for i in {1..3}; do echo $i; sleep 3; done')
action.timeout = 2.5
action.blocking = False
obs = runtime.run_action(action)
assert '1' in obs.content # First number should appear before timeout
assert obs.metadata.exit_code == -1 # -1 indicates command is still running
assert '[The command timed out after 2.5 seconds.' in obs.metadata.suffix
assert obs.metadata.prefix == ''
# Continue watching output
action = CmdRunAction('')
action.timeout = 2.5
obs = runtime.run_action(action)
assert '2' in obs.content
assert (
obs.metadata.prefix == '[Command output continued from previous command]\n'
)
assert '[The command timed out after 2.5 seconds.' in obs.metadata.suffix
assert obs.metadata.exit_code == -1 # -1 indicates command is still running
# Test command that produces no output
action = CmdRunAction('sleep 15')
action.timeout = 2.5
obs = runtime.run_action(action)
assert '3' in obs.content
assert (
obs.metadata.prefix == '[Command output continued from previous command]\n'
)
assert '[The command timed out after 2.5 seconds.' in obs.metadata.suffix
assert obs.metadata.exit_code == -1 # -1 indicates command is still running
finally:
_close_test_runtime(runtime)
def test_empty_command_errors(temp_dir, runtime_cls, run_as_openhands):
runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
try:
# Test empty command without previous command
obs = runtime.run_action(CmdRunAction(''))
assert isinstance(obs, CmdOutputObservation)
assert 'ERROR: No previous command to continue from' in obs.content
finally:
_close_test_runtime(runtime)
def test_python_interactive_input(temp_dir, runtime_cls, run_as_openhands):
runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
try:
# Test Python program that asks for input - properly escaped for bash
python_script = """name = input('Enter your name: '); age = input('Enter your age: '); print(f'Hello {name}, you are {age} years old')"""
# Start Python with the interactive script
obs = runtime.run_action(CmdRunAction(f'python3 -c "{python_script}"'))
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert 'Enter your name:' in obs.content
assert obs.metadata.exit_code == -1 # -1 indicates command is still running
# Send first input (name)
obs = runtime.run_action(CmdRunAction('Alice'))
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert 'Enter your age:' in obs.content
assert obs.metadata.exit_code == -1
# Send second input (age)
obs = runtime.run_action(CmdRunAction('25'))
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert 'Hello Alice, you are 25 years old' in obs.content
assert obs.metadata.exit_code == 0
assert '[The command completed with exit code 0.]' in obs.metadata.suffix
finally:
_close_test_runtime(runtime)

View File

@ -4,7 +4,6 @@ import pytest
from conftest import (
TEST_IN_CI,
_close_test_runtime,
_get_sandbox_folder,
_load_runtime,
)
@ -33,8 +32,6 @@ from openhands.events.observation import (
def test_simple_cmd_ipython_and_fileop(temp_dir, runtime_cls, run_as_openhands):
runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
sandbox_dir = _get_sandbox_folder(runtime)
# Test run command
action_cmd = CmdRunAction(command='ls -l')
logger.info(action_cmd, extra={'msg_type': 'ACTION'})
@ -55,7 +52,7 @@ def test_simple_cmd_ipython_and_fileop(temp_dir, runtime_cls, run_as_openhands):
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert obs.content.strip() == (
'Hello, `World`!\n'
f'[Jupyter current working directory: {sandbox_dir}]\n'
'[Jupyter current working directory: /openhands/workspace]\n'
'[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.12/bin/python]'
)
@ -76,7 +73,7 @@ def test_simple_cmd_ipython_and_fileop(temp_dir, runtime_cls, run_as_openhands):
assert obs.content == ''
# event stream runtime will always use absolute path
assert obs.path == f'{sandbox_dir}/hello.sh'
assert obs.path == '/openhands/workspace/hello.sh'
# Test read file (file should exist)
action_read = FileReadAction(path='hello.sh')
@ -88,7 +85,7 @@ def test_simple_cmd_ipython_and_fileop(temp_dir, runtime_cls, run_as_openhands):
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert obs.content == 'echo "Hello, World!"\n'
assert obs.path == f'{sandbox_dir}/hello.sh'
assert obs.path == '/openhands/workspace/hello.sh'
# clean up
action = CmdRunAction(command='rm -rf hello.sh')
@ -178,7 +175,6 @@ def test_ipython_multi_user(temp_dir, runtime_cls, run_as_openhands):
def test_ipython_simple(temp_dir, runtime_cls):
runtime = _load_runtime(temp_dir, runtime_cls)
sandbox_dir = _get_sandbox_folder(runtime)
# Test run ipython
# get username
@ -192,7 +188,7 @@ def test_ipython_simple(temp_dir, runtime_cls):
obs.content.strip()
== (
'1\n'
f'[Jupyter current working directory: {sandbox_dir}]\n'
'[Jupyter current working directory: /openhands/workspace]\n'
'[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.12/bin/python]'
).strip()
)
@ -203,7 +199,6 @@ def test_ipython_simple(temp_dir, runtime_cls):
def test_ipython_package_install(temp_dir, runtime_cls, run_as_openhands):
"""Make sure that cd in bash also update the current working directory in ipython."""
runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
sandbox_dir = _get_sandbox_folder(runtime)
# It should error out since pymsgbox is not installed
action = IPythonRunCellAction(code='import pymsgbox')
@ -229,7 +224,7 @@ def test_ipython_package_install(temp_dir, runtime_cls, run_as_openhands):
# import should not error out
assert obs.content.strip() == (
'[Code executed successfully with no output]\n'
f'[Jupyter current working directory: {sandbox_dir}]\n'
'[Jupyter current working directory: /openhands/workspace]\n'
'[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.12/bin/python]'
)
@ -239,7 +234,6 @@ def test_ipython_package_install(temp_dir, runtime_cls, run_as_openhands):
def test_ipython_file_editor_permissions_as_openhands(temp_dir, runtime_cls):
"""Test file editor permission behavior when running as different users."""
runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands=True)
sandbox_dir = _get_sandbox_folder(runtime)
# Create a file owned by root with restricted permissions
action = CmdRunAction(
@ -277,18 +271,18 @@ def test_ipython_file_editor_permissions_as_openhands(temp_dir, runtime_cls):
assert 'Permission denied' in obs.content
# Try to use file editor in openhands sandbox directory - should work
test_code = f"""
test_code = """
# Create file
print(file_editor(command='create', path='{sandbox_dir}/test.txt', file_text='Line 1\\nLine 2\\nLine 3'))
print(file_editor(command='create', path='/openhands/workspace/test.txt', file_text='Line 1\\nLine 2\\nLine 3'))
# View file
print(file_editor(command='view', path='{sandbox_dir}/test.txt'))
print(file_editor(command='view', path='/openhands/workspace/test.txt'))
# Edit file
print(file_editor(command='str_replace', path='{sandbox_dir}/test.txt', old_str='Line 2', new_str='New Line 2'))
print(file_editor(command='str_replace', path='/openhands/workspace/test.txt', old_str='Line 2', new_str='New Line 2'))
# Undo edit
print(file_editor(command='undo_edit', path='{sandbox_dir}/test.txt'))
print(file_editor(command='undo_edit', path='/openhands/workspace/test.txt'))
"""
action = IPythonRunCellAction(code=test_code)
logger.info(action, extra={'msg_type': 'ACTION'})
@ -303,7 +297,7 @@ print(file_editor(command='undo_edit', path='{sandbox_dir}/test.txt'))
assert 'undone successfully' in obs.content
# Clean up
action = CmdRunAction(command=f'rm -f {sandbox_dir}/test.txt')
action = CmdRunAction(command='rm -f /openhands/workspace/test.txt')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
@ -318,9 +312,9 @@ print(file_editor(command='undo_edit', path='{sandbox_dir}/test.txt'))
_close_test_runtime(runtime)
def test_file_read_and_edit_via_oh_aci(temp_dir, runtime_cls, run_as_openhands):
runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
sandbox_dir = _get_sandbox_folder(runtime)
def test_file_read_and_edit_via_oh_aci(runtime_cls, run_as_openhands):
runtime = _load_runtime(None, runtime_cls, run_as_openhands)
sandbox_dir = '/openhands/workspace'
actions = [
{

View File

@ -6,7 +6,11 @@ import pytest
from openhands.core.config import LLMConfig
from openhands.events.action import CmdRunAction
from openhands.events.observation import CmdOutputObservation, NullObservation
from openhands.events.observation import (
CmdOutputMetadata,
CmdOutputObservation,
NullObservation,
)
from openhands.llm.llm import LLM
from openhands.resolver.github_issue import GithubIssue, ReviewThread
from openhands.resolver.issue_definitions import IssueHandler, PRHandler
@ -55,23 +59,20 @@ def mock_followup_prompt_template():
return 'Issue context: {{ issues }}\n\nReview comments: {{ review_comments }}\n\nReview threads: {{ review_threads }}\n\nFiles: {{ files }}\n\nThread comments: {{ thread_context }}\n\nPlease fix this issue.'
def create_cmd_output(exit_code: int, content: str, command_id: int, command: str):
def create_cmd_output(exit_code: int, content: str, command: str):
return CmdOutputObservation(
exit_code=exit_code, content=content, command_id=command_id, command=command
content=content,
command=command,
metadata=CmdOutputMetadata(exit_code=exit_code),
)
def test_initialize_runtime():
mock_runtime = MagicMock()
mock_runtime.run_action.side_effect = [
create_cmd_output(exit_code=0, content='', command='cd /workspace'),
create_cmd_output(
exit_code=0, content='', command_id=1, command='cd /workspace'
),
create_cmd_output(
exit_code=0,
content='',
command_id=2,
command='git config --global core.pager ""',
exit_code=0, content='', command='git config --global core.pager ""'
),
]
@ -291,30 +292,19 @@ def test_download_pr_from_github():
async def test_complete_runtime():
mock_runtime = MagicMock()
mock_runtime.run_action.side_effect = [
create_cmd_output(exit_code=0, content='', command='cd /workspace'),
create_cmd_output(
exit_code=0, content='', command_id=1, command='cd /workspace'
exit_code=0, content='', command='git config --global core.pager ""'
),
create_cmd_output(
exit_code=0,
content='',
command_id=2,
command='git config --global core.pager ""',
),
create_cmd_output(
exit_code=0,
content='',
command_id=3,
command='git config --global --add safe.directory /workspace',
),
create_cmd_output(
exit_code=0,
content='',
command_id=4,
command='git diff base_commit_hash fix',
),
create_cmd_output(
exit_code=0, content='git diff content', command_id=5, command='git apply'
exit_code=0, content='', command='git diff base_commit_hash fix'
),
create_cmd_output(exit_code=0, content='git diff content', command='git apply'),
]
result = await complete_runtime(mock_runtime, 'base_commit_hash')
@ -614,11 +604,7 @@ def test_guess_success():
title='Test Issue',
body='This is a test issue',
)
mock_history = [
create_cmd_output(
exit_code=0, content='', command_id=1, command='cd /workspace'
)
]
mock_history = [create_cmd_output(exit_code=0, content='', command='cd /workspace')]
mock_llm_config = LLMConfig(model='test_model', api_key='test_api_key')
mock_completion_response = MagicMock()
@ -758,11 +744,7 @@ def test_guess_success_negative_case():
title='Test Issue',
body='This is a test issue',
)
mock_history = [
create_cmd_output(
exit_code=0, content='', command_id=1, command='cd /workspace'
)
]
mock_history = [create_cmd_output(exit_code=0, content='', command='cd /workspace')]
mock_llm_config = LLMConfig(model='test_model', api_key='test_api_key')
mock_completion_response = MagicMock()
@ -795,11 +777,7 @@ def test_guess_success_invalid_output():
title='Test Issue',
body='This is a test issue',
)
mock_history = [
create_cmd_output(
exit_code=0, content='', command_id=1, command='cd /workspace'
)
]
mock_history = [create_cmd_output(exit_code=0, content='', command='cd /workspace')]
mock_llm_config = LLMConfig(model='test_model', api_key='test_api_key')
mock_completion_response = MagicMock()

View File

@ -41,11 +41,10 @@ def serialization_deserialization(
serialized_action_memory = event_to_memory(action_instance, max_message_chars)
original_memory_dict = original_action_dict.copy()
# we don't send backend properties like id or 'keep_prompt'
# we don't send backend properties like id
original_memory_dict.pop('id', None)
original_memory_dict.pop('timestamp', None)
if 'args' in original_memory_dict:
original_memory_dict['args'].pop('keep_prompt', None)
original_memory_dict['args'].pop('blocking', None)
original_memory_dict['args'].pop('confirmation_state', None)
@ -99,7 +98,6 @@ def test_cmd_run_action_serialization_deserialization():
'blocking': False,
'command': 'echo "Hello world"',
'thought': '',
'keep_prompt': True,
'hidden': False,
'confirmation_state': ActionConfirmationStatus.CONFIRMED,
},
@ -154,3 +152,32 @@ def test_file_write_action_serialization_deserialization():
},
}
serialization_deserialization(original_action_dict, FileWriteAction)
def test_legacy_serialization():
original_action_dict = {
'action': 'run',
'args': {
'blocking': False,
'command': 'echo "Hello world"',
'thought': '',
'hidden': False,
'confirmation_state': ActionConfirmationStatus.CONFIRMED,
'keep_prompt': False, # will be treated as no-op
},
}
event = event_from_dict(original_action_dict)
assert isinstance(event, Action)
assert isinstance(event, CmdRunAction)
assert event.command == 'echo "Hello world"'
assert event.hidden is False
assert not hasattr(event, 'keep_prompt')
event_dict = event_to_dict(event)
assert 'keep_prompt' not in event_dict['args']
assert (
event_dict['args']['confirmation_state'] == ActionConfirmationStatus.CONFIRMED
)
assert event_dict['args']['blocking'] is False
assert event_dict['args']['command'] == 'echo "Hello world"'
assert event_dict['args']['thought'] == ''

View File

@ -1,6 +1,6 @@
import pytest
from openhands.runtime.utils.bash import split_bash_commands
from openhands.runtime.utils.bash import escape_bash_special_chars, split_bash_commands
def test_split_commands_util():
@ -257,3 +257,172 @@ def test_split_commands_with_invalid_input():
for input_command in invalid_inputs:
# it will fall back to return the original input
assert split_bash_commands(input_command) == [input_command]
def test_escape_bash_special_chars():
test_cases = [
# Basic cases - use raw strings (r'') to avoid Python escape sequence warnings
('echo test \\; ls', 'echo test \\\\; ls'),
('grep pattern \\| sort', 'grep pattern \\\\| sort'),
('cmd1 \\&\\& cmd2', 'cmd1 \\\\&\\\\& cmd2'),
('cat file \\> output.txt', 'cat file \\\\> output.txt'),
('cat \\< input.txt', 'cat \\\\< input.txt'),
# Quoted strings should remain unchanged
('echo "test \\; unchanged"', 'echo "test \\; unchanged"'),
("echo 'test \\| unchanged'", "echo 'test \\| unchanged'"),
# Mixed quoted and unquoted
(
'echo "quoted \\;" \\; "more" \\| grep',
'echo "quoted \\;" \\\\; "more" \\\\| grep',
),
# Multiple escapes in sequence
('cmd1 \\;\\|\\& cmd2', 'cmd1 \\\\;\\\\|\\\\& cmd2'),
# Commands with other backslashes
('echo test\\ntest', 'echo test\\ntest'),
('echo "test\\ntest"', 'echo "test\\ntest"'),
# Edge cases
('', ''), # Empty string
('\\\\', '\\\\'), # Double backslash
('\\"', '\\"'), # Escaped quote
]
for input_cmd, expected in test_cases:
result = escape_bash_special_chars(input_cmd)
assert (
result == expected
), f'Failed on input "{input_cmd}"\nExpected: "{expected}"\nGot: "{result}"'
def test_escape_bash_special_chars_with_invalid_syntax():
invalid_inputs = [
'echo "unclosed quote',
"echo 'unclosed quote",
'cat <<EOF\nunclosed heredoc',
]
for input_cmd in invalid_inputs:
# Should return original input when parsing fails
result = escape_bash_special_chars(input_cmd)
assert result == input_cmd, f'Failed to handle invalid input: {input_cmd}'
def test_escape_bash_special_chars_with_heredoc():
input_cmd = r"""cat <<EOF
line1 \; not escaped
line2 \| not escaped
EOF"""
# Heredoc content should not be escaped
expected = input_cmd
result = escape_bash_special_chars(input_cmd)
assert (
result == expected
), f'Failed to handle heredoc correctly\nExpected: {expected}\nGot: {result}'
def test_escape_bash_special_chars_with_parameter_expansion():
test_cases = [
# Parameter expansion should be preserved
('echo $HOME', 'echo $HOME'),
('echo ${HOME}', 'echo ${HOME}'),
('echo ${HOME:-default}', 'echo ${HOME:-default}'),
# Mixed with special chars
('echo $HOME \\; ls', 'echo $HOME \\\\; ls'),
('echo ${PATH} \\| grep bin', 'echo ${PATH} \\\\| grep bin'),
# Quoted parameter expansion
('echo "$HOME"', 'echo "$HOME"'),
('echo "${HOME}"', 'echo "${HOME}"'),
# Complex parameter expansions
('echo ${var:=default} \\; ls', 'echo ${var:=default} \\\\; ls'),
('echo ${!prefix*} \\| sort', 'echo ${!prefix*} \\\\| sort'),
]
for input_cmd, expected in test_cases:
result = escape_bash_special_chars(input_cmd)
assert (
result == expected
), f'Failed on input "{input_cmd}"\nExpected: "{expected}"\nGot: "{result}"'
def test_escape_bash_special_chars_with_command_substitution():
test_cases = [
# Basic command substitution
('echo $(pwd)', 'echo $(pwd)'),
('echo `pwd`', 'echo `pwd`'),
# Mixed with special chars
('echo $(pwd) \\; ls', 'echo $(pwd) \\\\; ls'),
('echo `pwd` \\| grep home', 'echo `pwd` \\\\| grep home'),
# Nested command substitution
('echo $(echo `pwd`)', 'echo $(echo `pwd`)'),
# Complex command substitution
('echo $(find . -name "*.txt" \\; ls)', 'echo $(find . -name "*.txt" \\; ls)'),
# Mixed with quotes
('echo "$(pwd)"', 'echo "$(pwd)"'),
('echo "`pwd`"', 'echo "`pwd`"'),
]
for input_cmd, expected in test_cases:
result = escape_bash_special_chars(input_cmd)
assert (
result == expected
), f'Failed on input "{input_cmd}"\nExpected: "{expected}"\nGot: "{result}"'
def test_escape_bash_special_chars_mixed_nodes():
test_cases = [
# Mix of parameter expansion and command substitution
('echo $HOME/$(pwd)', 'echo $HOME/$(pwd)'),
# Mix with special chars
('echo $HOME/$(pwd) \\; ls', 'echo $HOME/$(pwd) \\\\; ls'),
# Complex mixed cases
(
'echo "${HOME}/$(basename `pwd`) \\; next"',
'echo "${HOME}/$(basename `pwd`) \\; next"',
),
(
'VAR=${HOME} \\; echo $(pwd)',
'VAR=${HOME} \\\\; echo $(pwd)',
),
# Real-world examples
(
'find . -name "*.txt" -exec grep "${PATTERN:-default}" {} \\;',
'find . -name "*.txt" -exec grep "${PATTERN:-default}" {} \\\\;',
),
(
'echo "Current path: ${PWD}/$(basename `pwd`)" \\| grep home',
'echo "Current path: ${PWD}/$(basename `pwd`)" \\\\| grep home',
),
]
for input_cmd, expected in test_cases:
result = escape_bash_special_chars(input_cmd)
assert (
result == expected
), f'Failed on input "{input_cmd}"\nExpected: "{expected}"\nGot: "{result}"'
def test_escape_bash_special_chars_with_chained_commands():
test_cases = [
# Basic chained commands
('ls && pwd', 'ls && pwd'),
('echo "hello" && ls', 'echo "hello" && ls'),
# Chained commands with special chars
('ls \\; pwd && echo test', 'ls \\\\; pwd && echo test'),
('echo test && grep pattern \\| sort', 'echo test && grep pattern \\\\| sort'),
# Complex chained cases
('echo ${HOME} && ls \\; pwd', 'echo ${HOME} && ls \\\\; pwd'),
(
'echo "$(pwd)" && cat file \\> out.txt',
'echo "$(pwd)" && cat file \\\\> out.txt',
),
# Multiple chains
('cmd1 && cmd2 && cmd3', 'cmd1 && cmd2 && cmd3'),
(
'cmd1 \\; ls && cmd2 \\| grep && cmd3',
'cmd1 \\\\; ls && cmd2 \\\\| grep && cmd3',
),
]
for input_cmd, expected in test_cases:
result = escape_bash_special_chars(input_cmd)
assert (
result == expected
), f'Failed on input "{input_cmd}"\nExpected: "{expected}"\nGot: "{result}"'

View File

@ -0,0 +1,339 @@
import json
from openhands.events.observation.commands import (
CMD_OUTPUT_METADATA_PS1_REGEX,
CMD_OUTPUT_PS1_BEGIN,
CMD_OUTPUT_PS1_END,
CmdOutputMetadata,
CmdOutputObservation,
)
def test_ps1_metadata_format():
"""Test that PS1 prompt has correct format markers"""
prompt = CmdOutputMetadata.to_ps1_prompt()
print(prompt)
assert prompt.startswith('\n###PS1JSON###\n')
assert prompt.endswith('\n###PS1END###\n')
assert r'\"exit_code\"' in prompt, 'PS1 prompt should contain escaped double quotes'
def test_ps1_metadata_json_structure():
"""Test that PS1 prompt contains valid JSON with expected fields"""
prompt = CmdOutputMetadata.to_ps1_prompt()
# Extract JSON content between markers
json_str = prompt.replace('###PS1JSON###\n', '').replace('\n###PS1END###\n', '')
# Remove escaping before parsing
json_str = json_str.replace(r'\"', '"')
# Remove any trailing content after the JSON
json_str = json_str.split('###PS1END###')[0].strip()
data = json.loads(json_str)
# Check required fields
expected_fields = {
'pid',
'exit_code',
'username',
'hostname',
'working_dir',
'py_interpreter_path',
}
assert set(data.keys()) == expected_fields
def test_ps1_metadata_parsing():
"""Test parsing PS1 output into CmdOutputMetadata"""
test_data = {
'exit_code': 0,
'username': 'testuser',
'hostname': 'localhost',
'working_dir': '/home/testuser',
'py_interpreter_path': '/usr/bin/python',
}
ps1_str = f"""###PS1JSON###
{json.dumps(test_data, indent=2)}
###PS1END###
"""
matches = CmdOutputMetadata.matches_ps1_metadata(ps1_str)
assert len(matches) == 1
metadata = CmdOutputMetadata.from_ps1_match(matches[0])
assert metadata.exit_code == test_data['exit_code']
assert metadata.username == test_data['username']
assert metadata.hostname == test_data['hostname']
assert metadata.working_dir == test_data['working_dir']
assert metadata.py_interpreter_path == test_data['py_interpreter_path']
def test_ps1_metadata_parsing_string():
"""Test parsing PS1 output into CmdOutputMetadata"""
ps1_str = r"""###PS1JSON###
{
"exit_code": "0",
"username": "myname",
"hostname": "myhostname",
"working_dir": "~/mydir",
"py_interpreter_path": "/my/python/path"
}
###PS1END###
"""
matches = CmdOutputMetadata.matches_ps1_metadata(ps1_str)
assert len(matches) == 1
metadata = CmdOutputMetadata.from_ps1_match(matches[0])
assert metadata.exit_code == 0
assert metadata.username == 'myname'
assert metadata.hostname == 'myhostname'
assert metadata.working_dir == '~/mydir'
assert metadata.py_interpreter_path == '/my/python/path'
def test_ps1_metadata_parsing_string_real_example():
"""Test parsing PS1 output into CmdOutputMetadata"""
ps1_str = r"""
###PS1JSON###
{
"pid": "",
"exit_code": "0",
"username": "runner",
"hostname": "fv-az1055-610",
"working_dir": "/home/runner/work/OpenHands/OpenHands",
"py_interpreter_path": "/home/runner/.cache/pypoetry/virtualenvs/openhands-ai-ULPBlkAi-py3.12/bin/python"
}
###PS1END###
"""
matches = CmdOutputMetadata.matches_ps1_metadata(ps1_str)
assert len(matches) == 1
metadata = CmdOutputMetadata.from_ps1_match(matches[0])
assert metadata.exit_code == 0
assert metadata.username == 'runner'
assert metadata.hostname == 'fv-az1055-610'
assert metadata.working_dir == '/home/runner/work/OpenHands/OpenHands'
assert (
metadata.py_interpreter_path
== '/home/runner/.cache/pypoetry/virtualenvs/openhands-ai-ULPBlkAi-py3.12/bin/python'
)
def test_ps1_metadata_parsing_additional_prefix():
"""Test parsing PS1 output into CmdOutputMetadata"""
test_data = {
'exit_code': 0,
'username': 'testuser',
'hostname': 'localhost',
'working_dir': '/home/testuser',
'py_interpreter_path': '/usr/bin/python',
}
ps1_str = f"""
This is something that not part of the PS1 prompt
###PS1JSON###
{json.dumps(test_data, indent=2)}
###PS1END###
"""
matches = CmdOutputMetadata.matches_ps1_metadata(ps1_str)
assert len(matches) == 1
metadata = CmdOutputMetadata.from_ps1_match(matches[0])
assert metadata.exit_code == test_data['exit_code']
assert metadata.username == test_data['username']
assert metadata.hostname == test_data['hostname']
assert metadata.working_dir == test_data['working_dir']
assert metadata.py_interpreter_path == test_data['py_interpreter_path']
def test_ps1_metadata_parsing_invalid():
"""Test parsing invalid PS1 output returns default metadata"""
# Test with invalid JSON
invalid_json = """###PS1JSON###
{invalid json}
###PS1END###
"""
matches = CmdOutputMetadata.matches_ps1_metadata(invalid_json)
assert len(matches) == 0 # No matches should be found for invalid JSON
# Test with missing markers
invalid_format = """NOT A VALID PS1 PROMPT"""
matches = CmdOutputMetadata.matches_ps1_metadata(invalid_format)
assert len(matches) == 0
# Test with empty PS1 metadata
empty_metadata = """###PS1JSON###
###PS1END###
"""
matches = CmdOutputMetadata.matches_ps1_metadata(empty_metadata)
assert len(matches) == 0 # No matches should be found for empty metadata
# Test with whitespace in PS1 metadata
whitespace_metadata = """###PS1JSON###
{
"exit_code": "0",
"pid": "123",
"username": "test",
"hostname": "localhost",
"working_dir": "/home/test",
"py_interpreter_path": "/usr/bin/python"
}
###PS1END###
"""
matches = CmdOutputMetadata.matches_ps1_metadata(whitespace_metadata)
assert len(matches) == 1
metadata = CmdOutputMetadata.from_ps1_match(matches[0])
assert metadata.exit_code == 0
assert metadata.pid == 123
def test_ps1_metadata_missing_fields():
"""Test handling of missing fields in PS1 metadata"""
# Test with only required fields
minimal_data = {'exit_code': 0, 'pid': 123}
ps1_str = f"""###PS1JSON###
{json.dumps(minimal_data)}
###PS1END###
"""
matches = CmdOutputMetadata.matches_ps1_metadata(ps1_str)
assert len(matches) == 1
metadata = CmdOutputMetadata.from_ps1_match(matches[0])
assert metadata.exit_code == 0
assert metadata.pid == 123
assert metadata.username is None
assert metadata.hostname is None
assert metadata.working_dir is None
assert metadata.py_interpreter_path is None
# Test with missing exit_code but valid pid
no_exit_code = {'pid': 123, 'username': 'test'}
ps1_str = f"""###PS1JSON###
{json.dumps(no_exit_code)}
###PS1END###
"""
matches = CmdOutputMetadata.matches_ps1_metadata(ps1_str)
assert len(matches) == 1
metadata = CmdOutputMetadata.from_ps1_match(matches[0])
assert metadata.exit_code == -1 # default value
assert metadata.pid == 123
assert metadata.username == 'test'
def test_ps1_metadata_multiple_blocks():
"""Test handling multiple PS1 metadata blocks"""
test_data = {
'exit_code': 0,
'username': 'testuser',
'hostname': 'localhost',
'working_dir': '/home/testuser',
'py_interpreter_path': '/usr/bin/python',
}
ps1_str = f"""###PS1JSON###
{json.dumps(test_data, indent=2)}
###PS1END###
Some other content
###PS1JSON###
{json.dumps(test_data, indent=2)}
###PS1END###
"""
matches = CmdOutputMetadata.matches_ps1_metadata(ps1_str)
assert len(matches) == 2 # Should find both blocks
# Both blocks should parse successfully
metadata1 = CmdOutputMetadata.from_ps1_match(matches[0])
metadata2 = CmdOutputMetadata.from_ps1_match(matches[1])
assert metadata1.exit_code == test_data['exit_code']
assert metadata2.exit_code == test_data['exit_code']
def test_ps1_metadata_regex_pattern():
"""Test the regex pattern used to extract PS1 metadata"""
# Test basic pattern matching
test_str = f'{CMD_OUTPUT_PS1_BEGIN}test\n{CMD_OUTPUT_PS1_END}'
matches = CMD_OUTPUT_METADATA_PS1_REGEX.finditer(test_str)
match = next(matches)
assert match.group(1).strip() == 'test'
# Test with content before and after
test_str = f'prefix\n{CMD_OUTPUT_PS1_BEGIN}test\n{CMD_OUTPUT_PS1_END}suffix'
matches = CMD_OUTPUT_METADATA_PS1_REGEX.finditer(test_str)
match = next(matches)
assert match.group(1).strip() == 'test'
# Test with multiline content
test_str = f'{CMD_OUTPUT_PS1_BEGIN}line1\nline2\nline3\n{CMD_OUTPUT_PS1_END}'
matches = CMD_OUTPUT_METADATA_PS1_REGEX.finditer(test_str)
match = next(matches)
assert match.group(1).strip() == 'line1\nline2\nline3'
def test_cmd_output_observation_properties():
"""Test CmdOutputObservation class properties"""
# Test with successful command
metadata = CmdOutputMetadata(exit_code=0, pid=123)
obs = CmdOutputObservation(command='ls', content='file1\nfile2', metadata=metadata)
assert obs.command_id == 123
assert obs.exit_code == 0
assert not obs.error
assert 'exit code 0' in obs.message
assert 'ls' in obs.message
assert 'file1' in str(obs)
assert 'file2' in str(obs)
assert 'metadata' in str(obs)
# Test with failed command
metadata = CmdOutputMetadata(exit_code=1, pid=456)
obs = CmdOutputObservation(command='invalid', content='error', metadata=metadata)
assert obs.command_id == 456
assert obs.exit_code == 1
assert obs.error
assert 'exit code 1' in obs.message
assert 'invalid' in obs.message
assert 'error' in str(obs)
def test_ps1_metadata_empty_fields():
"""Test handling of empty fields in PS1 metadata"""
# Test with empty strings
empty_data = {
'exit_code': 0,
'pid': 123,
'username': '',
'hostname': '',
'working_dir': '',
'py_interpreter_path': '',
}
ps1_str = f"""###PS1JSON###
{json.dumps(empty_data)}
###PS1END###
"""
matches = CmdOutputMetadata.matches_ps1_metadata(ps1_str)
assert len(matches) == 1
metadata = CmdOutputMetadata.from_ps1_match(matches[0])
assert metadata.exit_code == 0
assert metadata.pid == 123
assert metadata.username == ''
assert metadata.hostname == ''
assert metadata.working_dir == ''
assert metadata.py_interpreter_path == ''
# Test with malformed but valid JSON
malformed_json = """###PS1JSON###
{
"exit_code":0,
"pid" : 123,
"username": "test" ,
"hostname": "host",
"working_dir" :"dir",
"py_interpreter_path":"path"
}
###PS1END###
"""
matches = CmdOutputMetadata.matches_ps1_metadata(malformed_json)
assert len(matches) == 1
metadata = CmdOutputMetadata.from_ps1_match(matches[0])
assert metadata.exit_code == 0
assert metadata.pid == 123
assert metadata.username == 'test'
assert metadata.hostname == 'host'
assert metadata.working_dir == 'dir'
assert metadata.py_interpreter_path == 'path'

View File

@ -0,0 +1,384 @@
import os
import tempfile
from openhands.core.logger import openhands_logger as logger
from openhands.events.action import CmdRunAction
from openhands.runtime.utils.bash import BashCommandStatus, BashSession
def test_session_initialization():
# Test with custom working directory
with tempfile.TemporaryDirectory() as temp_dir:
session = BashSession(work_dir=temp_dir)
session.initialize()
obs = session.execute(CmdRunAction('pwd'))
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert temp_dir in obs.content
assert '[The command completed with exit code 0.]' in obs.metadata.suffix
session.close()
# Test with custom username
session = BashSession(work_dir=os.getcwd(), username='nobody')
session.initialize()
assert 'openhands-nobody' in session.session.name
session.close()
def test_cwd_property(tmp_path):
session = BashSession(work_dir=tmp_path)
session.initialize()
# Change directory and verify pwd updates
random_dir = tmp_path / 'random'
random_dir.mkdir()
session.execute(CmdRunAction(f'cd {random_dir}'))
assert session.cwd == str(random_dir)
session.close()
def test_basic_command():
session = BashSession(work_dir=os.getcwd())
session.initialize()
# Test simple command
obs = session.execute(CmdRunAction("echo 'hello world'"))
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert 'hello world' in obs.content
assert obs.metadata.suffix == '\n[The command completed with exit code 0.]'
assert obs.metadata.prefix == ''
assert obs.metadata.exit_code == 0
assert session.prev_status == BashCommandStatus.COMPLETED
# Test command with error
obs = session.execute(CmdRunAction('nonexistent_command'))
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert obs.metadata.exit_code == 127
assert 'nonexistent_command: command not found' in obs.content
assert obs.metadata.suffix == '\n[The command completed with exit code 127.]'
assert obs.metadata.prefix == ''
assert session.prev_status == BashCommandStatus.COMPLETED
# Test multiple commands in sequence
obs = session.execute(CmdRunAction('echo "first" && echo "second" && echo "third"'))
assert 'first' in obs.content
assert 'second' in obs.content
assert 'third' in obs.content
assert obs.metadata.suffix == '\n[The command completed with exit code 0.]'
assert obs.metadata.prefix == ''
assert obs.metadata.exit_code == 0
assert session.prev_status == BashCommandStatus.COMPLETED
session.close()
def test_long_running_command_follow_by_execute():
session = BashSession(work_dir=os.getcwd(), no_change_timeout_seconds=2)
session.initialize()
# Test command that produces output slowly
obs = session.execute(
CmdRunAction('for i in {1..3}; do echo $i; sleep 3; done', blocking=False)
)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert '1' in obs.content # First number should appear before timeout
assert obs.metadata.exit_code == -1 # -1 indicates command is still running
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
assert obs.metadata.suffix == (
'\n[The command has no new output after 2 seconds. '
"You may wait longer to see additional output by sending empty command '', "
'send other commands to interact with the current process, '
'or send keys to interrupt/kill the command.]'
)
assert obs.metadata.prefix == ''
# Continue watching output
obs = session.execute(CmdRunAction(''))
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert '2' in obs.content
assert obs.metadata.prefix == '[Command output continued from previous command]\n'
assert obs.metadata.suffix == (
'\n[The command has no new output after 2 seconds. '
"You may wait longer to see additional output by sending empty command '', "
'send other commands to interact with the current process, '
'or send keys to interrupt/kill the command.]'
)
assert obs.metadata.exit_code == -1 # -1 indicates command is still running
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
# Test command that produces no output
obs = session.execute(CmdRunAction('sleep 15'))
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert '3' in obs.content
assert obs.metadata.prefix == '[Command output continued from previous command]\n'
assert obs.metadata.suffix == (
'\n[The command has no new output after 2 seconds. '
"You may wait longer to see additional output by sending empty command '', "
'send other commands to interact with the current process, '
'or send keys to interrupt/kill the command.]'
)
assert obs.metadata.exit_code == -1 # -1 indicates command is still running
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
session.close()
def test_interactive_command():
session = BashSession(work_dir=os.getcwd(), no_change_timeout_seconds=3)
session.initialize()
# Test interactive command with blocking=True
obs = session.execute(
CmdRunAction(
'read -p \'Enter name: \' name && echo "Hello $name"',
)
)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert 'Enter name:' in obs.content
assert obs.metadata.exit_code == -1 # -1 indicates command is still running
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
assert obs.metadata.suffix == (
'\n[The command has no new output after 3 seconds. '
"You may wait longer to see additional output by sending empty command '', "
'send other commands to interact with the current process, '
'or send keys to interrupt/kill the command.]'
)
assert obs.metadata.prefix == ''
# Send input
obs = session.execute(CmdRunAction('John'))
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert 'Hello John' in obs.content
assert obs.metadata.exit_code == 0
assert obs.metadata.suffix == '\n[The command completed with exit code 0.]'
assert obs.metadata.prefix == ''
assert session.prev_status == BashCommandStatus.COMPLETED
# Test multiline command input
obs = session.execute(CmdRunAction('cat << EOF'))
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert obs.metadata.exit_code == -1
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
assert obs.metadata.suffix == (
'\n[The command has no new output after 3 seconds. '
"You may wait longer to see additional output by sending empty command '', "
'send other commands to interact with the current process, '
'or send keys to interrupt/kill the command.]'
)
assert obs.metadata.prefix == ''
obs = session.execute(CmdRunAction('line 1'))
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert obs.metadata.exit_code == -1
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
assert obs.metadata.suffix == (
'\n[The command has no new output after 3 seconds. '
"You may wait longer to see additional output by sending empty command '', "
'send other commands to interact with the current process, '
'or send keys to interrupt/kill the command.]'
)
assert obs.metadata.prefix == '[Command output continued from previous command]\n'
obs = session.execute(CmdRunAction('line 2'))
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert obs.metadata.exit_code == -1
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
assert obs.metadata.suffix == (
'\n[The command has no new output after 3 seconds. '
"You may wait longer to see additional output by sending empty command '', "
'send other commands to interact with the current process, '
'or send keys to interrupt/kill the command.]'
)
assert obs.metadata.prefix == '[Command output continued from previous command]\n'
obs = session.execute(CmdRunAction('EOF'))
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert 'line 1' in obs.content and 'line 2' in obs.content
assert obs.metadata.exit_code == 0
assert obs.metadata.suffix == '\n[The command completed with exit code 0.]'
assert obs.metadata.prefix == ''
session.close()
def test_ctrl_c():
session = BashSession(work_dir=os.getcwd(), no_change_timeout_seconds=2)
session.initialize()
# Start infinite loop
obs = session.execute(
CmdRunAction("while true; do echo 'looping'; sleep 3; done"),
)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert 'looping' in obs.content
assert obs.metadata.suffix == (
'\n[The command has no new output after 2 seconds. '
"You may wait longer to see additional output by sending empty command '', "
'send other commands to interact with the current process, '
'or send keys to interrupt/kill the command.]'
)
assert obs.metadata.prefix == ''
assert obs.metadata.exit_code == -1 # -1 indicates command is still running
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
# Send Ctrl+C
obs = session.execute(CmdRunAction('C-c'))
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert obs.metadata.exit_code == 130 # Standard exit code for Ctrl+C
assert (
obs.metadata.suffix
== '\n[The command completed with exit code 130. CTRL+C was sent.]'
)
assert obs.metadata.prefix == ''
assert session.prev_status == BashCommandStatus.COMPLETED
session.close()
def test_empty_command_errors():
session = BashSession(work_dir=os.getcwd())
session.initialize()
# Test empty command without previous command
obs = session.execute(CmdRunAction(''))
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert (
obs.content
== 'ERROR: No previous command to continue from. Previous command has to be timeout to be continued.'
)
assert obs.metadata.exit_code == -1
assert obs.metadata.prefix == ''
assert obs.metadata.suffix == ''
assert session.prev_status is None
session.close()
def test_command_output_continuation():
session = BashSession(work_dir=os.getcwd(), no_change_timeout_seconds=2)
session.initialize()
# Start a command that produces output slowly
obs = session.execute(CmdRunAction('for i in {1..5}; do echo $i; sleep 3; done'))
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert obs.content.strip() == '1'
assert obs.metadata.prefix == ''
assert '[The command has no new output after 2 seconds.' in obs.metadata.suffix
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
obs = session.execute(CmdRunAction(''))
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert '[Command output continued from previous command]' in obs.metadata.prefix
assert obs.content.strip() == '2'
assert '[The command has no new output after 2 seconds.' in obs.metadata.suffix
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
obs = session.execute(CmdRunAction(''))
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert '[Command output continued from previous command]' in obs.metadata.prefix
assert obs.content.strip() == '3'
assert '[The command has no new output after 2 seconds.' in obs.metadata.suffix
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
obs = session.execute(CmdRunAction(''))
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert '[Command output continued from previous command]' in obs.metadata.prefix
assert obs.content.strip() == '4'
assert '[The command has no new output after 2 seconds.' in obs.metadata.suffix
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
obs = session.execute(CmdRunAction(''))
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert '[Command output continued from previous command]' in obs.metadata.prefix
assert obs.content.strip() == '5'
assert '[The command has no new output after 2 seconds.' in obs.metadata.suffix
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
obs = session.execute(CmdRunAction(''))
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert '[The command completed with exit code 0.]' in obs.metadata.suffix
assert session.prev_status == BashCommandStatus.COMPLETED
session.close()
def test_long_output():
session = BashSession(work_dir=os.getcwd())
session.initialize()
# Generate a long output that may exceed buffer size
obs = session.execute(CmdRunAction('for i in {1..5000}; do echo "Line $i"; done'))
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert 'Line 1' in obs.content
assert 'Line 5000' in obs.content
assert obs.metadata.exit_code == 0
assert obs.metadata.prefix == ''
assert obs.metadata.suffix == '\n[The command completed with exit code 0.]'
session.close()
def test_long_output_exceed_history_limit():
session = BashSession(work_dir=os.getcwd())
session.initialize()
# Generate a long output that may exceed buffer size
obs = session.execute(CmdRunAction('for i in {1..50000}; do echo "Line $i"; done'))
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert 'Previous command outputs are truncated' in obs.metadata.prefix
assert 'Line 40000' in obs.content
assert 'Line 50000' in obs.content
assert obs.metadata.exit_code == 0
assert obs.metadata.suffix == '\n[The command completed with exit code 0.]'
session.close()
def test_multiline_command():
session = BashSession(work_dir=os.getcwd())
session.initialize()
# Test multiline command with PS2 prompt disabled
obs = session.execute(
CmdRunAction("""if true; then
echo "inside if"
fi""")
)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert 'inside if' in obs.content
assert obs.metadata.exit_code == 0
assert obs.metadata.prefix == ''
assert obs.metadata.suffix == '\n[The command completed with exit code 0.]'
session.close()
def test_python_interactive_input():
session = BashSession(work_dir=os.getcwd(), no_change_timeout_seconds=2)
session.initialize()
# Test Python program that asks for input - properly escaped for bash
python_script = """name = input('Enter your name: '); age = input('Enter your age: '); print(f'Hello {name}, you are {age} years old')"""
# Start Python with the interactive script
obs = session.execute(CmdRunAction(f'python3 -c "{python_script}"'))
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert 'Enter your name:' in obs.content
assert obs.metadata.exit_code == -1 # -1 indicates command is still running
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
# Send first input (name)
obs = session.execute(CmdRunAction('Alice'))
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert 'Enter your age:' in obs.content
assert obs.metadata.exit_code == -1
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
# Send second input (age)
obs = session.execute(CmdRunAction('25'))
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert 'Hello Alice, you are 25 years old' in obs.content
assert obs.metadata.exit_code == 0
assert obs.metadata.suffix == '\n[The command completed with exit code 0.]'
assert session.prev_status == BashCommandStatus.COMPLETED
session.close()

View File

@ -26,6 +26,7 @@ from openhands.events.action import (
from openhands.events.event import EventSource, FileEditSource, FileReadSource
from openhands.events.observation.browse import BrowserOutputObservation
from openhands.events.observation.commands import (
CmdOutputMetadata,
CmdOutputObservation,
IPythonRunCellObservation,
)
@ -50,7 +51,11 @@ def agent() -> CodeActAgent:
def test_cmd_output_observation_message(agent: CodeActAgent):
agent.config.function_calling = False
obs = CmdOutputObservation(
command='echo hello', content='Command output', command_id=1, exit_code=0
command='echo hello',
content='Command output',
metadata=CmdOutputMetadata(
exit_code=0,
),
)
results = agent.get_observation_message(obs, tool_call_id_to_message={})
@ -62,7 +67,7 @@ def test_cmd_output_observation_message(agent: CodeActAgent):
assert len(result.content) == 1
assert isinstance(result.content[0], TextContent)
assert 'Command output' in result.content[0].text
assert 'Command finished with exit code 0' in result.content[0].text
assert '[Command finished with exit code 0]' in result.content[0].text
def test_ipython_run_cell_observation_message(agent: CodeActAgent):

View File

@ -1,4 +1,5 @@
from openhands.events.observation.commands import (
CmdOutputMetadata,
CmdOutputObservation,
IPythonRunCellObservation,
)
@ -7,14 +8,18 @@ from openhands.events.observation.commands import (
def test_cmd_output_success():
# Test successful command
obs = CmdOutputObservation(
command_id=1, command='ls', content='file1.txt\nfile2.txt', exit_code=0
command='ls',
content='file1.txt\nfile2.txt',
metadata=CmdOutputMetadata(exit_code=0),
)
assert obs.success is True
assert obs.error is False
# Test failed command
obs = CmdOutputObservation(
command_id=2, command='ls', content='No such file or directory', exit_code=1
command='ls',
content='No such file or directory',
metadata=CmdOutputMetadata(exit_code=1),
)
assert obs.success is False
assert obs.error is True

View File

@ -1,18 +1,22 @@
from openhands.events.observation import CmdOutputObservation
from openhands.events.observation import CmdOutputMetadata, CmdOutputObservation
from openhands.events.serialization import event_to_dict
def test_command_output_success_serialization():
# Test successful command
obs = CmdOutputObservation(
command_id=1, command='ls', content='file1.txt\nfile2.txt', exit_code=0
command='ls',
content='file1.txt\nfile2.txt',
metadata=CmdOutputMetadata(exit_code=0),
)
serialized = event_to_dict(obs)
assert serialized['success'] is True
# Test failed command
obs = CmdOutputObservation(
command_id=2, command='ls', content='No such file or directory', exit_code=1
command='ls',
content='No such file or directory',
metadata=CmdOutputMetadata(exit_code=1),
)
serialized = event_to_dict(obs)
assert serialized['success'] is False

View File

@ -107,7 +107,7 @@ class TestStuckDetector:
cmd_action = CmdRunAction(command='ls')
state.history.append(cmd_action)
cmd_observation = CmdOutputObservation(
command_id=1, command='ls', content='file1.txt\nfile2.txt'
command='ls', content='file1.txt\nfile2.txt'
)
# cmd_observation._cause = cmd_action._id
state.history.append(cmd_observation)
@ -188,7 +188,7 @@ class TestStuckDetector:
cmd_action_1 = CmdRunAction(command='ls')
cmd_action_1._id = 1
state.history.append(cmd_action_1)
cmd_observation_1 = CmdOutputObservation(content='', command='ls', command_id=1)
cmd_observation_1 = CmdOutputObservation(content='', command='ls')
cmd_observation_1._cause = cmd_action_1._id
state.history.append(cmd_observation_1)
# 4 events
@ -196,7 +196,7 @@ class TestStuckDetector:
cmd_action_2 = CmdRunAction(command='ls')
cmd_action_2._id = 2
state.history.append(cmd_action_2)
cmd_observation_2 = CmdOutputObservation(content='', command='ls', command_id=2)
cmd_observation_2 = CmdOutputObservation(content='', command='ls')
cmd_observation_2._cause = cmd_action_2._id
state.history.append(cmd_observation_2)
# 6 events
@ -212,7 +212,7 @@ class TestStuckDetector:
cmd_action_3 = CmdRunAction(command='ls')
cmd_action_3._id = 3
state.history.append(cmd_action_3)
cmd_observation_3 = CmdOutputObservation(content='', command='ls', command_id=3)
cmd_observation_3 = CmdOutputObservation(content='', command='ls')
cmd_observation_3._cause = cmd_action_3._id
state.history.append(cmd_observation_3)
# 10 events
@ -223,7 +223,7 @@ class TestStuckDetector:
cmd_action_4 = CmdRunAction(command='ls')
cmd_action_4._id = 4
state.history.append(cmd_action_4)
cmd_observation_4 = CmdOutputObservation(content='', command='ls', command_id=4)
cmd_observation_4 = CmdOutputObservation(content='', command='ls')
cmd_observation_4._cause = cmd_action_4._id
state.history.append(cmd_observation_4)
# 12 events
@ -436,7 +436,7 @@ class TestStuckDetector:
cmd_action_1 = CmdRunAction(command='ls')
state.history.append(cmd_action_1)
cmd_observation_1 = CmdOutputObservation(
command_id=1, command='ls', content='file1.txt\nfile2.txt'
command='ls', content='file1.txt\nfile2.txt'
)
# cmd_observation_1._cause = cmd_action_1._id
state.history.append(cmd_observation_1)
@ -452,7 +452,7 @@ class TestStuckDetector:
cmd_action_2 = CmdRunAction(command='ls')
state.history.append(cmd_action_2)
cmd_observation_2 = CmdOutputObservation(
command_id=2, command='ls', content='file1.txt\nfile2.txt'
command='ls', content='file1.txt\nfile2.txt'
)
# cmd_observation_2._cause = cmd_action_2._id
state.history.append(cmd_observation_2)
@ -475,7 +475,7 @@ class TestStuckDetector:
cmd_action_3 = CmdRunAction(command='ls')
state.history.append(cmd_action_3)
cmd_observation_3 = CmdOutputObservation(
command_id=3, command='ls', content='file1.txt\nfile2.txt'
command='ls', content='file1.txt\nfile2.txt'
)
# cmd_observation_3._cause = cmd_action_3._id
state.history.append(cmd_observation_3)
@ -506,7 +506,7 @@ class TestStuckDetector:
cmd_action_1 = CmdRunAction(command='ls')
state.history.append(cmd_action_1)
cmd_observation_1 = CmdOutputObservation(
command_id=cmd_action_1.id, command='ls', content='file1.txt\nfile2.txt'
command='ls', content='file1.txt\nfile2.txt'
)
# cmd_observation_1._cause = cmd_action_1._id
state.history.append(cmd_observation_1)
@ -521,9 +521,7 @@ class TestStuckDetector:
cmd_action_2 = CmdRunAction(command='pwd')
state.history.append(cmd_action_2)
cmd_observation_2 = CmdOutputObservation(
command_id=2, command='pwd', content='/home/user'
)
cmd_observation_2 = CmdOutputObservation(command='pwd', content='/home/user')
# cmd_observation_2._cause = cmd_action_2._id
state.history.append(cmd_observation_2)
@ -541,9 +539,7 @@ class TestStuckDetector:
cmd_action_3 = CmdRunAction(command='pwd')
state.history.append(cmd_action_3)
cmd_observation_3 = CmdOutputObservation(
command_id=cmd_action_3.id, command='pwd', content='/home/user'
)
cmd_observation_3 = CmdOutputObservation(command='pwd', content='/home/user')
# cmd_observation_3._cause = cmd_action_3._id
state.history.append(cmd_observation_3)
@ -590,7 +586,6 @@ class TestStuckDetector:
# Add an observation event between the repeated message actions
cmd_output_observation = CmdOutputObservation(
content='OK, I was stuck, but no more.',
command_id=42,
command='storybook',
exit_code=0,
)

View File

@ -1,4 +1,5 @@
from openhands.events.observation import (
CmdOutputMetadata,
CmdOutputObservation,
Observation,
)
@ -40,13 +41,65 @@ def serialization_deserialization(
# Additional tests for various observation subclasses can be included here
def test_observation_event_props_serialization_deserialization():
original_observation_dict = {
'id': 42,
'source': 'agent',
'timestamp': '2021-08-01T12:00:00',
'observation': 'run',
'message': 'Command `ls -l` executed with exit code 0.',
'extras': {
'command': 'ls -l',
'hidden': False,
'metadata': {
'exit_code': 0,
'hostname': None,
'pid': -1,
'prefix': '',
'py_interpreter_path': None,
'suffix': '',
'username': None,
'working_dir': None,
},
},
'content': 'foo.txt',
'success': True,
}
serialization_deserialization(original_observation_dict, CmdOutputObservation)
def test_command_output_observation_serialization_deserialization():
original_observation_dict = {
'observation': 'run',
'extras': {
'command': 'ls -l',
'hidden': False,
'metadata': {
'exit_code': 0,
'hostname': None,
'pid': -1,
'prefix': '',
'py_interpreter_path': None,
'suffix': '',
'username': None,
'working_dir': None,
},
},
'message': 'Command `ls -l` executed with exit code 0.',
'content': 'foo.txt',
'success': True,
}
serialization_deserialization(original_observation_dict, CmdOutputObservation)
def test_success_field_serialization():
# Test success=True
obs = CmdOutputObservation(
content='Command succeeded',
exit_code=0,
command='ls -l',
command_id=3,
metadata=CmdOutputMetadata(
exit_code=0,
),
)
serialized = event_to_dict(obs)
assert serialized['success'] is True
@ -54,9 +107,42 @@ def test_success_field_serialization():
# Test success=False
obs = CmdOutputObservation(
content='No such file or directory',
exit_code=1,
command='ls -l',
command_id=3,
metadata=CmdOutputMetadata(
exit_code=1,
),
)
serialized = event_to_dict(obs)
assert serialized['success'] is False
def test_legacy_serialization():
original_observation_dict = {
'id': 42,
'source': 'agent',
'timestamp': '2021-08-01T12:00:00',
'observation': 'run',
'message': 'Command `ls -l` executed with exit code 0.',
'extras': {
'command': 'ls -l',
'hidden': False,
'exit_code': 0,
'command_id': 3,
},
'content': 'foo.txt',
'success': True,
}
event = event_from_dict(original_observation_dict)
assert isinstance(event, Observation)
assert isinstance(event, CmdOutputObservation)
assert event.metadata.exit_code == 0
assert event.success is True
assert event.command == 'ls -l'
assert event.hidden is False
event_dict = event_to_dict(event)
assert event_dict['success'] is True
assert event_dict['extras']['metadata']['exit_code'] == 0
assert event_dict['extras']['metadata']['pid'] == 3
assert event_dict['extras']['command'] == 'ls -l'
assert event_dict['extras']['hidden'] is False

View File

@ -368,7 +368,6 @@ async def test_unsafe_bash_command(temp_dir: str):
'blocking': False,
'command': 'ls',
'hidden': False,
'keep_prompt': True,
'confirmation_state': ActionConfirmationStatus.CONFIRMED,
},
),
@ -495,9 +494,7 @@ def test_parse_action(action, expected_trace):
],
),
(
CmdOutputObservation(
content='cmd output content', command_id=1, command='ls'
),
CmdOutputObservation(content='cmd output content', command='ls'),
[
ToolOutput(
metadata={},