mirror of
https://github.com/OpenHands/OpenHands.git
synced 2025-12-26 05:48:36 +08:00
refactor: Replace pexpect with libtmux in BashSession (#4881)
Co-authored-by: openhands <openhands@all-hands.dev> Co-authored-by: Engel Nyst <enyst@users.noreply.github.com> Co-authored-by: Robert Brennan <accounts@rbren.io>
This commit is contained in:
parent
761a574b09
commit
ec70af9412
2
.github/workflows/dummy-agent-test.yml
vendored
2
.github/workflows/dummy-agent-test.yml
vendored
@ -36,6 +36,8 @@ jobs:
|
||||
- name: Set up Docker Buildx
|
||||
id: buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
- name: Install tmux
|
||||
run: sudo apt-get update && sudo apt-get install -y tmux
|
||||
- name: Install poetry via pipx
|
||||
run: pipx install poetry
|
||||
- name: Set up Python
|
||||
|
||||
2
.github/workflows/eval-runner.yml
vendored
2
.github/workflows/eval-runner.yml
vendored
@ -29,6 +29,8 @@ jobs:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install tmux
|
||||
run: sudo apt-get update && sudo apt-get install -y tmux
|
||||
- name: Install poetry via pipx
|
||||
run: pipx install poetry
|
||||
|
||||
|
||||
2
.github/workflows/py-unit-tests-mac.yml
vendored
2
.github/workflows/py-unit-tests-mac.yml
vendored
@ -31,6 +31,8 @@ jobs:
|
||||
key: ${{ runner.os }}-poetry-${{ hashFiles('**/poetry.lock') }}
|
||||
restore-keys: |
|
||||
${{ runner.os }}-poetry-
|
||||
- name: Install tmux
|
||||
run: brew install tmux
|
||||
- name: Install poetry via pipx
|
||||
run: pipx install poetry
|
||||
- name: Install Python dependencies using Poetry
|
||||
|
||||
2
.github/workflows/py-unit-tests.yml
vendored
2
.github/workflows/py-unit-tests.yml
vendored
@ -30,6 +30,8 @@ jobs:
|
||||
- name: Set up Docker Buildx
|
||||
id: buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
- name: Install tmux
|
||||
run: sudo apt-get update && sudo apt-get install -y tmux
|
||||
- name: Install poetry via pipx
|
||||
run: pipx install poetry
|
||||
- name: Set up Python
|
||||
|
||||
1
docs/static/img/backend_architecture.puml
vendored
1
docs/static/img/backend_architecture.puml
vendored
@ -123,7 +123,6 @@ class openhands.state.State {
|
||||
updated_info: List[Tuple[Action, Observation]]
|
||||
}
|
||||
class openhands.observation.CmdOutputObservation {
|
||||
command_id: int
|
||||
command: str
|
||||
exit_code: int
|
||||
observation: str
|
||||
|
||||
@ -137,7 +137,6 @@ def complete_runtime(
|
||||
|
||||
action = CmdRunAction(
|
||||
command=f'chmod +x ./{script_name} && ./{script_name}',
|
||||
keep_prompt=False,
|
||||
)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
@ -164,8 +163,7 @@ def complete_runtime(
|
||||
logger.info(f'Running get ground truth cmd: {script_name}')
|
||||
|
||||
action = CmdRunAction(
|
||||
command=f'chmod +x ./{script_name} && ./{script_name}',
|
||||
keep_prompt=False,
|
||||
command=f'chmod +x ./{script_name} && ./{script_name}'
|
||||
)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
|
||||
@ -145,10 +145,7 @@ def complete_runtime(
|
||||
)
|
||||
logger.info(f'Running test file: {script_name}')
|
||||
|
||||
action = CmdRunAction(
|
||||
command=f'python3 -m unittest {script_name}',
|
||||
keep_prompt=False,
|
||||
)
|
||||
action = CmdRunAction(command=f'python3 -m unittest {script_name}')
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
|
||||
@ -199,7 +199,7 @@ def complete_runtime(
|
||||
if obs.exit_code == 0:
|
||||
test_result['metadata']['1_copy_change_success'] = True
|
||||
|
||||
action = CmdRunAction(command=f'cat {generated_path}', keep_prompt=False)
|
||||
action = CmdRunAction(command=f'cat {generated_path}')
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
assert obs.exit_code == 0
|
||||
@ -223,9 +223,7 @@ def complete_runtime(
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert obs.exit_code == 0
|
||||
|
||||
action = CmdRunAction(
|
||||
command='cat /testing_files/results_biocoder.json', keep_prompt=False
|
||||
)
|
||||
action = CmdRunAction(command='cat /testing_files/results_biocoder.json')
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
if obs.exit_code == 0:
|
||||
|
||||
@ -127,7 +127,6 @@ For each problem, OpenHands is given a set number of iterations to fix the faili
|
||||
"observation": "run",
|
||||
"content": "california_schools/california_schools.sqlite\r\n[(1.0,)]",
|
||||
"extras": {
|
||||
"command_id": -1,
|
||||
"command": "python3 0.py",
|
||||
"exit_code": 0
|
||||
}
|
||||
|
||||
@ -268,10 +268,7 @@ def initialize_runtime(
|
||||
runtime.copy_to(db_file, '/workspace')
|
||||
|
||||
# Check the database is copied
|
||||
action = CmdRunAction(
|
||||
command='cd /workspace && ls -l',
|
||||
keep_prompt=False,
|
||||
)
|
||||
action = CmdRunAction(command='cd /workspace && ls -l')
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert obs.exit_code == 0
|
||||
@ -300,10 +297,7 @@ def complete_runtime(
|
||||
instance_id = instance.instance_id.replace('/', '__')
|
||||
path = os.path.join('/workspace', f'{instance_id}.py')
|
||||
|
||||
action = CmdRunAction(
|
||||
command=f'cat {path}',
|
||||
keep_prompt=False,
|
||||
)
|
||||
action = CmdRunAction(command=f'cat {path}')
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
|
||||
|
||||
@ -71,7 +71,6 @@ For each problem, OpenHands is given a set number of iterations to fix the faili
|
||||
"observation": "run",
|
||||
"content": "[File: /workspace/Python__2.py (14 lines total)]\r\n1:def truncate_number(number: float) -> float:\r\n2: return number % 1.0 + 1.0\r\n3:\r\n4:\r\n5:\r\n6:\r\n7:\r\n8:\r\n9:def check(truncate_number):\r\n10: assert truncate_number(3.5) == 0.5\r\n11: assert abs(truncate_number(1.33) - 0.33) < 1e-6\r\n12: assert abs(truncate_number(123.456) - 0.456) < 1e-6\r\n13:\r\n14:check(truncate_number)",
|
||||
"extras": {
|
||||
"command_id": -1,
|
||||
"command": "open Python__2.py",
|
||||
"exit_code": 0
|
||||
}
|
||||
@ -98,7 +97,6 @@ For each problem, OpenHands is given a set number of iterations to fix the faili
|
||||
"observation": "run",
|
||||
"content": "> > [File: /workspace/Python__2.py (14 lines total)]\r\n1:def truncate_number(number: float) -> float:\r\n2: return number % 1.0\r\n3:\r\n4:\r\n5:\r\n6:\r\n7:\r\n8:\r\n9:def check(truncate_number):\r\n10: assert truncate_number(3.5) == 0.5\r\n11: assert abs(truncate_number(1.33) - 0.33) < 1e-6\r\n12: assert abs(truncate_number(123.456) - 0.456) < 1e-6\r\n13:\r\n14:check(truncate_number)\r\nFile updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.",
|
||||
"extras": {
|
||||
"command_id": -1,
|
||||
"command": "edit 2:2 <<EOF\n return number % 1.0\nEOF",
|
||||
"exit_code": 0
|
||||
}
|
||||
@ -125,7 +123,6 @@ For each problem, OpenHands is given a set number of iterations to fix the faili
|
||||
"observation": "run",
|
||||
"content": "",
|
||||
"extras": {
|
||||
"command_id": -1,
|
||||
"command": "python3 Python__2.py",
|
||||
"exit_code": 0
|
||||
}
|
||||
|
||||
@ -171,9 +171,7 @@ def complete_runtime(
|
||||
num_workers = LANGUAGE_TO_NUM_WORKERS[language]
|
||||
python_imports = '\n'.join(IMPORT_HELPER[language])
|
||||
|
||||
action = CmdRunAction(
|
||||
command=f'cat /workspace/{_get_instance_id(instance)}.py', keep_prompt=False
|
||||
)
|
||||
action = CmdRunAction(command=f'cat /workspace/{_get_instance_id(instance)}.py')
|
||||
obs = runtime.run_action(action)
|
||||
assert obs.exit_code == 0
|
||||
|
||||
|
||||
@ -163,7 +163,7 @@ def complete_runtime(
|
||||
eval_script = os.path.join(task_path, 'run.sh')
|
||||
logger.info(f'Running evaluation script: {eval_script}')
|
||||
|
||||
action = CmdRunAction(command=f'cat {eval_script}', keep_prompt=False)
|
||||
action = CmdRunAction(command=f'cat {eval_script}')
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
if obs.exit_code == 0:
|
||||
|
||||
@ -121,10 +121,7 @@ def initialize_runtime(
|
||||
runtime.copy_to(dataset_dir, '/workspace/benchmark/datasets', recursive=True)
|
||||
|
||||
# Check the dataset exists
|
||||
action = CmdRunAction(
|
||||
command='cd /workspace/benchmark/datasets && ls',
|
||||
keep_prompt=False,
|
||||
)
|
||||
action = CmdRunAction(command='cd /workspace/benchmark/datasets && ls')
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert obs.exit_code == 0
|
||||
@ -154,10 +151,7 @@ def complete_runtime(
|
||||
|
||||
assert obs.exit_code == 0
|
||||
|
||||
action = CmdRunAction(
|
||||
command=f'cat pred_programs/{instance.pred_program_name}',
|
||||
keep_prompt=False,
|
||||
)
|
||||
action = CmdRunAction(command=f'cat pred_programs/{instance.pred_program_name}')
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
|
||||
|
||||
@ -98,6 +98,7 @@ def process_instance(
|
||||
metadata: EvalMetadata,
|
||||
reset_logger: bool = True,
|
||||
log_dir: str | None = None,
|
||||
runtime_failure_count: int = 0,
|
||||
) -> EvalOutput:
|
||||
"""
|
||||
Evaluate agent performance on a SWE-bench problem instance.
|
||||
@ -146,6 +147,16 @@ def process_instance(
|
||||
metadata=metadata,
|
||||
)
|
||||
|
||||
# Increase resource_factor with increasing attempt_id
|
||||
if runtime_failure_count > 0:
|
||||
config.sandbox.remote_runtime_resource_factor = min(
|
||||
config.sandbox.remote_runtime_resource_factor * (2**runtime_failure_count),
|
||||
4, # hardcode maximum resource factor to 4
|
||||
)
|
||||
logger.warning(
|
||||
f'This is the second attempt for instance {instance.instance_id}, setting resource factor to {config.sandbox.remote_runtime_resource_factor}'
|
||||
)
|
||||
|
||||
runtime = create_runtime(config)
|
||||
call_async_from_sync(runtime.connect)
|
||||
# Get patch and save it to /tmp/patch.diff
|
||||
@ -177,7 +188,7 @@ def process_instance(
|
||||
"(patch --batch --fuzz=5 -p1 -i /tmp/patch.diff && echo 'APPLY_PATCH_PASS' || "
|
||||
"echo 'APPLY_PATCH_FAIL')))"
|
||||
)
|
||||
action = CmdRunAction(command=exec_command, keep_prompt=False)
|
||||
action = CmdRunAction(command=exec_command)
|
||||
action.timeout = 600
|
||||
obs = runtime.run_action(action)
|
||||
assert isinstance(obs, CmdOutputObservation)
|
||||
@ -200,9 +211,7 @@ def process_instance(
|
||||
|
||||
# Run eval script in background and save output to log file
|
||||
log_file = '/tmp/eval_output.log'
|
||||
action = CmdRunAction(
|
||||
command=f'/tmp/eval.sh > {log_file} 2>&1 & echo $!', keep_prompt=False
|
||||
)
|
||||
action = CmdRunAction(command=f'/tmp/eval.sh > {log_file} 2>&1 & echo $!')
|
||||
action.timeout = 60 # Short timeout just to get the process ID
|
||||
obs = runtime.run_action(action)
|
||||
|
||||
@ -224,7 +233,7 @@ def process_instance(
|
||||
instance['test_result']['report']['test_timeout'] = True
|
||||
break
|
||||
check_action = CmdRunAction(
|
||||
command=f'ps -p {pid} > /dev/null; echo $?', keep_prompt=False
|
||||
command=f'ps -p {pid} > /dev/null; echo $?'
|
||||
)
|
||||
check_action.timeout = 60
|
||||
check_obs = runtime.run_action(check_action)
|
||||
@ -242,7 +251,7 @@ def process_instance(
|
||||
time.sleep(30) # Wait for 30 seconds before checking again
|
||||
|
||||
# Read the log file
|
||||
cat_action = CmdRunAction(command=f'cat {log_file}', keep_prompt=False)
|
||||
cat_action = CmdRunAction(command=f'cat {log_file}')
|
||||
cat_action.timeout = 300
|
||||
cat_obs = runtime.run_action(cat_action)
|
||||
|
||||
|
||||
@ -282,6 +282,16 @@ def initialize_runtime(
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert_and_raise(obs.exit_code == 0, f'Failed to remove git remotes: {str(obs)}')
|
||||
|
||||
action = CmdRunAction(command='which python')
|
||||
action.timeout = 600
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert_and_raise(
|
||||
obs.exit_code == 0 and 'testbed' in obs.content,
|
||||
f'Expected to find python interpreter from testbed, but got: {str(obs)}',
|
||||
)
|
||||
|
||||
logger.info('-' * 30)
|
||||
logger.info('END Runtime Initialization Fn')
|
||||
logger.info('-' * 30)
|
||||
@ -337,8 +347,7 @@ def complete_runtime(
|
||||
git_patch = None
|
||||
while n_retries < 5:
|
||||
action = CmdRunAction(
|
||||
command=f'git diff --no-color --cached {instance["base_commit"]}',
|
||||
keep_prompt=False,
|
||||
command=f'git diff --no-color --cached {instance["base_commit"]}'
|
||||
)
|
||||
action.timeout = 600 + 100 * n_retries
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
@ -385,7 +394,7 @@ def process_instance(
|
||||
if runtime_failure_count > 0:
|
||||
config.sandbox.remote_runtime_resource_factor = min(
|
||||
config.sandbox.remote_runtime_resource_factor * (2**runtime_failure_count),
|
||||
2, # hardcode maximum resource factor to 2
|
||||
8,
|
||||
)
|
||||
logger.warning(
|
||||
f'This is the second attempt for instance {instance.instance_id}, setting resource factor to {config.sandbox.remote_runtime_resource_factor}'
|
||||
@ -535,4 +544,5 @@ if __name__ == '__main__':
|
||||
args.eval_num_workers,
|
||||
process_instance,
|
||||
timeout_seconds=120 * 60, # 2 hour PER instance should be more than enough
|
||||
max_retries=5,
|
||||
)
|
||||
|
||||
@ -104,9 +104,9 @@ for repo, diff in repo_diffs:
|
||||
# Determine if this repo has a significant diff
|
||||
is_significant = diff >= threshold
|
||||
repo_color = 'red' if is_significant else 'yellow'
|
||||
print(colored(f'Difference: {diff} instances!', repo_color, attrs=['bold']))
|
||||
|
||||
print(f"\n{colored(repo, repo_color, attrs=['bold'])}:")
|
||||
print(colored(f'Difference: {diff} instances!', repo_color, attrs=['bold']))
|
||||
print(colored(f'X resolved but Y failed: ({len(x_instances)} instances)', 'green'))
|
||||
if x_instances:
|
||||
print(' ' + str(x_instances))
|
||||
|
||||
@ -20,6 +20,13 @@ output_md_folder = args.oh_output_file.replace('.jsonl', '.viz')
|
||||
print(f'Converting {args.oh_output_file} to markdown files in {output_md_folder}')
|
||||
|
||||
oh_format = pd.read_json(args.oh_output_file, orient='records', lines=True)
|
||||
|
||||
swebench_eval_file = args.oh_output_file.replace('.jsonl', '.swebench_eval.jsonl')
|
||||
if os.path.exists(swebench_eval_file):
|
||||
eval_output_df = pd.read_json(swebench_eval_file, orient='records', lines=True)
|
||||
else:
|
||||
eval_output_df = None
|
||||
|
||||
# model name is the folder name of oh_output_file
|
||||
model_name = os.path.basename(os.path.dirname(args.oh_output_file))
|
||||
|
||||
@ -50,7 +57,7 @@ def convert_history_to_str(history):
|
||||
return ret
|
||||
|
||||
|
||||
def write_row_to_md_file(row):
|
||||
def write_row_to_md_file(row, instance_id_to_test_result):
|
||||
if 'git_patch' in row:
|
||||
model_patch = row['git_patch']
|
||||
elif 'test_result' in row and 'git_patch' in row['test_result']:
|
||||
@ -58,8 +65,21 @@ def write_row_to_md_file(row):
|
||||
else:
|
||||
raise ValueError(f'Row {row} does not have a git_patch')
|
||||
|
||||
if 'report' in row:
|
||||
resolved = row['report'].get('resolved', False)
|
||||
test_output = None
|
||||
if row['instance_id'] in instance_id_to_test_result:
|
||||
report = instance_id_to_test_result[row['instance_id']].get('report', {})
|
||||
resolved = report.get('resolved', False)
|
||||
test_output = instance_id_to_test_result[row['instance_id']].get(
|
||||
'test_output', None
|
||||
)
|
||||
elif 'report' in row and row['report'] is not None:
|
||||
if not isinstance(row['report'], dict):
|
||||
resolved = None
|
||||
print(
|
||||
f'ERROR: Report is not a dict, but a {type(row["report"])}. Row: {row}'
|
||||
)
|
||||
else:
|
||||
resolved = row['report'].get('resolved', False)
|
||||
else:
|
||||
resolved = None
|
||||
|
||||
@ -84,5 +104,18 @@ def write_row_to_md_file(row):
|
||||
f.write('## Model Patch\n')
|
||||
f.write(f'{process_git_patch(model_patch)}\n')
|
||||
|
||||
f.write('## Test Output\n')
|
||||
f.write(str(test_output))
|
||||
|
||||
oh_format.progress_apply(write_row_to_md_file, axis=1)
|
||||
|
||||
instance_id_to_test_result = {}
|
||||
if eval_output_df is not None:
|
||||
instance_id_to_test_result = (
|
||||
eval_output_df[['instance_id', 'test_result']]
|
||||
.set_index('instance_id')['test_result']
|
||||
.to_dict()
|
||||
)
|
||||
|
||||
oh_format.progress_apply(
|
||||
write_row_to_md_file, axis=1, instance_id_to_test_result=instance_id_to_test_result
|
||||
)
|
||||
|
||||
@ -111,6 +111,11 @@ elif os.path.exists(openhands_remote_report_jsonl):
|
||||
instance_id_to_status[row['instance_id']] = row['test_result']['report']
|
||||
df['report'] = df.apply(apply_report, axis=1)
|
||||
|
||||
report_is_dict = df['report'].apply(lambda x: isinstance(x, dict))
|
||||
if not report_is_dict.all():
|
||||
print(df[~report_is_dict])
|
||||
raise ValueError(f'Report is not a dict, but a {type(row["report"])}')
|
||||
|
||||
_n_instances = len(df)
|
||||
_n_resolved = len(df[df['report'].apply(lambda x: x.get('resolved', False))])
|
||||
_n_unresolved = _n_instances - _n_resolved
|
||||
|
||||
@ -24,7 +24,7 @@ class Test(BaseIntegrationTest):
|
||||
@classmethod
|
||||
def verify_result(cls, runtime: Runtime, histories: list[Event]) -> TestResult:
|
||||
# check if the file /workspace/bad.txt has been fixed
|
||||
action = CmdRunAction(command='cat /workspace/bad.txt', keep_prompt=False)
|
||||
action = CmdRunAction(command='cat /workspace/bad.txt')
|
||||
obs = runtime.run_action(action)
|
||||
if obs.exit_code != 0:
|
||||
return TestResult(
|
||||
|
||||
@ -10,14 +10,14 @@ class Test(BaseIntegrationTest):
|
||||
|
||||
@classmethod
|
||||
def initialize_runtime(cls, runtime: Runtime) -> None:
|
||||
action = CmdRunAction(command='mkdir -p /workspace', keep_prompt=False)
|
||||
action = CmdRunAction(command='mkdir -p /workspace')
|
||||
obs = runtime.run_action(action)
|
||||
assert_and_raise(obs.exit_code == 0, f'Failed to run command: {obs.content}')
|
||||
|
||||
@classmethod
|
||||
def verify_result(cls, runtime: Runtime, histories: list[Event]) -> TestResult:
|
||||
# check if the file /workspace/hello.sh exists
|
||||
action = CmdRunAction(command='cat /workspace/hello.sh', keep_prompt=False)
|
||||
action = CmdRunAction(command='cat /workspace/hello.sh')
|
||||
obs = runtime.run_action(action)
|
||||
if obs.exit_code != 0:
|
||||
return TestResult(
|
||||
@ -26,7 +26,7 @@ class Test(BaseIntegrationTest):
|
||||
)
|
||||
|
||||
# execute the script
|
||||
action = CmdRunAction(command='bash /workspace/hello.sh', keep_prompt=False)
|
||||
action = CmdRunAction(command='bash /workspace/hello.sh')
|
||||
obs = runtime.run_action(action)
|
||||
if obs.exit_code != 0:
|
||||
return TestResult(
|
||||
|
||||
@ -10,14 +10,14 @@ class Test(BaseIntegrationTest):
|
||||
|
||||
@classmethod
|
||||
def initialize_runtime(cls, runtime: Runtime) -> None:
|
||||
action = CmdRunAction(command='mkdir -p /workspace', keep_prompt=False)
|
||||
action = CmdRunAction(command='mkdir -p /workspace')
|
||||
obs = runtime.run_action(action)
|
||||
assert_and_raise(obs.exit_code == 0, f'Failed to run command: {obs.content}')
|
||||
|
||||
@classmethod
|
||||
def verify_result(cls, runtime: Runtime, histories: list[Event]) -> TestResult:
|
||||
# check if the file /workspace/hello.sh exists
|
||||
action = CmdRunAction(command='cat /workspace/test.txt', keep_prompt=False)
|
||||
action = CmdRunAction(command='cat /workspace/test.txt')
|
||||
obs = runtime.run_action(action)
|
||||
if obs.exit_code != 0:
|
||||
return TestResult(
|
||||
@ -26,7 +26,7 @@ class Test(BaseIntegrationTest):
|
||||
)
|
||||
|
||||
# execute the script
|
||||
action = CmdRunAction(command='cat /workspace/test.txt', keep_prompt=False)
|
||||
action = CmdRunAction(command='cat /workspace/test.txt')
|
||||
obs = runtime.run_action(action)
|
||||
|
||||
if obs.exit_code != 0:
|
||||
|
||||
@ -10,31 +10,29 @@ class Test(BaseIntegrationTest):
|
||||
|
||||
@classmethod
|
||||
def initialize_runtime(cls, runtime: Runtime) -> None:
|
||||
action = CmdRunAction(command='mkdir -p /workspace', keep_prompt=False)
|
||||
action = CmdRunAction(command='mkdir -p /workspace')
|
||||
obs = runtime.run_action(action)
|
||||
assert_and_raise(obs.exit_code == 0, f'Failed to run command: {obs.content}')
|
||||
|
||||
# git init
|
||||
action = CmdRunAction(command='git init', keep_prompt=False)
|
||||
action = CmdRunAction(command='git init')
|
||||
obs = runtime.run_action(action)
|
||||
assert_and_raise(obs.exit_code == 0, f'Failed to run command: {obs.content}')
|
||||
|
||||
# create README.md
|
||||
action = CmdRunAction(
|
||||
command='echo \'print("hello world")\' > hello.py', keep_prompt=False
|
||||
)
|
||||
action = CmdRunAction(command='echo \'print("hello world")\' > hello.py')
|
||||
obs = runtime.run_action(action)
|
||||
assert_and_raise(obs.exit_code == 0, f'Failed to run command: {obs.content}')
|
||||
|
||||
# git add README.md
|
||||
action = CmdRunAction(command='git add hello.py', keep_prompt=False)
|
||||
action = CmdRunAction(command='git add hello.py')
|
||||
obs = runtime.run_action(action)
|
||||
assert_and_raise(obs.exit_code == 0, f'Failed to run command: {obs.content}')
|
||||
|
||||
@classmethod
|
||||
def verify_result(cls, runtime: Runtime, histories: list[Event]) -> TestResult:
|
||||
# check if the file /workspace/hello.py exists
|
||||
action = CmdRunAction(command='cat /workspace/hello.py', keep_prompt=False)
|
||||
action = CmdRunAction(command='cat /workspace/hello.py')
|
||||
obs = runtime.run_action(action)
|
||||
if obs.exit_code != 0:
|
||||
return TestResult(
|
||||
@ -43,7 +41,7 @@ class Test(BaseIntegrationTest):
|
||||
)
|
||||
|
||||
# check if the staging area is empty
|
||||
action = CmdRunAction(command='git status', keep_prompt=False)
|
||||
action = CmdRunAction(command='git status')
|
||||
obs = runtime.run_action(action)
|
||||
if obs.exit_code != 0:
|
||||
return TestResult(
|
||||
|
||||
@ -83,11 +83,11 @@ class Test(BaseIntegrationTest):
|
||||
|
||||
@classmethod
|
||||
def initialize_runtime(cls, runtime: Runtime) -> None:
|
||||
action = CmdRunAction(command='mkdir -p /workspace', keep_prompt=False)
|
||||
action = CmdRunAction(command='mkdir -p /workspace')
|
||||
obs = runtime.run_action(action)
|
||||
assert_and_raise(obs.exit_code == 0, f'Failed to run command: {obs.content}')
|
||||
|
||||
action = CmdRunAction(command='mkdir -p /tmp/server', keep_prompt=False)
|
||||
action = CmdRunAction(command='mkdir -p /tmp/server')
|
||||
obs = runtime.run_action(action)
|
||||
assert_and_raise(obs.exit_code == 0, f'Failed to run command: {obs.content}')
|
||||
|
||||
@ -101,8 +101,7 @@ class Test(BaseIntegrationTest):
|
||||
|
||||
# create README.md
|
||||
action = CmdRunAction(
|
||||
command='cd /tmp/server && nohup python3 -m http.server 8000 &',
|
||||
keep_prompt=False,
|
||||
command='cd /tmp/server && nohup python3 -m http.server 8000 &'
|
||||
)
|
||||
obs = runtime.run_action(action)
|
||||
|
||||
|
||||
@ -16,6 +16,7 @@ import {
|
||||
StatusMessage,
|
||||
} from "#/types/message";
|
||||
import { handleObservationMessage } from "./observations";
|
||||
import { appendInput } from "#/state/command-slice";
|
||||
|
||||
const messageActions = {
|
||||
[ActionType.BROWSE]: (message: ActionMessage) => {
|
||||
@ -62,6 +63,10 @@ export function handleActionMessage(message: ActionMessage) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (message.action === ActionType.RUN) {
|
||||
store.dispatch(appendInput(message.args.command));
|
||||
}
|
||||
|
||||
if ("args" in message && "security_risk" in message.args) {
|
||||
store.dispatch(appendSecurityAnalyzerInput(message));
|
||||
}
|
||||
|
||||
@ -80,8 +80,7 @@ export function handleObservationMessage(message: ObservationMessage) {
|
||||
observation: "run" as const,
|
||||
extras: {
|
||||
command: String(message.extras.command || ""),
|
||||
command_id: Number(message.extras.command_id || 0),
|
||||
exit_code: Number(message.extras.exit_code || 0),
|
||||
metadata: message.extras.metadata,
|
||||
hidden: Boolean(message.extras.hidden),
|
||||
},
|
||||
}),
|
||||
|
||||
@ -93,7 +93,7 @@ export const chatSlice = createSlice({
|
||||
const translationID = `ACTION_MESSAGE$${actionID.toUpperCase()}`;
|
||||
let text = "";
|
||||
if (actionID === "run") {
|
||||
text = `\`${action.payload.args.command}\``;
|
||||
text = `Command:\n\`${action.payload.args.command}\``;
|
||||
} else if (actionID === "run_ipython") {
|
||||
text = `\`\`\`\n${action.payload.args.code}\n\`\`\``;
|
||||
} else if (actionID === "write") {
|
||||
@ -144,7 +144,7 @@ export const chatSlice = createSlice({
|
||||
// Set success property based on observation type
|
||||
if (observationID === "run") {
|
||||
const commandObs = observation.payload as CommandObservation;
|
||||
causeMessage.success = commandObs.extras.exit_code === 0;
|
||||
causeMessage.success = commandObs.extras.metadata.exit_code === 0;
|
||||
} else if (observationID === "run_ipython") {
|
||||
// For IPython, we consider it successful if there's no error message
|
||||
const ipythonObs = observation.payload as IPythonObservation;
|
||||
@ -158,7 +158,9 @@ export const chatSlice = createSlice({
|
||||
if (content.length > MAX_CONTENT_LENGTH) {
|
||||
content = `${content.slice(0, MAX_CONTENT_LENGTH)}...`;
|
||||
}
|
||||
content = `\`\`\`\n${content}\n\`\`\``;
|
||||
content = `${
|
||||
causeMessage.content
|
||||
}\n\nOutput:\n\`\`\`\n${content.trim() || "[Command finished execution with no output]"}\n\`\`\``;
|
||||
causeMessage.content = content; // Observation content includes the action
|
||||
} else if (observationID === "read" || observationID === "edit") {
|
||||
const { content } = observation.payload;
|
||||
|
||||
@ -13,9 +13,8 @@ export interface CommandObservation extends OpenHandsObservationEvent<"run"> {
|
||||
source: "agent";
|
||||
extras: {
|
||||
command: string;
|
||||
command_id: number;
|
||||
exit_code: number;
|
||||
hidden?: boolean;
|
||||
metadata: Record<string, unknown>;
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@ -27,8 +27,11 @@ export interface ObservationMessage {
|
||||
// The observed data
|
||||
content: string;
|
||||
|
||||
// Additional structured data
|
||||
extras: Record<string, string>;
|
||||
extras: {
|
||||
metadata: Record<string, unknown>;
|
||||
error_id: string;
|
||||
[key: string]: string | Record<string, unknown>;
|
||||
};
|
||||
|
||||
// A friendly message that can be put in the chat log
|
||||
message: string;
|
||||
|
||||
@ -277,7 +277,9 @@ class CodeActAgent(Agent):
|
||||
)
|
||||
else:
|
||||
text = truncate_content(
|
||||
obs.content + obs.interpreter_details, max_message_chars
|
||||
obs.content
|
||||
+ f'\n[Python Interpreter: {obs.metadata.py_interpreter_path}]',
|
||||
max_message_chars,
|
||||
)
|
||||
text += f'\n[Command finished with exit code {obs.exit_code}]'
|
||||
message = Message(role='user', content=[TextContent(text=text)])
|
||||
|
||||
@ -31,8 +31,7 @@ from openhands.events.tool import ToolCallMetadata
|
||||
|
||||
_BASH_DESCRIPTION = """Execute a bash command in the terminal.
|
||||
* Long running commands: For commands that may run indefinitely, it should be run in the background and the output should be redirected to a file, e.g. command = `python3 app.py > server.log 2>&1 &`.
|
||||
* Interactive: If a bash command returns exit code `-1`, this means the process is not yet finished. The assistant must then send a second call to terminal with an empty `command` (which will retrieve any additional logs), or it can send additional text (set `command` to the text) to STDIN of the running process, or it can send command=`ctrl+c` to interrupt the process.
|
||||
* Timeout: If a command execution result says "Command timed out. Sending SIGINT to the process", the assistant should retry running the command in the background.
|
||||
* Interactive: If a bash command returns exit code `-1`, this means the process is not yet finished. The assistant must then send a second call to terminal with an empty `command` (which will retrieve any additional logs), or it can send additional text (set `command` to the text) to STDIN of the running process, or it can send command like `C-c` (Ctrl+C) to interrupt the process.
|
||||
"""
|
||||
|
||||
CmdRunTool = ChatCompletionToolParam(
|
||||
@ -45,7 +44,7 @@ CmdRunTool = ChatCompletionToolParam(
|
||||
'properties': {
|
||||
'command': {
|
||||
'type': 'string',
|
||||
'description': 'The bash command to execute. Can be empty to view additional logs when previous exit code is `-1`. Can be `ctrl+c` to interrupt the currently running process.',
|
||||
'description': 'The bash command to execute. Can be empty string to view additional logs when previous exit code is `-1`. Can be `C-c` (Ctrl+C) to interrupt the currently running process.',
|
||||
},
|
||||
},
|
||||
'required': ['command'],
|
||||
|
||||
@ -18,6 +18,7 @@ from openhands.events.action import (
|
||||
from openhands.events.observation import (
|
||||
AgentStateChangedObservation,
|
||||
BrowserOutputObservation,
|
||||
CmdOutputMetadata,
|
||||
CmdOutputObservation,
|
||||
FileReadObservation,
|
||||
FileWriteObservation,
|
||||
@ -54,11 +55,7 @@ class DummyAgent(Agent):
|
||||
},
|
||||
{
|
||||
'action': CmdRunAction(command='echo "foo"'),
|
||||
'observations': [
|
||||
CmdOutputObservation(
|
||||
'foo', command_id=-1, command='echo "foo"', exit_code=0
|
||||
)
|
||||
],
|
||||
'observations': [CmdOutputObservation('foo', command='echo "foo"')],
|
||||
},
|
||||
{
|
||||
'action': FileWriteAction(
|
||||
@ -81,9 +78,8 @@ class DummyAgent(Agent):
|
||||
'observations': [
|
||||
CmdOutputObservation(
|
||||
'bash: hello.sh: No such file or directory',
|
||||
command_id=-1,
|
||||
command='bash workspace/hello.sh',
|
||||
exit_code=127,
|
||||
metadata=CmdOutputMetadata(exit_code=127),
|
||||
)
|
||||
],
|
||||
},
|
||||
@ -152,8 +148,6 @@ class DummyAgent(Agent):
|
||||
obs.pop('timestamp', None)
|
||||
obs.pop('cause', None)
|
||||
obs.pop('source', None)
|
||||
if 'extras' in obs:
|
||||
obs['extras'].pop('command_id', None)
|
||||
|
||||
if hist_obs != expected_obs:
|
||||
print(
|
||||
|
||||
@ -5,7 +5,7 @@ from openhands.events.action.commands import IPythonRunCellAction
|
||||
from openhands.events.action.empty import NullAction
|
||||
from openhands.events.action.message import MessageAction
|
||||
from openhands.events.event import Event, EventSource
|
||||
from openhands.events.observation.commands import (
|
||||
from openhands.events.observation import (
|
||||
CmdOutputObservation,
|
||||
IPythonRunCellObservation,
|
||||
)
|
||||
|
||||
@ -6,6 +6,7 @@ from litellm.types.utils import ModelResponse
|
||||
|
||||
from openhands.core.exceptions import LLMResponseError
|
||||
from openhands.events.event import Event
|
||||
from openhands.events.observation import CmdOutputMetadata
|
||||
from openhands.events.serialization import event_to_dict
|
||||
from openhands.llm.metrics import Metrics
|
||||
|
||||
@ -20,6 +21,8 @@ def my_default_encoder(obj):
|
||||
return obj.get()
|
||||
if isinstance(obj, ModelResponse):
|
||||
return obj.model_dump()
|
||||
if isinstance(obj, CmdOutputMetadata):
|
||||
return obj.model_dump()
|
||||
return json.JSONEncoder().default(obj)
|
||||
|
||||
|
||||
|
||||
@ -12,19 +12,11 @@ from openhands.events.action.action import (
|
||||
@dataclass
|
||||
class CmdRunAction(Action):
|
||||
command: str
|
||||
# When `command` is empty, it will be used to print the current tmux window
|
||||
thought: str = ''
|
||||
blocking: bool = False
|
||||
# If False, the command will be run in a non-blocking / interactive way
|
||||
# The partial command outputs will be returned as output observation.
|
||||
# If True, the command will be run for max .timeout seconds.
|
||||
keep_prompt: bool = True
|
||||
# if True, the command prompt will be kept in the command output observation
|
||||
# Example of command output:
|
||||
# root@sandbox:~# ls
|
||||
# file1.txt
|
||||
# file2.txt
|
||||
# root@sandbox:~# <-- this is the command prompt
|
||||
|
||||
# If blocking is True, the command will be run in a blocking manner.
|
||||
# e.g., it will NOT return early due to soft timeout.
|
||||
hidden: bool = False
|
||||
action: str = ActionType.RUN
|
||||
runnable: ClassVar[bool] = True
|
||||
|
||||
@ -48,6 +48,15 @@ class FileWriteAction(Action):
|
||||
def message(self) -> str:
|
||||
return f'Writing file: {self.path}'
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return (
|
||||
f'**FileWriteAction**\n'
|
||||
f'Path: {self.path}\n'
|
||||
f'Range: [L{self.start}:L{self.end}]\n'
|
||||
f'Thought: {self.thought}\n'
|
||||
f'Content:\n```\n{self.content}\n```\n'
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class FileEditAction(Action):
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
from openhands.events.observation.agent import AgentStateChangedObservation
|
||||
from openhands.events.observation.browse import BrowserOutputObservation
|
||||
from openhands.events.observation.commands import (
|
||||
CmdOutputMetadata,
|
||||
CmdOutputObservation,
|
||||
IPythonRunCellObservation,
|
||||
)
|
||||
@ -20,6 +21,7 @@ __all__ = [
|
||||
'Observation',
|
||||
'NullObservation',
|
||||
'CmdOutputObservation',
|
||||
'CmdOutputMetadata',
|
||||
'IPythonRunCellObservation',
|
||||
'BrowserOutputObservation',
|
||||
'FileReadObservation',
|
||||
|
||||
@ -1,19 +1,136 @@
|
||||
from dataclasses import dataclass
|
||||
import json
|
||||
import re
|
||||
import traceback
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Self
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.core.schema import ObservationType
|
||||
from openhands.events.observation.observation import Observation
|
||||
|
||||
CMD_OUTPUT_PS1_BEGIN = '\n###PS1JSON###\n'
|
||||
CMD_OUTPUT_PS1_END = '\n###PS1END###'
|
||||
CMD_OUTPUT_METADATA_PS1_REGEX = re.compile(
|
||||
f'^{CMD_OUTPUT_PS1_BEGIN.strip()}(.*?){CMD_OUTPUT_PS1_END.strip()}',
|
||||
re.DOTALL | re.MULTILINE,
|
||||
)
|
||||
|
||||
|
||||
class CmdOutputMetadata(BaseModel):
|
||||
"""Additional metadata captured from PS1"""
|
||||
|
||||
exit_code: int = -1
|
||||
pid: int = -1
|
||||
username: str | None = None
|
||||
hostname: str | None = None
|
||||
working_dir: str | None = None
|
||||
py_interpreter_path: str | None = None
|
||||
prefix: str = '' # Prefix to add to command output
|
||||
suffix: str = '' # Suffix to add to command output
|
||||
|
||||
@classmethod
|
||||
def to_ps1_prompt(cls) -> str:
|
||||
"""Convert the required metadata into a PS1 prompt."""
|
||||
prompt = CMD_OUTPUT_PS1_BEGIN
|
||||
json_str = json.dumps(
|
||||
{
|
||||
'pid': '$!',
|
||||
'exit_code': '$?',
|
||||
'username': r'\u',
|
||||
'hostname': r'\h',
|
||||
'working_dir': r'$(pwd)',
|
||||
'py_interpreter_path': r'$(which python 2>/dev/null || echo "")',
|
||||
},
|
||||
indent=2,
|
||||
)
|
||||
# Make sure we escape double quotes in the JSON string
|
||||
# So that PS1 will keep them as part of the output
|
||||
prompt += json_str.replace('"', r'\"')
|
||||
prompt += CMD_OUTPUT_PS1_END + '\n' # Ensure there's a newline at the end
|
||||
return prompt
|
||||
|
||||
@classmethod
|
||||
def matches_ps1_metadata(cls, string: str) -> list[re.Match[str]]:
|
||||
matches = []
|
||||
for match in CMD_OUTPUT_METADATA_PS1_REGEX.finditer(string):
|
||||
try:
|
||||
json.loads(match.group(1).strip()) # Try to parse as JSON
|
||||
matches.append(match)
|
||||
except json.JSONDecodeError:
|
||||
logger.warning(
|
||||
f'Failed to parse PS1 metadata: {match.group(1)}. Skipping.'
|
||||
+ traceback.format_exc()
|
||||
)
|
||||
continue # Skip if not valid JSON
|
||||
return matches
|
||||
|
||||
@classmethod
|
||||
def from_ps1_match(cls, match: re.Match[str]) -> Self:
|
||||
"""Extract the required metadata from a PS1 prompt."""
|
||||
metadata = json.loads(match.group(1))
|
||||
# Create a copy of metadata to avoid modifying the original
|
||||
processed = metadata.copy()
|
||||
# Convert numeric fields
|
||||
if 'pid' in metadata:
|
||||
try:
|
||||
processed['pid'] = int(float(str(metadata['pid'])))
|
||||
except (ValueError, TypeError):
|
||||
processed['pid'] = -1
|
||||
if 'exit_code' in metadata:
|
||||
try:
|
||||
processed['exit_code'] = int(float(str(metadata['exit_code'])))
|
||||
except (ValueError, TypeError):
|
||||
logger.warning(
|
||||
f'Failed to parse exit code: {metadata["exit_code"]}. Setting to -1.'
|
||||
)
|
||||
processed['exit_code'] = -1
|
||||
return cls(**processed)
|
||||
|
||||
|
||||
@dataclass
|
||||
class CmdOutputObservation(Observation):
|
||||
"""This data class represents the output of a command."""
|
||||
|
||||
command_id: int
|
||||
command: str
|
||||
exit_code: int = 0
|
||||
hidden: bool = False
|
||||
observation: str = ObservationType.RUN
|
||||
interpreter_details: str = ''
|
||||
# Additional metadata captured from PS1
|
||||
metadata: CmdOutputMetadata = field(default_factory=CmdOutputMetadata)
|
||||
# Whether the command output should be hidden from the user
|
||||
hidden: bool = False
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
content: str,
|
||||
command: str,
|
||||
observation: str = ObservationType.RUN,
|
||||
metadata: dict | CmdOutputMetadata | None = None,
|
||||
hidden: bool = False,
|
||||
**kwargs,
|
||||
):
|
||||
super().__init__(content)
|
||||
self.command = command
|
||||
self.observation = observation
|
||||
self.hidden = hidden
|
||||
if isinstance(metadata, dict):
|
||||
self.metadata = CmdOutputMetadata(**metadata)
|
||||
else:
|
||||
self.metadata = metadata or CmdOutputMetadata()
|
||||
|
||||
# Handle legacy attribute
|
||||
if 'exit_code' in kwargs:
|
||||
self.metadata.exit_code = kwargs['exit_code']
|
||||
if 'command_id' in kwargs:
|
||||
self.metadata.pid = kwargs['command_id']
|
||||
|
||||
@property
|
||||
def command_id(self) -> int:
|
||||
return self.metadata.pid
|
||||
|
||||
@property
|
||||
def exit_code(self) -> int:
|
||||
return self.metadata.exit_code
|
||||
|
||||
@property
|
||||
def error(self) -> bool:
|
||||
@ -28,7 +145,21 @@ class CmdOutputObservation(Observation):
|
||||
return not self.error
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f'**CmdOutputObservation (source={self.source}, exit code={self.exit_code})**\n{self.content}'
|
||||
return (
|
||||
f'**CmdOutputObservation (source={self.source}, exit code={self.exit_code}, '
|
||||
f'metadata={json.dumps(self.metadata.model_dump(), indent=2)})**\n'
|
||||
'--BEGIN AGENT OBSERVATION--\n'
|
||||
f'{self._to_agent_observation()}\n'
|
||||
'--END AGENT OBSERVATION--'
|
||||
)
|
||||
|
||||
def _to_agent_observation(self) -> str:
|
||||
ret = f'{self.metadata.prefix}{self.content}{self.metadata.suffix}'
|
||||
if self.metadata.working_dir:
|
||||
ret += f'\n[Current working directory: {self.metadata.working_dir}]'
|
||||
if self.metadata.py_interpreter_path:
|
||||
ret += f'\n[Python interpreter: {self.metadata.py_interpreter_path}]'
|
||||
return ret
|
||||
|
||||
|
||||
@dataclass
|
||||
|
||||
@ -18,6 +18,9 @@ class FileReadObservation(Observation):
|
||||
def message(self) -> str:
|
||||
return f'I read the file {self.path}.'
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f'[Read from {self.path} is successful.]\n' f'{self.content}'
|
||||
|
||||
|
||||
@dataclass
|
||||
class FileWriteObservation(Observation):
|
||||
@ -30,6 +33,9 @@ class FileWriteObservation(Observation):
|
||||
def message(self) -> str:
|
||||
return f'I wrote to the file {self.path}.'
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f'[Write to {self.path} is successful.]\n' f'{self.content}'
|
||||
|
||||
|
||||
@dataclass
|
||||
class FileEditObservation(Observation):
|
||||
|
||||
@ -67,6 +67,10 @@ def action_from_dict(action: dict) -> Action:
|
||||
if 'images_urls' in args:
|
||||
args['image_urls'] = args.pop('images_urls')
|
||||
|
||||
# keep_prompt has been deprecated in https://github.com/All-Hands-AI/OpenHands/pull/4881
|
||||
if 'keep_prompt' in args:
|
||||
args.pop('keep_prompt')
|
||||
|
||||
try:
|
||||
decoded_action = action_class(**args)
|
||||
if 'timeout' in action:
|
||||
|
||||
@ -1,6 +1,8 @@
|
||||
from dataclasses import asdict
|
||||
from datetime import datetime
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from openhands.events import Event, EventSource
|
||||
from openhands.events.observation.observation import Observation
|
||||
from openhands.events.serialization.action import action_from_dict
|
||||
@ -56,6 +58,12 @@ def event_from_dict(data) -> 'Event':
|
||||
return evt
|
||||
|
||||
|
||||
def _convert_pydantic_to_dict(obj: BaseModel | dict) -> dict:
|
||||
if isinstance(obj, BaseModel):
|
||||
return obj.model_dump()
|
||||
return obj
|
||||
|
||||
|
||||
def event_to_dict(event: 'Event') -> dict:
|
||||
props = asdict(event)
|
||||
d = {}
|
||||
@ -82,7 +90,11 @@ def event_to_dict(event: 'Event') -> dict:
|
||||
d['timeout'] = event.timeout
|
||||
elif 'observation' in d:
|
||||
d['content'] = props.pop('content', '')
|
||||
d['extras'] = props
|
||||
|
||||
# props is a dict whose values can include a complex object like an instance of a BaseModel subclass
|
||||
# such as CmdOutputMetadata
|
||||
# we serialize it along with the rest
|
||||
d['extras'] = {k: _convert_pydantic_to_dict(v) for k, v in props.items()}
|
||||
# Include success field for CmdOutputObservation
|
||||
if hasattr(event, 'success'):
|
||||
d['success'] = event.success
|
||||
@ -109,7 +121,6 @@ def event_to_memory(event: 'Event', max_message_chars: int) -> dict:
|
||||
# runnable actions have some extra fields used in the BE/FE, which should not be sent to the LLM
|
||||
if 'args' in d:
|
||||
d['args'].pop('blocking', None)
|
||||
d['args'].pop('keep_prompt', None)
|
||||
d['args'].pop('confirmation_state', None)
|
||||
|
||||
if 'extras' in d:
|
||||
|
||||
@ -1,6 +1,9 @@
|
||||
import copy
|
||||
|
||||
from openhands.events.observation.agent import AgentStateChangedObservation
|
||||
from openhands.events.observation.browse import BrowserOutputObservation
|
||||
from openhands.events.observation.commands import (
|
||||
CmdOutputMetadata,
|
||||
CmdOutputObservation,
|
||||
IPythonRunCellObservation,
|
||||
)
|
||||
@ -37,6 +40,26 @@ OBSERVATION_TYPE_TO_CLASS = {
|
||||
}
|
||||
|
||||
|
||||
def _update_cmd_output_metadata(
|
||||
metadata: dict | CmdOutputMetadata | None, **kwargs
|
||||
) -> dict | CmdOutputMetadata:
|
||||
"""Update the metadata of a CmdOutputObservation.
|
||||
|
||||
If metadata is None, create a new CmdOutputMetadata instance.
|
||||
If metadata is a dict, update the dict.
|
||||
If metadata is a CmdOutputMetadata instance, update the instance.
|
||||
"""
|
||||
if metadata is None:
|
||||
return CmdOutputMetadata(**kwargs)
|
||||
|
||||
if isinstance(metadata, dict):
|
||||
metadata.update(**kwargs)
|
||||
elif isinstance(metadata, CmdOutputMetadata):
|
||||
for key, value in kwargs.items():
|
||||
setattr(metadata, key, value)
|
||||
return metadata
|
||||
|
||||
|
||||
def observation_from_dict(observation: dict) -> Observation:
|
||||
observation = observation.copy()
|
||||
if 'observation' not in observation:
|
||||
@ -49,6 +72,24 @@ def observation_from_dict(observation: dict) -> Observation:
|
||||
observation.pop('observation')
|
||||
observation.pop('message', None)
|
||||
content = observation.pop('content', '')
|
||||
extras = observation.pop('extras', {})
|
||||
extras = copy.deepcopy(observation.pop('extras', {}))
|
||||
|
||||
# Handle legacy attributes for CmdOutputObservation
|
||||
if 'exit_code' in extras:
|
||||
extras['metadata'] = _update_cmd_output_metadata(
|
||||
extras.get('metadata', None), exit_code=extras.pop('exit_code')
|
||||
)
|
||||
if 'command_id' in extras:
|
||||
extras['metadata'] = _update_cmd_output_metadata(
|
||||
extras.get('metadata', None), pid=extras.pop('command_id')
|
||||
)
|
||||
# convert metadata to CmdOutputMetadata if it is a dict
|
||||
if observation_class is CmdOutputObservation:
|
||||
if 'metadata' in extras and isinstance(extras['metadata'], dict):
|
||||
extras['metadata'] = CmdOutputMetadata(**extras['metadata'])
|
||||
elif 'metadata' in extras and isinstance(extras['metadata'], CmdOutputMetadata):
|
||||
pass
|
||||
else:
|
||||
extras['metadata'] = CmdOutputMetadata()
|
||||
|
||||
return observation_class(content=content, **extras)
|
||||
|
||||
@ -2,9 +2,11 @@ from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.events.action.action import Action
|
||||
from openhands.events.action.empty import NullAction
|
||||
from openhands.events.event import Event
|
||||
from openhands.events.observation.commands import CmdOutputObservation
|
||||
from openhands.events.observation.empty import NullObservation
|
||||
from openhands.events.observation.observation import Observation
|
||||
from openhands.events.observation import (
|
||||
CmdOutputObservation,
|
||||
NullObservation,
|
||||
Observation,
|
||||
)
|
||||
|
||||
|
||||
def get_pairs_from_events(events: list[Event]) -> list[tuple[Action, Observation]]:
|
||||
|
||||
@ -122,10 +122,7 @@ async def complete_runtime(
|
||||
n_retries = 0
|
||||
git_patch = None
|
||||
while n_retries < 5:
|
||||
action = CmdRunAction(
|
||||
command=f'git diff --no-color --cached {base_commit}',
|
||||
keep_prompt=False,
|
||||
)
|
||||
action = CmdRunAction(command=f'git diff --no-color --cached {base_commit}')
|
||||
action.timeout = 600 + 100 * n_retries
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
|
||||
@ -95,31 +95,34 @@ class ActionExecutor:
|
||||
browsergym_eval_env: str | None,
|
||||
) -> None:
|
||||
self.plugins_to_load = plugins_to_load
|
||||
self._initial_pwd = work_dir
|
||||
self._initial_cwd = work_dir
|
||||
self.username = username
|
||||
self.user_id = user_id
|
||||
_updated_user_id = init_user_and_working_directory(
|
||||
username=username, user_id=self.user_id, initial_pwd=work_dir
|
||||
username=username, user_id=self.user_id, initial_cwd=work_dir
|
||||
)
|
||||
if _updated_user_id is not None:
|
||||
self.user_id = _updated_user_id
|
||||
|
||||
self.bash_session = BashSession(
|
||||
work_dir=work_dir,
|
||||
username=username,
|
||||
)
|
||||
|
||||
self.bash_session: BashSession | None = None
|
||||
self.lock = asyncio.Lock()
|
||||
self.plugins: dict[str, Plugin] = {}
|
||||
self.browser = BrowserEnv(browsergym_eval_env)
|
||||
self.start_time = time.time()
|
||||
self.last_execution_time = self.start_time
|
||||
self._initialized = False
|
||||
|
||||
@property
|
||||
def initial_pwd(self):
|
||||
return self._initial_pwd
|
||||
def initial_cwd(self):
|
||||
return self._initial_cwd
|
||||
|
||||
async def ainit(self):
|
||||
# bash needs to be initialized first
|
||||
self.bash_session = BashSession(
|
||||
work_dir=self._initial_cwd,
|
||||
username=self.username,
|
||||
)
|
||||
self.bash_session.initialize()
|
||||
await wait_all(
|
||||
(self._init_plugin(plugin) for plugin in self.plugins_to_load),
|
||||
timeout=30,
|
||||
@ -138,8 +141,14 @@ class ActionExecutor:
|
||||
|
||||
await self._init_bash_commands()
|
||||
logger.debug('Runtime client initialized.')
|
||||
self._initialized = True
|
||||
|
||||
@property
|
||||
def initialized(self) -> bool:
|
||||
return self._initialized
|
||||
|
||||
async def _init_plugin(self, plugin: Plugin):
|
||||
assert self.bash_session is not None
|
||||
await plugin.initialize(self.username)
|
||||
self.plugins[plugin.name] = plugin
|
||||
logger.debug(f'Initializing plugin: {plugin.name}')
|
||||
@ -147,7 +156,7 @@ class ActionExecutor:
|
||||
if isinstance(plugin, JupyterPlugin):
|
||||
await self.run_ipython(
|
||||
IPythonRunCellAction(
|
||||
code=f'import os; os.chdir("{self.bash_session.pwd}")'
|
||||
code=f'import os; os.chdir("{self.bash_session.cwd}")'
|
||||
)
|
||||
)
|
||||
|
||||
@ -177,30 +186,32 @@ class ActionExecutor:
|
||||
async def run(
|
||||
self, action: CmdRunAction
|
||||
) -> CmdOutputObservation | ErrorObservation:
|
||||
obs = await call_sync_from_async(self.bash_session.run, action)
|
||||
assert self.bash_session is not None
|
||||
obs = await call_sync_from_async(self.bash_session.execute, action)
|
||||
return obs
|
||||
|
||||
async def run_ipython(self, action: IPythonRunCellAction) -> Observation:
|
||||
assert self.bash_session is not None
|
||||
if 'jupyter' in self.plugins:
|
||||
_jupyter_plugin: JupyterPlugin = self.plugins['jupyter'] # type: ignore
|
||||
# This is used to make AgentSkills in Jupyter aware of the
|
||||
# current working directory in Bash
|
||||
jupyter_pwd = getattr(self, '_jupyter_pwd', None)
|
||||
if self.bash_session.pwd != jupyter_pwd:
|
||||
jupyter_cwd = getattr(self, '_jupyter_cwd', None)
|
||||
if self.bash_session.cwd != jupyter_cwd:
|
||||
logger.debug(
|
||||
f'{self.bash_session.pwd} != {jupyter_pwd} -> reset Jupyter PWD'
|
||||
f'{self.bash_session.cwd} != {jupyter_cwd} -> reset Jupyter PWD'
|
||||
)
|
||||
reset_jupyter_pwd_code = (
|
||||
f'import os; os.chdir("{self.bash_session.pwd}")'
|
||||
reset_jupyter_cwd_code = (
|
||||
f'import os; os.chdir("{self.bash_session.cwd}")'
|
||||
)
|
||||
_aux_action = IPythonRunCellAction(code=reset_jupyter_pwd_code)
|
||||
_aux_action = IPythonRunCellAction(code=reset_jupyter_cwd_code)
|
||||
_reset_obs: IPythonRunCellObservation = await _jupyter_plugin.run(
|
||||
_aux_action
|
||||
)
|
||||
logger.debug(
|
||||
f'Changed working directory in IPython to: {self.bash_session.pwd}. Output: {_reset_obs}'
|
||||
f'Changed working directory in IPython to: {self.bash_session.cwd}. Output: {_reset_obs}'
|
||||
)
|
||||
self._jupyter_pwd = self.bash_session.pwd
|
||||
self._jupyter_cwd = self.bash_session.cwd
|
||||
|
||||
obs: IPythonRunCellObservation = await _jupyter_plugin.run(action)
|
||||
obs.content = obs.content.rstrip()
|
||||
@ -266,7 +277,7 @@ class ActionExecutor:
|
||||
|
||||
if action.include_extra:
|
||||
obs.content += (
|
||||
f'\n[Jupyter current working directory: {self.bash_session.pwd}]'
|
||||
f'\n[Jupyter current working directory: {self.bash_session.cwd}]'
|
||||
)
|
||||
obs.content += f'\n[Jupyter Python interpreter: {_jupyter_plugin.python_interpreter_path}]'
|
||||
return obs
|
||||
@ -282,6 +293,7 @@ class ActionExecutor:
|
||||
return str(filepath)
|
||||
|
||||
async def read(self, action: FileReadAction) -> Observation:
|
||||
assert self.bash_session is not None
|
||||
if action.impl_source == FileReadSource.OH_ACI:
|
||||
return await self.run_ipython(
|
||||
IPythonRunCellAction(
|
||||
@ -292,7 +304,7 @@ class ActionExecutor:
|
||||
|
||||
# NOTE: the client code is running inside the sandbox,
|
||||
# so there's no need to check permission
|
||||
working_dir = self.bash_session.workdir
|
||||
working_dir = self.bash_session.cwd
|
||||
filepath = self._resolve_path(action.path, working_dir)
|
||||
try:
|
||||
if filepath.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif')):
|
||||
@ -339,7 +351,8 @@ class ActionExecutor:
|
||||
return FileReadObservation(path=filepath, content=code_view)
|
||||
|
||||
async def write(self, action: FileWriteAction) -> Observation:
|
||||
working_dir = self.bash_session.workdir
|
||||
assert self.bash_session is not None
|
||||
working_dir = self.bash_session.cwd
|
||||
filepath = self._resolve_path(action.path, working_dir)
|
||||
|
||||
insert = action.content.split('\n')
|
||||
@ -400,7 +413,8 @@ class ActionExecutor:
|
||||
return await browse(action, self.browser)
|
||||
|
||||
def close(self):
|
||||
self.bash_session.close()
|
||||
if self.bash_session is not None:
|
||||
self.bash_session.close()
|
||||
self.browser.close()
|
||||
|
||||
|
||||
@ -609,6 +623,8 @@ if __name__ == '__main__':
|
||||
|
||||
@app.get('/alive')
|
||||
async def alive():
|
||||
if client is None or not client.initialized:
|
||||
return {'status': 'not initialized'}
|
||||
return {'status': 'ok'}
|
||||
|
||||
# ================================
|
||||
@ -658,11 +674,11 @@ if __name__ == '__main__':
|
||||
|
||||
# Get the full path of the requested directory
|
||||
if path is None:
|
||||
full_path = client.initial_pwd
|
||||
full_path = client.initial_cwd
|
||||
elif os.path.isabs(path):
|
||||
full_path = path
|
||||
else:
|
||||
full_path = os.path.join(client.initial_pwd, path)
|
||||
full_path = os.path.join(client.initial_cwd, path)
|
||||
|
||||
if not os.path.exists(full_path):
|
||||
# if user just removed a folder, prevent server error 500 in UI
|
||||
|
||||
@ -264,7 +264,6 @@ class ActionExecutionClient(Runtime):
|
||||
raise AgentRuntimeTimeoutError(
|
||||
f'Runtime failed to return execute_action before the requested timeout of {action.timeout}s'
|
||||
)
|
||||
|
||||
return obs
|
||||
|
||||
def run(self, action: CmdRunAction) -> Observation:
|
||||
|
||||
@ -219,7 +219,9 @@ class RemoteRuntime(ActionExecutionClient):
|
||||
'image': self.container_image,
|
||||
'command': command,
|
||||
'working_dir': '/openhands/code/',
|
||||
'environment': {'DEBUG': 'true'} if self.config.debug else {},
|
||||
'environment': {'DEBUG': 'true'}
|
||||
if self.config.debug or os.environ.get('DEBUG', 'false').lower() == 'true'
|
||||
else {},
|
||||
'session_id': self.sid,
|
||||
'resource_factor': self.config.sandbox.remote_runtime_resource_factor,
|
||||
}
|
||||
@ -364,7 +366,10 @@ class RemoteRuntime(ActionExecutionClient):
|
||||
try:
|
||||
return super()._send_action_server_request(method, url, **kwargs)
|
||||
except requests.Timeout:
|
||||
self.log('error', 'No response received within the timeout period.')
|
||||
self.log(
|
||||
'error',
|
||||
f'No response received within the timeout period for url: {url}',
|
||||
)
|
||||
raise
|
||||
except requests.HTTPError as e:
|
||||
if e.response.status_code == 404:
|
||||
|
||||
@ -1,18 +1,21 @@
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
import uuid
|
||||
from enum import Enum
|
||||
|
||||
import bashlex
|
||||
import pexpect
|
||||
import libtmux
|
||||
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.events.action import CmdRunAction
|
||||
from openhands.events.event import EventSource
|
||||
from openhands.events.observation import (
|
||||
from openhands.events.observation import ErrorObservation
|
||||
from openhands.events.observation.commands import (
|
||||
CMD_OUTPUT_PS1_END,
|
||||
CmdOutputMetadata,
|
||||
CmdOutputObservation,
|
||||
ErrorObservation,
|
||||
)
|
||||
|
||||
SOFT_TIMEOUT_SECONDS = 5
|
||||
from openhands.utils.shutdown_listener import should_continue
|
||||
|
||||
|
||||
def split_bash_commands(commands):
|
||||
@ -66,269 +69,493 @@ def split_bash_commands(commands):
|
||||
return result
|
||||
|
||||
|
||||
def escape_bash_special_chars(command: str) -> str:
|
||||
r"""
|
||||
Escapes characters that have different interpretations in bash vs python.
|
||||
Specifically handles escape sequences like \;, \|, \&, etc.
|
||||
"""
|
||||
if command.strip() == '':
|
||||
return ''
|
||||
|
||||
try:
|
||||
parts = []
|
||||
last_pos = 0
|
||||
|
||||
def visit_node(node):
|
||||
nonlocal last_pos
|
||||
if (
|
||||
node.kind == 'redirect'
|
||||
and hasattr(node, 'heredoc')
|
||||
and node.heredoc is not None
|
||||
):
|
||||
# We're entering a heredoc - preserve everything as-is until we see EOF
|
||||
# Store the heredoc end marker (usually 'EOF' but could be different)
|
||||
between = command[last_pos : node.pos[0]]
|
||||
parts.append(between)
|
||||
# Add the heredoc start marker
|
||||
parts.append(command[node.pos[0] : node.heredoc.pos[0]])
|
||||
# Add the heredoc content as-is
|
||||
parts.append(command[node.heredoc.pos[0] : node.heredoc.pos[1]])
|
||||
last_pos = node.pos[1]
|
||||
return
|
||||
|
||||
if node.kind == 'word':
|
||||
# Get the raw text between the last position and current word
|
||||
between = command[last_pos : node.pos[0]]
|
||||
word_text = command[node.pos[0] : node.pos[1]]
|
||||
|
||||
# Add the between text, escaping special characters
|
||||
between = re.sub(r'\\([;&|><])', r'\\\\\1', between)
|
||||
parts.append(between)
|
||||
|
||||
# Check if word_text is a quoted string or command substitution
|
||||
if (
|
||||
(word_text.startswith('"') and word_text.endswith('"'))
|
||||
or (word_text.startswith("'") and word_text.endswith("'"))
|
||||
or (word_text.startswith('$(') and word_text.endswith(')'))
|
||||
or (word_text.startswith('`') and word_text.endswith('`'))
|
||||
):
|
||||
# Preserve quoted strings, command substitutions, and heredoc content as-is
|
||||
parts.append(word_text)
|
||||
else:
|
||||
# Escape special chars in unquoted text
|
||||
word_text = re.sub(r'\\([;&|><])', r'\\\\\1', word_text)
|
||||
parts.append(word_text)
|
||||
|
||||
last_pos = node.pos[1]
|
||||
return
|
||||
|
||||
# Visit child nodes
|
||||
if hasattr(node, 'parts'):
|
||||
for part in node.parts:
|
||||
visit_node(part)
|
||||
|
||||
# Process all nodes in the AST
|
||||
nodes = list(bashlex.parse(command))
|
||||
for node in nodes:
|
||||
between = command[last_pos : node.pos[0]]
|
||||
between = re.sub(r'\\([;&|><])', r'\\\\\1', between)
|
||||
parts.append(between)
|
||||
last_pos = node.pos[0]
|
||||
visit_node(node)
|
||||
|
||||
# Handle any remaining text after the last word
|
||||
remaining = command[last_pos:]
|
||||
parts.append(remaining)
|
||||
return ''.join(parts)
|
||||
except bashlex.errors.ParsingError:
|
||||
# Fallback if parsing fails
|
||||
logger.warning(f'Failed to parse command: {command}')
|
||||
return command
|
||||
|
||||
|
||||
class BashCommandStatus(Enum):
|
||||
CONTINUE = 'continue'
|
||||
COMPLETED = 'completed'
|
||||
NO_CHANGE_TIMEOUT = 'no_change_timeout'
|
||||
HARD_TIMEOUT = 'hard_timeout'
|
||||
|
||||
|
||||
def _remove_command_prefix(command_output: str, command: str) -> str:
|
||||
return command_output.lstrip().removeprefix(command.lstrip()).lstrip()
|
||||
|
||||
|
||||
class BashSession:
|
||||
"""A class that maintains a pexpect process and provides a simple interface for running commands and interacting with the shell."""
|
||||
POLL_INTERVAL = 0.5
|
||||
HISTORY_LIMIT = 10_000
|
||||
PS1 = CmdOutputMetadata.to_ps1_prompt()
|
||||
|
||||
def __init__(self, work_dir: str, username: str):
|
||||
self._pwd = work_dir
|
||||
def __init__(
|
||||
self,
|
||||
work_dir: str,
|
||||
username: str | None = None,
|
||||
no_change_timeout_seconds: float = 30.0,
|
||||
):
|
||||
self.NO_CHANGE_TIMEOUT_SECONDS = no_change_timeout_seconds
|
||||
self.work_dir = work_dir
|
||||
self.username = username
|
||||
self._initialized = False
|
||||
|
||||
self.shell = pexpect.spawn(
|
||||
f'su {username}',
|
||||
encoding='utf-8',
|
||||
codec_errors='replace',
|
||||
echo=False,
|
||||
def initialize(self):
|
||||
self.server = libtmux.Server()
|
||||
window_command = '/bin/bash'
|
||||
if self.username:
|
||||
# This starts a non-login (new) shell for the given user
|
||||
window_command = f'su {self.username} -'
|
||||
|
||||
session_name = f'openhands-{self.username}-{uuid.uuid4()}'
|
||||
self.session = self.server.new_session(
|
||||
session_name=session_name,
|
||||
window_name='bash',
|
||||
window_command=window_command,
|
||||
start_directory=self.work_dir,
|
||||
kill_session=True,
|
||||
x=1000,
|
||||
y=1000,
|
||||
)
|
||||
self._init_bash_shell(work_dir)
|
||||
|
||||
# Set history limit to a large number to avoid losing history
|
||||
# https://unix.stackexchange.com/questions/43414/unlimited-history-in-tmux
|
||||
self.session.set_option('history-limit', str(self.HISTORY_LIMIT), _global=True)
|
||||
self.session.history_limit = self.HISTORY_LIMIT
|
||||
# We need to create a new pane because the initial pane's history limit is (default) 2000
|
||||
_initial_window = self.session.attached_window
|
||||
self.window = self.session.new_window(
|
||||
window_shell=window_command,
|
||||
start_directory=self.work_dir,
|
||||
)
|
||||
self.pane = self.window.attached_pane
|
||||
logger.debug(f'pane: {self.pane}; history_limit: {self.session.history_limit}')
|
||||
_initial_window.kill_window()
|
||||
|
||||
# Configure bash to use simple PS1 and disable PS2
|
||||
self.pane.send_keys(
|
||||
f'export PROMPT_COMMAND=\'export PS1="{self.PS1}"\'; export PS2=""'
|
||||
)
|
||||
time.sleep(0.1) # Wait for command to take effect
|
||||
self._clear_screen()
|
||||
|
||||
# Store the last command for interactive input handling
|
||||
self.prev_status: BashCommandStatus | None = None
|
||||
self.prev_output: str = ''
|
||||
self._closed: bool = False
|
||||
logger.debug(f'Bash session initialized with work dir: {self.work_dir}')
|
||||
|
||||
# Maintain the current working directory
|
||||
self._cwd = os.path.abspath(self.work_dir)
|
||||
self._initialized = True
|
||||
|
||||
def __del__(self):
|
||||
"""Ensure the session is closed when the object is destroyed."""
|
||||
self.close()
|
||||
|
||||
def _get_pane_content(self) -> str:
|
||||
"""Capture the current pane content and update the buffer."""
|
||||
content = '\n'.join(
|
||||
map(
|
||||
# avoid double newlines
|
||||
lambda line: line.rstrip(),
|
||||
self.pane.cmd('capture-pane', '-J', '-pS', '-').stdout,
|
||||
)
|
||||
)
|
||||
return content
|
||||
|
||||
def close(self):
|
||||
self.shell.close()
|
||||
"""Clean up the session."""
|
||||
if self._closed:
|
||||
return
|
||||
self.session.kill_session()
|
||||
self._closed = True
|
||||
|
||||
@property
|
||||
def pwd(self):
|
||||
return self._pwd
|
||||
def cwd(self):
|
||||
return self._cwd
|
||||
|
||||
@property
|
||||
def workdir(self):
|
||||
return self._get_working_directory()
|
||||
def _is_special_key(self, command: str) -> bool:
|
||||
"""Check if the command is a special key."""
|
||||
# Special keys are of the form C-<key>
|
||||
_command = command.strip()
|
||||
return _command.startswith('C-') and len(_command) == 3
|
||||
|
||||
def _get_working_directory(self):
|
||||
# NOTE: this is part of initialization, so we hard code the timeout
|
||||
result, exit_code = self._execute_bash('pwd', timeout=60, keep_prompt=False)
|
||||
if exit_code != 0:
|
||||
raise RuntimeError(
|
||||
f'Failed to get working directory (exit code: {exit_code}): {result}'
|
||||
)
|
||||
return result.strip()
|
||||
def _clear_screen(self):
|
||||
"""Clear the tmux pane screen and history."""
|
||||
self.pane.send_keys('C-l', enter=False)
|
||||
time.sleep(0.1)
|
||||
self.pane.cmd('clear-history')
|
||||
|
||||
def _init_bash_shell(self, work_dir: str):
|
||||
self.__bash_PS1 = (
|
||||
r'[PEXPECT_BEGIN]\n'
|
||||
r'$(which python >/dev/null 2>&1 && echo "[Python Interpreter: $(which python)]\n")'
|
||||
r'\u@\h:\w\n'
|
||||
r'[PEXPECT_END]'
|
||||
)
|
||||
|
||||
# This should NOT match "PS1=\u@\h:\w [PEXPECT]$" when `env` is executed
|
||||
self.__bash_expect_regex = r'\[PEXPECT_BEGIN\]\s*(.*?)\s*([a-z0-9_-]*)@([a-zA-Z0-9.-]*):(.+)\s*\[PEXPECT_END\]'
|
||||
# Set umask to allow group write permissions
|
||||
self.shell.sendline(f'umask 002; export PS1="{self.__bash_PS1}"; export PS2=""')
|
||||
self.shell.expect(self.__bash_expect_regex)
|
||||
|
||||
self.shell.sendline(
|
||||
f'if [ ! -d "{work_dir}" ]; then mkdir -p "{work_dir}"; fi && cd "{work_dir}"'
|
||||
)
|
||||
self.shell.expect(self.__bash_expect_regex)
|
||||
logger.debug(
|
||||
f'Bash initialized. Working directory: {work_dir}. Output: [{self.shell.before}]'
|
||||
)
|
||||
# Ensure the group has write permissions on the working directory
|
||||
self.shell.sendline(f'chmod g+rw "{work_dir}"')
|
||||
self.shell.expect(self.__bash_expect_regex)
|
||||
|
||||
def _get_bash_prompt_and_update_pwd(self):
|
||||
ps1 = self.shell.after
|
||||
if ps1 == pexpect.EOF:
|
||||
logger.error(f'Bash shell EOF! {self.shell.after=}, {self.shell.before=}')
|
||||
raise RuntimeError('Bash shell EOF')
|
||||
if ps1 == pexpect.TIMEOUT:
|
||||
logger.warning('Bash shell timeout')
|
||||
return ''
|
||||
|
||||
# begin at the last occurrence of '[PEXPECT_BEGIN]'.
|
||||
# In multi-line bash commands, the prompt will be repeated
|
||||
# and the matched regex captures all of them
|
||||
# - we only want the last one (newest prompt)
|
||||
_begin_pos = ps1.rfind('[PEXPECT_BEGIN]')
|
||||
if _begin_pos != -1:
|
||||
ps1 = ps1[_begin_pos:]
|
||||
|
||||
# parse the ps1 to get username, hostname, and working directory
|
||||
matched = re.match(self.__bash_expect_regex, ps1)
|
||||
assert (
|
||||
matched is not None
|
||||
), f'Failed to parse bash prompt: {ps1}. This should not happen.'
|
||||
other_info, username, hostname, working_dir = matched.groups()
|
||||
working_dir = working_dir.rstrip()
|
||||
self._pwd = os.path.expanduser(working_dir)
|
||||
|
||||
# re-assemble the prompt
|
||||
# ignore the hostname AND use 'openhands-workspace'
|
||||
prompt = f'{other_info.strip()}\n{username}@openhands-workspace:{working_dir} '
|
||||
if username == 'root':
|
||||
prompt += '#'
|
||||
else:
|
||||
prompt += '$'
|
||||
return prompt + ' '
|
||||
|
||||
def _execute_bash(
|
||||
def _get_command_output(
|
||||
self,
|
||||
command: str,
|
||||
timeout: int,
|
||||
keep_prompt: bool = True,
|
||||
kill_on_timeout: bool = True,
|
||||
) -> tuple[str, int]:
|
||||
logger.debug(f'Executing command: {command}')
|
||||
self.shell.sendline(command)
|
||||
return self._continue_bash(
|
||||
timeout=timeout, keep_prompt=keep_prompt, kill_on_timeout=kill_on_timeout
|
||||
raw_command_output: str,
|
||||
metadata: CmdOutputMetadata,
|
||||
continue_prefix: str = '',
|
||||
) -> str:
|
||||
"""Get the command output with the previous command output removed.
|
||||
|
||||
Args:
|
||||
command: The command that was executed.
|
||||
raw_command_output: The raw output from the command.
|
||||
metadata: The metadata object to store prefix/suffix in.
|
||||
continue_prefix: The prefix to add to the command output if it's a continuation of the previous command.
|
||||
"""
|
||||
# remove the previous command output from the new output if any
|
||||
if self.prev_output:
|
||||
command_output = raw_command_output.removeprefix(self.prev_output)
|
||||
metadata.prefix = continue_prefix
|
||||
else:
|
||||
command_output = raw_command_output
|
||||
self.prev_output = raw_command_output # update current command output anyway
|
||||
command_output = _remove_command_prefix(command_output, command)
|
||||
return command_output.rstrip()
|
||||
|
||||
def _handle_completed_command(
|
||||
self, command: str, pane_content: str, ps1_matches: list[re.Match]
|
||||
) -> CmdOutputObservation:
|
||||
is_special_key = self._is_special_key(command)
|
||||
assert len(ps1_matches) >= 1, (
|
||||
f'Expected at least one PS1 metadata block, but got {len(ps1_matches)}.\n'
|
||||
f'---FULL OUTPUT---\n{pane_content!r}\n---END OF OUTPUT---'
|
||||
)
|
||||
metadata = CmdOutputMetadata.from_ps1_match(ps1_matches[-1])
|
||||
|
||||
# Special case where the previous command output is truncated due to history limit
|
||||
# We should get the content BEFORE the last PS1 prompt
|
||||
get_content_before_last_match = bool(len(ps1_matches) == 1)
|
||||
|
||||
# Update the current working directory if it has changed
|
||||
if metadata.working_dir != self._cwd and metadata.working_dir:
|
||||
self._cwd = metadata.working_dir
|
||||
|
||||
logger.debug(f'COMMAND OUTPUT: {pane_content}')
|
||||
# Extract the command output between the two PS1 prompts
|
||||
raw_command_output = self._combine_outputs_between_matches(
|
||||
pane_content,
|
||||
ps1_matches,
|
||||
get_content_before_last_match=get_content_before_last_match,
|
||||
)
|
||||
|
||||
def _interrupt_bash(
|
||||
if get_content_before_last_match:
|
||||
# Count the number of lines in the truncated output
|
||||
num_lines = len(raw_command_output.splitlines())
|
||||
metadata.prefix = f'[Previous command outputs are truncated. Showing the last {num_lines} lines of the output below.]\n'
|
||||
|
||||
metadata.suffix = (
|
||||
f'\n[The command completed with exit code {metadata.exit_code}.]'
|
||||
if not is_special_key
|
||||
else f'\n[The command completed with exit code {metadata.exit_code}. CTRL+{command[-1].upper()} was sent.]'
|
||||
)
|
||||
command_output = self._get_command_output(
|
||||
command,
|
||||
raw_command_output,
|
||||
metadata,
|
||||
)
|
||||
self.prev_status = BashCommandStatus.COMPLETED
|
||||
self.prev_output = '' # Reset previous command output
|
||||
self._ready_for_next_command()
|
||||
return CmdOutputObservation(
|
||||
content=command_output,
|
||||
command=command,
|
||||
metadata=metadata,
|
||||
)
|
||||
|
||||
def _handle_nochange_timeout_command(
|
||||
self,
|
||||
action_timeout: int | None,
|
||||
interrupt_timeout: int | None = None,
|
||||
max_retries: int = 2,
|
||||
) -> tuple[str, int]:
|
||||
interrupt_timeout = interrupt_timeout or 1 # default timeout for SIGINT
|
||||
# try to interrupt the bash shell use SIGINT
|
||||
while max_retries > 0:
|
||||
self.shell.sendintr() # send SIGINT to the shell
|
||||
logger.debug('Sent SIGINT to bash. Waiting for output...')
|
||||
try:
|
||||
self.shell.expect(self.__bash_expect_regex, timeout=interrupt_timeout)
|
||||
output = self.shell.before
|
||||
logger.debug(f'Received output after SIGINT: {output}')
|
||||
exit_code = 130 # SIGINT
|
||||
|
||||
_additional_msg = ''
|
||||
if action_timeout is not None:
|
||||
_additional_msg = (
|
||||
f'Command timed out after {action_timeout} seconds. '
|
||||
)
|
||||
output += (
|
||||
'\r\n\r\n'
|
||||
+ f'[{_additional_msg}SIGINT was sent to interrupt the command.]'
|
||||
)
|
||||
return output, exit_code
|
||||
except pexpect.TIMEOUT as e:
|
||||
logger.warning(f'Bash pexpect.TIMEOUT while waiting for SIGINT: {e}')
|
||||
max_retries -= 1
|
||||
|
||||
# fall back to send control-z
|
||||
logger.error(
|
||||
'Failed to get output after SIGINT. Max retries reached. Sending control-z...'
|
||||
command: str,
|
||||
pane_content: str,
|
||||
ps1_matches: list[re.Match],
|
||||
) -> CmdOutputObservation:
|
||||
self.prev_status = BashCommandStatus.NO_CHANGE_TIMEOUT
|
||||
if len(ps1_matches) != 1:
|
||||
logger.warning(
|
||||
'Expected exactly one PS1 metadata block BEFORE the execution of a command, '
|
||||
f'but got {len(ps1_matches)} PS1 metadata blocks:\n---\n{pane_content!r}\n---'
|
||||
)
|
||||
raw_command_output = self._combine_outputs_between_matches(
|
||||
pane_content, ps1_matches
|
||||
)
|
||||
self.shell.sendcontrol('z')
|
||||
self.shell.expect(self.__bash_expect_regex)
|
||||
output = self.shell.before
|
||||
logger.debug(f'Received output after control-z: {output}')
|
||||
# Try to kill the job
|
||||
self.shell.sendline('kill -9 %1')
|
||||
self.shell.expect(self.__bash_expect_regex)
|
||||
logger.debug(f'Received output after killing job %1: {self.shell.before}')
|
||||
output += self.shell.before
|
||||
|
||||
_additional_msg = ''
|
||||
if action_timeout is not None:
|
||||
_additional_msg = f'Command timed out after {action_timeout} seconds. '
|
||||
output += (
|
||||
'\r\n\r\n'
|
||||
+ f'[{_additional_msg}SIGINT was sent to interrupt the command, but failed. The command was killed.]'
|
||||
metadata = CmdOutputMetadata() # No metadata available
|
||||
metadata.suffix = (
|
||||
f'\n[The command has no new output after {self.NO_CHANGE_TIMEOUT_SECONDS} seconds. '
|
||||
"You may wait longer to see additional output by sending empty command '', "
|
||||
'send other commands to interact with the current process, '
|
||||
'or send keys to interrupt/kill the command.]'
|
||||
)
|
||||
command_output = self._get_command_output(
|
||||
command,
|
||||
raw_command_output,
|
||||
metadata,
|
||||
continue_prefix='[Command output continued from previous command]\n',
|
||||
)
|
||||
return CmdOutputObservation(
|
||||
content=command_output,
|
||||
command=command,
|
||||
metadata=metadata,
|
||||
)
|
||||
|
||||
# Try to get the exit code again
|
||||
self.shell.sendline('echo $?')
|
||||
self.shell.expect(self.__bash_expect_regex)
|
||||
_exit_code_output = self.shell.before
|
||||
exit_code = self._parse_exit_code(_exit_code_output)
|
||||
|
||||
return output, exit_code
|
||||
|
||||
def _parse_exit_code(self, output: str) -> int:
|
||||
try:
|
||||
exit_code = int(output.strip().split()[0])
|
||||
except Exception:
|
||||
logger.error('Error getting exit code from bash script')
|
||||
# If we try to run an invalid shell script the output sometimes includes error text
|
||||
# rather than the error code - we assume this is an error
|
||||
exit_code = 2
|
||||
return exit_code
|
||||
|
||||
def _continue_bash(
|
||||
def _handle_hard_timeout_command(
|
||||
self,
|
||||
timeout: int,
|
||||
keep_prompt: bool = True,
|
||||
kill_on_timeout: bool = True,
|
||||
) -> tuple[str, int]:
|
||||
logger.debug(f'Continuing bash with timeout={timeout}')
|
||||
try:
|
||||
self.shell.expect(self.__bash_expect_regex, timeout=timeout)
|
||||
command: str,
|
||||
pane_content: str,
|
||||
ps1_matches: list[re.Match],
|
||||
timeout: float,
|
||||
) -> CmdOutputObservation:
|
||||
self.prev_status = BashCommandStatus.HARD_TIMEOUT
|
||||
if len(ps1_matches) != 1:
|
||||
logger.warning(
|
||||
'Expected exactly one PS1 metadata block BEFORE the execution of a command, '
|
||||
f'but got {len(ps1_matches)} PS1 metadata blocks:\n---\n{pane_content!r}\n---'
|
||||
)
|
||||
raw_command_output = self._combine_outputs_between_matches(
|
||||
pane_content, ps1_matches
|
||||
)
|
||||
metadata = CmdOutputMetadata() # No metadata available
|
||||
metadata.suffix = (
|
||||
f'\n[The command timed out after {timeout} seconds. '
|
||||
"You may wait longer to see additional output by sending empty command '', "
|
||||
'send other commands to interact with the current process, '
|
||||
'or send keys to interrupt/kill the command.]'
|
||||
)
|
||||
command_output = self._get_command_output(
|
||||
command,
|
||||
raw_command_output,
|
||||
metadata,
|
||||
continue_prefix='[Command output continued from previous command]\n',
|
||||
)
|
||||
|
||||
output = self.shell.before
|
||||
return CmdOutputObservation(
|
||||
command=command,
|
||||
content=command_output,
|
||||
metadata=metadata,
|
||||
)
|
||||
|
||||
# Get exit code
|
||||
self.shell.sendline('echo $?')
|
||||
logger.debug('Requesting exit code...')
|
||||
self.shell.expect(self.__bash_expect_regex, timeout=timeout)
|
||||
_exit_code_output = self.shell.before
|
||||
exit_code = self._parse_exit_code(_exit_code_output)
|
||||
except pexpect.TIMEOUT as e:
|
||||
logger.warning(f'Bash pexpect.TIMEOUT while executing bash command: {e}')
|
||||
if kill_on_timeout:
|
||||
output, exit_code = self._interrupt_bash(action_timeout=timeout)
|
||||
def _ready_for_next_command(self):
|
||||
"""Reset the content buffer for a new command."""
|
||||
# Clear the current content
|
||||
self._clear_screen()
|
||||
|
||||
def _combine_outputs_between_matches(
|
||||
self,
|
||||
pane_content: str,
|
||||
ps1_matches: list[re.Match],
|
||||
get_content_before_last_match: bool = False,
|
||||
) -> str:
|
||||
"""Combine all outputs between PS1 matches.
|
||||
|
||||
Args:
|
||||
pane_content: The full pane content containing PS1 prompts and command outputs
|
||||
ps1_matches: List of regex matches for PS1 prompts
|
||||
get_content_before_last_match: when there's only one PS1 match, whether to get
|
||||
the content before the last PS1 prompt (True) or after the last PS1 prompt (False)
|
||||
Returns:
|
||||
Combined string of all outputs between matches
|
||||
"""
|
||||
if len(ps1_matches) == 1:
|
||||
if get_content_before_last_match:
|
||||
# The command output is the content before the last PS1 prompt
|
||||
return pane_content[: ps1_matches[0].start()]
|
||||
else:
|
||||
output = self.shell.before or ''
|
||||
exit_code = -1
|
||||
finally:
|
||||
bash_prompt = self._get_bash_prompt_and_update_pwd()
|
||||
if keep_prompt:
|
||||
output += '\r\n' + bash_prompt
|
||||
return output, exit_code
|
||||
# The command output is the content after the last PS1 prompt
|
||||
return pane_content[ps1_matches[0].end() + 1 :]
|
||||
combined_output = ''
|
||||
for i in range(len(ps1_matches) - 1):
|
||||
# Extract content between current and next PS1 prompt
|
||||
output_segment = pane_content[
|
||||
ps1_matches[i].end() + 1 : ps1_matches[i + 1].start()
|
||||
]
|
||||
combined_output += output_segment + '\n'
|
||||
logger.debug(f'COMBINED OUTPUT: {combined_output}')
|
||||
return combined_output
|
||||
|
||||
def run(self, action: CmdRunAction) -> CmdOutputObservation | ErrorObservation:
|
||||
try:
|
||||
assert (
|
||||
action.timeout is not None
|
||||
), f'Timeout argument is required for CmdRunAction: {action}'
|
||||
commands = split_bash_commands(action.command)
|
||||
all_output = ''
|
||||
python_interpreter = ''
|
||||
for command in commands:
|
||||
if command == '':
|
||||
output, exit_code = self._continue_bash(
|
||||
timeout=SOFT_TIMEOUT_SECONDS,
|
||||
keep_prompt=action.keep_prompt,
|
||||
kill_on_timeout=False,
|
||||
)
|
||||
elif command.lower() == 'ctrl+c':
|
||||
output, exit_code = self._interrupt_bash(
|
||||
action_timeout=None, # intentionally None
|
||||
)
|
||||
else:
|
||||
output, exit_code = self._execute_bash(
|
||||
command,
|
||||
timeout=SOFT_TIMEOUT_SECONDS
|
||||
if not action.blocking
|
||||
else action.timeout,
|
||||
keep_prompt=action.keep_prompt,
|
||||
kill_on_timeout=False if not action.blocking else True,
|
||||
)
|
||||
# Get rid of the python interpreter string from each line of the output.
|
||||
# We need it only once at the end.
|
||||
parts = output.rsplit('[Python Interpreter: ', 1)
|
||||
output = parts[0]
|
||||
if len(parts) == 2:
|
||||
python_interpreter = '[Python Interpreter: ' + parts[1]
|
||||
if all_output:
|
||||
# previous output already exists so we add a newline
|
||||
all_output += '\r\n'
|
||||
def execute(self, action: CmdRunAction) -> CmdOutputObservation | ErrorObservation:
|
||||
"""Execute a command in the bash session."""
|
||||
if not self._initialized:
|
||||
raise RuntimeError('Bash session is not initialized')
|
||||
|
||||
# If the command originated with the agent, append the command that was run...
|
||||
if action.source == EventSource.AGENT:
|
||||
all_output += command + '\r\n'
|
||||
# Strip the command of any leading/trailing whitespace
|
||||
logger.debug(f'RECEIVED ACTION: {action}')
|
||||
command = action.command.strip()
|
||||
|
||||
all_output += str(output)
|
||||
if exit_code != 0:
|
||||
break
|
||||
if command == '' and self.prev_status not in {
|
||||
BashCommandStatus.CONTINUE,
|
||||
BashCommandStatus.NO_CHANGE_TIMEOUT,
|
||||
BashCommandStatus.HARD_TIMEOUT,
|
||||
}:
|
||||
return CmdOutputObservation(
|
||||
command_id=-1,
|
||||
content=all_output.rstrip('\r\n'),
|
||||
command=action.command,
|
||||
hidden=action.hidden,
|
||||
exit_code=exit_code,
|
||||
interpreter_details=python_interpreter,
|
||||
content='ERROR: No previous command to continue from. '
|
||||
+ 'Previous command has to be timeout to be continued.',
|
||||
command='',
|
||||
metadata=CmdOutputMetadata(),
|
||||
)
|
||||
except UnicodeDecodeError as e:
|
||||
|
||||
splited_commands = split_bash_commands(command)
|
||||
if len(splited_commands) > 1:
|
||||
return ErrorObservation(
|
||||
f'Runtime bash execution failed: Command output could not be decoded as utf-8. {str(e)}',
|
||||
content=(
|
||||
f'ERROR: Cannot execute multiple commands at once.\n'
|
||||
f'Please run each command separately OR chain them into a single command via && or ;\n'
|
||||
f'Provided commands:\n{"\n".join(f"({i+1}) {cmd}" for i, cmd in enumerate(splited_commands))}'
|
||||
)
|
||||
)
|
||||
|
||||
start_time = time.time()
|
||||
last_change_time = start_time
|
||||
last_pane_output = self._get_pane_content()
|
||||
|
||||
_ps1_matches = CmdOutputMetadata.matches_ps1_metadata(last_pane_output)
|
||||
assert len(_ps1_matches) >= 1, (
|
||||
'Expected at least one PS1 metadata block BEFORE the execution of a command, '
|
||||
f'but got {len(_ps1_matches)} PS1 metadata blocks:\n---\n{last_pane_output!r}\n---'
|
||||
)
|
||||
if len(_ps1_matches) > 1:
|
||||
logger.warning(
|
||||
'Found multiple PS1 metadata blocks BEFORE the execution of a command. '
|
||||
'Only the last one will be used.'
|
||||
)
|
||||
_ps1_matches = [_ps1_matches[-1]]
|
||||
|
||||
if command != '':
|
||||
# convert command to raw string
|
||||
command = escape_bash_special_chars(command)
|
||||
logger.debug(f'SENDING COMMAND: {command!r}')
|
||||
self.pane.send_keys(
|
||||
command,
|
||||
enter=not self._is_special_key(command),
|
||||
)
|
||||
|
||||
# Loop until the command completes or times out
|
||||
while should_continue():
|
||||
_start_time = time.time()
|
||||
logger.debug(f'GETTING PANE CONTENT at {_start_time}')
|
||||
cur_pane_output = self._get_pane_content()
|
||||
logger.debug(
|
||||
f'PANE CONTENT GOT after {time.time() - _start_time:.2f} seconds'
|
||||
)
|
||||
logger.debug(f'BEGIN OF PANE CONTENT: {cur_pane_output.split("\n")[:10]}')
|
||||
logger.debug(f'END OF PANE CONTENT: {cur_pane_output.split("\n")[-10:]}')
|
||||
ps1_matches = CmdOutputMetadata.matches_ps1_metadata(cur_pane_output)
|
||||
if cur_pane_output != last_pane_output:
|
||||
last_pane_output = cur_pane_output
|
||||
last_change_time = time.time()
|
||||
logger.debug(f'CONTENT UPDATED DETECTED at {last_change_time}')
|
||||
|
||||
# 1) Execution completed
|
||||
# if the last command output contains the end marker
|
||||
if cur_pane_output.rstrip().endswith(CMD_OUTPUT_PS1_END.rstrip()):
|
||||
return self._handle_completed_command(
|
||||
command,
|
||||
pane_content=cur_pane_output,
|
||||
ps1_matches=ps1_matches,
|
||||
)
|
||||
|
||||
# 2) Execution timed out since there's no change in output
|
||||
# for a while (self.NO_CHANGE_TIMEOUT_SECONDS)
|
||||
# We ignore this if the command is *blocking
|
||||
time_since_last_change = time.time() - last_change_time
|
||||
logger.debug(
|
||||
f'CHECKING NO CHANGE TIMEOUT ({self.NO_CHANGE_TIMEOUT_SECONDS}s): elapsed {time_since_last_change}'
|
||||
)
|
||||
if (
|
||||
not action.blocking
|
||||
and time_since_last_change >= self.NO_CHANGE_TIMEOUT_SECONDS
|
||||
):
|
||||
return self._handle_nochange_timeout_command(
|
||||
command,
|
||||
pane_content=cur_pane_output,
|
||||
ps1_matches=ps1_matches,
|
||||
)
|
||||
|
||||
# 3) Execution timed out due to hard timeout
|
||||
logger.debug(
|
||||
f'CHECKING HARD TIMEOUT ({action.timeout}s): elapsed {time.time() - start_time}'
|
||||
)
|
||||
if action.timeout and time.time() - start_time >= action.timeout:
|
||||
return self._handle_hard_timeout_command(
|
||||
command,
|
||||
pane_content=cur_pane_output,
|
||||
ps1_matches=ps1_matches,
|
||||
timeout=action.timeout,
|
||||
)
|
||||
|
||||
logger.debug(f'SLEEPING for {self.POLL_INTERVAL} seconds for next poll')
|
||||
time.sleep(self.POLL_INTERVAL)
|
||||
raise RuntimeError('Bash session was likely interrupted...')
|
||||
|
||||
@ -4,7 +4,7 @@ from openhands.core.logger import openhands_logger as logger
|
||||
|
||||
|
||||
def init_user_and_working_directory(
|
||||
username: str, user_id: int, initial_pwd: str
|
||||
username: str, user_id: int, initial_cwd: str
|
||||
) -> int | None:
|
||||
"""Create working directory and user if not exists.
|
||||
It performs the following steps effectively:
|
||||
@ -26,23 +26,23 @@ def init_user_and_working_directory(
|
||||
Args:
|
||||
username (str): The username to create.
|
||||
user_id (int): The user ID to assign to the user.
|
||||
initial_pwd (str): The initial working directory to create.
|
||||
initial_cwd (str): The initial working directory to create.
|
||||
|
||||
Returns:
|
||||
int | None: The user ID if it was updated, None otherwise.
|
||||
"""
|
||||
|
||||
# First create the working directory, independent of the user
|
||||
logger.debug(f'Client working directory: {initial_pwd}')
|
||||
command = f'umask 002; mkdir -p {initial_pwd}'
|
||||
logger.debug(f'Client working directory: {initial_cwd}')
|
||||
command = f'umask 002; mkdir -p {initial_cwd}'
|
||||
output = subprocess.run(command, shell=True, capture_output=True)
|
||||
out_str = output.stdout.decode()
|
||||
|
||||
command = f'chown -R {username}:root {initial_pwd}'
|
||||
command = f'chown -R {username}:root {initial_cwd}'
|
||||
output = subprocess.run(command, shell=True, capture_output=True)
|
||||
out_str += output.stdout.decode()
|
||||
|
||||
command = f'chmod g+rw {initial_pwd}'
|
||||
command = f'chmod g+rw {initial_cwd}'
|
||||
output = subprocess.run(command, shell=True, capture_output=True)
|
||||
out_str += output.stdout.decode()
|
||||
logger.debug(f'Created working directory. Output: [{out_str}]')
|
||||
|
||||
@ -15,7 +15,7 @@ ENV POETRY_VIRTUALENVS_PATH=/openhands/poetry \
|
||||
# Install base system dependencies
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
wget curl sudo apt-utils git jq \
|
||||
wget curl sudo apt-utils git jq tmux \
|
||||
{% if 'ubuntu' in base_image and (base_image.endswith(':latest') or base_image.endswith(':24.04')) %}
|
||||
libgl1 \
|
||||
{% else %}
|
||||
|
||||
34
poetry.lock
generated
34
poetry.lock
generated
@ -1,4 +1,4 @@
|
||||
# This file is automatically @generated by Poetry 1.8.5 and should not be changed by hand.
|
||||
# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
|
||||
|
||||
[[package]]
|
||||
name = "aiohappyeyeballs"
|
||||
@ -3711,6 +3711,17 @@ websocket-client = ">=0.32.0,<0.40.0 || >0.40.0,<0.41.dev0 || >=0.43.dev0"
|
||||
[package.extras]
|
||||
adal = ["adal (>=1.0.2)"]
|
||||
|
||||
[[package]]
|
||||
name = "libtmux"
|
||||
version = "0.37.0"
|
||||
description = "Typed library that provides an ORM wrapper for tmux, a terminal multiplexer."
|
||||
optional = false
|
||||
python-versions = "<4.0,>=3.8"
|
||||
files = [
|
||||
{file = "libtmux-0.37.0-py3-none-any.whl", hash = "sha256:7e8cbab30b033d132b6fca5dddb575bb7f6a1fd802328e7174f9b49023556376"},
|
||||
{file = "libtmux-0.37.0.tar.gz", hash = "sha256:21955c5dce6332db41abad5e26ae8c4062ef2b9a89099bd57a36f52be1d5270f"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "libwebarena"
|
||||
version = "0.0.3"
|
||||
@ -8298,6 +8309,25 @@ postgresql-psycopgbinary = ["psycopg[binary] (>=3.0.7)"]
|
||||
pymysql = ["pymysql"]
|
||||
sqlcipher = ["sqlcipher3_binary"]
|
||||
|
||||
[[package]]
|
||||
name = "sse-starlette"
|
||||
version = "2.1.3"
|
||||
description = "SSE plugin for Starlette"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "sse_starlette-2.1.3-py3-none-any.whl", hash = "sha256:8ec846438b4665b9e8c560fcdea6bc8081a3abf7942faa95e5a744999d219772"},
|
||||
{file = "sse_starlette-2.1.3.tar.gz", hash = "sha256:9cd27eb35319e1414e3d2558ee7414487f9529ce3b3cf9b21434fd110e017169"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
anyio = "*"
|
||||
starlette = "*"
|
||||
uvicorn = "*"
|
||||
|
||||
[package.extras]
|
||||
examples = ["fastapi"]
|
||||
|
||||
[[package]]
|
||||
name = "stack-data"
|
||||
version = "0.6.3"
|
||||
@ -10054,4 +10084,4 @@ testing = ["coverage[toml]", "zope.event", "zope.testing"]
|
||||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = "^3.12"
|
||||
content-hash = "691bdd0f64e3476858eb34ce6ed6d0b0e7d97458cfd69fd366cd9c1c4f4ec897"
|
||||
content-hash = "db887f071f7dbb712cfba5d9b4de8938afbedee22fd166b4527f4aec40e37cfd"
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
[tool.poetry]
|
||||
name = "openhands-ai"
|
||||
version = "0.18.0"
|
||||
version = "0.18.0a0"
|
||||
description = "OpenHands: Code Less, Make More"
|
||||
authors = ["OpenHands"]
|
||||
license = "MIT"
|
||||
@ -62,11 +62,13 @@ opentelemetry-api = "1.25.0"
|
||||
opentelemetry-exporter-otlp-proto-grpc = "1.25.0"
|
||||
modal = ">=0.66.26,<0.71.0"
|
||||
runloop-api-client = "0.11.0"
|
||||
libtmux = "^0.37.0"
|
||||
pygithub = "^2.5.0"
|
||||
joblib = "*"
|
||||
openhands-aci = "0.1.5"
|
||||
python-socketio = "^5.11.4"
|
||||
redis = "^5.2.0"
|
||||
sse-starlette = "^2.1.3"
|
||||
|
||||
[tool.poetry.group.llama-index.dependencies]
|
||||
llama-index = "*"
|
||||
@ -100,6 +102,7 @@ reportlab = "*"
|
||||
[tool.coverage.run]
|
||||
concurrency = ["gevent"]
|
||||
|
||||
|
||||
[tool.poetry.group.runtime.dependencies]
|
||||
jupyterlab = "*"
|
||||
notebook = "*"
|
||||
@ -129,6 +132,7 @@ ignore = ["D1"]
|
||||
[tool.ruff.lint.pydocstyle]
|
||||
convention = "google"
|
||||
|
||||
|
||||
[tool.poetry.group.evaluation.dependencies]
|
||||
streamlit = "*"
|
||||
whatthepatch = "*"
|
||||
|
||||
@ -69,7 +69,7 @@ def _close_test_runtime(runtime: Runtime) -> None:
|
||||
time.sleep(1)
|
||||
|
||||
|
||||
def _reset_pwd() -> None:
|
||||
def _reset_cwd() -> None:
|
||||
global project_dir
|
||||
# Try to change back to project directory
|
||||
try:
|
||||
@ -152,16 +152,16 @@ def get_run_as_openhands() -> list[bool]:
|
||||
|
||||
@pytest.fixture(scope='module') # for xdist
|
||||
def runtime_setup_module():
|
||||
_reset_pwd()
|
||||
_reset_cwd()
|
||||
yield
|
||||
_reset_pwd()
|
||||
_reset_cwd()
|
||||
|
||||
|
||||
@pytest.fixture(scope='session') # not for xdist
|
||||
def runtime_setup_session():
|
||||
_reset_pwd()
|
||||
_reset_cwd()
|
||||
yield
|
||||
_reset_pwd()
|
||||
_reset_cwd()
|
||||
|
||||
|
||||
# This assures that all tests run together per runtime, not alternating between them,
|
||||
@ -230,14 +230,14 @@ def _load_runtime(
|
||||
global test_mount_path
|
||||
if use_workspace:
|
||||
test_mount_path = os.path.join(config.workspace_base, 'rt')
|
||||
elif temp_dir is not None:
|
||||
test_mount_path = os.path.join(temp_dir, sid)
|
||||
else:
|
||||
test_mount_path = os.path.join(
|
||||
temp_dir, sid
|
||||
) # need a subfolder to avoid conflicts
|
||||
test_mount_path = None
|
||||
config.workspace_mount_path = test_mount_path
|
||||
|
||||
# Mounting folder specific for this test inside the sandbox
|
||||
config.workspace_mount_path_in_sandbox = f'{sandbox_test_folder}/{sid}'
|
||||
config.workspace_mount_path_in_sandbox = f'{sandbox_test_folder}'
|
||||
print('\nPaths used:')
|
||||
print(f'use_host_network: {config.sandbox.use_host_network}')
|
||||
print(f'workspace_base: {config.workspace_base}')
|
||||
|
||||
@ -5,7 +5,6 @@ from pathlib import Path
|
||||
|
||||
import pytest
|
||||
from conftest import (
|
||||
TEST_IN_CI,
|
||||
_close_test_runtime,
|
||||
_get_sandbox_folder,
|
||||
_load_runtime,
|
||||
@ -13,7 +12,7 @@ from conftest import (
|
||||
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.events.action import CmdRunAction
|
||||
from openhands.events.observation import CmdOutputObservation
|
||||
from openhands.events.observation import CmdOutputObservation, ErrorObservation
|
||||
from openhands.runtime.base import Runtime
|
||||
|
||||
# ============================================================================================================================
|
||||
@ -21,36 +20,19 @@ from openhands.runtime.base import Runtime
|
||||
# ============================================================================================================================
|
||||
|
||||
|
||||
def _run_cmd_action(runtime, custom_command: str, keep_prompt=True):
|
||||
action = CmdRunAction(command=custom_command, keep_prompt=keep_prompt)
|
||||
def _run_cmd_action(runtime, custom_command: str):
|
||||
action = CmdRunAction(command=custom_command)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
assert isinstance(obs, CmdOutputObservation)
|
||||
assert isinstance(obs, (CmdOutputObservation, ErrorObservation))
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
return obs
|
||||
|
||||
|
||||
def test_bash_command_pexcept(temp_dir, runtime_cls, run_as_openhands):
|
||||
def test_bash_command_env(temp_dir, runtime_cls, run_as_openhands):
|
||||
runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
|
||||
try:
|
||||
# We set env var PS1="\u@\h:\w $"
|
||||
# and construct the PEXCEPT prompt base on it.
|
||||
# When run `env`, bad implementation of CmdRunAction will be pexcepted by this
|
||||
# and failed to pexcept the right content, causing it fail to get error code.
|
||||
obs = runtime.run_action(CmdRunAction(command='env'))
|
||||
|
||||
# For example:
|
||||
# 02:16:13 - openhands:DEBUG: client.py:78 - Executing command: env
|
||||
# 02:16:13 - openhands:DEBUG: client.py:82 - Command output: PYTHONUNBUFFERED=1
|
||||
# CONDA_EXE=/openhands/miniforge3/bin/conda
|
||||
# [...]
|
||||
# LC_CTYPE=C.UTF-8
|
||||
# PS1=\u@\h:\w $
|
||||
# 02:16:13 - openhands:DEBUG: client.py:89 - Executing command for exit code: env
|
||||
# 02:16:13 - openhands:DEBUG: client.py:92 - Exit code Output:
|
||||
# CONDA_DEFAULT_ENV=base
|
||||
|
||||
# As long as the exit code is 0, the test will pass.
|
||||
assert isinstance(
|
||||
obs, CmdOutputObservation
|
||||
), 'The observation should be a CmdOutputObservation.'
|
||||
@ -59,52 +41,7 @@ def test_bash_command_pexcept(temp_dir, runtime_cls, run_as_openhands):
|
||||
_close_test_runtime(runtime)
|
||||
|
||||
|
||||
def test_bash_timeout_and_keyboard_interrupt(temp_dir, runtime_cls, run_as_openhands):
|
||||
runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
|
||||
try:
|
||||
action = CmdRunAction(command='python -c "import time; time.sleep(10)"')
|
||||
action.timeout = 1
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert isinstance(obs, CmdOutputObservation)
|
||||
assert (
|
||||
'[Command timed out after 1 seconds. SIGINT was sent to interrupt the command.]'
|
||||
in obs.content
|
||||
)
|
||||
assert 'KeyboardInterrupt' in obs.content
|
||||
|
||||
# follow up command should not be affected
|
||||
action = CmdRunAction(command='ls')
|
||||
action.timeout = 1
|
||||
obs = runtime.run_action(action)
|
||||
assert isinstance(obs, CmdOutputObservation)
|
||||
assert obs.exit_code == 0
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
|
||||
# run it again!
|
||||
action = CmdRunAction(command='python -c "import time; time.sleep(10)"')
|
||||
action.timeout = 1
|
||||
obs = runtime.run_action(action)
|
||||
assert isinstance(obs, CmdOutputObservation)
|
||||
assert (
|
||||
'[Command timed out after 1 seconds. SIGINT was sent to interrupt the command.]'
|
||||
in obs.content
|
||||
)
|
||||
assert 'KeyboardInterrupt' in obs.content
|
||||
|
||||
# things should still work
|
||||
action = CmdRunAction(command='ls')
|
||||
action.timeout = 1
|
||||
obs = runtime.run_action(action)
|
||||
assert isinstance(obs, CmdOutputObservation)
|
||||
assert obs.exit_code == 0
|
||||
assert '/workspace' in obs.interpreter_details
|
||||
|
||||
finally:
|
||||
_close_test_runtime(runtime)
|
||||
|
||||
|
||||
def test_bash_pexcept_eof(temp_dir, runtime_cls, run_as_openhands):
|
||||
def test_bash_server(temp_dir, runtime_cls, run_as_openhands):
|
||||
runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
|
||||
try:
|
||||
action = CmdRunAction(command='python3 -m http.server 8080')
|
||||
@ -112,9 +49,21 @@ def test_bash_pexcept_eof(temp_dir, runtime_cls, run_as_openhands):
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert isinstance(obs, CmdOutputObservation)
|
||||
assert obs.exit_code == 130 # script was killed by SIGINT
|
||||
assert obs.exit_code == -1
|
||||
assert 'Serving HTTP on 0.0.0.0 port 8080' in obs.content
|
||||
assert (
|
||||
"[The command timed out after 1 seconds. You may wait longer to see additional output by sending empty command '', send other commands to interact with the current process, or send keys to interrupt/kill the command.]"
|
||||
in obs.metadata.suffix
|
||||
)
|
||||
|
||||
action = CmdRunAction(command='C-c')
|
||||
action.timeout = 30
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert isinstance(obs, CmdOutputObservation)
|
||||
assert obs.exit_code == 0
|
||||
assert 'Keyboard interrupt received, exiting.' in obs.content
|
||||
assert '/workspace' in obs.metadata.working_dir
|
||||
|
||||
action = CmdRunAction(command='ls')
|
||||
action.timeout = 1
|
||||
@ -122,7 +71,8 @@ def test_bash_pexcept_eof(temp_dir, runtime_cls, run_as_openhands):
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert isinstance(obs, CmdOutputObservation)
|
||||
assert obs.exit_code == 0
|
||||
assert '/workspace' in obs.interpreter_details
|
||||
assert 'Keyboard interrupt received, exiting.' not in obs.content
|
||||
assert '/workspace' in obs.metadata.working_dir
|
||||
|
||||
# run it again!
|
||||
action = CmdRunAction(command='python3 -m http.server 8080')
|
||||
@ -130,122 +80,8 @@ def test_bash_pexcept_eof(temp_dir, runtime_cls, run_as_openhands):
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert isinstance(obs, CmdOutputObservation)
|
||||
assert obs.exit_code == 130 # script was killed by SIGINT
|
||||
assert obs.exit_code == -1
|
||||
assert 'Serving HTTP on 0.0.0.0 port 8080' in obs.content
|
||||
assert 'Keyboard interrupt received, exiting.' in obs.content
|
||||
|
||||
# things should still work
|
||||
action = CmdRunAction(command='ls')
|
||||
action.timeout = 1
|
||||
obs = runtime.run_action(action)
|
||||
assert isinstance(obs, CmdOutputObservation)
|
||||
assert obs.exit_code == 0
|
||||
assert '/workspace' in obs.interpreter_details
|
||||
finally:
|
||||
_close_test_runtime(runtime)
|
||||
|
||||
|
||||
def test_process_resistant_to_one_sigint(temp_dir, runtime_cls, run_as_openhands):
|
||||
runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
|
||||
try:
|
||||
# Create a bash script that ignores SIGINT up to 1 times
|
||||
script_content = """
|
||||
#!/bin/bash
|
||||
trap_count=0
|
||||
trap 'echo "Caught SIGINT ($((++trap_count))/1), ignoring..."; [ $trap_count -ge 1 ] && trap - INT && exit' INT
|
||||
while true; do
|
||||
echo "Still running..."
|
||||
sleep 1
|
||||
done
|
||||
""".strip()
|
||||
|
||||
with open(f'{temp_dir}/resistant_script.sh', 'w') as f:
|
||||
f.write(script_content)
|
||||
os.chmod(f'{temp_dir}/resistant_script.sh', 0o777)
|
||||
|
||||
runtime.copy_to(
|
||||
os.path.join(temp_dir, 'resistant_script.sh'),
|
||||
runtime.config.workspace_mount_path_in_sandbox,
|
||||
)
|
||||
|
||||
# Run the resistant script
|
||||
action = CmdRunAction(command='sudo bash ./resistant_script.sh')
|
||||
action.timeout = 5
|
||||
action.blocking = True
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert isinstance(obs, CmdOutputObservation)
|
||||
assert obs.exit_code == 130 # script was killed by SIGINT
|
||||
assert 'Still running...' in obs.content
|
||||
assert 'Caught SIGINT (1/1), ignoring...' in obs.content
|
||||
assert 'Stopped' not in obs.content
|
||||
assert (
|
||||
'[Command timed out after 5 seconds. SIGINT was sent to interrupt the command.]'
|
||||
in obs.content
|
||||
)
|
||||
|
||||
# Normal command should still work
|
||||
action = CmdRunAction(command='ls')
|
||||
action.timeout = 10
|
||||
obs = runtime.run_action(action)
|
||||
assert isinstance(obs, CmdOutputObservation)
|
||||
assert obs.exit_code == 0
|
||||
assert '/workspace' in obs.interpreter_details
|
||||
assert 'resistant_script.sh' in obs.content
|
||||
|
||||
finally:
|
||||
_close_test_runtime(runtime)
|
||||
|
||||
|
||||
def test_process_resistant_to_multiple_sigint(temp_dir, runtime_cls, run_as_openhands):
|
||||
runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
|
||||
try:
|
||||
# Create a bash script that ignores SIGINT up to 2 times
|
||||
script_content = """
|
||||
#!/bin/bash
|
||||
trap_count=0
|
||||
trap 'echo "Caught SIGINT ($((++trap_count))/3), ignoring..."; [ $trap_count -ge 3 ] && trap - INT && exit' INT
|
||||
while true; do
|
||||
echo "Still running..."
|
||||
sleep 1
|
||||
done
|
||||
""".strip()
|
||||
|
||||
with open(f'{temp_dir}/resistant_script.sh', 'w') as f:
|
||||
f.write(script_content)
|
||||
os.chmod(f'{temp_dir}/resistant_script.sh', 0o777)
|
||||
|
||||
runtime.copy_to(
|
||||
os.path.join(temp_dir, 'resistant_script.sh'),
|
||||
runtime.config.workspace_mount_path_in_sandbox,
|
||||
)
|
||||
|
||||
# Run the resistant script
|
||||
action = CmdRunAction(command='sudo bash ./resistant_script.sh')
|
||||
action.timeout = 2
|
||||
action.blocking = True
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert isinstance(obs, CmdOutputObservation)
|
||||
assert obs.exit_code == 0
|
||||
assert 'Still running...' in obs.content
|
||||
assert 'Caught SIGINT (1/3), ignoring...' in obs.content
|
||||
assert '[1]+' and 'Stopped' in obs.content
|
||||
assert (
|
||||
'[Command timed out after 2 seconds. SIGINT was sent to interrupt the command, but failed. The command was killed.]'
|
||||
in obs.content
|
||||
)
|
||||
|
||||
# Normal command should still work
|
||||
action = CmdRunAction(command='ls')
|
||||
action.timeout = 10
|
||||
obs = runtime.run_action(action)
|
||||
assert isinstance(obs, CmdOutputObservation)
|
||||
assert obs.exit_code == 0
|
||||
assert '/workspace' in obs.interpreter_details
|
||||
assert 'resistant_script.sh' in obs.content
|
||||
|
||||
finally:
|
||||
_close_test_runtime(runtime)
|
||||
@ -262,12 +98,12 @@ def test_multiline_commands(temp_dir, runtime_cls):
|
||||
# test multiline echo
|
||||
obs = _run_cmd_action(runtime, 'echo -e "hello\nworld"')
|
||||
assert obs.exit_code == 0, 'The exit code should be 0.'
|
||||
assert 'hello\r\nworld' in obs.content
|
||||
assert 'hello\nworld' in obs.content
|
||||
|
||||
# test whitespace
|
||||
obs = _run_cmd_action(runtime, 'echo -e "a\\n\\n\\nz"')
|
||||
assert obs.exit_code == 0, 'The exit code should be 0.'
|
||||
assert '\r\n\r\n\r\n' in obs.content
|
||||
assert '\n\n\n' in obs.content
|
||||
finally:
|
||||
_close_test_runtime(runtime)
|
||||
|
||||
@ -276,43 +112,43 @@ def test_multiple_multiline_commands(temp_dir, runtime_cls, run_as_openhands):
|
||||
cmds = [
|
||||
'ls -l',
|
||||
'echo -e "hello\nworld"',
|
||||
"""
|
||||
echo -e "hello it\\'s me"
|
||||
""".strip(),
|
||||
"""
|
||||
echo \\
|
||||
"""echo -e "hello it's me\"""",
|
||||
"""echo \\
|
||||
-e 'hello' \\
|
||||
-v
|
||||
""".strip(),
|
||||
"""
|
||||
echo -e 'hello\\nworld\\nare\\nyou\\nthere?'
|
||||
""".strip(),
|
||||
"""
|
||||
echo -e 'hello
|
||||
world
|
||||
are
|
||||
you\\n
|
||||
there?'
|
||||
""".strip(),
|
||||
"""
|
||||
echo -e 'hello
|
||||
world "
|
||||
'
|
||||
""".strip(),
|
||||
-v""",
|
||||
"""echo -e 'hello\\nworld\\nare\\nyou\\nthere?'""",
|
||||
"""echo -e 'hello\nworld\nare\nyou\n\nthere?'""",
|
||||
"""echo -e 'hello\nworld "'""",
|
||||
]
|
||||
joined_cmds = '\n'.join(cmds)
|
||||
|
||||
runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
|
||||
try:
|
||||
# First test that running multiple commands at once fails
|
||||
obs = _run_cmd_action(runtime, joined_cmds)
|
||||
assert obs.exit_code == 0, 'The exit code should be 0.'
|
||||
assert isinstance(obs, ErrorObservation)
|
||||
assert 'Cannot execute multiple commands at once' in obs.content
|
||||
|
||||
assert 'total 0' in obs.content
|
||||
assert 'hello\r\nworld' in obs.content
|
||||
assert "hello it\\'s me" in obs.content
|
||||
assert 'hello -v' in obs.content
|
||||
assert 'hello\r\nworld\r\nare\r\nyou\r\nthere?' in obs.content
|
||||
assert 'hello\r\nworld\r\nare\r\nyou\r\n\r\nthere?' in obs.content
|
||||
# Now run each command individually and verify they work
|
||||
results = []
|
||||
for cmd in cmds:
|
||||
obs = _run_cmd_action(runtime, cmd)
|
||||
assert isinstance(obs, CmdOutputObservation)
|
||||
assert obs.exit_code == 0
|
||||
results.append(obs.content)
|
||||
|
||||
# Verify all expected outputs are present
|
||||
assert 'total 0' in results[0] # ls -l
|
||||
assert 'hello\nworld' in results[1] # echo -e "hello\nworld"
|
||||
assert "hello it's me" in results[2] # echo -e "hello it\'s me"
|
||||
assert 'hello -v' in results[3] # echo -e 'hello' -v
|
||||
assert (
|
||||
'hello\nworld\nare\nyou\nthere?' in results[4]
|
||||
) # echo -e 'hello\nworld\nare\nyou\nthere?'
|
||||
assert (
|
||||
'hello\nworld\nare\nyou\n\nthere?' in results[5]
|
||||
) # echo -e with literal newlines
|
||||
assert 'hello\nworld "' in results[6] # echo -e with quote
|
||||
finally:
|
||||
_close_test_runtime(runtime)
|
||||
|
||||
@ -324,7 +160,7 @@ def test_no_ps2_in_output(temp_dir, runtime_cls, run_as_openhands):
|
||||
obs = _run_cmd_action(runtime, 'echo -e "hello\nworld"')
|
||||
assert obs.exit_code == 0, 'The exit code should be 0.'
|
||||
|
||||
assert 'hello\r\nworld' in obs.content
|
||||
assert 'hello\nworld' in obs.content
|
||||
assert '>' not in obs.content
|
||||
finally:
|
||||
_close_test_runtime(runtime)
|
||||
@ -332,21 +168,17 @@ def test_no_ps2_in_output(temp_dir, runtime_cls, run_as_openhands):
|
||||
|
||||
def test_multiline_command_loop(temp_dir, runtime_cls):
|
||||
# https://github.com/All-Hands-AI/OpenHands/issues/3143
|
||||
init_cmd = """
|
||||
mkdir -p _modules && \
|
||||
init_cmd = """mkdir -p _modules && \
|
||||
for month in {01..04}; do
|
||||
for day in {01..05}; do
|
||||
touch "_modules/2024-${month}-${day}-sample.md"
|
||||
done
|
||||
done
|
||||
echo "created files"
|
||||
done && echo "created files"
|
||||
"""
|
||||
follow_up_cmd = """
|
||||
for file in _modules/*.md; do
|
||||
follow_up_cmd = """for file in _modules/*.md; do
|
||||
new_date=$(echo $file | sed -E 's/2024-(01|02|03|04)-/2024-/;s/2024-01/2024-08/;s/2024-02/2024-09/;s/2024-03/2024-10/;s/2024-04/2024-11/')
|
||||
mv "$file" "$new_date"
|
||||
done
|
||||
echo "success"
|
||||
done && echo "success"
|
||||
"""
|
||||
runtime = _load_runtime(temp_dir, runtime_cls)
|
||||
try:
|
||||
@ -424,7 +256,6 @@ def test_multi_cmd_run_in_single_line(temp_dir, runtime_cls):
|
||||
|
||||
def test_stateful_cmd(temp_dir, runtime_cls):
|
||||
runtime = _load_runtime(temp_dir, runtime_cls)
|
||||
sandbox_dir = _get_sandbox_folder(runtime)
|
||||
try:
|
||||
obs = _run_cmd_action(runtime, 'mkdir -p test')
|
||||
assert obs.exit_code == 0, 'The exit code should be 0.'
|
||||
@ -434,7 +265,7 @@ def test_stateful_cmd(temp_dir, runtime_cls):
|
||||
|
||||
obs = _run_cmd_action(runtime, 'pwd')
|
||||
assert obs.exit_code == 0, 'The exit code should be 0.'
|
||||
assert f'{sandbox_dir}/test' in obs.content
|
||||
assert '/workspace/test' in obs.content
|
||||
finally:
|
||||
_close_test_runtime(runtime)
|
||||
|
||||
@ -532,7 +363,7 @@ def test_copy_to_non_existent_directory(temp_dir, runtime_cls):
|
||||
def test_overwrite_existing_file(temp_dir, runtime_cls):
|
||||
runtime = _load_runtime(temp_dir, runtime_cls)
|
||||
try:
|
||||
sandbox_dir = _get_sandbox_folder(runtime)
|
||||
sandbox_dir = '/openhands/workspace'
|
||||
|
||||
obs = _run_cmd_action(runtime, f'ls -alh {sandbox_dir}')
|
||||
assert obs.exit_code == 0
|
||||
@ -595,38 +426,13 @@ def test_copy_from_directory(temp_dir, runtime_cls):
|
||||
_close_test_runtime(runtime)
|
||||
|
||||
|
||||
def test_keep_prompt(runtime_cls, temp_dir):
|
||||
runtime = _load_runtime(
|
||||
temp_dir,
|
||||
runtime_cls=runtime_cls,
|
||||
run_as_openhands=False,
|
||||
)
|
||||
try:
|
||||
sandbox_dir = _get_sandbox_folder(runtime)
|
||||
|
||||
obs = _run_cmd_action(runtime, f'touch {sandbox_dir}/test_file.txt')
|
||||
assert obs.exit_code == 0
|
||||
assert 'root@' in obs.interpreter_details
|
||||
|
||||
obs = _run_cmd_action(
|
||||
runtime, f'cat {sandbox_dir}/test_file.txt', keep_prompt=False
|
||||
)
|
||||
assert obs.exit_code == 0
|
||||
assert 'root@' not in obs.interpreter_details
|
||||
finally:
|
||||
_close_test_runtime(runtime)
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
TEST_IN_CI != 'True',
|
||||
reason='This test is not working in WSL (file ownership)',
|
||||
)
|
||||
def test_git_operation(runtime_cls):
|
||||
# do not mount workspace, since workspace mount by tests will be owned by root
|
||||
# while the user_id we get via os.getuid() is different from root
|
||||
# which causes permission issues
|
||||
runtime = _load_runtime(
|
||||
temp_dir=None,
|
||||
use_workspace=False,
|
||||
runtime_cls=runtime_cls,
|
||||
# Need to use non-root user to expose issues
|
||||
run_as_openhands=True,
|
||||
@ -634,12 +440,15 @@ def test_git_operation(runtime_cls):
|
||||
# this will happen if permission of runtime is not properly configured
|
||||
# fatal: detected dubious ownership in repository at '/workspace'
|
||||
try:
|
||||
obs = _run_cmd_action(runtime, 'sudo chown -R openhands:root .')
|
||||
assert obs.exit_code == 0
|
||||
|
||||
# check the ownership of the current directory
|
||||
obs = _run_cmd_action(runtime, 'ls -alh .')
|
||||
assert obs.exit_code == 0
|
||||
# drwx--S--- 2 openhands root 64 Aug 7 23:32 .
|
||||
# drwxr-xr-x 1 root root 4.0K Aug 7 23:33 ..
|
||||
for line in obs.content.split('\r\n'):
|
||||
for line in obs.content.split('\n'):
|
||||
if ' ..' in line:
|
||||
# parent directory should be owned by root
|
||||
assert 'root' in line
|
||||
@ -663,8 +472,10 @@ def test_git_operation(runtime_cls):
|
||||
assert obs.exit_code == 0
|
||||
|
||||
# git diff
|
||||
obs = _run_cmd_action(runtime, 'git diff')
|
||||
obs = _run_cmd_action(runtime, 'git diff --no-color --cached')
|
||||
assert obs.exit_code == 0
|
||||
assert 'b/test_file.txt' in obs.content
|
||||
assert '+hello' in obs.content
|
||||
|
||||
# git commit
|
||||
obs = _run_cmd_action(runtime, 'git commit -m "test commit"')
|
||||
@ -685,3 +496,276 @@ def test_python_version(temp_dir, runtime_cls, run_as_openhands):
|
||||
assert 'Python 3' in obs.content, 'The output should contain "Python 3".'
|
||||
finally:
|
||||
_close_test_runtime(runtime)
|
||||
|
||||
|
||||
def test_pwd_property(temp_dir, runtime_cls, run_as_openhands):
|
||||
runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
|
||||
try:
|
||||
# Create a subdirectory and verify pwd updates
|
||||
obs = _run_cmd_action(runtime, 'mkdir -p random_dir')
|
||||
assert obs.exit_code == 0
|
||||
|
||||
obs = _run_cmd_action(runtime, 'cd random_dir && pwd')
|
||||
assert obs.exit_code == 0
|
||||
assert 'random_dir' in obs.content
|
||||
finally:
|
||||
_close_test_runtime(runtime)
|
||||
|
||||
|
||||
def test_basic_command(temp_dir, runtime_cls, run_as_openhands):
|
||||
runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
|
||||
try:
|
||||
# Test simple command
|
||||
obs = _run_cmd_action(runtime, "echo 'hello world'")
|
||||
assert 'hello world' in obs.content
|
||||
assert obs.exit_code == 0
|
||||
|
||||
# Test command with error
|
||||
obs = _run_cmd_action(runtime, 'nonexistent_command')
|
||||
assert obs.exit_code == 127
|
||||
assert 'nonexistent_command: command not found' in obs.content
|
||||
|
||||
# Test command with special characters
|
||||
obs = _run_cmd_action(runtime, "echo 'hello world with\nspecial chars'")
|
||||
assert 'hello world with\nspecial chars' in obs.content
|
||||
assert obs.exit_code == 0
|
||||
|
||||
# Test multiple commands in sequence
|
||||
obs = _run_cmd_action(runtime, 'echo "first" && echo "second" && echo "third"')
|
||||
assert 'first' in obs.content
|
||||
assert 'second' in obs.content
|
||||
assert 'third' in obs.content
|
||||
assert obs.exit_code == 0
|
||||
finally:
|
||||
_close_test_runtime(runtime)
|
||||
|
||||
|
||||
def test_interactive_command(temp_dir, runtime_cls, run_as_openhands):
|
||||
runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
|
||||
try:
|
||||
# Test interactive command
|
||||
action = CmdRunAction('read -p "Enter name: " name && echo "Hello $name"')
|
||||
action.timeout = 1
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
# assert 'Enter name:' in obs.content # FIXME: this is not working
|
||||
assert '[The command timed out after 1 seconds.' in obs.metadata.suffix
|
||||
|
||||
action = CmdRunAction('John')
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert 'Hello John' in obs.content
|
||||
assert '[The command completed with exit code 0.]' in obs.metadata.suffix
|
||||
|
||||
# Test multiline command input with here document
|
||||
action = CmdRunAction("""cat << EOF
|
||||
line 1
|
||||
line 2
|
||||
EOF""")
|
||||
obs = runtime.run_action(action)
|
||||
assert 'line 1\nline 2' in obs.content
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert '[The command completed with exit code 0.]' in obs.metadata.suffix
|
||||
assert obs.exit_code == 0
|
||||
finally:
|
||||
_close_test_runtime(runtime)
|
||||
|
||||
|
||||
def test_long_output(temp_dir, runtime_cls, run_as_openhands):
|
||||
runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
|
||||
try:
|
||||
# Generate a long output
|
||||
action = CmdRunAction('for i in $(seq 1 5000); do echo "Line $i"; done')
|
||||
action.timeout = 10
|
||||
obs = runtime.run_action(action)
|
||||
assert obs.exit_code == 0
|
||||
assert 'Line 1' in obs.content
|
||||
assert 'Line 5000' in obs.content
|
||||
finally:
|
||||
_close_test_runtime(runtime)
|
||||
|
||||
|
||||
def test_long_output_exceed_history_limit(temp_dir, runtime_cls, run_as_openhands):
|
||||
runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
|
||||
try:
|
||||
# Generate a long output
|
||||
action = CmdRunAction('for i in $(seq 1 50000); do echo "Line $i"; done')
|
||||
action.timeout = 30
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert obs.exit_code == 0
|
||||
assert 'Previous command outputs are truncated' in obs.metadata.prefix
|
||||
assert 'Line 40000' in obs.content
|
||||
assert 'Line 50000' in obs.content
|
||||
finally:
|
||||
_close_test_runtime(runtime)
|
||||
|
||||
|
||||
def test_long_output_from_nested_directories(temp_dir, runtime_cls, run_as_openhands):
|
||||
runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
|
||||
try:
|
||||
# Create nested directories with many files
|
||||
setup_cmd = 'mkdir -p /tmp/test_dir && cd /tmp/test_dir && for i in $(seq 1 100); do mkdir -p "folder_$i"; for j in $(seq 1 100); do touch "folder_$i/file_$j.txt"; done; done'
|
||||
setup_action = CmdRunAction(setup_cmd.strip())
|
||||
setup_action.timeout = 60
|
||||
obs = runtime.run_action(setup_action)
|
||||
assert obs.exit_code == 0
|
||||
|
||||
# List the directory structure recursively
|
||||
action = CmdRunAction('ls -R /tmp/test_dir')
|
||||
action.timeout = 60
|
||||
obs = runtime.run_action(action)
|
||||
assert obs.exit_code == 0
|
||||
|
||||
# Verify output contains expected files
|
||||
assert 'folder_1' in obs.content
|
||||
assert 'file_1.txt' in obs.content
|
||||
assert 'folder_100' in obs.content
|
||||
assert 'file_100.txt' in obs.content
|
||||
finally:
|
||||
_close_test_runtime(runtime)
|
||||
|
||||
|
||||
def test_command_backslash(temp_dir, runtime_cls, run_as_openhands):
|
||||
runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
|
||||
try:
|
||||
# Create a file with the content "implemented_function"
|
||||
action = CmdRunAction(
|
||||
'mkdir -p /tmp/test_dir && echo "implemented_function" > /tmp/test_dir/file_1.txt'
|
||||
)
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert obs.exit_code == 0
|
||||
|
||||
# Reproduce an issue we ran into during evaluation
|
||||
# find /workspace/sympy__sympy__1.0 -type f -exec grep -l "implemented_function" {} \;
|
||||
# find: missing argument to `-exec'
|
||||
# --> This is unexpected output due to incorrect escaping of \;
|
||||
# This tests for correct escaping of \;
|
||||
action = CmdRunAction(
|
||||
'find /tmp/test_dir -type f -exec grep -l "implemented_function" {} \\;'
|
||||
)
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert obs.exit_code == 0
|
||||
assert '/tmp/test_dir/file_1.txt' in obs.content
|
||||
finally:
|
||||
_close_test_runtime(runtime)
|
||||
|
||||
|
||||
def test_command_output_continuation(temp_dir, runtime_cls, run_as_openhands):
|
||||
runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
|
||||
try:
|
||||
# Start a command that produces output slowly
|
||||
action = CmdRunAction('for i in {1..5}; do echo $i; sleep 3; done')
|
||||
action.timeout = 2.5 # Set timeout to 2.5 seconds
|
||||
obs = runtime.run_action(action)
|
||||
assert obs.content.strip() == '1'
|
||||
assert obs.metadata.prefix == ''
|
||||
assert '[The command timed out after 2.5 seconds.' in obs.metadata.suffix
|
||||
|
||||
# Continue watching output
|
||||
action = CmdRunAction('')
|
||||
action.timeout = 2.5
|
||||
obs = runtime.run_action(action)
|
||||
assert '[Command output continued from previous command]' in obs.metadata.prefix
|
||||
assert obs.content.strip() == '2'
|
||||
assert '[The command timed out after 2.5 seconds.' in obs.metadata.suffix
|
||||
|
||||
# Continue until completion
|
||||
for expected in ['3', '4', '5']:
|
||||
action = CmdRunAction('')
|
||||
action.timeout = 2.5
|
||||
obs = runtime.run_action(action)
|
||||
assert (
|
||||
'[Command output continued from previous command]'
|
||||
in obs.metadata.prefix
|
||||
)
|
||||
assert obs.content.strip() == expected
|
||||
assert '[The command timed out after 2.5 seconds.' in obs.metadata.suffix
|
||||
|
||||
# Final empty command to complete
|
||||
action = CmdRunAction('')
|
||||
obs = runtime.run_action(action)
|
||||
assert '[The command completed with exit code 0.]' in obs.metadata.suffix
|
||||
finally:
|
||||
_close_test_runtime(runtime)
|
||||
|
||||
|
||||
def test_long_running_command_follow_by_execute(
|
||||
temp_dir, runtime_cls, run_as_openhands
|
||||
):
|
||||
runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
|
||||
try:
|
||||
# Test command that produces output slowly
|
||||
action = CmdRunAction('for i in {1..3}; do echo $i; sleep 3; done')
|
||||
action.timeout = 2.5
|
||||
action.blocking = False
|
||||
obs = runtime.run_action(action)
|
||||
assert '1' in obs.content # First number should appear before timeout
|
||||
assert obs.metadata.exit_code == -1 # -1 indicates command is still running
|
||||
assert '[The command timed out after 2.5 seconds.' in obs.metadata.suffix
|
||||
assert obs.metadata.prefix == ''
|
||||
|
||||
# Continue watching output
|
||||
action = CmdRunAction('')
|
||||
action.timeout = 2.5
|
||||
obs = runtime.run_action(action)
|
||||
assert '2' in obs.content
|
||||
assert (
|
||||
obs.metadata.prefix == '[Command output continued from previous command]\n'
|
||||
)
|
||||
assert '[The command timed out after 2.5 seconds.' in obs.metadata.suffix
|
||||
assert obs.metadata.exit_code == -1 # -1 indicates command is still running
|
||||
|
||||
# Test command that produces no output
|
||||
action = CmdRunAction('sleep 15')
|
||||
action.timeout = 2.5
|
||||
obs = runtime.run_action(action)
|
||||
assert '3' in obs.content
|
||||
assert (
|
||||
obs.metadata.prefix == '[Command output continued from previous command]\n'
|
||||
)
|
||||
assert '[The command timed out after 2.5 seconds.' in obs.metadata.suffix
|
||||
assert obs.metadata.exit_code == -1 # -1 indicates command is still running
|
||||
finally:
|
||||
_close_test_runtime(runtime)
|
||||
|
||||
|
||||
def test_empty_command_errors(temp_dir, runtime_cls, run_as_openhands):
|
||||
runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
|
||||
try:
|
||||
# Test empty command without previous command
|
||||
obs = runtime.run_action(CmdRunAction(''))
|
||||
assert isinstance(obs, CmdOutputObservation)
|
||||
assert 'ERROR: No previous command to continue from' in obs.content
|
||||
finally:
|
||||
_close_test_runtime(runtime)
|
||||
|
||||
|
||||
def test_python_interactive_input(temp_dir, runtime_cls, run_as_openhands):
|
||||
runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
|
||||
try:
|
||||
# Test Python program that asks for input - properly escaped for bash
|
||||
python_script = """name = input('Enter your name: '); age = input('Enter your age: '); print(f'Hello {name}, you are {age} years old')"""
|
||||
|
||||
# Start Python with the interactive script
|
||||
obs = runtime.run_action(CmdRunAction(f'python3 -c "{python_script}"'))
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert 'Enter your name:' in obs.content
|
||||
assert obs.metadata.exit_code == -1 # -1 indicates command is still running
|
||||
|
||||
# Send first input (name)
|
||||
obs = runtime.run_action(CmdRunAction('Alice'))
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert 'Enter your age:' in obs.content
|
||||
assert obs.metadata.exit_code == -1
|
||||
|
||||
# Send second input (age)
|
||||
obs = runtime.run_action(CmdRunAction('25'))
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert 'Hello Alice, you are 25 years old' in obs.content
|
||||
assert obs.metadata.exit_code == 0
|
||||
assert '[The command completed with exit code 0.]' in obs.metadata.suffix
|
||||
finally:
|
||||
_close_test_runtime(runtime)
|
||||
|
||||
@ -4,7 +4,6 @@ import pytest
|
||||
from conftest import (
|
||||
TEST_IN_CI,
|
||||
_close_test_runtime,
|
||||
_get_sandbox_folder,
|
||||
_load_runtime,
|
||||
)
|
||||
|
||||
@ -33,8 +32,6 @@ from openhands.events.observation import (
|
||||
def test_simple_cmd_ipython_and_fileop(temp_dir, runtime_cls, run_as_openhands):
|
||||
runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
|
||||
|
||||
sandbox_dir = _get_sandbox_folder(runtime)
|
||||
|
||||
# Test run command
|
||||
action_cmd = CmdRunAction(command='ls -l')
|
||||
logger.info(action_cmd, extra={'msg_type': 'ACTION'})
|
||||
@ -55,7 +52,7 @@ def test_simple_cmd_ipython_and_fileop(temp_dir, runtime_cls, run_as_openhands):
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert obs.content.strip() == (
|
||||
'Hello, `World`!\n'
|
||||
f'[Jupyter current working directory: {sandbox_dir}]\n'
|
||||
'[Jupyter current working directory: /openhands/workspace]\n'
|
||||
'[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.12/bin/python]'
|
||||
)
|
||||
|
||||
@ -76,7 +73,7 @@ def test_simple_cmd_ipython_and_fileop(temp_dir, runtime_cls, run_as_openhands):
|
||||
|
||||
assert obs.content == ''
|
||||
# event stream runtime will always use absolute path
|
||||
assert obs.path == f'{sandbox_dir}/hello.sh'
|
||||
assert obs.path == '/openhands/workspace/hello.sh'
|
||||
|
||||
# Test read file (file should exist)
|
||||
action_read = FileReadAction(path='hello.sh')
|
||||
@ -88,7 +85,7 @@ def test_simple_cmd_ipython_and_fileop(temp_dir, runtime_cls, run_as_openhands):
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
|
||||
assert obs.content == 'echo "Hello, World!"\n'
|
||||
assert obs.path == f'{sandbox_dir}/hello.sh'
|
||||
assert obs.path == '/openhands/workspace/hello.sh'
|
||||
|
||||
# clean up
|
||||
action = CmdRunAction(command='rm -rf hello.sh')
|
||||
@ -178,7 +175,6 @@ def test_ipython_multi_user(temp_dir, runtime_cls, run_as_openhands):
|
||||
|
||||
def test_ipython_simple(temp_dir, runtime_cls):
|
||||
runtime = _load_runtime(temp_dir, runtime_cls)
|
||||
sandbox_dir = _get_sandbox_folder(runtime)
|
||||
|
||||
# Test run ipython
|
||||
# get username
|
||||
@ -192,7 +188,7 @@ def test_ipython_simple(temp_dir, runtime_cls):
|
||||
obs.content.strip()
|
||||
== (
|
||||
'1\n'
|
||||
f'[Jupyter current working directory: {sandbox_dir}]\n'
|
||||
'[Jupyter current working directory: /openhands/workspace]\n'
|
||||
'[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.12/bin/python]'
|
||||
).strip()
|
||||
)
|
||||
@ -203,7 +199,6 @@ def test_ipython_simple(temp_dir, runtime_cls):
|
||||
def test_ipython_package_install(temp_dir, runtime_cls, run_as_openhands):
|
||||
"""Make sure that cd in bash also update the current working directory in ipython."""
|
||||
runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
|
||||
sandbox_dir = _get_sandbox_folder(runtime)
|
||||
|
||||
# It should error out since pymsgbox is not installed
|
||||
action = IPythonRunCellAction(code='import pymsgbox')
|
||||
@ -229,7 +224,7 @@ def test_ipython_package_install(temp_dir, runtime_cls, run_as_openhands):
|
||||
# import should not error out
|
||||
assert obs.content.strip() == (
|
||||
'[Code executed successfully with no output]\n'
|
||||
f'[Jupyter current working directory: {sandbox_dir}]\n'
|
||||
'[Jupyter current working directory: /openhands/workspace]\n'
|
||||
'[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.12/bin/python]'
|
||||
)
|
||||
|
||||
@ -239,7 +234,6 @@ def test_ipython_package_install(temp_dir, runtime_cls, run_as_openhands):
|
||||
def test_ipython_file_editor_permissions_as_openhands(temp_dir, runtime_cls):
|
||||
"""Test file editor permission behavior when running as different users."""
|
||||
runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands=True)
|
||||
sandbox_dir = _get_sandbox_folder(runtime)
|
||||
|
||||
# Create a file owned by root with restricted permissions
|
||||
action = CmdRunAction(
|
||||
@ -277,18 +271,18 @@ def test_ipython_file_editor_permissions_as_openhands(temp_dir, runtime_cls):
|
||||
assert 'Permission denied' in obs.content
|
||||
|
||||
# Try to use file editor in openhands sandbox directory - should work
|
||||
test_code = f"""
|
||||
test_code = """
|
||||
# Create file
|
||||
print(file_editor(command='create', path='{sandbox_dir}/test.txt', file_text='Line 1\\nLine 2\\nLine 3'))
|
||||
print(file_editor(command='create', path='/openhands/workspace/test.txt', file_text='Line 1\\nLine 2\\nLine 3'))
|
||||
|
||||
# View file
|
||||
print(file_editor(command='view', path='{sandbox_dir}/test.txt'))
|
||||
print(file_editor(command='view', path='/openhands/workspace/test.txt'))
|
||||
|
||||
# Edit file
|
||||
print(file_editor(command='str_replace', path='{sandbox_dir}/test.txt', old_str='Line 2', new_str='New Line 2'))
|
||||
print(file_editor(command='str_replace', path='/openhands/workspace/test.txt', old_str='Line 2', new_str='New Line 2'))
|
||||
|
||||
# Undo edit
|
||||
print(file_editor(command='undo_edit', path='{sandbox_dir}/test.txt'))
|
||||
print(file_editor(command='undo_edit', path='/openhands/workspace/test.txt'))
|
||||
"""
|
||||
action = IPythonRunCellAction(code=test_code)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
@ -303,7 +297,7 @@ print(file_editor(command='undo_edit', path='{sandbox_dir}/test.txt'))
|
||||
assert 'undone successfully' in obs.content
|
||||
|
||||
# Clean up
|
||||
action = CmdRunAction(command=f'rm -f {sandbox_dir}/test.txt')
|
||||
action = CmdRunAction(command='rm -f /openhands/workspace/test.txt')
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
@ -318,9 +312,9 @@ print(file_editor(command='undo_edit', path='{sandbox_dir}/test.txt'))
|
||||
_close_test_runtime(runtime)
|
||||
|
||||
|
||||
def test_file_read_and_edit_via_oh_aci(temp_dir, runtime_cls, run_as_openhands):
|
||||
runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
|
||||
sandbox_dir = _get_sandbox_folder(runtime)
|
||||
def test_file_read_and_edit_via_oh_aci(runtime_cls, run_as_openhands):
|
||||
runtime = _load_runtime(None, runtime_cls, run_as_openhands)
|
||||
sandbox_dir = '/openhands/workspace'
|
||||
|
||||
actions = [
|
||||
{
|
||||
|
||||
@ -6,7 +6,11 @@ import pytest
|
||||
|
||||
from openhands.core.config import LLMConfig
|
||||
from openhands.events.action import CmdRunAction
|
||||
from openhands.events.observation import CmdOutputObservation, NullObservation
|
||||
from openhands.events.observation import (
|
||||
CmdOutputMetadata,
|
||||
CmdOutputObservation,
|
||||
NullObservation,
|
||||
)
|
||||
from openhands.llm.llm import LLM
|
||||
from openhands.resolver.github_issue import GithubIssue, ReviewThread
|
||||
from openhands.resolver.issue_definitions import IssueHandler, PRHandler
|
||||
@ -55,23 +59,20 @@ def mock_followup_prompt_template():
|
||||
return 'Issue context: {{ issues }}\n\nReview comments: {{ review_comments }}\n\nReview threads: {{ review_threads }}\n\nFiles: {{ files }}\n\nThread comments: {{ thread_context }}\n\nPlease fix this issue.'
|
||||
|
||||
|
||||
def create_cmd_output(exit_code: int, content: str, command_id: int, command: str):
|
||||
def create_cmd_output(exit_code: int, content: str, command: str):
|
||||
return CmdOutputObservation(
|
||||
exit_code=exit_code, content=content, command_id=command_id, command=command
|
||||
content=content,
|
||||
command=command,
|
||||
metadata=CmdOutputMetadata(exit_code=exit_code),
|
||||
)
|
||||
|
||||
|
||||
def test_initialize_runtime():
|
||||
mock_runtime = MagicMock()
|
||||
mock_runtime.run_action.side_effect = [
|
||||
create_cmd_output(exit_code=0, content='', command='cd /workspace'),
|
||||
create_cmd_output(
|
||||
exit_code=0, content='', command_id=1, command='cd /workspace'
|
||||
),
|
||||
create_cmd_output(
|
||||
exit_code=0,
|
||||
content='',
|
||||
command_id=2,
|
||||
command='git config --global core.pager ""',
|
||||
exit_code=0, content='', command='git config --global core.pager ""'
|
||||
),
|
||||
]
|
||||
|
||||
@ -291,30 +292,19 @@ def test_download_pr_from_github():
|
||||
async def test_complete_runtime():
|
||||
mock_runtime = MagicMock()
|
||||
mock_runtime.run_action.side_effect = [
|
||||
create_cmd_output(exit_code=0, content='', command='cd /workspace'),
|
||||
create_cmd_output(
|
||||
exit_code=0, content='', command_id=1, command='cd /workspace'
|
||||
exit_code=0, content='', command='git config --global core.pager ""'
|
||||
),
|
||||
create_cmd_output(
|
||||
exit_code=0,
|
||||
content='',
|
||||
command_id=2,
|
||||
command='git config --global core.pager ""',
|
||||
),
|
||||
create_cmd_output(
|
||||
exit_code=0,
|
||||
content='',
|
||||
command_id=3,
|
||||
command='git config --global --add safe.directory /workspace',
|
||||
),
|
||||
create_cmd_output(
|
||||
exit_code=0,
|
||||
content='',
|
||||
command_id=4,
|
||||
command='git diff base_commit_hash fix',
|
||||
),
|
||||
create_cmd_output(
|
||||
exit_code=0, content='git diff content', command_id=5, command='git apply'
|
||||
exit_code=0, content='', command='git diff base_commit_hash fix'
|
||||
),
|
||||
create_cmd_output(exit_code=0, content='git diff content', command='git apply'),
|
||||
]
|
||||
|
||||
result = await complete_runtime(mock_runtime, 'base_commit_hash')
|
||||
@ -614,11 +604,7 @@ def test_guess_success():
|
||||
title='Test Issue',
|
||||
body='This is a test issue',
|
||||
)
|
||||
mock_history = [
|
||||
create_cmd_output(
|
||||
exit_code=0, content='', command_id=1, command='cd /workspace'
|
||||
)
|
||||
]
|
||||
mock_history = [create_cmd_output(exit_code=0, content='', command='cd /workspace')]
|
||||
mock_llm_config = LLMConfig(model='test_model', api_key='test_api_key')
|
||||
|
||||
mock_completion_response = MagicMock()
|
||||
@ -758,11 +744,7 @@ def test_guess_success_negative_case():
|
||||
title='Test Issue',
|
||||
body='This is a test issue',
|
||||
)
|
||||
mock_history = [
|
||||
create_cmd_output(
|
||||
exit_code=0, content='', command_id=1, command='cd /workspace'
|
||||
)
|
||||
]
|
||||
mock_history = [create_cmd_output(exit_code=0, content='', command='cd /workspace')]
|
||||
mock_llm_config = LLMConfig(model='test_model', api_key='test_api_key')
|
||||
|
||||
mock_completion_response = MagicMock()
|
||||
@ -795,11 +777,7 @@ def test_guess_success_invalid_output():
|
||||
title='Test Issue',
|
||||
body='This is a test issue',
|
||||
)
|
||||
mock_history = [
|
||||
create_cmd_output(
|
||||
exit_code=0, content='', command_id=1, command='cd /workspace'
|
||||
)
|
||||
]
|
||||
mock_history = [create_cmd_output(exit_code=0, content='', command='cd /workspace')]
|
||||
mock_llm_config = LLMConfig(model='test_model', api_key='test_api_key')
|
||||
|
||||
mock_completion_response = MagicMock()
|
||||
|
||||
@ -41,11 +41,10 @@ def serialization_deserialization(
|
||||
serialized_action_memory = event_to_memory(action_instance, max_message_chars)
|
||||
original_memory_dict = original_action_dict.copy()
|
||||
|
||||
# we don't send backend properties like id or 'keep_prompt'
|
||||
# we don't send backend properties like id
|
||||
original_memory_dict.pop('id', None)
|
||||
original_memory_dict.pop('timestamp', None)
|
||||
if 'args' in original_memory_dict:
|
||||
original_memory_dict['args'].pop('keep_prompt', None)
|
||||
original_memory_dict['args'].pop('blocking', None)
|
||||
original_memory_dict['args'].pop('confirmation_state', None)
|
||||
|
||||
@ -99,7 +98,6 @@ def test_cmd_run_action_serialization_deserialization():
|
||||
'blocking': False,
|
||||
'command': 'echo "Hello world"',
|
||||
'thought': '',
|
||||
'keep_prompt': True,
|
||||
'hidden': False,
|
||||
'confirmation_state': ActionConfirmationStatus.CONFIRMED,
|
||||
},
|
||||
@ -154,3 +152,32 @@ def test_file_write_action_serialization_deserialization():
|
||||
},
|
||||
}
|
||||
serialization_deserialization(original_action_dict, FileWriteAction)
|
||||
|
||||
|
||||
def test_legacy_serialization():
|
||||
original_action_dict = {
|
||||
'action': 'run',
|
||||
'args': {
|
||||
'blocking': False,
|
||||
'command': 'echo "Hello world"',
|
||||
'thought': '',
|
||||
'hidden': False,
|
||||
'confirmation_state': ActionConfirmationStatus.CONFIRMED,
|
||||
'keep_prompt': False, # will be treated as no-op
|
||||
},
|
||||
}
|
||||
event = event_from_dict(original_action_dict)
|
||||
assert isinstance(event, Action)
|
||||
assert isinstance(event, CmdRunAction)
|
||||
assert event.command == 'echo "Hello world"'
|
||||
assert event.hidden is False
|
||||
assert not hasattr(event, 'keep_prompt')
|
||||
|
||||
event_dict = event_to_dict(event)
|
||||
assert 'keep_prompt' not in event_dict['args']
|
||||
assert (
|
||||
event_dict['args']['confirmation_state'] == ActionConfirmationStatus.CONFIRMED
|
||||
)
|
||||
assert event_dict['args']['blocking'] is False
|
||||
assert event_dict['args']['command'] == 'echo "Hello world"'
|
||||
assert event_dict['args']['thought'] == ''
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
import pytest
|
||||
|
||||
from openhands.runtime.utils.bash import split_bash_commands
|
||||
from openhands.runtime.utils.bash import escape_bash_special_chars, split_bash_commands
|
||||
|
||||
|
||||
def test_split_commands_util():
|
||||
@ -257,3 +257,172 @@ def test_split_commands_with_invalid_input():
|
||||
for input_command in invalid_inputs:
|
||||
# it will fall back to return the original input
|
||||
assert split_bash_commands(input_command) == [input_command]
|
||||
|
||||
|
||||
def test_escape_bash_special_chars():
|
||||
test_cases = [
|
||||
# Basic cases - use raw strings (r'') to avoid Python escape sequence warnings
|
||||
('echo test \\; ls', 'echo test \\\\; ls'),
|
||||
('grep pattern \\| sort', 'grep pattern \\\\| sort'),
|
||||
('cmd1 \\&\\& cmd2', 'cmd1 \\\\&\\\\& cmd2'),
|
||||
('cat file \\> output.txt', 'cat file \\\\> output.txt'),
|
||||
('cat \\< input.txt', 'cat \\\\< input.txt'),
|
||||
# Quoted strings should remain unchanged
|
||||
('echo "test \\; unchanged"', 'echo "test \\; unchanged"'),
|
||||
("echo 'test \\| unchanged'", "echo 'test \\| unchanged'"),
|
||||
# Mixed quoted and unquoted
|
||||
(
|
||||
'echo "quoted \\;" \\; "more" \\| grep',
|
||||
'echo "quoted \\;" \\\\; "more" \\\\| grep',
|
||||
),
|
||||
# Multiple escapes in sequence
|
||||
('cmd1 \\;\\|\\& cmd2', 'cmd1 \\\\;\\\\|\\\\& cmd2'),
|
||||
# Commands with other backslashes
|
||||
('echo test\\ntest', 'echo test\\ntest'),
|
||||
('echo "test\\ntest"', 'echo "test\\ntest"'),
|
||||
# Edge cases
|
||||
('', ''), # Empty string
|
||||
('\\\\', '\\\\'), # Double backslash
|
||||
('\\"', '\\"'), # Escaped quote
|
||||
]
|
||||
|
||||
for input_cmd, expected in test_cases:
|
||||
result = escape_bash_special_chars(input_cmd)
|
||||
assert (
|
||||
result == expected
|
||||
), f'Failed on input "{input_cmd}"\nExpected: "{expected}"\nGot: "{result}"'
|
||||
|
||||
|
||||
def test_escape_bash_special_chars_with_invalid_syntax():
|
||||
invalid_inputs = [
|
||||
'echo "unclosed quote',
|
||||
"echo 'unclosed quote",
|
||||
'cat <<EOF\nunclosed heredoc',
|
||||
]
|
||||
for input_cmd in invalid_inputs:
|
||||
# Should return original input when parsing fails
|
||||
result = escape_bash_special_chars(input_cmd)
|
||||
assert result == input_cmd, f'Failed to handle invalid input: {input_cmd}'
|
||||
|
||||
|
||||
def test_escape_bash_special_chars_with_heredoc():
|
||||
input_cmd = r"""cat <<EOF
|
||||
line1 \; not escaped
|
||||
line2 \| not escaped
|
||||
EOF"""
|
||||
# Heredoc content should not be escaped
|
||||
expected = input_cmd
|
||||
result = escape_bash_special_chars(input_cmd)
|
||||
assert (
|
||||
result == expected
|
||||
), f'Failed to handle heredoc correctly\nExpected: {expected}\nGot: {result}'
|
||||
|
||||
|
||||
def test_escape_bash_special_chars_with_parameter_expansion():
|
||||
test_cases = [
|
||||
# Parameter expansion should be preserved
|
||||
('echo $HOME', 'echo $HOME'),
|
||||
('echo ${HOME}', 'echo ${HOME}'),
|
||||
('echo ${HOME:-default}', 'echo ${HOME:-default}'),
|
||||
# Mixed with special chars
|
||||
('echo $HOME \\; ls', 'echo $HOME \\\\; ls'),
|
||||
('echo ${PATH} \\| grep bin', 'echo ${PATH} \\\\| grep bin'),
|
||||
# Quoted parameter expansion
|
||||
('echo "$HOME"', 'echo "$HOME"'),
|
||||
('echo "${HOME}"', 'echo "${HOME}"'),
|
||||
# Complex parameter expansions
|
||||
('echo ${var:=default} \\; ls', 'echo ${var:=default} \\\\; ls'),
|
||||
('echo ${!prefix*} \\| sort', 'echo ${!prefix*} \\\\| sort'),
|
||||
]
|
||||
|
||||
for input_cmd, expected in test_cases:
|
||||
result = escape_bash_special_chars(input_cmd)
|
||||
assert (
|
||||
result == expected
|
||||
), f'Failed on input "{input_cmd}"\nExpected: "{expected}"\nGot: "{result}"'
|
||||
|
||||
|
||||
def test_escape_bash_special_chars_with_command_substitution():
|
||||
test_cases = [
|
||||
# Basic command substitution
|
||||
('echo $(pwd)', 'echo $(pwd)'),
|
||||
('echo `pwd`', 'echo `pwd`'),
|
||||
# Mixed with special chars
|
||||
('echo $(pwd) \\; ls', 'echo $(pwd) \\\\; ls'),
|
||||
('echo `pwd` \\| grep home', 'echo `pwd` \\\\| grep home'),
|
||||
# Nested command substitution
|
||||
('echo $(echo `pwd`)', 'echo $(echo `pwd`)'),
|
||||
# Complex command substitution
|
||||
('echo $(find . -name "*.txt" \\; ls)', 'echo $(find . -name "*.txt" \\; ls)'),
|
||||
# Mixed with quotes
|
||||
('echo "$(pwd)"', 'echo "$(pwd)"'),
|
||||
('echo "`pwd`"', 'echo "`pwd`"'),
|
||||
]
|
||||
|
||||
for input_cmd, expected in test_cases:
|
||||
result = escape_bash_special_chars(input_cmd)
|
||||
assert (
|
||||
result == expected
|
||||
), f'Failed on input "{input_cmd}"\nExpected: "{expected}"\nGot: "{result}"'
|
||||
|
||||
|
||||
def test_escape_bash_special_chars_mixed_nodes():
|
||||
test_cases = [
|
||||
# Mix of parameter expansion and command substitution
|
||||
('echo $HOME/$(pwd)', 'echo $HOME/$(pwd)'),
|
||||
# Mix with special chars
|
||||
('echo $HOME/$(pwd) \\; ls', 'echo $HOME/$(pwd) \\\\; ls'),
|
||||
# Complex mixed cases
|
||||
(
|
||||
'echo "${HOME}/$(basename `pwd`) \\; next"',
|
||||
'echo "${HOME}/$(basename `pwd`) \\; next"',
|
||||
),
|
||||
(
|
||||
'VAR=${HOME} \\; echo $(pwd)',
|
||||
'VAR=${HOME} \\\\; echo $(pwd)',
|
||||
),
|
||||
# Real-world examples
|
||||
(
|
||||
'find . -name "*.txt" -exec grep "${PATTERN:-default}" {} \\;',
|
||||
'find . -name "*.txt" -exec grep "${PATTERN:-default}" {} \\\\;',
|
||||
),
|
||||
(
|
||||
'echo "Current path: ${PWD}/$(basename `pwd`)" \\| grep home',
|
||||
'echo "Current path: ${PWD}/$(basename `pwd`)" \\\\| grep home',
|
||||
),
|
||||
]
|
||||
|
||||
for input_cmd, expected in test_cases:
|
||||
result = escape_bash_special_chars(input_cmd)
|
||||
assert (
|
||||
result == expected
|
||||
), f'Failed on input "{input_cmd}"\nExpected: "{expected}"\nGot: "{result}"'
|
||||
|
||||
|
||||
def test_escape_bash_special_chars_with_chained_commands():
|
||||
test_cases = [
|
||||
# Basic chained commands
|
||||
('ls && pwd', 'ls && pwd'),
|
||||
('echo "hello" && ls', 'echo "hello" && ls'),
|
||||
# Chained commands with special chars
|
||||
('ls \\; pwd && echo test', 'ls \\\\; pwd && echo test'),
|
||||
('echo test && grep pattern \\| sort', 'echo test && grep pattern \\\\| sort'),
|
||||
# Complex chained cases
|
||||
('echo ${HOME} && ls \\; pwd', 'echo ${HOME} && ls \\\\; pwd'),
|
||||
(
|
||||
'echo "$(pwd)" && cat file \\> out.txt',
|
||||
'echo "$(pwd)" && cat file \\\\> out.txt',
|
||||
),
|
||||
# Multiple chains
|
||||
('cmd1 && cmd2 && cmd3', 'cmd1 && cmd2 && cmd3'),
|
||||
(
|
||||
'cmd1 \\; ls && cmd2 \\| grep && cmd3',
|
||||
'cmd1 \\\\; ls && cmd2 \\\\| grep && cmd3',
|
||||
),
|
||||
]
|
||||
|
||||
for input_cmd, expected in test_cases:
|
||||
result = escape_bash_special_chars(input_cmd)
|
||||
assert (
|
||||
result == expected
|
||||
), f'Failed on input "{input_cmd}"\nExpected: "{expected}"\nGot: "{result}"'
|
||||
|
||||
339
tests/unit/test_bash_ps1_metadata.py
Normal file
339
tests/unit/test_bash_ps1_metadata.py
Normal file
@ -0,0 +1,339 @@
|
||||
import json
|
||||
|
||||
from openhands.events.observation.commands import (
|
||||
CMD_OUTPUT_METADATA_PS1_REGEX,
|
||||
CMD_OUTPUT_PS1_BEGIN,
|
||||
CMD_OUTPUT_PS1_END,
|
||||
CmdOutputMetadata,
|
||||
CmdOutputObservation,
|
||||
)
|
||||
|
||||
|
||||
def test_ps1_metadata_format():
|
||||
"""Test that PS1 prompt has correct format markers"""
|
||||
prompt = CmdOutputMetadata.to_ps1_prompt()
|
||||
print(prompt)
|
||||
assert prompt.startswith('\n###PS1JSON###\n')
|
||||
assert prompt.endswith('\n###PS1END###\n')
|
||||
assert r'\"exit_code\"' in prompt, 'PS1 prompt should contain escaped double quotes'
|
||||
|
||||
|
||||
def test_ps1_metadata_json_structure():
|
||||
"""Test that PS1 prompt contains valid JSON with expected fields"""
|
||||
prompt = CmdOutputMetadata.to_ps1_prompt()
|
||||
# Extract JSON content between markers
|
||||
json_str = prompt.replace('###PS1JSON###\n', '').replace('\n###PS1END###\n', '')
|
||||
# Remove escaping before parsing
|
||||
json_str = json_str.replace(r'\"', '"')
|
||||
# Remove any trailing content after the JSON
|
||||
json_str = json_str.split('###PS1END###')[0].strip()
|
||||
data = json.loads(json_str)
|
||||
|
||||
# Check required fields
|
||||
expected_fields = {
|
||||
'pid',
|
||||
'exit_code',
|
||||
'username',
|
||||
'hostname',
|
||||
'working_dir',
|
||||
'py_interpreter_path',
|
||||
}
|
||||
assert set(data.keys()) == expected_fields
|
||||
|
||||
|
||||
def test_ps1_metadata_parsing():
|
||||
"""Test parsing PS1 output into CmdOutputMetadata"""
|
||||
test_data = {
|
||||
'exit_code': 0,
|
||||
'username': 'testuser',
|
||||
'hostname': 'localhost',
|
||||
'working_dir': '/home/testuser',
|
||||
'py_interpreter_path': '/usr/bin/python',
|
||||
}
|
||||
|
||||
ps1_str = f"""###PS1JSON###
|
||||
{json.dumps(test_data, indent=2)}
|
||||
###PS1END###
|
||||
"""
|
||||
matches = CmdOutputMetadata.matches_ps1_metadata(ps1_str)
|
||||
assert len(matches) == 1
|
||||
metadata = CmdOutputMetadata.from_ps1_match(matches[0])
|
||||
assert metadata.exit_code == test_data['exit_code']
|
||||
assert metadata.username == test_data['username']
|
||||
assert metadata.hostname == test_data['hostname']
|
||||
assert metadata.working_dir == test_data['working_dir']
|
||||
assert metadata.py_interpreter_path == test_data['py_interpreter_path']
|
||||
|
||||
|
||||
def test_ps1_metadata_parsing_string():
|
||||
"""Test parsing PS1 output into CmdOutputMetadata"""
|
||||
ps1_str = r"""###PS1JSON###
|
||||
{
|
||||
"exit_code": "0",
|
||||
"username": "myname",
|
||||
"hostname": "myhostname",
|
||||
"working_dir": "~/mydir",
|
||||
"py_interpreter_path": "/my/python/path"
|
||||
}
|
||||
###PS1END###
|
||||
"""
|
||||
matches = CmdOutputMetadata.matches_ps1_metadata(ps1_str)
|
||||
assert len(matches) == 1
|
||||
metadata = CmdOutputMetadata.from_ps1_match(matches[0])
|
||||
assert metadata.exit_code == 0
|
||||
assert metadata.username == 'myname'
|
||||
assert metadata.hostname == 'myhostname'
|
||||
assert metadata.working_dir == '~/mydir'
|
||||
assert metadata.py_interpreter_path == '/my/python/path'
|
||||
|
||||
|
||||
def test_ps1_metadata_parsing_string_real_example():
|
||||
"""Test parsing PS1 output into CmdOutputMetadata"""
|
||||
ps1_str = r"""
|
||||
###PS1JSON###
|
||||
{
|
||||
"pid": "",
|
||||
"exit_code": "0",
|
||||
"username": "runner",
|
||||
"hostname": "fv-az1055-610",
|
||||
"working_dir": "/home/runner/work/OpenHands/OpenHands",
|
||||
"py_interpreter_path": "/home/runner/.cache/pypoetry/virtualenvs/openhands-ai-ULPBlkAi-py3.12/bin/python"
|
||||
}
|
||||
###PS1END###
|
||||
"""
|
||||
matches = CmdOutputMetadata.matches_ps1_metadata(ps1_str)
|
||||
assert len(matches) == 1
|
||||
metadata = CmdOutputMetadata.from_ps1_match(matches[0])
|
||||
assert metadata.exit_code == 0
|
||||
assert metadata.username == 'runner'
|
||||
assert metadata.hostname == 'fv-az1055-610'
|
||||
assert metadata.working_dir == '/home/runner/work/OpenHands/OpenHands'
|
||||
assert (
|
||||
metadata.py_interpreter_path
|
||||
== '/home/runner/.cache/pypoetry/virtualenvs/openhands-ai-ULPBlkAi-py3.12/bin/python'
|
||||
)
|
||||
|
||||
|
||||
def test_ps1_metadata_parsing_additional_prefix():
|
||||
"""Test parsing PS1 output into CmdOutputMetadata"""
|
||||
test_data = {
|
||||
'exit_code': 0,
|
||||
'username': 'testuser',
|
||||
'hostname': 'localhost',
|
||||
'working_dir': '/home/testuser',
|
||||
'py_interpreter_path': '/usr/bin/python',
|
||||
}
|
||||
|
||||
ps1_str = f"""
|
||||
This is something that not part of the PS1 prompt
|
||||
|
||||
###PS1JSON###
|
||||
{json.dumps(test_data, indent=2)}
|
||||
###PS1END###
|
||||
"""
|
||||
|
||||
matches = CmdOutputMetadata.matches_ps1_metadata(ps1_str)
|
||||
assert len(matches) == 1
|
||||
metadata = CmdOutputMetadata.from_ps1_match(matches[0])
|
||||
assert metadata.exit_code == test_data['exit_code']
|
||||
assert metadata.username == test_data['username']
|
||||
assert metadata.hostname == test_data['hostname']
|
||||
assert metadata.working_dir == test_data['working_dir']
|
||||
assert metadata.py_interpreter_path == test_data['py_interpreter_path']
|
||||
|
||||
|
||||
def test_ps1_metadata_parsing_invalid():
|
||||
"""Test parsing invalid PS1 output returns default metadata"""
|
||||
# Test with invalid JSON
|
||||
invalid_json = """###PS1JSON###
|
||||
{invalid json}
|
||||
###PS1END###
|
||||
"""
|
||||
matches = CmdOutputMetadata.matches_ps1_metadata(invalid_json)
|
||||
assert len(matches) == 0 # No matches should be found for invalid JSON
|
||||
|
||||
# Test with missing markers
|
||||
invalid_format = """NOT A VALID PS1 PROMPT"""
|
||||
matches = CmdOutputMetadata.matches_ps1_metadata(invalid_format)
|
||||
assert len(matches) == 0
|
||||
|
||||
# Test with empty PS1 metadata
|
||||
empty_metadata = """###PS1JSON###
|
||||
|
||||
###PS1END###
|
||||
"""
|
||||
matches = CmdOutputMetadata.matches_ps1_metadata(empty_metadata)
|
||||
assert len(matches) == 0 # No matches should be found for empty metadata
|
||||
|
||||
# Test with whitespace in PS1 metadata
|
||||
whitespace_metadata = """###PS1JSON###
|
||||
|
||||
{
|
||||
"exit_code": "0",
|
||||
"pid": "123",
|
||||
"username": "test",
|
||||
"hostname": "localhost",
|
||||
"working_dir": "/home/test",
|
||||
"py_interpreter_path": "/usr/bin/python"
|
||||
}
|
||||
|
||||
###PS1END###
|
||||
"""
|
||||
matches = CmdOutputMetadata.matches_ps1_metadata(whitespace_metadata)
|
||||
assert len(matches) == 1
|
||||
metadata = CmdOutputMetadata.from_ps1_match(matches[0])
|
||||
assert metadata.exit_code == 0
|
||||
assert metadata.pid == 123
|
||||
|
||||
|
||||
def test_ps1_metadata_missing_fields():
|
||||
"""Test handling of missing fields in PS1 metadata"""
|
||||
# Test with only required fields
|
||||
minimal_data = {'exit_code': 0, 'pid': 123}
|
||||
ps1_str = f"""###PS1JSON###
|
||||
{json.dumps(minimal_data)}
|
||||
###PS1END###
|
||||
"""
|
||||
matches = CmdOutputMetadata.matches_ps1_metadata(ps1_str)
|
||||
assert len(matches) == 1
|
||||
metadata = CmdOutputMetadata.from_ps1_match(matches[0])
|
||||
assert metadata.exit_code == 0
|
||||
assert metadata.pid == 123
|
||||
assert metadata.username is None
|
||||
assert metadata.hostname is None
|
||||
assert metadata.working_dir is None
|
||||
assert metadata.py_interpreter_path is None
|
||||
|
||||
# Test with missing exit_code but valid pid
|
||||
no_exit_code = {'pid': 123, 'username': 'test'}
|
||||
ps1_str = f"""###PS1JSON###
|
||||
{json.dumps(no_exit_code)}
|
||||
###PS1END###
|
||||
"""
|
||||
matches = CmdOutputMetadata.matches_ps1_metadata(ps1_str)
|
||||
assert len(matches) == 1
|
||||
metadata = CmdOutputMetadata.from_ps1_match(matches[0])
|
||||
assert metadata.exit_code == -1 # default value
|
||||
assert metadata.pid == 123
|
||||
assert metadata.username == 'test'
|
||||
|
||||
|
||||
def test_ps1_metadata_multiple_blocks():
|
||||
"""Test handling multiple PS1 metadata blocks"""
|
||||
test_data = {
|
||||
'exit_code': 0,
|
||||
'username': 'testuser',
|
||||
'hostname': 'localhost',
|
||||
'working_dir': '/home/testuser',
|
||||
'py_interpreter_path': '/usr/bin/python',
|
||||
}
|
||||
|
||||
ps1_str = f"""###PS1JSON###
|
||||
{json.dumps(test_data, indent=2)}
|
||||
###PS1END###
|
||||
Some other content
|
||||
###PS1JSON###
|
||||
{json.dumps(test_data, indent=2)}
|
||||
###PS1END###
|
||||
"""
|
||||
matches = CmdOutputMetadata.matches_ps1_metadata(ps1_str)
|
||||
assert len(matches) == 2 # Should find both blocks
|
||||
# Both blocks should parse successfully
|
||||
metadata1 = CmdOutputMetadata.from_ps1_match(matches[0])
|
||||
metadata2 = CmdOutputMetadata.from_ps1_match(matches[1])
|
||||
assert metadata1.exit_code == test_data['exit_code']
|
||||
assert metadata2.exit_code == test_data['exit_code']
|
||||
|
||||
|
||||
def test_ps1_metadata_regex_pattern():
|
||||
"""Test the regex pattern used to extract PS1 metadata"""
|
||||
# Test basic pattern matching
|
||||
test_str = f'{CMD_OUTPUT_PS1_BEGIN}test\n{CMD_OUTPUT_PS1_END}'
|
||||
matches = CMD_OUTPUT_METADATA_PS1_REGEX.finditer(test_str)
|
||||
match = next(matches)
|
||||
assert match.group(1).strip() == 'test'
|
||||
|
||||
# Test with content before and after
|
||||
test_str = f'prefix\n{CMD_OUTPUT_PS1_BEGIN}test\n{CMD_OUTPUT_PS1_END}suffix'
|
||||
matches = CMD_OUTPUT_METADATA_PS1_REGEX.finditer(test_str)
|
||||
match = next(matches)
|
||||
assert match.group(1).strip() == 'test'
|
||||
|
||||
# Test with multiline content
|
||||
test_str = f'{CMD_OUTPUT_PS1_BEGIN}line1\nline2\nline3\n{CMD_OUTPUT_PS1_END}'
|
||||
matches = CMD_OUTPUT_METADATA_PS1_REGEX.finditer(test_str)
|
||||
match = next(matches)
|
||||
assert match.group(1).strip() == 'line1\nline2\nline3'
|
||||
|
||||
|
||||
def test_cmd_output_observation_properties():
|
||||
"""Test CmdOutputObservation class properties"""
|
||||
# Test with successful command
|
||||
metadata = CmdOutputMetadata(exit_code=0, pid=123)
|
||||
obs = CmdOutputObservation(command='ls', content='file1\nfile2', metadata=metadata)
|
||||
assert obs.command_id == 123
|
||||
assert obs.exit_code == 0
|
||||
assert not obs.error
|
||||
assert 'exit code 0' in obs.message
|
||||
assert 'ls' in obs.message
|
||||
assert 'file1' in str(obs)
|
||||
assert 'file2' in str(obs)
|
||||
assert 'metadata' in str(obs)
|
||||
|
||||
# Test with failed command
|
||||
metadata = CmdOutputMetadata(exit_code=1, pid=456)
|
||||
obs = CmdOutputObservation(command='invalid', content='error', metadata=metadata)
|
||||
assert obs.command_id == 456
|
||||
assert obs.exit_code == 1
|
||||
assert obs.error
|
||||
assert 'exit code 1' in obs.message
|
||||
assert 'invalid' in obs.message
|
||||
assert 'error' in str(obs)
|
||||
|
||||
|
||||
def test_ps1_metadata_empty_fields():
|
||||
"""Test handling of empty fields in PS1 metadata"""
|
||||
# Test with empty strings
|
||||
empty_data = {
|
||||
'exit_code': 0,
|
||||
'pid': 123,
|
||||
'username': '',
|
||||
'hostname': '',
|
||||
'working_dir': '',
|
||||
'py_interpreter_path': '',
|
||||
}
|
||||
ps1_str = f"""###PS1JSON###
|
||||
{json.dumps(empty_data)}
|
||||
###PS1END###
|
||||
"""
|
||||
matches = CmdOutputMetadata.matches_ps1_metadata(ps1_str)
|
||||
assert len(matches) == 1
|
||||
metadata = CmdOutputMetadata.from_ps1_match(matches[0])
|
||||
assert metadata.exit_code == 0
|
||||
assert metadata.pid == 123
|
||||
assert metadata.username == ''
|
||||
assert metadata.hostname == ''
|
||||
assert metadata.working_dir == ''
|
||||
assert metadata.py_interpreter_path == ''
|
||||
|
||||
# Test with malformed but valid JSON
|
||||
malformed_json = """###PS1JSON###
|
||||
{
|
||||
"exit_code":0,
|
||||
"pid" : 123,
|
||||
"username": "test" ,
|
||||
"hostname": "host",
|
||||
"working_dir" :"dir",
|
||||
"py_interpreter_path":"path"
|
||||
}
|
||||
###PS1END###
|
||||
"""
|
||||
matches = CmdOutputMetadata.matches_ps1_metadata(malformed_json)
|
||||
assert len(matches) == 1
|
||||
metadata = CmdOutputMetadata.from_ps1_match(matches[0])
|
||||
assert metadata.exit_code == 0
|
||||
assert metadata.pid == 123
|
||||
assert metadata.username == 'test'
|
||||
assert metadata.hostname == 'host'
|
||||
assert metadata.working_dir == 'dir'
|
||||
assert metadata.py_interpreter_path == 'path'
|
||||
384
tests/unit/test_bash_session.py
Normal file
384
tests/unit/test_bash_session.py
Normal file
@ -0,0 +1,384 @@
|
||||
import os
|
||||
import tempfile
|
||||
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.events.action import CmdRunAction
|
||||
from openhands.runtime.utils.bash import BashCommandStatus, BashSession
|
||||
|
||||
|
||||
def test_session_initialization():
|
||||
# Test with custom working directory
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
session = BashSession(work_dir=temp_dir)
|
||||
session.initialize()
|
||||
obs = session.execute(CmdRunAction('pwd'))
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert temp_dir in obs.content
|
||||
assert '[The command completed with exit code 0.]' in obs.metadata.suffix
|
||||
session.close()
|
||||
|
||||
# Test with custom username
|
||||
session = BashSession(work_dir=os.getcwd(), username='nobody')
|
||||
session.initialize()
|
||||
assert 'openhands-nobody' in session.session.name
|
||||
session.close()
|
||||
|
||||
|
||||
def test_cwd_property(tmp_path):
|
||||
session = BashSession(work_dir=tmp_path)
|
||||
session.initialize()
|
||||
# Change directory and verify pwd updates
|
||||
random_dir = tmp_path / 'random'
|
||||
random_dir.mkdir()
|
||||
session.execute(CmdRunAction(f'cd {random_dir}'))
|
||||
assert session.cwd == str(random_dir)
|
||||
session.close()
|
||||
|
||||
|
||||
def test_basic_command():
|
||||
session = BashSession(work_dir=os.getcwd())
|
||||
session.initialize()
|
||||
|
||||
# Test simple command
|
||||
obs = session.execute(CmdRunAction("echo 'hello world'"))
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert 'hello world' in obs.content
|
||||
assert obs.metadata.suffix == '\n[The command completed with exit code 0.]'
|
||||
assert obs.metadata.prefix == ''
|
||||
assert obs.metadata.exit_code == 0
|
||||
assert session.prev_status == BashCommandStatus.COMPLETED
|
||||
|
||||
# Test command with error
|
||||
obs = session.execute(CmdRunAction('nonexistent_command'))
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert obs.metadata.exit_code == 127
|
||||
assert 'nonexistent_command: command not found' in obs.content
|
||||
assert obs.metadata.suffix == '\n[The command completed with exit code 127.]'
|
||||
assert obs.metadata.prefix == ''
|
||||
assert session.prev_status == BashCommandStatus.COMPLETED
|
||||
|
||||
# Test multiple commands in sequence
|
||||
obs = session.execute(CmdRunAction('echo "first" && echo "second" && echo "third"'))
|
||||
assert 'first' in obs.content
|
||||
assert 'second' in obs.content
|
||||
assert 'third' in obs.content
|
||||
assert obs.metadata.suffix == '\n[The command completed with exit code 0.]'
|
||||
assert obs.metadata.prefix == ''
|
||||
assert obs.metadata.exit_code == 0
|
||||
assert session.prev_status == BashCommandStatus.COMPLETED
|
||||
|
||||
session.close()
|
||||
|
||||
|
||||
def test_long_running_command_follow_by_execute():
|
||||
session = BashSession(work_dir=os.getcwd(), no_change_timeout_seconds=2)
|
||||
session.initialize()
|
||||
|
||||
# Test command that produces output slowly
|
||||
obs = session.execute(
|
||||
CmdRunAction('for i in {1..3}; do echo $i; sleep 3; done', blocking=False)
|
||||
)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert '1' in obs.content # First number should appear before timeout
|
||||
assert obs.metadata.exit_code == -1 # -1 indicates command is still running
|
||||
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
|
||||
assert obs.metadata.suffix == (
|
||||
'\n[The command has no new output after 2 seconds. '
|
||||
"You may wait longer to see additional output by sending empty command '', "
|
||||
'send other commands to interact with the current process, '
|
||||
'or send keys to interrupt/kill the command.]'
|
||||
)
|
||||
assert obs.metadata.prefix == ''
|
||||
|
||||
# Continue watching output
|
||||
obs = session.execute(CmdRunAction(''))
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert '2' in obs.content
|
||||
assert obs.metadata.prefix == '[Command output continued from previous command]\n'
|
||||
assert obs.metadata.suffix == (
|
||||
'\n[The command has no new output after 2 seconds. '
|
||||
"You may wait longer to see additional output by sending empty command '', "
|
||||
'send other commands to interact with the current process, '
|
||||
'or send keys to interrupt/kill the command.]'
|
||||
)
|
||||
assert obs.metadata.exit_code == -1 # -1 indicates command is still running
|
||||
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
|
||||
|
||||
# Test command that produces no output
|
||||
obs = session.execute(CmdRunAction('sleep 15'))
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert '3' in obs.content
|
||||
assert obs.metadata.prefix == '[Command output continued from previous command]\n'
|
||||
assert obs.metadata.suffix == (
|
||||
'\n[The command has no new output after 2 seconds. '
|
||||
"You may wait longer to see additional output by sending empty command '', "
|
||||
'send other commands to interact with the current process, '
|
||||
'or send keys to interrupt/kill the command.]'
|
||||
)
|
||||
assert obs.metadata.exit_code == -1 # -1 indicates command is still running
|
||||
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
|
||||
|
||||
session.close()
|
||||
|
||||
|
||||
def test_interactive_command():
|
||||
session = BashSession(work_dir=os.getcwd(), no_change_timeout_seconds=3)
|
||||
session.initialize()
|
||||
|
||||
# Test interactive command with blocking=True
|
||||
obs = session.execute(
|
||||
CmdRunAction(
|
||||
'read -p \'Enter name: \' name && echo "Hello $name"',
|
||||
)
|
||||
)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert 'Enter name:' in obs.content
|
||||
assert obs.metadata.exit_code == -1 # -1 indicates command is still running
|
||||
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
|
||||
assert obs.metadata.suffix == (
|
||||
'\n[The command has no new output after 3 seconds. '
|
||||
"You may wait longer to see additional output by sending empty command '', "
|
||||
'send other commands to interact with the current process, '
|
||||
'or send keys to interrupt/kill the command.]'
|
||||
)
|
||||
assert obs.metadata.prefix == ''
|
||||
|
||||
# Send input
|
||||
obs = session.execute(CmdRunAction('John'))
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert 'Hello John' in obs.content
|
||||
assert obs.metadata.exit_code == 0
|
||||
assert obs.metadata.suffix == '\n[The command completed with exit code 0.]'
|
||||
assert obs.metadata.prefix == ''
|
||||
assert session.prev_status == BashCommandStatus.COMPLETED
|
||||
|
||||
# Test multiline command input
|
||||
obs = session.execute(CmdRunAction('cat << EOF'))
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert obs.metadata.exit_code == -1
|
||||
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
|
||||
assert obs.metadata.suffix == (
|
||||
'\n[The command has no new output after 3 seconds. '
|
||||
"You may wait longer to see additional output by sending empty command '', "
|
||||
'send other commands to interact with the current process, '
|
||||
'or send keys to interrupt/kill the command.]'
|
||||
)
|
||||
assert obs.metadata.prefix == ''
|
||||
|
||||
obs = session.execute(CmdRunAction('line 1'))
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert obs.metadata.exit_code == -1
|
||||
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
|
||||
assert obs.metadata.suffix == (
|
||||
'\n[The command has no new output after 3 seconds. '
|
||||
"You may wait longer to see additional output by sending empty command '', "
|
||||
'send other commands to interact with the current process, '
|
||||
'or send keys to interrupt/kill the command.]'
|
||||
)
|
||||
assert obs.metadata.prefix == '[Command output continued from previous command]\n'
|
||||
|
||||
obs = session.execute(CmdRunAction('line 2'))
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert obs.metadata.exit_code == -1
|
||||
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
|
||||
assert obs.metadata.suffix == (
|
||||
'\n[The command has no new output after 3 seconds. '
|
||||
"You may wait longer to see additional output by sending empty command '', "
|
||||
'send other commands to interact with the current process, '
|
||||
'or send keys to interrupt/kill the command.]'
|
||||
)
|
||||
assert obs.metadata.prefix == '[Command output continued from previous command]\n'
|
||||
|
||||
obs = session.execute(CmdRunAction('EOF'))
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert 'line 1' in obs.content and 'line 2' in obs.content
|
||||
assert obs.metadata.exit_code == 0
|
||||
assert obs.metadata.suffix == '\n[The command completed with exit code 0.]'
|
||||
assert obs.metadata.prefix == ''
|
||||
|
||||
session.close()
|
||||
|
||||
|
||||
def test_ctrl_c():
|
||||
session = BashSession(work_dir=os.getcwd(), no_change_timeout_seconds=2)
|
||||
session.initialize()
|
||||
|
||||
# Start infinite loop
|
||||
obs = session.execute(
|
||||
CmdRunAction("while true; do echo 'looping'; sleep 3; done"),
|
||||
)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert 'looping' in obs.content
|
||||
assert obs.metadata.suffix == (
|
||||
'\n[The command has no new output after 2 seconds. '
|
||||
"You may wait longer to see additional output by sending empty command '', "
|
||||
'send other commands to interact with the current process, '
|
||||
'or send keys to interrupt/kill the command.]'
|
||||
)
|
||||
assert obs.metadata.prefix == ''
|
||||
assert obs.metadata.exit_code == -1 # -1 indicates command is still running
|
||||
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
|
||||
|
||||
# Send Ctrl+C
|
||||
obs = session.execute(CmdRunAction('C-c'))
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert obs.metadata.exit_code == 130 # Standard exit code for Ctrl+C
|
||||
assert (
|
||||
obs.metadata.suffix
|
||||
== '\n[The command completed with exit code 130. CTRL+C was sent.]'
|
||||
)
|
||||
assert obs.metadata.prefix == ''
|
||||
assert session.prev_status == BashCommandStatus.COMPLETED
|
||||
|
||||
session.close()
|
||||
|
||||
|
||||
def test_empty_command_errors():
|
||||
session = BashSession(work_dir=os.getcwd())
|
||||
session.initialize()
|
||||
|
||||
# Test empty command without previous command
|
||||
obs = session.execute(CmdRunAction(''))
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert (
|
||||
obs.content
|
||||
== 'ERROR: No previous command to continue from. Previous command has to be timeout to be continued.'
|
||||
)
|
||||
assert obs.metadata.exit_code == -1
|
||||
assert obs.metadata.prefix == ''
|
||||
assert obs.metadata.suffix == ''
|
||||
assert session.prev_status is None
|
||||
|
||||
session.close()
|
||||
|
||||
|
||||
def test_command_output_continuation():
|
||||
session = BashSession(work_dir=os.getcwd(), no_change_timeout_seconds=2)
|
||||
session.initialize()
|
||||
|
||||
# Start a command that produces output slowly
|
||||
obs = session.execute(CmdRunAction('for i in {1..5}; do echo $i; sleep 3; done'))
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert obs.content.strip() == '1'
|
||||
assert obs.metadata.prefix == ''
|
||||
assert '[The command has no new output after 2 seconds.' in obs.metadata.suffix
|
||||
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
|
||||
|
||||
obs = session.execute(CmdRunAction(''))
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert '[Command output continued from previous command]' in obs.metadata.prefix
|
||||
assert obs.content.strip() == '2'
|
||||
assert '[The command has no new output after 2 seconds.' in obs.metadata.suffix
|
||||
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
|
||||
|
||||
obs = session.execute(CmdRunAction(''))
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert '[Command output continued from previous command]' in obs.metadata.prefix
|
||||
assert obs.content.strip() == '3'
|
||||
|
||||
assert '[The command has no new output after 2 seconds.' in obs.metadata.suffix
|
||||
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
|
||||
|
||||
obs = session.execute(CmdRunAction(''))
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert '[Command output continued from previous command]' in obs.metadata.prefix
|
||||
assert obs.content.strip() == '4'
|
||||
assert '[The command has no new output after 2 seconds.' in obs.metadata.suffix
|
||||
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
|
||||
|
||||
obs = session.execute(CmdRunAction(''))
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert '[Command output continued from previous command]' in obs.metadata.prefix
|
||||
assert obs.content.strip() == '5'
|
||||
assert '[The command has no new output after 2 seconds.' in obs.metadata.suffix
|
||||
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
|
||||
|
||||
obs = session.execute(CmdRunAction(''))
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert '[The command completed with exit code 0.]' in obs.metadata.suffix
|
||||
assert session.prev_status == BashCommandStatus.COMPLETED
|
||||
|
||||
session.close()
|
||||
|
||||
|
||||
def test_long_output():
|
||||
session = BashSession(work_dir=os.getcwd())
|
||||
session.initialize()
|
||||
|
||||
# Generate a long output that may exceed buffer size
|
||||
obs = session.execute(CmdRunAction('for i in {1..5000}; do echo "Line $i"; done'))
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert 'Line 1' in obs.content
|
||||
assert 'Line 5000' in obs.content
|
||||
assert obs.metadata.exit_code == 0
|
||||
assert obs.metadata.prefix == ''
|
||||
assert obs.metadata.suffix == '\n[The command completed with exit code 0.]'
|
||||
|
||||
session.close()
|
||||
|
||||
|
||||
def test_long_output_exceed_history_limit():
|
||||
session = BashSession(work_dir=os.getcwd())
|
||||
session.initialize()
|
||||
|
||||
# Generate a long output that may exceed buffer size
|
||||
obs = session.execute(CmdRunAction('for i in {1..50000}; do echo "Line $i"; done'))
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert 'Previous command outputs are truncated' in obs.metadata.prefix
|
||||
assert 'Line 40000' in obs.content
|
||||
assert 'Line 50000' in obs.content
|
||||
assert obs.metadata.exit_code == 0
|
||||
assert obs.metadata.suffix == '\n[The command completed with exit code 0.]'
|
||||
|
||||
session.close()
|
||||
|
||||
|
||||
def test_multiline_command():
|
||||
session = BashSession(work_dir=os.getcwd())
|
||||
session.initialize()
|
||||
|
||||
# Test multiline command with PS2 prompt disabled
|
||||
obs = session.execute(
|
||||
CmdRunAction("""if true; then
|
||||
echo "inside if"
|
||||
fi""")
|
||||
)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert 'inside if' in obs.content
|
||||
assert obs.metadata.exit_code == 0
|
||||
assert obs.metadata.prefix == ''
|
||||
assert obs.metadata.suffix == '\n[The command completed with exit code 0.]'
|
||||
|
||||
session.close()
|
||||
|
||||
|
||||
def test_python_interactive_input():
|
||||
session = BashSession(work_dir=os.getcwd(), no_change_timeout_seconds=2)
|
||||
session.initialize()
|
||||
|
||||
# Test Python program that asks for input - properly escaped for bash
|
||||
python_script = """name = input('Enter your name: '); age = input('Enter your age: '); print(f'Hello {name}, you are {age} years old')"""
|
||||
|
||||
# Start Python with the interactive script
|
||||
obs = session.execute(CmdRunAction(f'python3 -c "{python_script}"'))
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert 'Enter your name:' in obs.content
|
||||
assert obs.metadata.exit_code == -1 # -1 indicates command is still running
|
||||
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
|
||||
|
||||
# Send first input (name)
|
||||
obs = session.execute(CmdRunAction('Alice'))
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert 'Enter your age:' in obs.content
|
||||
assert obs.metadata.exit_code == -1
|
||||
assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
|
||||
|
||||
# Send second input (age)
|
||||
obs = session.execute(CmdRunAction('25'))
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert 'Hello Alice, you are 25 years old' in obs.content
|
||||
assert obs.metadata.exit_code == 0
|
||||
assert obs.metadata.suffix == '\n[The command completed with exit code 0.]'
|
||||
assert session.prev_status == BashCommandStatus.COMPLETED
|
||||
|
||||
session.close()
|
||||
@ -26,6 +26,7 @@ from openhands.events.action import (
|
||||
from openhands.events.event import EventSource, FileEditSource, FileReadSource
|
||||
from openhands.events.observation.browse import BrowserOutputObservation
|
||||
from openhands.events.observation.commands import (
|
||||
CmdOutputMetadata,
|
||||
CmdOutputObservation,
|
||||
IPythonRunCellObservation,
|
||||
)
|
||||
@ -50,7 +51,11 @@ def agent() -> CodeActAgent:
|
||||
def test_cmd_output_observation_message(agent: CodeActAgent):
|
||||
agent.config.function_calling = False
|
||||
obs = CmdOutputObservation(
|
||||
command='echo hello', content='Command output', command_id=1, exit_code=0
|
||||
command='echo hello',
|
||||
content='Command output',
|
||||
metadata=CmdOutputMetadata(
|
||||
exit_code=0,
|
||||
),
|
||||
)
|
||||
|
||||
results = agent.get_observation_message(obs, tool_call_id_to_message={})
|
||||
@ -62,7 +67,7 @@ def test_cmd_output_observation_message(agent: CodeActAgent):
|
||||
assert len(result.content) == 1
|
||||
assert isinstance(result.content[0], TextContent)
|
||||
assert 'Command output' in result.content[0].text
|
||||
assert 'Command finished with exit code 0' in result.content[0].text
|
||||
assert '[Command finished with exit code 0]' in result.content[0].text
|
||||
|
||||
|
||||
def test_ipython_run_cell_observation_message(agent: CodeActAgent):
|
||||
|
||||
@ -1,4 +1,5 @@
|
||||
from openhands.events.observation.commands import (
|
||||
CmdOutputMetadata,
|
||||
CmdOutputObservation,
|
||||
IPythonRunCellObservation,
|
||||
)
|
||||
@ -7,14 +8,18 @@ from openhands.events.observation.commands import (
|
||||
def test_cmd_output_success():
|
||||
# Test successful command
|
||||
obs = CmdOutputObservation(
|
||||
command_id=1, command='ls', content='file1.txt\nfile2.txt', exit_code=0
|
||||
command='ls',
|
||||
content='file1.txt\nfile2.txt',
|
||||
metadata=CmdOutputMetadata(exit_code=0),
|
||||
)
|
||||
assert obs.success is True
|
||||
assert obs.error is False
|
||||
|
||||
# Test failed command
|
||||
obs = CmdOutputObservation(
|
||||
command_id=2, command='ls', content='No such file or directory', exit_code=1
|
||||
command='ls',
|
||||
content='No such file or directory',
|
||||
metadata=CmdOutputMetadata(exit_code=1),
|
||||
)
|
||||
assert obs.success is False
|
||||
assert obs.error is True
|
||||
|
||||
@ -1,18 +1,22 @@
|
||||
from openhands.events.observation import CmdOutputObservation
|
||||
from openhands.events.observation import CmdOutputMetadata, CmdOutputObservation
|
||||
from openhands.events.serialization import event_to_dict
|
||||
|
||||
|
||||
def test_command_output_success_serialization():
|
||||
# Test successful command
|
||||
obs = CmdOutputObservation(
|
||||
command_id=1, command='ls', content='file1.txt\nfile2.txt', exit_code=0
|
||||
command='ls',
|
||||
content='file1.txt\nfile2.txt',
|
||||
metadata=CmdOutputMetadata(exit_code=0),
|
||||
)
|
||||
serialized = event_to_dict(obs)
|
||||
assert serialized['success'] is True
|
||||
|
||||
# Test failed command
|
||||
obs = CmdOutputObservation(
|
||||
command_id=2, command='ls', content='No such file or directory', exit_code=1
|
||||
command='ls',
|
||||
content='No such file or directory',
|
||||
metadata=CmdOutputMetadata(exit_code=1),
|
||||
)
|
||||
serialized = event_to_dict(obs)
|
||||
assert serialized['success'] is False
|
||||
|
||||
@ -107,7 +107,7 @@ class TestStuckDetector:
|
||||
cmd_action = CmdRunAction(command='ls')
|
||||
state.history.append(cmd_action)
|
||||
cmd_observation = CmdOutputObservation(
|
||||
command_id=1, command='ls', content='file1.txt\nfile2.txt'
|
||||
command='ls', content='file1.txt\nfile2.txt'
|
||||
)
|
||||
# cmd_observation._cause = cmd_action._id
|
||||
state.history.append(cmd_observation)
|
||||
@ -188,7 +188,7 @@ class TestStuckDetector:
|
||||
cmd_action_1 = CmdRunAction(command='ls')
|
||||
cmd_action_1._id = 1
|
||||
state.history.append(cmd_action_1)
|
||||
cmd_observation_1 = CmdOutputObservation(content='', command='ls', command_id=1)
|
||||
cmd_observation_1 = CmdOutputObservation(content='', command='ls')
|
||||
cmd_observation_1._cause = cmd_action_1._id
|
||||
state.history.append(cmd_observation_1)
|
||||
# 4 events
|
||||
@ -196,7 +196,7 @@ class TestStuckDetector:
|
||||
cmd_action_2 = CmdRunAction(command='ls')
|
||||
cmd_action_2._id = 2
|
||||
state.history.append(cmd_action_2)
|
||||
cmd_observation_2 = CmdOutputObservation(content='', command='ls', command_id=2)
|
||||
cmd_observation_2 = CmdOutputObservation(content='', command='ls')
|
||||
cmd_observation_2._cause = cmd_action_2._id
|
||||
state.history.append(cmd_observation_2)
|
||||
# 6 events
|
||||
@ -212,7 +212,7 @@ class TestStuckDetector:
|
||||
cmd_action_3 = CmdRunAction(command='ls')
|
||||
cmd_action_3._id = 3
|
||||
state.history.append(cmd_action_3)
|
||||
cmd_observation_3 = CmdOutputObservation(content='', command='ls', command_id=3)
|
||||
cmd_observation_3 = CmdOutputObservation(content='', command='ls')
|
||||
cmd_observation_3._cause = cmd_action_3._id
|
||||
state.history.append(cmd_observation_3)
|
||||
# 10 events
|
||||
@ -223,7 +223,7 @@ class TestStuckDetector:
|
||||
cmd_action_4 = CmdRunAction(command='ls')
|
||||
cmd_action_4._id = 4
|
||||
state.history.append(cmd_action_4)
|
||||
cmd_observation_4 = CmdOutputObservation(content='', command='ls', command_id=4)
|
||||
cmd_observation_4 = CmdOutputObservation(content='', command='ls')
|
||||
cmd_observation_4._cause = cmd_action_4._id
|
||||
state.history.append(cmd_observation_4)
|
||||
# 12 events
|
||||
@ -436,7 +436,7 @@ class TestStuckDetector:
|
||||
cmd_action_1 = CmdRunAction(command='ls')
|
||||
state.history.append(cmd_action_1)
|
||||
cmd_observation_1 = CmdOutputObservation(
|
||||
command_id=1, command='ls', content='file1.txt\nfile2.txt'
|
||||
command='ls', content='file1.txt\nfile2.txt'
|
||||
)
|
||||
# cmd_observation_1._cause = cmd_action_1._id
|
||||
state.history.append(cmd_observation_1)
|
||||
@ -452,7 +452,7 @@ class TestStuckDetector:
|
||||
cmd_action_2 = CmdRunAction(command='ls')
|
||||
state.history.append(cmd_action_2)
|
||||
cmd_observation_2 = CmdOutputObservation(
|
||||
command_id=2, command='ls', content='file1.txt\nfile2.txt'
|
||||
command='ls', content='file1.txt\nfile2.txt'
|
||||
)
|
||||
# cmd_observation_2._cause = cmd_action_2._id
|
||||
state.history.append(cmd_observation_2)
|
||||
@ -475,7 +475,7 @@ class TestStuckDetector:
|
||||
cmd_action_3 = CmdRunAction(command='ls')
|
||||
state.history.append(cmd_action_3)
|
||||
cmd_observation_3 = CmdOutputObservation(
|
||||
command_id=3, command='ls', content='file1.txt\nfile2.txt'
|
||||
command='ls', content='file1.txt\nfile2.txt'
|
||||
)
|
||||
# cmd_observation_3._cause = cmd_action_3._id
|
||||
state.history.append(cmd_observation_3)
|
||||
@ -506,7 +506,7 @@ class TestStuckDetector:
|
||||
cmd_action_1 = CmdRunAction(command='ls')
|
||||
state.history.append(cmd_action_1)
|
||||
cmd_observation_1 = CmdOutputObservation(
|
||||
command_id=cmd_action_1.id, command='ls', content='file1.txt\nfile2.txt'
|
||||
command='ls', content='file1.txt\nfile2.txt'
|
||||
)
|
||||
# cmd_observation_1._cause = cmd_action_1._id
|
||||
state.history.append(cmd_observation_1)
|
||||
@ -521,9 +521,7 @@ class TestStuckDetector:
|
||||
|
||||
cmd_action_2 = CmdRunAction(command='pwd')
|
||||
state.history.append(cmd_action_2)
|
||||
cmd_observation_2 = CmdOutputObservation(
|
||||
command_id=2, command='pwd', content='/home/user'
|
||||
)
|
||||
cmd_observation_2 = CmdOutputObservation(command='pwd', content='/home/user')
|
||||
# cmd_observation_2._cause = cmd_action_2._id
|
||||
state.history.append(cmd_observation_2)
|
||||
|
||||
@ -541,9 +539,7 @@ class TestStuckDetector:
|
||||
|
||||
cmd_action_3 = CmdRunAction(command='pwd')
|
||||
state.history.append(cmd_action_3)
|
||||
cmd_observation_3 = CmdOutputObservation(
|
||||
command_id=cmd_action_3.id, command='pwd', content='/home/user'
|
||||
)
|
||||
cmd_observation_3 = CmdOutputObservation(command='pwd', content='/home/user')
|
||||
# cmd_observation_3._cause = cmd_action_3._id
|
||||
state.history.append(cmd_observation_3)
|
||||
|
||||
@ -590,7 +586,6 @@ class TestStuckDetector:
|
||||
# Add an observation event between the repeated message actions
|
||||
cmd_output_observation = CmdOutputObservation(
|
||||
content='OK, I was stuck, but no more.',
|
||||
command_id=42,
|
||||
command='storybook',
|
||||
exit_code=0,
|
||||
)
|
||||
|
||||
@ -1,4 +1,5 @@
|
||||
from openhands.events.observation import (
|
||||
CmdOutputMetadata,
|
||||
CmdOutputObservation,
|
||||
Observation,
|
||||
)
|
||||
@ -40,13 +41,65 @@ def serialization_deserialization(
|
||||
|
||||
|
||||
# Additional tests for various observation subclasses can be included here
|
||||
def test_observation_event_props_serialization_deserialization():
|
||||
original_observation_dict = {
|
||||
'id': 42,
|
||||
'source': 'agent',
|
||||
'timestamp': '2021-08-01T12:00:00',
|
||||
'observation': 'run',
|
||||
'message': 'Command `ls -l` executed with exit code 0.',
|
||||
'extras': {
|
||||
'command': 'ls -l',
|
||||
'hidden': False,
|
||||
'metadata': {
|
||||
'exit_code': 0,
|
||||
'hostname': None,
|
||||
'pid': -1,
|
||||
'prefix': '',
|
||||
'py_interpreter_path': None,
|
||||
'suffix': '',
|
||||
'username': None,
|
||||
'working_dir': None,
|
||||
},
|
||||
},
|
||||
'content': 'foo.txt',
|
||||
'success': True,
|
||||
}
|
||||
serialization_deserialization(original_observation_dict, CmdOutputObservation)
|
||||
|
||||
|
||||
def test_command_output_observation_serialization_deserialization():
|
||||
original_observation_dict = {
|
||||
'observation': 'run',
|
||||
'extras': {
|
||||
'command': 'ls -l',
|
||||
'hidden': False,
|
||||
'metadata': {
|
||||
'exit_code': 0,
|
||||
'hostname': None,
|
||||
'pid': -1,
|
||||
'prefix': '',
|
||||
'py_interpreter_path': None,
|
||||
'suffix': '',
|
||||
'username': None,
|
||||
'working_dir': None,
|
||||
},
|
||||
},
|
||||
'message': 'Command `ls -l` executed with exit code 0.',
|
||||
'content': 'foo.txt',
|
||||
'success': True,
|
||||
}
|
||||
serialization_deserialization(original_observation_dict, CmdOutputObservation)
|
||||
|
||||
|
||||
def test_success_field_serialization():
|
||||
# Test success=True
|
||||
obs = CmdOutputObservation(
|
||||
content='Command succeeded',
|
||||
exit_code=0,
|
||||
command='ls -l',
|
||||
command_id=3,
|
||||
metadata=CmdOutputMetadata(
|
||||
exit_code=0,
|
||||
),
|
||||
)
|
||||
serialized = event_to_dict(obs)
|
||||
assert serialized['success'] is True
|
||||
@ -54,9 +107,42 @@ def test_success_field_serialization():
|
||||
# Test success=False
|
||||
obs = CmdOutputObservation(
|
||||
content='No such file or directory',
|
||||
exit_code=1,
|
||||
command='ls -l',
|
||||
command_id=3,
|
||||
metadata=CmdOutputMetadata(
|
||||
exit_code=1,
|
||||
),
|
||||
)
|
||||
serialized = event_to_dict(obs)
|
||||
assert serialized['success'] is False
|
||||
|
||||
|
||||
def test_legacy_serialization():
|
||||
original_observation_dict = {
|
||||
'id': 42,
|
||||
'source': 'agent',
|
||||
'timestamp': '2021-08-01T12:00:00',
|
||||
'observation': 'run',
|
||||
'message': 'Command `ls -l` executed with exit code 0.',
|
||||
'extras': {
|
||||
'command': 'ls -l',
|
||||
'hidden': False,
|
||||
'exit_code': 0,
|
||||
'command_id': 3,
|
||||
},
|
||||
'content': 'foo.txt',
|
||||
'success': True,
|
||||
}
|
||||
event = event_from_dict(original_observation_dict)
|
||||
assert isinstance(event, Observation)
|
||||
assert isinstance(event, CmdOutputObservation)
|
||||
assert event.metadata.exit_code == 0
|
||||
assert event.success is True
|
||||
assert event.command == 'ls -l'
|
||||
assert event.hidden is False
|
||||
|
||||
event_dict = event_to_dict(event)
|
||||
assert event_dict['success'] is True
|
||||
assert event_dict['extras']['metadata']['exit_code'] == 0
|
||||
assert event_dict['extras']['metadata']['pid'] == 3
|
||||
assert event_dict['extras']['command'] == 'ls -l'
|
||||
assert event_dict['extras']['hidden'] is False
|
||||
|
||||
@ -368,7 +368,6 @@ async def test_unsafe_bash_command(temp_dir: str):
|
||||
'blocking': False,
|
||||
'command': 'ls',
|
||||
'hidden': False,
|
||||
'keep_prompt': True,
|
||||
'confirmation_state': ActionConfirmationStatus.CONFIRMED,
|
||||
},
|
||||
),
|
||||
@ -495,9 +494,7 @@ def test_parse_action(action, expected_trace):
|
||||
],
|
||||
),
|
||||
(
|
||||
CmdOutputObservation(
|
||||
content='cmd output content', command_id=1, command='ls'
|
||||
),
|
||||
CmdOutputObservation(content='cmd output content', command='ls'),
|
||||
[
|
||||
ToolOutput(
|
||||
metadata={},
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user