mirror of
https://github.com/OpenHands/OpenHands.git
synced 2025-12-26 05:48:36 +08:00
Migrate multi-line-bash-related sandbox tests into runtime tests and fix multi-line issue (#3128)
* Remove global config from memory * Remove runtime global config * Remove from storage * Remove global config * Fix event stream tests * Fix sandbox issue * Change config * Removed transferred tests * Add swe env box * Fixes on testing * Fixed some tests * Merge with stashed changes * Fix typing * Fix ipython test * Revive function * Make temp_dir fixture * Remove test to avoid circular import * fix eventstream filestore for test_runtime * fix parse arg issue that cause integration test to fail * support swebench pull from custom namespace * add back simple tests for runtime * move multi-line bash tests to test_runtime; support multi-line bash for esruntime; * add testcase to handle PS2 prompt * use bashlex for bash parsing to handle multi-line commands; add testcases for multi-line commands * revert ghcr runtime change --------- Co-authored-by: Graham Neubig <neubig@gmail.com>
This commit is contained in:
parent
8b77e8a0ff
commit
b1ea204c5b
@ -9,7 +9,7 @@ from typing import Literal, Mapping
|
||||
from termcolor import colored
|
||||
|
||||
DISABLE_COLOR_PRINTING = False
|
||||
DEBUG = False
|
||||
DEBUG = os.getenv('DEBUG', 'False').lower() in ['true', '1', 'yes']
|
||||
|
||||
ColorType = Literal[
|
||||
'red',
|
||||
|
||||
@ -46,6 +46,7 @@ from opendevin.runtime.plugins import (
|
||||
Plugin,
|
||||
)
|
||||
from opendevin.runtime.server.files import insert_lines, read_lines
|
||||
from opendevin.runtime.utils import split_bash_commands
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
@ -79,7 +80,7 @@ class RuntimeClient:
|
||||
r'\[PEXPECT_BEGIN\] ([a-z0-9_-]*)@([a-zA-Z0-9.-]*):(.+) \[PEXPECT_END\]'
|
||||
)
|
||||
|
||||
self.shell.sendline(f'export PS1="{self.__bash_PS1}"')
|
||||
self.shell.sendline(f'export PS1="{self.__bash_PS1}"; export PS2=""')
|
||||
self.shell.expect(self.__bash_expect_regex)
|
||||
|
||||
self.shell.sendline(f'cd {work_dir}')
|
||||
@ -87,6 +88,15 @@ class RuntimeClient:
|
||||
|
||||
def _get_bash_prompt(self):
|
||||
ps1 = self.shell.after
|
||||
|
||||
# begin at the last occurence of '[PEXPECT_BEGIN]'.
|
||||
# In multi-line bash commands, the prompt will be repeated
|
||||
# and the matched regex captures all of them
|
||||
# - we only want the last one (newest prompt)
|
||||
_begin_pos = ps1.rfind('[PEXPECT_BEGIN]')
|
||||
if _begin_pos != -1:
|
||||
ps1 = ps1[_begin_pos:]
|
||||
|
||||
# parse the ps1 to get username, hostname, and working directory
|
||||
matched = re.match(self.__bash_expect_regex, ps1)
|
||||
assert (
|
||||
@ -102,7 +112,7 @@ class RuntimeClient:
|
||||
prompt += '$'
|
||||
return prompt + ' '
|
||||
|
||||
def _execute_bash(self, command, keep_prompt: bool = True) -> tuple[str, int]:
|
||||
def _execute_bash(self, command: str, keep_prompt: bool = True) -> tuple[str, int]:
|
||||
logger.debug(f'Executing command: {command}')
|
||||
self.shell.sendline(command)
|
||||
self.shell.expect(self.__bash_expect_regex)
|
||||
@ -129,10 +139,22 @@ class RuntimeClient:
|
||||
|
||||
async def run(self, action: CmdRunAction) -> CmdOutputObservation:
|
||||
try:
|
||||
output, exit_code = self._execute_bash(action.command)
|
||||
commands = split_bash_commands(action.command)
|
||||
all_output = ''
|
||||
for command in commands:
|
||||
output, exit_code = self._execute_bash(command)
|
||||
if all_output:
|
||||
# previous output already exists with prompt "user@hostname:working_dir #""
|
||||
# we need to add the command to the previous output,
|
||||
# so model knows the following is the output of another action)
|
||||
all_output = all_output.rstrip() + ' ' + command + '\r\n'
|
||||
|
||||
all_output += str(output) + '\r\n'
|
||||
if exit_code != 0:
|
||||
break
|
||||
return CmdOutputObservation(
|
||||
command_id=-1,
|
||||
content=str(output),
|
||||
content=all_output.rstrip('\r\n'),
|
||||
command=action.command,
|
||||
exit_code=exit_code,
|
||||
)
|
||||
|
||||
@ -58,7 +58,7 @@ class EventStreamRuntime(Runtime):
|
||||
# TODO: We can switch to aiodocker when `get_od_sandbox_image` is updated to use aiodocker
|
||||
self.docker_client: docker.DockerClient = self._init_docker_client()
|
||||
self.container_image = (
|
||||
config.sandbox.container_image
|
||||
self.config.sandbox.container_image
|
||||
if container_image is None
|
||||
else container_image
|
||||
)
|
||||
@ -103,7 +103,7 @@ class EventStreamRuntime(Runtime):
|
||||
async def _init_container(
|
||||
self,
|
||||
sandbox_workspace_dir: str,
|
||||
mount_dir: str,
|
||||
mount_dir: str | None = None,
|
||||
plugins: list[PluginRequirement] | None = None,
|
||||
):
|
||||
try:
|
||||
@ -124,6 +124,14 @@ class EventStreamRuntime(Runtime):
|
||||
else:
|
||||
port_mapping = {f'{self._port}/tcp': self._port}
|
||||
|
||||
if mount_dir is not None:
|
||||
volumes = {mount_dir: {'bind': sandbox_workspace_dir, 'mode': 'rw'}}
|
||||
else:
|
||||
logger.warn(
|
||||
'Mount dir is not set, will not mount the workspace directory to the container.'
|
||||
)
|
||||
volumes = None
|
||||
|
||||
container = self.docker_client.containers.run(
|
||||
self.container_image,
|
||||
command=(
|
||||
@ -139,7 +147,7 @@ class EventStreamRuntime(Runtime):
|
||||
name=self.container_name,
|
||||
detach=True,
|
||||
environment={'DEBUG': 'true'} if self.config.debug else None,
|
||||
volumes={mount_dir: {'bind': sandbox_workspace_dir, 'mode': 'rw'}},
|
||||
volumes=volumes,
|
||||
)
|
||||
logger.info(f'Container started. Server url: {self.api_url}')
|
||||
return container
|
||||
|
||||
@ -33,13 +33,13 @@ from opendevin.runtime.tools import RuntimeTool
|
||||
from opendevin.storage import FileStore
|
||||
|
||||
|
||||
def _default_env_vars(config: SandboxConfig) -> dict[str, str]:
|
||||
def _default_env_vars(sandbox_config: SandboxConfig) -> dict[str, str]:
|
||||
ret = {}
|
||||
for key in os.environ:
|
||||
if key.startswith('SANDBOX_ENV_'):
|
||||
sandbox_key = key.removeprefix('SANDBOX_ENV_')
|
||||
ret[sandbox_key] = os.environ[key]
|
||||
if config.enable_auto_lint:
|
||||
if sandbox_config.enable_auto_lint:
|
||||
ret['ENABLE_AUTO_LINT'] = 'true'
|
||||
return ret
|
||||
|
||||
|
||||
@ -115,7 +115,7 @@ class ServerRuntime(Runtime):
|
||||
|
||||
async def run_ipython(self, action: IPythonRunCellAction) -> Observation:
|
||||
self._run_command(
|
||||
("cat > /tmp/opendevin_jupyter_temp.py <<'EOL'\n" f'{action.code}\n' 'EOL'),
|
||||
f"cat > /tmp/opendevin_jupyter_temp.py <<'EOL'\n{action.code}\nEOL"
|
||||
)
|
||||
|
||||
# run the code
|
||||
|
||||
@ -1,87 +1,49 @@
|
||||
import bashlex
|
||||
|
||||
from opendevin.core.logger import opendevin_logger as logger
|
||||
|
||||
|
||||
def split_bash_commands(commands):
|
||||
# States
|
||||
NORMAL = 0
|
||||
IN_SINGLE_QUOTE = 1
|
||||
IN_DOUBLE_QUOTE = 2
|
||||
IN_HEREDOC = 3
|
||||
try:
|
||||
parsed = bashlex.parse(commands)
|
||||
except bashlex.errors.ParsingError as e:
|
||||
logger.error(
|
||||
f'Failed to parse bash commands\n[input]: {commands}\n[error]: {e}'
|
||||
)
|
||||
# If parsing fails, return the original commands
|
||||
return [commands]
|
||||
|
||||
state = NORMAL
|
||||
heredoc_trigger = None
|
||||
result = []
|
||||
current_command: list[str] = []
|
||||
result: list[str] = []
|
||||
last_end = 0
|
||||
|
||||
i = 0
|
||||
while i < len(commands):
|
||||
char = commands[i]
|
||||
for node in parsed:
|
||||
start, end = node.pos
|
||||
|
||||
if state == NORMAL:
|
||||
if char == "'":
|
||||
state = IN_SINGLE_QUOTE
|
||||
elif char == '"':
|
||||
state = IN_DOUBLE_QUOTE
|
||||
elif char == '\\':
|
||||
# Check if this is escaping a newline
|
||||
if i + 1 < len(commands) and commands[i + 1] == '\n':
|
||||
i += 1 # Skip the newline
|
||||
# Continue with the next line as part of the same command
|
||||
i += 1 # Move to the first character of the next line
|
||||
continue
|
||||
elif char == '\n':
|
||||
if not heredoc_trigger and current_command:
|
||||
result.append(''.join(current_command).strip())
|
||||
current_command = []
|
||||
elif char == '<' and commands[i : i + 2] == '<<':
|
||||
# Detect heredoc
|
||||
state = IN_HEREDOC
|
||||
i += 2 # Skip '<<'
|
||||
while commands[i] == ' ':
|
||||
i += 1
|
||||
start = i
|
||||
while commands[i] not in [' ', '\n']:
|
||||
i += 1
|
||||
heredoc_trigger = commands[start:i]
|
||||
current_command.append(commands[start - 2 : i]) # Include '<<'
|
||||
continue # Skip incrementing i at the end of the loop
|
||||
current_command.append(char)
|
||||
# Include any text between the last command and this one
|
||||
if start > last_end:
|
||||
between = commands[last_end:start]
|
||||
logger.debug(f'BASH PARSING between: {between}')
|
||||
if result:
|
||||
result[-1] += between.rstrip()
|
||||
elif between.strip():
|
||||
# THIS SHOULD NOT HAPPEN
|
||||
result.append(between.rstrip())
|
||||
|
||||
elif state == IN_SINGLE_QUOTE:
|
||||
current_command.append(char)
|
||||
if char == "'" and commands[i - 1] != '\\':
|
||||
state = NORMAL
|
||||
# Extract the command, preserving original formatting
|
||||
command = commands[start:end].rstrip()
|
||||
logger.debug(f'BASH PARSING command: {command}')
|
||||
result.append(command)
|
||||
|
||||
elif state == IN_DOUBLE_QUOTE:
|
||||
current_command.append(char)
|
||||
if char == '"' and commands[i - 1] != '\\':
|
||||
state = NORMAL
|
||||
|
||||
elif state == IN_HEREDOC:
|
||||
current_command.append(char)
|
||||
if (
|
||||
char == '\n'
|
||||
and heredoc_trigger
|
||||
and commands[i + 1 : i + 1 + len(heredoc_trigger) + 1]
|
||||
== heredoc_trigger + '\n'
|
||||
):
|
||||
# Check if the next line starts with the heredoc trigger followed by a newline
|
||||
i += (
|
||||
len(heredoc_trigger) + 1
|
||||
) # Move past the heredoc trigger and newline
|
||||
current_command.append(
|
||||
heredoc_trigger + '\n'
|
||||
) # Include the heredoc trigger and newline
|
||||
result.append(''.join(current_command).strip())
|
||||
current_command = []
|
||||
heredoc_trigger = None
|
||||
state = NORMAL
|
||||
continue
|
||||
|
||||
i += 1
|
||||
|
||||
# Add the last command if any
|
||||
if current_command:
|
||||
result.append(''.join(current_command).strip())
|
||||
|
||||
# Remove any empty strings from the result
|
||||
result = [cmd for cmd in result if cmd]
|
||||
last_end = end
|
||||
|
||||
# Add any remaining text after the last command to the last command
|
||||
remaining = commands[last_end:].rstrip()
|
||||
logger.debug(f'BASH PARSING remaining: {remaining}')
|
||||
if last_end < len(commands) and result:
|
||||
result[-1] += remaining
|
||||
logger.debug(f'BASH PARSING result[-1] += remaining: {result[-1]}')
|
||||
elif last_end < len(commands):
|
||||
if remaining:
|
||||
result.append(remaining)
|
||||
logger.debug(f'BASH PARSING result.append(remaining): {result[-1]}')
|
||||
return result
|
||||
|
||||
15
poetry.lock
generated
15
poetry.lock
generated
@ -1,4 +1,4 @@
|
||||
# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
|
||||
# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand.
|
||||
|
||||
[[package]]
|
||||
name = "aenum"
|
||||
@ -398,6 +398,17 @@ files = [
|
||||
{file = "backoff-2.2.1.tar.gz", hash = "sha256:03f829f5bb1923180821643f8753b0502c3b682293992485b0eef2807afa5cba"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bashlex"
|
||||
version = "0.18"
|
||||
description = "Python parser for bash"
|
||||
optional = false
|
||||
python-versions = ">=2.7, !=3.0, !=3.1, !=3.2, !=3.3, !=3.4"
|
||||
files = [
|
||||
{file = "bashlex-0.18-py2.py3-none-any.whl", hash = "sha256:91d73a23a3e51711919c1c899083890cdecffc91d8c088942725ac13e9dcfffa"},
|
||||
{file = "bashlex-0.18.tar.gz", hash = "sha256:5bb03a01c6d5676338c36fd1028009c8ad07e7d61d8a1ce3f513b7fff52796ee"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bcrypt"
|
||||
version = "4.1.3"
|
||||
@ -9109,4 +9120,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
|
||||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = "^3.11"
|
||||
content-hash = "e1520f1342ab527bc3bb2619f8909cbdddeb227c14614eb3d82e133961f1f4d2"
|
||||
content-hash = "6d6cfaf3a614a4bf766d9a0e886e82dc9f8cfb8bf08a642f0207f260e72dd6da"
|
||||
|
||||
@ -39,6 +39,7 @@ pathspec = "^0.12.1"
|
||||
google-cloud-aiplatform = "*"
|
||||
grep-ast = "0.3.2"
|
||||
tree-sitter = "0.21.3"
|
||||
bashlex = "^0.18"
|
||||
|
||||
[tool.poetry.group.llama-index.dependencies]
|
||||
llama-index = "*"
|
||||
@ -72,6 +73,7 @@ reportlab = "*"
|
||||
[tool.coverage.run]
|
||||
concurrency = ["gevent"]
|
||||
|
||||
|
||||
[tool.poetry.group.runtime.dependencies]
|
||||
jupyterlab = "*"
|
||||
notebook = "*"
|
||||
@ -105,6 +107,7 @@ ignore = ["D1"]
|
||||
[tool.ruff.lint.pydocstyle]
|
||||
convention = "google"
|
||||
|
||||
|
||||
[tool.poetry.group.evaluation.dependencies]
|
||||
streamlit = "*"
|
||||
whatthepatch = "*"
|
||||
|
||||
283
tests/unit/test_bash_parsing.py
Normal file
283
tests/unit/test_bash_parsing.py
Normal file
@ -0,0 +1,283 @@
|
||||
import pytest
|
||||
|
||||
from opendevin.runtime.utils.bash import split_bash_commands
|
||||
|
||||
|
||||
def test_split_commands_util():
|
||||
cmds = [
|
||||
'ls -l',
|
||||
'echo -e "hello\nworld"',
|
||||
"""
|
||||
echo -e "hello it\\'s me"
|
||||
""".strip(),
|
||||
"""
|
||||
echo \\
|
||||
-e 'hello' \\
|
||||
-v
|
||||
""".strip(),
|
||||
"""
|
||||
echo -e 'hello\\nworld\\nare\\nyou\\nthere?'
|
||||
""".strip(),
|
||||
"""
|
||||
echo -e 'hello
|
||||
world
|
||||
are
|
||||
you\\n
|
||||
there?'
|
||||
""".strip(),
|
||||
"""
|
||||
echo -e 'hello
|
||||
world "
|
||||
'
|
||||
""".strip(),
|
||||
"""
|
||||
kubectl apply -f - <<EOF
|
||||
apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
name: busybox-sleep
|
||||
spec:
|
||||
containers:
|
||||
- name: busybox
|
||||
image: busybox:1.28
|
||||
args:
|
||||
- sleep
|
||||
- "1000000"
|
||||
EOF
|
||||
""".strip(),
|
||||
"""
|
||||
mkdir -p _modules && \
|
||||
for month in {01..04}; do
|
||||
for day in {01..05}; do
|
||||
touch "_modules/2024-${month}-${day}-sample.md"
|
||||
done
|
||||
done
|
||||
""".strip(),
|
||||
]
|
||||
joined_cmds = '\n'.join(cmds)
|
||||
split_cmds = split_bash_commands(joined_cmds)
|
||||
for s in split_cmds:
|
||||
print('\nCMD')
|
||||
print(s)
|
||||
for i in range(len(cmds)):
|
||||
assert (
|
||||
split_cmds[i].strip() == cmds[i].strip()
|
||||
), f'At index {i}: {split_cmds[i]} != {cmds[i]}.'
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'input_command, expected_output',
|
||||
[
|
||||
('ls -l', ['ls -l']),
|
||||
("echo 'Hello, world!'", ["echo 'Hello, world!'"]),
|
||||
('cd /tmp && touch test.txt', ['cd /tmp && touch test.txt']),
|
||||
("echo -e 'line1\\nline2\\nline3'", ["echo -e 'line1\\nline2\\nline3'"]),
|
||||
(
|
||||
"grep 'pattern' file.txt | sort | uniq",
|
||||
["grep 'pattern' file.txt | sort | uniq"],
|
||||
),
|
||||
('for i in {1..5}; do echo $i; done', ['for i in {1..5}; do echo $i; done']),
|
||||
(
|
||||
"echo 'Single quotes don\\'t escape'",
|
||||
["echo 'Single quotes don\\'t escape'"],
|
||||
),
|
||||
(
|
||||
'echo "Double quotes \\"do\\" escape"',
|
||||
['echo "Double quotes \\"do\\" escape"'],
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_single_commands(input_command, expected_output):
|
||||
assert split_bash_commands(input_command) == expected_output
|
||||
|
||||
|
||||
def test_heredoc():
|
||||
input_commands = """
|
||||
cat <<EOF
|
||||
multiline
|
||||
text
|
||||
EOF
|
||||
echo "Done"
|
||||
"""
|
||||
expected_output = ['cat <<EOF\nmultiline\ntext\nEOF', 'echo "Done"']
|
||||
assert split_bash_commands(input_commands) == expected_output
|
||||
|
||||
|
||||
def test_jupyter_heredoc():
|
||||
"""This tests specifically test the behavior of the bash parser
|
||||
when the input is a heredoc for a Jupyter cell (used in ServerRuntime).
|
||||
|
||||
It will failed to parse bash commands AND fall back to the original input,
|
||||
which won't cause issues in actual execution.
|
||||
|
||||
[input]: cat > /tmp/opendevin_jupyter_temp.py <<'EOL'
|
||||
print('Hello, `World`!
|
||||
')
|
||||
EOL
|
||||
[error]: here-document at line 0 delimited by end-of-file (wanted "'EOL'") (position 75)
|
||||
|
||||
TODO: remove this tests after the deprecation of ServerRuntime
|
||||
"""
|
||||
|
||||
code = "print('Hello, `World`!\n')"
|
||||
input_commands = f"""cat > /tmp/opendevin_jupyter_temp.py <<'EOL'
|
||||
{code}
|
||||
EOL"""
|
||||
expected_output = [f"cat > /tmp/opendevin_jupyter_temp.py <<'EOL'\n{code}\nEOL"]
|
||||
assert split_bash_commands(input_commands) == expected_output
|
||||
|
||||
|
||||
def test_backslash_continuation():
|
||||
input_commands = """
|
||||
echo "This is a long \
|
||||
command that spans \
|
||||
multiple lines"
|
||||
echo "Next command"
|
||||
"""
|
||||
expected_output = [
|
||||
'echo "This is a long command that spans multiple lines"',
|
||||
'echo "Next command"',
|
||||
]
|
||||
assert split_bash_commands(input_commands) == expected_output
|
||||
|
||||
|
||||
def test_comments():
|
||||
input_commands = """
|
||||
echo "Hello" # This is a comment
|
||||
# This is another comment
|
||||
ls -l
|
||||
"""
|
||||
expected_output = [
|
||||
'echo "Hello" # This is a comment\n# This is another comment',
|
||||
'ls -l',
|
||||
]
|
||||
assert split_bash_commands(input_commands) == expected_output
|
||||
|
||||
|
||||
def test_complex_quoting():
|
||||
input_commands = """
|
||||
echo "This is a \\"quoted\\" string"
|
||||
echo 'This is a '\''single-quoted'\'' string'
|
||||
echo "Mixed 'quotes' in \\"double quotes\\""
|
||||
"""
|
||||
expected_output = [
|
||||
'echo "This is a \\"quoted\\" string"',
|
||||
"echo 'This is a '''single-quoted''' string'",
|
||||
'echo "Mixed \'quotes\' in \\"double quotes\\""',
|
||||
]
|
||||
assert split_bash_commands(input_commands) == expected_output
|
||||
|
||||
|
||||
def test_invalid_syntax():
|
||||
invalid_inputs = [
|
||||
'echo "Unclosed quote',
|
||||
"echo 'Unclosed quote",
|
||||
'cat <<EOF\nUnclosed heredoc',
|
||||
]
|
||||
for input_command in invalid_inputs:
|
||||
# it will fall back to return the original input
|
||||
assert split_bash_commands(input_command) == [input_command]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_commands():
|
||||
return [
|
||||
'ls -l',
|
||||
'echo "Hello, world!"',
|
||||
'cd /tmp && touch test.txt',
|
||||
'echo -e "line1\\nline2\\nline3"',
|
||||
'grep "pattern" file.txt | sort | uniq',
|
||||
'for i in {1..5}; do echo $i; done',
|
||||
'cat <<EOF\nmultiline\ntext\nEOF',
|
||||
'echo "Escaped \\"quotes\\""',
|
||||
"echo 'Single quotes don\\'t escape'",
|
||||
'echo "Command with a trailing backslash \\\n and continuation"',
|
||||
]
|
||||
|
||||
|
||||
def test_split_single_commands(sample_commands):
|
||||
for cmd in sample_commands:
|
||||
result = split_bash_commands(cmd)
|
||||
assert len(result) == 1, f'Expected single command, got: {result}'
|
||||
|
||||
|
||||
def test_split_commands_with_heredoc():
|
||||
input_commands = """
|
||||
cat <<EOF
|
||||
multiline
|
||||
text
|
||||
EOF
|
||||
echo "Done"
|
||||
"""
|
||||
expected_output = ['cat <<EOF\nmultiline\ntext\nEOF', 'echo "Done"']
|
||||
result = split_bash_commands(input_commands)
|
||||
assert result == expected_output, f'Expected {expected_output}, got {result}'
|
||||
|
||||
|
||||
def test_split_commands_with_backslash_continuation():
|
||||
input_commands = """
|
||||
echo "This is a long \
|
||||
command that spans \
|
||||
multiple lines"
|
||||
echo "Next command"
|
||||
"""
|
||||
expected_output = [
|
||||
'echo "This is a long command that spans multiple lines"',
|
||||
'echo "Next command"',
|
||||
]
|
||||
result = split_bash_commands(input_commands)
|
||||
assert result == expected_output, f'Expected {expected_output}, got {result}'
|
||||
|
||||
|
||||
def test_split_commands_with_empty_lines():
|
||||
input_commands = """
|
||||
ls -l
|
||||
|
||||
echo "Hello"
|
||||
|
||||
cd /tmp
|
||||
"""
|
||||
expected_output = ['ls -l', 'echo "Hello"', 'cd /tmp']
|
||||
result = split_bash_commands(input_commands)
|
||||
assert result == expected_output, f'Expected {expected_output}, got {result}'
|
||||
|
||||
|
||||
def test_split_commands_with_comments():
|
||||
input_commands = """
|
||||
echo "Hello" # This is a comment
|
||||
# This is another comment
|
||||
ls -l
|
||||
"""
|
||||
expected_output = [
|
||||
'echo "Hello" # This is a comment\n# This is another comment',
|
||||
'ls -l',
|
||||
]
|
||||
result = split_bash_commands(input_commands)
|
||||
assert result == expected_output, f'Expected {expected_output}, got {result}'
|
||||
|
||||
|
||||
def test_split_commands_with_complex_quoting():
|
||||
input_commands = """
|
||||
echo "This is a \\"quoted\\" string"
|
||||
echo "Mixed 'quotes' in \\"double quotes\\""
|
||||
"""
|
||||
# echo 'This is a '\''single-quoted'\'' string'
|
||||
|
||||
expected_output = [
|
||||
'echo "This is a \\"quoted\\" string"',
|
||||
'echo "Mixed \'quotes\' in \\"double quotes\\""',
|
||||
]
|
||||
# "echo 'This is a '\\''single-quoted'\\'' string'",
|
||||
result = split_bash_commands(input_commands)
|
||||
assert result == expected_output, f'Expected {expected_output}, got {result}'
|
||||
|
||||
|
||||
def test_split_commands_with_invalid_input():
|
||||
invalid_inputs = [
|
||||
'echo "Unclosed quote',
|
||||
"echo 'Unclosed quote",
|
||||
'cat <<EOF\nUnclosed heredoc',
|
||||
]
|
||||
for input_command in invalid_inputs:
|
||||
# it will fall back to return the original input
|
||||
assert split_bash_commands(input_command) == [input_command]
|
||||
@ -9,14 +9,23 @@ from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from opendevin.core.config import AppConfig, SandboxConfig
|
||||
from opendevin.core.config import AppConfig, SandboxConfig, load_from_env
|
||||
from opendevin.core.logger import opendevin_logger as logger
|
||||
from opendevin.events import EventStream
|
||||
from opendevin.events.action import (
|
||||
BrowseURLAction,
|
||||
CmdRunAction,
|
||||
FileReadAction,
|
||||
FileWriteAction,
|
||||
IPythonRunCellAction,
|
||||
)
|
||||
from opendevin.events.observation import (
|
||||
BrowserOutputObservation,
|
||||
CmdOutputObservation,
|
||||
ErrorObservation,
|
||||
FileReadObservation,
|
||||
FileWriteObservation,
|
||||
IPythonRunCellObservation,
|
||||
)
|
||||
from opendevin.runtime.client.runtime import EventStreamRuntime
|
||||
from opendevin.runtime.plugins import AgentSkillsRequirement, JupyterRequirement
|
||||
@ -58,6 +67,8 @@ async def _load_runtime(temp_dir, box_class):
|
||||
use_host_network=True,
|
||||
),
|
||||
)
|
||||
load_from_env(config, os.environ)
|
||||
|
||||
file_store = get_file_store(config.file_store, config.file_store_path)
|
||||
event_stream = EventStream(cli_session, file_store)
|
||||
|
||||
@ -223,3 +234,218 @@ async def test_bash_command_pexcept(temp_dir, box_class):
|
||||
|
||||
await runtime.close()
|
||||
await asyncio.sleep(1)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_simple_cmd_ipython_and_fileop(temp_dir, box_class):
|
||||
runtime = await _load_runtime(temp_dir, box_class)
|
||||
|
||||
# Test run command
|
||||
action_cmd = CmdRunAction(command='ls -l')
|
||||
logger.info(action_cmd, extra={'msg_type': 'ACTION'})
|
||||
obs = await runtime.run_action(action_cmd)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
|
||||
assert isinstance(obs, CmdOutputObservation)
|
||||
assert obs.exit_code == 0
|
||||
assert 'total 0' in obs.content
|
||||
|
||||
# Test run ipython
|
||||
test_code = "print('Hello, `World`!\\n')"
|
||||
action_ipython = IPythonRunCellAction(code=test_code)
|
||||
logger.info(action_ipython, extra={'msg_type': 'ACTION'})
|
||||
obs = await runtime.run_action(action_ipython)
|
||||
assert isinstance(obs, IPythonRunCellObservation)
|
||||
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert obs.content.strip() == 'Hello, `World`!'
|
||||
|
||||
# Test read file (file should not exist)
|
||||
action_read = FileReadAction(path='hello.sh')
|
||||
logger.info(action_read, extra={'msg_type': 'ACTION'})
|
||||
obs = await runtime.run_action(action_read)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert isinstance(obs, ErrorObservation)
|
||||
assert 'File not found' in obs.content
|
||||
|
||||
# Test write file
|
||||
action_write = FileWriteAction(content='echo "Hello, World!"', path='hello.sh')
|
||||
logger.info(action_write, extra={'msg_type': 'ACTION'})
|
||||
obs = await runtime.run_action(action_write)
|
||||
assert isinstance(obs, FileWriteObservation)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
|
||||
assert obs.content == ''
|
||||
if box_class == ServerRuntime:
|
||||
assert obs.path == 'hello.sh'
|
||||
else:
|
||||
# event stream runtime will always use absolute path
|
||||
assert obs.path == '/workspace/hello.sh'
|
||||
|
||||
# Test read file (file should exist)
|
||||
action_read = FileReadAction(path='hello.sh')
|
||||
logger.info(action_read, extra={'msg_type': 'ACTION'})
|
||||
obs = await runtime.run_action(action_read)
|
||||
assert isinstance(
|
||||
obs, FileReadObservation
|
||||
), 'The observation should be a FileReadObservation.'
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
|
||||
assert obs.content == 'echo "Hello, World!"\n'
|
||||
if box_class == ServerRuntime:
|
||||
assert obs.path == 'hello.sh'
|
||||
else:
|
||||
assert obs.path == '/workspace/hello.sh'
|
||||
|
||||
await runtime.close()
|
||||
await asyncio.sleep(1)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_simple_browse(temp_dir, box_class):
|
||||
runtime = await _load_runtime(temp_dir, box_class)
|
||||
|
||||
# Test browse
|
||||
action_cmd = CmdRunAction(command='python -m http.server 8000 > server.log 2>&1 &')
|
||||
logger.info(action_cmd, extra={'msg_type': 'ACTION'})
|
||||
obs = await runtime.run_action(action_cmd)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
|
||||
assert isinstance(obs, CmdOutputObservation)
|
||||
assert obs.exit_code == 0
|
||||
assert '[1]' in obs.content
|
||||
|
||||
action_browse = BrowseURLAction(url='http://localhost:8000')
|
||||
logger.info(action_browse, extra={'msg_type': 'ACTION'})
|
||||
obs = await runtime.run_action(action_browse)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
|
||||
assert isinstance(obs, BrowserOutputObservation)
|
||||
assert 'http://localhost:8000' in obs.url
|
||||
assert obs.status_code == 200
|
||||
assert not obs.error
|
||||
assert obs.open_pages_urls == ['http://localhost:8000/']
|
||||
assert obs.active_page_index == 0
|
||||
assert obs.last_browser_action == 'goto("http://localhost:8000")'
|
||||
assert obs.last_browser_action_error == ''
|
||||
assert 'Directory listing for /' in obs.content
|
||||
assert 'server.log' in obs.content
|
||||
|
||||
await runtime.close()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_multiline_commands(temp_dir, box_class):
|
||||
cmds = [
|
||||
'ls -l',
|
||||
'echo -e "hello\nworld"',
|
||||
"""
|
||||
echo -e "hello it\\'s me"
|
||||
""".strip(),
|
||||
"""
|
||||
echo \\
|
||||
-e 'hello' \\
|
||||
-v
|
||||
""".strip(),
|
||||
"""
|
||||
echo -e 'hello\\nworld\\nare\\nyou\\nthere?'
|
||||
""".strip(),
|
||||
"""
|
||||
echo -e 'hello
|
||||
world
|
||||
are
|
||||
you\\n
|
||||
there?'
|
||||
""".strip(),
|
||||
"""
|
||||
echo -e 'hello
|
||||
world "
|
||||
'
|
||||
""".strip(),
|
||||
]
|
||||
joined_cmds = '\n'.join(cmds)
|
||||
|
||||
runtime = await _load_runtime(temp_dir, box_class)
|
||||
|
||||
action = CmdRunAction(command=joined_cmds)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = await runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
|
||||
assert isinstance(obs, CmdOutputObservation)
|
||||
assert obs.exit_code == 0, 'The exit code should be 0.'
|
||||
|
||||
assert 'total 0' in obs.content
|
||||
assert 'hello\r\nworld' in obs.content
|
||||
assert "hello it\\'s me" in obs.content
|
||||
assert 'hello -v' in obs.content
|
||||
assert 'hello\r\nworld\r\nare\r\nyou\r\nthere?' in obs.content
|
||||
assert 'hello\r\nworld\r\nare\r\nyou\r\n\r\nthere?' in obs.content
|
||||
assert 'hello\r\nworld "\r\n' in obs.content
|
||||
|
||||
await runtime.close()
|
||||
await asyncio.sleep(1)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_no_ps2_in_output(temp_dir, box_class):
|
||||
"""Test that the PS2 sign is not added to the output of a multiline command."""
|
||||
runtime = await _load_runtime(temp_dir, box_class)
|
||||
|
||||
action = CmdRunAction(command='echo -e "hello\nworld"')
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = await runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
|
||||
if box_class == ServerRuntime:
|
||||
# the extra PS2 '>' is NOT handled by the ServerRuntime
|
||||
assert 'hello\r\nworld' in obs.content
|
||||
assert '>' in obs.content
|
||||
assert obs.content.count('>') == 1
|
||||
else:
|
||||
assert 'hello\r\nworld' in obs.content
|
||||
assert '>' not in obs.content
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_multiline_command_loop(temp_dir, box_class):
|
||||
# https://github.com/OpenDevin/OpenDevin/issues/3143
|
||||
|
||||
runtime = await _load_runtime(temp_dir, box_class)
|
||||
|
||||
init_cmd = """
|
||||
mkdir -p _modules && \
|
||||
for month in {01..04}; do
|
||||
for day in {01..05}; do
|
||||
touch "_modules/2024-${month}-${day}-sample.md"
|
||||
done
|
||||
done
|
||||
echo "created files"
|
||||
"""
|
||||
action = CmdRunAction(command=init_cmd)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = await runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
|
||||
assert isinstance(obs, CmdOutputObservation)
|
||||
assert obs.exit_code == 0, 'The exit code should be 0.'
|
||||
assert 'created files' in obs.content
|
||||
|
||||
follow_up_cmd = """
|
||||
for file in _modules/*.md; do
|
||||
new_date=$(echo $file | sed -E 's/2024-(01|02|03|04)-/2024-/;s/2024-01/2024-08/;s/2024-02/2024-09/;s/2024-03/2024-10/;s/2024-04/2024-11/')
|
||||
mv "$file" "$new_date"
|
||||
done
|
||||
echo "success"
|
||||
"""
|
||||
action = CmdRunAction(command=follow_up_cmd)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = await runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
|
||||
assert isinstance(obs, CmdOutputObservation)
|
||||
assert obs.exit_code == 0, 'The exit code should be 0.'
|
||||
assert 'success' in obs.content
|
||||
|
||||
await runtime.close()
|
||||
await asyncio.sleep(1)
|
||||
|
||||
@ -7,7 +7,6 @@ import pytest
|
||||
from opendevin.core.config import AppConfig, SandboxConfig
|
||||
from opendevin.runtime.docker.ssh_box import DockerSSHBox
|
||||
from opendevin.runtime.plugins import AgentSkillsRequirement, JupyterRequirement
|
||||
from opendevin.runtime.utils import split_bash_commands
|
||||
|
||||
|
||||
def create_docker_box_from_app_config(
|
||||
@ -41,62 +40,6 @@ def temp_dir(monkeypatch):
|
||||
yield temp_dir
|
||||
|
||||
|
||||
def test_split_commands():
|
||||
cmds = [
|
||||
'ls -l',
|
||||
'echo -e "hello\nworld"',
|
||||
"""
|
||||
echo -e 'hello it\\'s me'
|
||||
""".strip(),
|
||||
"""
|
||||
echo \\
|
||||
-e 'hello' \\
|
||||
-v
|
||||
""".strip(),
|
||||
"""
|
||||
echo -e 'hello\\nworld\\nare\\nyou\\nthere?'
|
||||
""".strip(),
|
||||
"""
|
||||
echo -e 'hello
|
||||
world
|
||||
are
|
||||
you\\n
|
||||
there?'
|
||||
""".strip(),
|
||||
"""
|
||||
echo -e 'hello
|
||||
world "
|
||||
'
|
||||
""".strip(),
|
||||
"""
|
||||
kubectl apply -f - <<EOF
|
||||
apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
name: busybox-sleep
|
||||
spec:
|
||||
containers:
|
||||
- name: busybox
|
||||
image: busybox:1.28
|
||||
args:
|
||||
- sleep
|
||||
- "1000000"
|
||||
EOF
|
||||
""".strip(),
|
||||
]
|
||||
joined_cmds = '\n'.join(cmds)
|
||||
split_cmds = split_bash_commands(joined_cmds)
|
||||
for s in split_cmds:
|
||||
print('\nCMD')
|
||||
print(s)
|
||||
cmds = [
|
||||
c.replace('\\\n', '') for c in cmds
|
||||
] # The function strips escaped newlines, but this shouldn't matter
|
||||
assert (
|
||||
split_cmds == cmds
|
||||
), 'The split commands should be the same as the input commands.'
|
||||
|
||||
|
||||
def test_ssh_box_run_as_devin(temp_dir):
|
||||
# get a temporary directory
|
||||
for box in [
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user