Migrate multi-line-bash-related sandbox tests into runtime tests and fix multi-line issue (#3128)

* Remove global config from memory

* Remove runtime global config

* Remove from storage

* Remove global config

* Fix event stream tests

* Fix sandbox issue

* Change config

* Removed transferred tests

* Add swe env box

* Fixes on testing

* Fixed some tests

* Merge with stashed changes

* Fix typing

* Fix ipython test

* Revive function

* Make temp_dir fixture

* Remove test to avoid circular import

* fix eventstream filestore for test_runtime

* fix parse arg issue that cause integration test to fail

* support swebench pull from custom namespace

* add back simple tests for runtime

* move multi-line bash tests to test_runtime;
support multi-line bash for esruntime;

* add testcase to handle PS2 prompt

* use bashlex for bash parsing to handle multi-line commands;
add testcases for multi-line commands

* revert ghcr runtime change

---------

Co-authored-by: Graham Neubig <neubig@gmail.com>
This commit is contained in:
Xingyao Wang 2024-07-28 04:12:57 +08:00 committed by GitHub
parent 8b77e8a0ff
commit b1ea204c5b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 608 additions and 150 deletions

View File

@ -9,7 +9,7 @@ from typing import Literal, Mapping
from termcolor import colored
DISABLE_COLOR_PRINTING = False
DEBUG = False
DEBUG = os.getenv('DEBUG', 'False').lower() in ['true', '1', 'yes']
ColorType = Literal[
'red',

View File

@ -46,6 +46,7 @@ from opendevin.runtime.plugins import (
Plugin,
)
from opendevin.runtime.server.files import insert_lines, read_lines
from opendevin.runtime.utils import split_bash_commands
app = FastAPI()
@ -79,7 +80,7 @@ class RuntimeClient:
r'\[PEXPECT_BEGIN\] ([a-z0-9_-]*)@([a-zA-Z0-9.-]*):(.+) \[PEXPECT_END\]'
)
self.shell.sendline(f'export PS1="{self.__bash_PS1}"')
self.shell.sendline(f'export PS1="{self.__bash_PS1}"; export PS2=""')
self.shell.expect(self.__bash_expect_regex)
self.shell.sendline(f'cd {work_dir}')
@ -87,6 +88,15 @@ class RuntimeClient:
def _get_bash_prompt(self):
ps1 = self.shell.after
# begin at the last occurence of '[PEXPECT_BEGIN]'.
# In multi-line bash commands, the prompt will be repeated
# and the matched regex captures all of them
# - we only want the last one (newest prompt)
_begin_pos = ps1.rfind('[PEXPECT_BEGIN]')
if _begin_pos != -1:
ps1 = ps1[_begin_pos:]
# parse the ps1 to get username, hostname, and working directory
matched = re.match(self.__bash_expect_regex, ps1)
assert (
@ -102,7 +112,7 @@ class RuntimeClient:
prompt += '$'
return prompt + ' '
def _execute_bash(self, command, keep_prompt: bool = True) -> tuple[str, int]:
def _execute_bash(self, command: str, keep_prompt: bool = True) -> tuple[str, int]:
logger.debug(f'Executing command: {command}')
self.shell.sendline(command)
self.shell.expect(self.__bash_expect_regex)
@ -129,10 +139,22 @@ class RuntimeClient:
async def run(self, action: CmdRunAction) -> CmdOutputObservation:
try:
output, exit_code = self._execute_bash(action.command)
commands = split_bash_commands(action.command)
all_output = ''
for command in commands:
output, exit_code = self._execute_bash(command)
if all_output:
# previous output already exists with prompt "user@hostname:working_dir #""
# we need to add the command to the previous output,
# so model knows the following is the output of another action)
all_output = all_output.rstrip() + ' ' + command + '\r\n'
all_output += str(output) + '\r\n'
if exit_code != 0:
break
return CmdOutputObservation(
command_id=-1,
content=str(output),
content=all_output.rstrip('\r\n'),
command=action.command,
exit_code=exit_code,
)

View File

@ -58,7 +58,7 @@ class EventStreamRuntime(Runtime):
# TODO: We can switch to aiodocker when `get_od_sandbox_image` is updated to use aiodocker
self.docker_client: docker.DockerClient = self._init_docker_client()
self.container_image = (
config.sandbox.container_image
self.config.sandbox.container_image
if container_image is None
else container_image
)
@ -103,7 +103,7 @@ class EventStreamRuntime(Runtime):
async def _init_container(
self,
sandbox_workspace_dir: str,
mount_dir: str,
mount_dir: str | None = None,
plugins: list[PluginRequirement] | None = None,
):
try:
@ -124,6 +124,14 @@ class EventStreamRuntime(Runtime):
else:
port_mapping = {f'{self._port}/tcp': self._port}
if mount_dir is not None:
volumes = {mount_dir: {'bind': sandbox_workspace_dir, 'mode': 'rw'}}
else:
logger.warn(
'Mount dir is not set, will not mount the workspace directory to the container.'
)
volumes = None
container = self.docker_client.containers.run(
self.container_image,
command=(
@ -139,7 +147,7 @@ class EventStreamRuntime(Runtime):
name=self.container_name,
detach=True,
environment={'DEBUG': 'true'} if self.config.debug else None,
volumes={mount_dir: {'bind': sandbox_workspace_dir, 'mode': 'rw'}},
volumes=volumes,
)
logger.info(f'Container started. Server url: {self.api_url}')
return container

View File

@ -33,13 +33,13 @@ from opendevin.runtime.tools import RuntimeTool
from opendevin.storage import FileStore
def _default_env_vars(config: SandboxConfig) -> dict[str, str]:
def _default_env_vars(sandbox_config: SandboxConfig) -> dict[str, str]:
ret = {}
for key in os.environ:
if key.startswith('SANDBOX_ENV_'):
sandbox_key = key.removeprefix('SANDBOX_ENV_')
ret[sandbox_key] = os.environ[key]
if config.enable_auto_lint:
if sandbox_config.enable_auto_lint:
ret['ENABLE_AUTO_LINT'] = 'true'
return ret

View File

@ -115,7 +115,7 @@ class ServerRuntime(Runtime):
async def run_ipython(self, action: IPythonRunCellAction) -> Observation:
self._run_command(
("cat > /tmp/opendevin_jupyter_temp.py <<'EOL'\n" f'{action.code}\n' 'EOL'),
f"cat > /tmp/opendevin_jupyter_temp.py <<'EOL'\n{action.code}\nEOL"
)
# run the code

View File

@ -1,87 +1,49 @@
import bashlex
from opendevin.core.logger import opendevin_logger as logger
def split_bash_commands(commands):
# States
NORMAL = 0
IN_SINGLE_QUOTE = 1
IN_DOUBLE_QUOTE = 2
IN_HEREDOC = 3
try:
parsed = bashlex.parse(commands)
except bashlex.errors.ParsingError as e:
logger.error(
f'Failed to parse bash commands\n[input]: {commands}\n[error]: {e}'
)
# If parsing fails, return the original commands
return [commands]
state = NORMAL
heredoc_trigger = None
result = []
current_command: list[str] = []
result: list[str] = []
last_end = 0
i = 0
while i < len(commands):
char = commands[i]
for node in parsed:
start, end = node.pos
if state == NORMAL:
if char == "'":
state = IN_SINGLE_QUOTE
elif char == '"':
state = IN_DOUBLE_QUOTE
elif char == '\\':
# Check if this is escaping a newline
if i + 1 < len(commands) and commands[i + 1] == '\n':
i += 1 # Skip the newline
# Continue with the next line as part of the same command
i += 1 # Move to the first character of the next line
continue
elif char == '\n':
if not heredoc_trigger and current_command:
result.append(''.join(current_command).strip())
current_command = []
elif char == '<' and commands[i : i + 2] == '<<':
# Detect heredoc
state = IN_HEREDOC
i += 2 # Skip '<<'
while commands[i] == ' ':
i += 1
start = i
while commands[i] not in [' ', '\n']:
i += 1
heredoc_trigger = commands[start:i]
current_command.append(commands[start - 2 : i]) # Include '<<'
continue # Skip incrementing i at the end of the loop
current_command.append(char)
# Include any text between the last command and this one
if start > last_end:
between = commands[last_end:start]
logger.debug(f'BASH PARSING between: {between}')
if result:
result[-1] += between.rstrip()
elif between.strip():
# THIS SHOULD NOT HAPPEN
result.append(between.rstrip())
elif state == IN_SINGLE_QUOTE:
current_command.append(char)
if char == "'" and commands[i - 1] != '\\':
state = NORMAL
# Extract the command, preserving original formatting
command = commands[start:end].rstrip()
logger.debug(f'BASH PARSING command: {command}')
result.append(command)
elif state == IN_DOUBLE_QUOTE:
current_command.append(char)
if char == '"' and commands[i - 1] != '\\':
state = NORMAL
elif state == IN_HEREDOC:
current_command.append(char)
if (
char == '\n'
and heredoc_trigger
and commands[i + 1 : i + 1 + len(heredoc_trigger) + 1]
== heredoc_trigger + '\n'
):
# Check if the next line starts with the heredoc trigger followed by a newline
i += (
len(heredoc_trigger) + 1
) # Move past the heredoc trigger and newline
current_command.append(
heredoc_trigger + '\n'
) # Include the heredoc trigger and newline
result.append(''.join(current_command).strip())
current_command = []
heredoc_trigger = None
state = NORMAL
continue
i += 1
# Add the last command if any
if current_command:
result.append(''.join(current_command).strip())
# Remove any empty strings from the result
result = [cmd for cmd in result if cmd]
last_end = end
# Add any remaining text after the last command to the last command
remaining = commands[last_end:].rstrip()
logger.debug(f'BASH PARSING remaining: {remaining}')
if last_end < len(commands) and result:
result[-1] += remaining
logger.debug(f'BASH PARSING result[-1] += remaining: {result[-1]}')
elif last_end < len(commands):
if remaining:
result.append(remaining)
logger.debug(f'BASH PARSING result.append(remaining): {result[-1]}')
return result

15
poetry.lock generated
View File

@ -1,4 +1,4 @@
# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand.
[[package]]
name = "aenum"
@ -398,6 +398,17 @@ files = [
{file = "backoff-2.2.1.tar.gz", hash = "sha256:03f829f5bb1923180821643f8753b0502c3b682293992485b0eef2807afa5cba"},
]
[[package]]
name = "bashlex"
version = "0.18"
description = "Python parser for bash"
optional = false
python-versions = ">=2.7, !=3.0, !=3.1, !=3.2, !=3.3, !=3.4"
files = [
{file = "bashlex-0.18-py2.py3-none-any.whl", hash = "sha256:91d73a23a3e51711919c1c899083890cdecffc91d8c088942725ac13e9dcfffa"},
{file = "bashlex-0.18.tar.gz", hash = "sha256:5bb03a01c6d5676338c36fd1028009c8ad07e7d61d8a1ce3f513b7fff52796ee"},
]
[[package]]
name = "bcrypt"
version = "4.1.3"
@ -9109,4 +9120,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
[metadata]
lock-version = "2.0"
python-versions = "^3.11"
content-hash = "e1520f1342ab527bc3bb2619f8909cbdddeb227c14614eb3d82e133961f1f4d2"
content-hash = "6d6cfaf3a614a4bf766d9a0e886e82dc9f8cfb8bf08a642f0207f260e72dd6da"

View File

@ -39,6 +39,7 @@ pathspec = "^0.12.1"
google-cloud-aiplatform = "*"
grep-ast = "0.3.2"
tree-sitter = "0.21.3"
bashlex = "^0.18"
[tool.poetry.group.llama-index.dependencies]
llama-index = "*"
@ -72,6 +73,7 @@ reportlab = "*"
[tool.coverage.run]
concurrency = ["gevent"]
[tool.poetry.group.runtime.dependencies]
jupyterlab = "*"
notebook = "*"
@ -105,6 +107,7 @@ ignore = ["D1"]
[tool.ruff.lint.pydocstyle]
convention = "google"
[tool.poetry.group.evaluation.dependencies]
streamlit = "*"
whatthepatch = "*"

View File

@ -0,0 +1,283 @@
import pytest
from opendevin.runtime.utils.bash import split_bash_commands
def test_split_commands_util():
cmds = [
'ls -l',
'echo -e "hello\nworld"',
"""
echo -e "hello it\\'s me"
""".strip(),
"""
echo \\
-e 'hello' \\
-v
""".strip(),
"""
echo -e 'hello\\nworld\\nare\\nyou\\nthere?'
""".strip(),
"""
echo -e 'hello
world
are
you\\n
there?'
""".strip(),
"""
echo -e 'hello
world "
'
""".strip(),
"""
kubectl apply -f - <<EOF
apiVersion: v1
kind: Pod
metadata:
name: busybox-sleep
spec:
containers:
- name: busybox
image: busybox:1.28
args:
- sleep
- "1000000"
EOF
""".strip(),
"""
mkdir -p _modules && \
for month in {01..04}; do
for day in {01..05}; do
touch "_modules/2024-${month}-${day}-sample.md"
done
done
""".strip(),
]
joined_cmds = '\n'.join(cmds)
split_cmds = split_bash_commands(joined_cmds)
for s in split_cmds:
print('\nCMD')
print(s)
for i in range(len(cmds)):
assert (
split_cmds[i].strip() == cmds[i].strip()
), f'At index {i}: {split_cmds[i]} != {cmds[i]}.'
@pytest.mark.parametrize(
'input_command, expected_output',
[
('ls -l', ['ls -l']),
("echo 'Hello, world!'", ["echo 'Hello, world!'"]),
('cd /tmp && touch test.txt', ['cd /tmp && touch test.txt']),
("echo -e 'line1\\nline2\\nline3'", ["echo -e 'line1\\nline2\\nline3'"]),
(
"grep 'pattern' file.txt | sort | uniq",
["grep 'pattern' file.txt | sort | uniq"],
),
('for i in {1..5}; do echo $i; done', ['for i in {1..5}; do echo $i; done']),
(
"echo 'Single quotes don\\'t escape'",
["echo 'Single quotes don\\'t escape'"],
),
(
'echo "Double quotes \\"do\\" escape"',
['echo "Double quotes \\"do\\" escape"'],
),
],
)
def test_single_commands(input_command, expected_output):
assert split_bash_commands(input_command) == expected_output
def test_heredoc():
input_commands = """
cat <<EOF
multiline
text
EOF
echo "Done"
"""
expected_output = ['cat <<EOF\nmultiline\ntext\nEOF', 'echo "Done"']
assert split_bash_commands(input_commands) == expected_output
def test_jupyter_heredoc():
"""This tests specifically test the behavior of the bash parser
when the input is a heredoc for a Jupyter cell (used in ServerRuntime).
It will failed to parse bash commands AND fall back to the original input,
which won't cause issues in actual execution.
[input]: cat > /tmp/opendevin_jupyter_temp.py <<'EOL'
print('Hello, `World`!
')
EOL
[error]: here-document at line 0 delimited by end-of-file (wanted "'EOL'") (position 75)
TODO: remove this tests after the deprecation of ServerRuntime
"""
code = "print('Hello, `World`!\n')"
input_commands = f"""cat > /tmp/opendevin_jupyter_temp.py <<'EOL'
{code}
EOL"""
expected_output = [f"cat > /tmp/opendevin_jupyter_temp.py <<'EOL'\n{code}\nEOL"]
assert split_bash_commands(input_commands) == expected_output
def test_backslash_continuation():
input_commands = """
echo "This is a long \
command that spans \
multiple lines"
echo "Next command"
"""
expected_output = [
'echo "This is a long command that spans multiple lines"',
'echo "Next command"',
]
assert split_bash_commands(input_commands) == expected_output
def test_comments():
input_commands = """
echo "Hello" # This is a comment
# This is another comment
ls -l
"""
expected_output = [
'echo "Hello" # This is a comment\n# This is another comment',
'ls -l',
]
assert split_bash_commands(input_commands) == expected_output
def test_complex_quoting():
input_commands = """
echo "This is a \\"quoted\\" string"
echo 'This is a '\''single-quoted'\'' string'
echo "Mixed 'quotes' in \\"double quotes\\""
"""
expected_output = [
'echo "This is a \\"quoted\\" string"',
"echo 'This is a '''single-quoted''' string'",
'echo "Mixed \'quotes\' in \\"double quotes\\""',
]
assert split_bash_commands(input_commands) == expected_output
def test_invalid_syntax():
invalid_inputs = [
'echo "Unclosed quote',
"echo 'Unclosed quote",
'cat <<EOF\nUnclosed heredoc',
]
for input_command in invalid_inputs:
# it will fall back to return the original input
assert split_bash_commands(input_command) == [input_command]
@pytest.fixture
def sample_commands():
return [
'ls -l',
'echo "Hello, world!"',
'cd /tmp && touch test.txt',
'echo -e "line1\\nline2\\nline3"',
'grep "pattern" file.txt | sort | uniq',
'for i in {1..5}; do echo $i; done',
'cat <<EOF\nmultiline\ntext\nEOF',
'echo "Escaped \\"quotes\\""',
"echo 'Single quotes don\\'t escape'",
'echo "Command with a trailing backslash \\\n and continuation"',
]
def test_split_single_commands(sample_commands):
for cmd in sample_commands:
result = split_bash_commands(cmd)
assert len(result) == 1, f'Expected single command, got: {result}'
def test_split_commands_with_heredoc():
input_commands = """
cat <<EOF
multiline
text
EOF
echo "Done"
"""
expected_output = ['cat <<EOF\nmultiline\ntext\nEOF', 'echo "Done"']
result = split_bash_commands(input_commands)
assert result == expected_output, f'Expected {expected_output}, got {result}'
def test_split_commands_with_backslash_continuation():
input_commands = """
echo "This is a long \
command that spans \
multiple lines"
echo "Next command"
"""
expected_output = [
'echo "This is a long command that spans multiple lines"',
'echo "Next command"',
]
result = split_bash_commands(input_commands)
assert result == expected_output, f'Expected {expected_output}, got {result}'
def test_split_commands_with_empty_lines():
input_commands = """
ls -l
echo "Hello"
cd /tmp
"""
expected_output = ['ls -l', 'echo "Hello"', 'cd /tmp']
result = split_bash_commands(input_commands)
assert result == expected_output, f'Expected {expected_output}, got {result}'
def test_split_commands_with_comments():
input_commands = """
echo "Hello" # This is a comment
# This is another comment
ls -l
"""
expected_output = [
'echo "Hello" # This is a comment\n# This is another comment',
'ls -l',
]
result = split_bash_commands(input_commands)
assert result == expected_output, f'Expected {expected_output}, got {result}'
def test_split_commands_with_complex_quoting():
input_commands = """
echo "This is a \\"quoted\\" string"
echo "Mixed 'quotes' in \\"double quotes\\""
"""
# echo 'This is a '\''single-quoted'\'' string'
expected_output = [
'echo "This is a \\"quoted\\" string"',
'echo "Mixed \'quotes\' in \\"double quotes\\""',
]
# "echo 'This is a '\\''single-quoted'\\'' string'",
result = split_bash_commands(input_commands)
assert result == expected_output, f'Expected {expected_output}, got {result}'
def test_split_commands_with_invalid_input():
invalid_inputs = [
'echo "Unclosed quote',
"echo 'Unclosed quote",
'cat <<EOF\nUnclosed heredoc',
]
for input_command in invalid_inputs:
# it will fall back to return the original input
assert split_bash_commands(input_command) == [input_command]

View File

@ -9,14 +9,23 @@ from unittest.mock import patch
import pytest
from opendevin.core.config import AppConfig, SandboxConfig
from opendevin.core.config import AppConfig, SandboxConfig, load_from_env
from opendevin.core.logger import opendevin_logger as logger
from opendevin.events import EventStream
from opendevin.events.action import (
BrowseURLAction,
CmdRunAction,
FileReadAction,
FileWriteAction,
IPythonRunCellAction,
)
from opendevin.events.observation import (
BrowserOutputObservation,
CmdOutputObservation,
ErrorObservation,
FileReadObservation,
FileWriteObservation,
IPythonRunCellObservation,
)
from opendevin.runtime.client.runtime import EventStreamRuntime
from opendevin.runtime.plugins import AgentSkillsRequirement, JupyterRequirement
@ -58,6 +67,8 @@ async def _load_runtime(temp_dir, box_class):
use_host_network=True,
),
)
load_from_env(config, os.environ)
file_store = get_file_store(config.file_store, config.file_store_path)
event_stream = EventStream(cli_session, file_store)
@ -223,3 +234,218 @@ async def test_bash_command_pexcept(temp_dir, box_class):
await runtime.close()
await asyncio.sleep(1)
@pytest.mark.asyncio
async def test_simple_cmd_ipython_and_fileop(temp_dir, box_class):
runtime = await _load_runtime(temp_dir, box_class)
# Test run command
action_cmd = CmdRunAction(command='ls -l')
logger.info(action_cmd, extra={'msg_type': 'ACTION'})
obs = await runtime.run_action(action_cmd)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert isinstance(obs, CmdOutputObservation)
assert obs.exit_code == 0
assert 'total 0' in obs.content
# Test run ipython
test_code = "print('Hello, `World`!\\n')"
action_ipython = IPythonRunCellAction(code=test_code)
logger.info(action_ipython, extra={'msg_type': 'ACTION'})
obs = await runtime.run_action(action_ipython)
assert isinstance(obs, IPythonRunCellObservation)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert obs.content.strip() == 'Hello, `World`!'
# Test read file (file should not exist)
action_read = FileReadAction(path='hello.sh')
logger.info(action_read, extra={'msg_type': 'ACTION'})
obs = await runtime.run_action(action_read)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert isinstance(obs, ErrorObservation)
assert 'File not found' in obs.content
# Test write file
action_write = FileWriteAction(content='echo "Hello, World!"', path='hello.sh')
logger.info(action_write, extra={'msg_type': 'ACTION'})
obs = await runtime.run_action(action_write)
assert isinstance(obs, FileWriteObservation)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert obs.content == ''
if box_class == ServerRuntime:
assert obs.path == 'hello.sh'
else:
# event stream runtime will always use absolute path
assert obs.path == '/workspace/hello.sh'
# Test read file (file should exist)
action_read = FileReadAction(path='hello.sh')
logger.info(action_read, extra={'msg_type': 'ACTION'})
obs = await runtime.run_action(action_read)
assert isinstance(
obs, FileReadObservation
), 'The observation should be a FileReadObservation.'
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert obs.content == 'echo "Hello, World!"\n'
if box_class == ServerRuntime:
assert obs.path == 'hello.sh'
else:
assert obs.path == '/workspace/hello.sh'
await runtime.close()
await asyncio.sleep(1)
@pytest.mark.asyncio
async def test_simple_browse(temp_dir, box_class):
runtime = await _load_runtime(temp_dir, box_class)
# Test browse
action_cmd = CmdRunAction(command='python -m http.server 8000 > server.log 2>&1 &')
logger.info(action_cmd, extra={'msg_type': 'ACTION'})
obs = await runtime.run_action(action_cmd)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert isinstance(obs, CmdOutputObservation)
assert obs.exit_code == 0
assert '[1]' in obs.content
action_browse = BrowseURLAction(url='http://localhost:8000')
logger.info(action_browse, extra={'msg_type': 'ACTION'})
obs = await runtime.run_action(action_browse)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert isinstance(obs, BrowserOutputObservation)
assert 'http://localhost:8000' in obs.url
assert obs.status_code == 200
assert not obs.error
assert obs.open_pages_urls == ['http://localhost:8000/']
assert obs.active_page_index == 0
assert obs.last_browser_action == 'goto("http://localhost:8000")'
assert obs.last_browser_action_error == ''
assert 'Directory listing for /' in obs.content
assert 'server.log' in obs.content
await runtime.close()
@pytest.mark.asyncio
async def test_multiline_commands(temp_dir, box_class):
cmds = [
'ls -l',
'echo -e "hello\nworld"',
"""
echo -e "hello it\\'s me"
""".strip(),
"""
echo \\
-e 'hello' \\
-v
""".strip(),
"""
echo -e 'hello\\nworld\\nare\\nyou\\nthere?'
""".strip(),
"""
echo -e 'hello
world
are
you\\n
there?'
""".strip(),
"""
echo -e 'hello
world "
'
""".strip(),
]
joined_cmds = '\n'.join(cmds)
runtime = await _load_runtime(temp_dir, box_class)
action = CmdRunAction(command=joined_cmds)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = await runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert isinstance(obs, CmdOutputObservation)
assert obs.exit_code == 0, 'The exit code should be 0.'
assert 'total 0' in obs.content
assert 'hello\r\nworld' in obs.content
assert "hello it\\'s me" in obs.content
assert 'hello -v' in obs.content
assert 'hello\r\nworld\r\nare\r\nyou\r\nthere?' in obs.content
assert 'hello\r\nworld\r\nare\r\nyou\r\n\r\nthere?' in obs.content
assert 'hello\r\nworld "\r\n' in obs.content
await runtime.close()
await asyncio.sleep(1)
@pytest.mark.asyncio
async def test_no_ps2_in_output(temp_dir, box_class):
"""Test that the PS2 sign is not added to the output of a multiline command."""
runtime = await _load_runtime(temp_dir, box_class)
action = CmdRunAction(command='echo -e "hello\nworld"')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = await runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
if box_class == ServerRuntime:
# the extra PS2 '>' is NOT handled by the ServerRuntime
assert 'hello\r\nworld' in obs.content
assert '>' in obs.content
assert obs.content.count('>') == 1
else:
assert 'hello\r\nworld' in obs.content
assert '>' not in obs.content
@pytest.mark.asyncio
async def test_multiline_command_loop(temp_dir, box_class):
# https://github.com/OpenDevin/OpenDevin/issues/3143
runtime = await _load_runtime(temp_dir, box_class)
init_cmd = """
mkdir -p _modules && \
for month in {01..04}; do
for day in {01..05}; do
touch "_modules/2024-${month}-${day}-sample.md"
done
done
echo "created files"
"""
action = CmdRunAction(command=init_cmd)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = await runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert isinstance(obs, CmdOutputObservation)
assert obs.exit_code == 0, 'The exit code should be 0.'
assert 'created files' in obs.content
follow_up_cmd = """
for file in _modules/*.md; do
new_date=$(echo $file | sed -E 's/2024-(01|02|03|04)-/2024-/;s/2024-01/2024-08/;s/2024-02/2024-09/;s/2024-03/2024-10/;s/2024-04/2024-11/')
mv "$file" "$new_date"
done
echo "success"
"""
action = CmdRunAction(command=follow_up_cmd)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = await runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert isinstance(obs, CmdOutputObservation)
assert obs.exit_code == 0, 'The exit code should be 0.'
assert 'success' in obs.content
await runtime.close()
await asyncio.sleep(1)

View File

@ -7,7 +7,6 @@ import pytest
from opendevin.core.config import AppConfig, SandboxConfig
from opendevin.runtime.docker.ssh_box import DockerSSHBox
from opendevin.runtime.plugins import AgentSkillsRequirement, JupyterRequirement
from opendevin.runtime.utils import split_bash_commands
def create_docker_box_from_app_config(
@ -41,62 +40,6 @@ def temp_dir(monkeypatch):
yield temp_dir
def test_split_commands():
cmds = [
'ls -l',
'echo -e "hello\nworld"',
"""
echo -e 'hello it\\'s me'
""".strip(),
"""
echo \\
-e 'hello' \\
-v
""".strip(),
"""
echo -e 'hello\\nworld\\nare\\nyou\\nthere?'
""".strip(),
"""
echo -e 'hello
world
are
you\\n
there?'
""".strip(),
"""
echo -e 'hello
world "
'
""".strip(),
"""
kubectl apply -f - <<EOF
apiVersion: v1
kind: Pod
metadata:
name: busybox-sleep
spec:
containers:
- name: busybox
image: busybox:1.28
args:
- sleep
- "1000000"
EOF
""".strip(),
]
joined_cmds = '\n'.join(cmds)
split_cmds = split_bash_commands(joined_cmds)
for s in split_cmds:
print('\nCMD')
print(s)
cmds = [
c.replace('\\\n', '') for c in cmds
] # The function strips escaped newlines, but this shouldn't matter
assert (
split_cmds == cmds
), 'The split commands should be the same as the input commands.'
def test_ssh_box_run_as_devin(temp_dir):
# get a temporary directory
for box in [