Revert ssh box implemetation, fix multi-line command issues and add unit tests (#1460)

* update dev md for instruction of running unit tests

* add back unit tests

* revert back to the original sandbox implementation to fix testcases

* allow unit test workflow to find docker.sock

* make sandbox test working via patch

* fix arg parser that's broken for some reason

* fix integration test

* Revert "fix arg parser that's broken for some reason"

This reverts commit 6cc89611337bb74555fd16b4be78681fb7e36573.

* update Development.md
This commit is contained in:
Xingyao Wang 2024-05-01 00:46:35 +08:00 committed by GitHub
parent ccbbabac1c
commit fe43aeb9b6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 163 additions and 7 deletions

View File

@ -34,11 +34,16 @@ jobs:
brew install colima docker
colima start
# For testcontainers to find the Colima socket
# https://github.com/abiosoft/colima/blob/main/docs/FAQ.md#cannot-connect-to-the-docker-daemon-at-unixvarrundockersock-is-the-docker-daemon-running
sudo ln -sf $HOME/.colima/default/docker.sock /var/run/docker.sock
- name: Build Environment
run: make build
- name: Run Tests
run: poetry run pytest --cov=agenthub --cov=opendevin --cov-report=xml ./tests/unit
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v4
env:
@ -70,6 +75,7 @@ jobs:
- name: Run Tests
run: poetry run pytest --cov=agenthub --cov=opendevin --cov-report=xml ./tests/unit
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v4
env:

View File

@ -82,3 +82,15 @@ If you encounter any issues with the Language Model (LM) or you're simply curiou
```bash
make help
```
### 8. Testing
#### Unit tests
```bash
poetry run pytest ./tests/unit/test_sandbox.py
```
#### Integration tests
Please refer to [this README](./tests/integration/README.md) for details.

View File

@ -171,7 +171,7 @@ class DockerSSHBox(Sandbox):
def start_ssh_session(self):
# start ssh session at the background
self.ssh = pxssh.pxssh(echo=False)
self.ssh = pxssh.pxssh()
hostname = SSH_HOSTNAME
if RUN_AS_DEVIN:
username = 'opendevin'
@ -215,14 +215,36 @@ class DockerSSHBox(Sandbox):
# send a SIGINT to the process
self.ssh.sendintr()
self.ssh.prompt()
command_output = self.ssh.before.decode('utf-8').strip()
command_output = self.ssh.before.decode(
'utf-8').lstrip(cmd).strip()
return -1, f'Command: "{cmd}" timed out. Sending SIGINT to the process: {command_output}'
command_output = self.ssh.before.decode('utf-8').strip()
# once out, make sure that we have *every* output, we while loop until we get an empty output
while True:
logger.debug('WAITING FOR .prompt()')
self.ssh.sendline('\n')
timeout_not_reached = self.ssh.prompt(timeout=1)
if not timeout_not_reached:
logger.debug('TIMEOUT REACHED')
break
logger.debug('WAITING FOR .before')
output = self.ssh.before.decode('utf-8').strip()
logger.debug(f'WAITING FOR END OF command output ({bool(output)}): {output}')
if output == '':
break
command_output += output
command_output = command_output.lstrip(cmd).strip()
# get the exit code
self.ssh.sendline('echo $?')
self.ssh.prompt(timeout=10)
exit_code = int(self.ssh.before.decode('utf-8').strip())
self.ssh.prompt()
exit_code = self.ssh.before.decode('utf-8')
while not exit_code.startswith('echo $?'):
self.ssh.prompt()
exit_code = self.ssh.before.decode('utf-8')
logger.debug(f'WAITING FOR exit code: {exit_code}')
exit_code = int(exit_code.lstrip('echo $?').strip())
return exit_code, command_output
def copy_to(self, host_src: str, sandbox_dest: str, recursive: bool = False):
@ -307,9 +329,10 @@ class DockerSSHBox(Sandbox):
pass
def get_working_directory(self):
self.ssh.sendline('pwd')
self.ssh.prompt(timeout=10)
return self.ssh.before.decode('utf-8').strip()
exit_code, result = self.execute('pwd')
if exit_code != 0:
raise Exception('Failed to get working directory')
return result.strip()
def is_container_running(self):
try:

115
tests/unit/test_sandbox.py Normal file
View File

@ -0,0 +1,115 @@
import pathlib
import tempfile
from unittest.mock import patch
import pytest
from opendevin import config
from opendevin.sandbox.docker.ssh_box import DockerSSHBox
@pytest.fixture
def temp_dir():
# get a temporary directory
with tempfile.TemporaryDirectory() as temp_dir:
pathlib.Path().mkdir(parents=True, exist_ok=True)
yield temp_dir
def test_ssh_box_run_as_devin(temp_dir):
# get a temporary directory
with patch.dict(
config.config,
{
config.ConfigType.WORKSPACE_BASE: temp_dir,
config.ConfigType.RUN_AS_DEVIN: 'true',
config.ConfigType.SANDBOX_TYPE: 'ssh',
},
clear=True
):
ssh_box = DockerSSHBox()
# test the ssh box
exit_code, output = ssh_box.execute('ls -l')
assert exit_code == 0, 'The exit code should be 0.'
assert output.strip() == 'total 0'
exit_code, output = ssh_box.execute('mkdir test')
assert exit_code == 0, 'The exit code should be 0.'
assert output.strip() == ''
exit_code, output = ssh_box.execute('ls -l')
assert exit_code == 0, 'The exit code should be 0.'
assert 'opendevin' in output, "The output should contain username 'opendevin'"
assert 'test' in output, 'The output should contain the test directory'
exit_code, output = ssh_box.execute('touch test/foo.txt')
assert exit_code == 0, 'The exit code should be 0.'
assert output.strip() == ''
exit_code, output = ssh_box.execute('ls -l test')
assert exit_code == 0, 'The exit code should be 0.'
assert 'foo.txt' in output, 'The output should contain the foo.txt file'
def test_ssh_box_multi_line_cmd_run_as_devin(temp_dir):
# get a temporary directory
with patch.dict(
config.config,
{
config.ConfigType.WORKSPACE_BASE: temp_dir,
config.ConfigType.RUN_AS_DEVIN: 'true',
config.ConfigType.SANDBOX_TYPE: 'ssh',
},
clear=True
):
ssh_box = DockerSSHBox()
# test the ssh box
exit_code, output = ssh_box.execute('pwd\nls -l')
assert exit_code == 0, 'The exit code should be 0.'
expected_lines = ['/workspacels -l', 'total 0']
assert output.strip().splitlines() == expected_lines
def test_ssh_box_stateful_cmd_run_as_devin(temp_dir):
# get a temporary directory
with patch.dict(
config.config,
{
config.ConfigType.WORKSPACE_BASE: temp_dir,
config.ConfigType.RUN_AS_DEVIN: 'true',
config.ConfigType.SANDBOX_TYPE: 'ssh',
},
clear=True
):
ssh_box = DockerSSHBox()
# test the ssh box
exit_code, output = ssh_box.execute('mkdir test')
assert exit_code == 0, 'The exit code should be 0.'
assert output.strip() == ''
exit_code, output = ssh_box.execute('cd test')
assert exit_code == 0, 'The exit code should be 0.'
assert output.strip() == ''
exit_code, output = ssh_box.execute('pwd')
assert exit_code == 0, 'The exit code should be 0.'
assert output.strip() == '/workspace/test'
def test_ssh_box_failed_cmd_run_as_devin(temp_dir):
# get a temporary directory
with patch.dict(
config.config,
{
config.ConfigType.WORKSPACE_BASE: temp_dir,
config.ConfigType.RUN_AS_DEVIN: 'true',
config.ConfigType.SANDBOX_TYPE: 'ssh',
},
clear=True
):
ssh_box = DockerSSHBox()
# test the ssh box with a command that fails
exit_code, output = ssh_box.execute('non_existing_command')
assert exit_code != 0, 'The exit code should not be 0 for a failed command.'