diff --git a/.github/workflows/run-unit-tests.yml b/.github/workflows/run-unit-tests.yml index 4e34ec2474..47697da73d 100644 --- a/.github/workflows/run-unit-tests.yml +++ b/.github/workflows/run-unit-tests.yml @@ -34,11 +34,16 @@ jobs: brew install colima docker colima start + # For testcontainers to find the Colima socket + # https://github.com/abiosoft/colima/blob/main/docs/FAQ.md#cannot-connect-to-the-docker-daemon-at-unixvarrundockersock-is-the-docker-daemon-running + sudo ln -sf $HOME/.colima/default/docker.sock /var/run/docker.sock + - name: Build Environment run: make build - name: Run Tests run: poetry run pytest --cov=agenthub --cov=opendevin --cov-report=xml ./tests/unit + - name: Upload coverage to Codecov uses: codecov/codecov-action@v4 env: @@ -70,6 +75,7 @@ jobs: - name: Run Tests run: poetry run pytest --cov=agenthub --cov=opendevin --cov-report=xml ./tests/unit + - name: Upload coverage to Codecov uses: codecov/codecov-action@v4 env: diff --git a/Development.md b/Development.md index f98e3e437d..c67b569014 100644 --- a/Development.md +++ b/Development.md @@ -82,3 +82,15 @@ If you encounter any issues with the Language Model (LM) or you're simply curiou ```bash make help ``` + +### 8. Testing + +#### Unit tests + +```bash +poetry run pytest ./tests/unit/test_sandbox.py +``` + +#### Integration tests + +Please refer to [this README](./tests/integration/README.md) for details. diff --git a/opendevin/sandbox/docker/ssh_box.py b/opendevin/sandbox/docker/ssh_box.py index d0aef06fb1..71f46a7864 100644 --- a/opendevin/sandbox/docker/ssh_box.py +++ b/opendevin/sandbox/docker/ssh_box.py @@ -171,7 +171,7 @@ class DockerSSHBox(Sandbox): def start_ssh_session(self): # start ssh session at the background - self.ssh = pxssh.pxssh(echo=False) + self.ssh = pxssh.pxssh() hostname = SSH_HOSTNAME if RUN_AS_DEVIN: username = 'opendevin' @@ -215,14 +215,36 @@ class DockerSSHBox(Sandbox): # send a SIGINT to the process self.ssh.sendintr() self.ssh.prompt() - command_output = self.ssh.before.decode('utf-8').strip() + command_output = self.ssh.before.decode( + 'utf-8').lstrip(cmd).strip() return -1, f'Command: "{cmd}" timed out. Sending SIGINT to the process: {command_output}' command_output = self.ssh.before.decode('utf-8').strip() + # once out, make sure that we have *every* output, we while loop until we get an empty output + while True: + logger.debug('WAITING FOR .prompt()') + self.ssh.sendline('\n') + timeout_not_reached = self.ssh.prompt(timeout=1) + if not timeout_not_reached: + logger.debug('TIMEOUT REACHED') + break + logger.debug('WAITING FOR .before') + output = self.ssh.before.decode('utf-8').strip() + logger.debug(f'WAITING FOR END OF command output ({bool(output)}): {output}') + if output == '': + break + command_output += output + command_output = command_output.lstrip(cmd).strip() + # get the exit code self.ssh.sendline('echo $?') - self.ssh.prompt(timeout=10) - exit_code = int(self.ssh.before.decode('utf-8').strip()) + self.ssh.prompt() + exit_code = self.ssh.before.decode('utf-8') + while not exit_code.startswith('echo $?'): + self.ssh.prompt() + exit_code = self.ssh.before.decode('utf-8') + logger.debug(f'WAITING FOR exit code: {exit_code}') + exit_code = int(exit_code.lstrip('echo $?').strip()) return exit_code, command_output def copy_to(self, host_src: str, sandbox_dest: str, recursive: bool = False): @@ -307,9 +329,10 @@ class DockerSSHBox(Sandbox): pass def get_working_directory(self): - self.ssh.sendline('pwd') - self.ssh.prompt(timeout=10) - return self.ssh.before.decode('utf-8').strip() + exit_code, result = self.execute('pwd') + if exit_code != 0: + raise Exception('Failed to get working directory') + return result.strip() def is_container_running(self): try: diff --git a/tests/unit/test_sandbox.py b/tests/unit/test_sandbox.py new file mode 100644 index 0000000000..e815033c9a --- /dev/null +++ b/tests/unit/test_sandbox.py @@ -0,0 +1,115 @@ +import pathlib +import tempfile +from unittest.mock import patch + +import pytest + +from opendevin import config +from opendevin.sandbox.docker.ssh_box import DockerSSHBox + + +@pytest.fixture +def temp_dir(): + # get a temporary directory + with tempfile.TemporaryDirectory() as temp_dir: + pathlib.Path().mkdir(parents=True, exist_ok=True) + yield temp_dir + + +def test_ssh_box_run_as_devin(temp_dir): + # get a temporary directory + with patch.dict( + config.config, + { + config.ConfigType.WORKSPACE_BASE: temp_dir, + config.ConfigType.RUN_AS_DEVIN: 'true', + config.ConfigType.SANDBOX_TYPE: 'ssh', + }, + clear=True + ): + ssh_box = DockerSSHBox() + + # test the ssh box + exit_code, output = ssh_box.execute('ls -l') + assert exit_code == 0, 'The exit code should be 0.' + assert output.strip() == 'total 0' + + exit_code, output = ssh_box.execute('mkdir test') + assert exit_code == 0, 'The exit code should be 0.' + assert output.strip() == '' + + exit_code, output = ssh_box.execute('ls -l') + assert exit_code == 0, 'The exit code should be 0.' + assert 'opendevin' in output, "The output should contain username 'opendevin'" + assert 'test' in output, 'The output should contain the test directory' + + exit_code, output = ssh_box.execute('touch test/foo.txt') + assert exit_code == 0, 'The exit code should be 0.' + assert output.strip() == '' + + exit_code, output = ssh_box.execute('ls -l test') + assert exit_code == 0, 'The exit code should be 0.' + assert 'foo.txt' in output, 'The output should contain the foo.txt file' + + +def test_ssh_box_multi_line_cmd_run_as_devin(temp_dir): + # get a temporary directory + with patch.dict( + config.config, + { + config.ConfigType.WORKSPACE_BASE: temp_dir, + config.ConfigType.RUN_AS_DEVIN: 'true', + config.ConfigType.SANDBOX_TYPE: 'ssh', + }, + clear=True + ): + ssh_box = DockerSSHBox() + + # test the ssh box + exit_code, output = ssh_box.execute('pwd\nls -l') + assert exit_code == 0, 'The exit code should be 0.' + expected_lines = ['/workspacels -l', 'total 0'] + assert output.strip().splitlines() == expected_lines + +def test_ssh_box_stateful_cmd_run_as_devin(temp_dir): + # get a temporary directory + with patch.dict( + config.config, + { + config.ConfigType.WORKSPACE_BASE: temp_dir, + config.ConfigType.RUN_AS_DEVIN: 'true', + config.ConfigType.SANDBOX_TYPE: 'ssh', + }, + clear=True + ): + ssh_box = DockerSSHBox() + + # test the ssh box + exit_code, output = ssh_box.execute('mkdir test') + assert exit_code == 0, 'The exit code should be 0.' + assert output.strip() == '' + + exit_code, output = ssh_box.execute('cd test') + assert exit_code == 0, 'The exit code should be 0.' + assert output.strip() == '' + + exit_code, output = ssh_box.execute('pwd') + assert exit_code == 0, 'The exit code should be 0.' + assert output.strip() == '/workspace/test' + +def test_ssh_box_failed_cmd_run_as_devin(temp_dir): + # get a temporary directory + with patch.dict( + config.config, + { + config.ConfigType.WORKSPACE_BASE: temp_dir, + config.ConfigType.RUN_AS_DEVIN: 'true', + config.ConfigType.SANDBOX_TYPE: 'ssh', + }, + clear=True + ): + ssh_box = DockerSSHBox() + + # test the ssh box with a command that fails + exit_code, output = ssh_box.execute('non_existing_command') + assert exit_code != 0, 'The exit code should not be 0 for a failed command.'