From 44aea95ddee766dd25d20aa6eeab8759d383966f Mon Sep 17 00:00:00 2001 From: Christian Balcom Date: Sat, 27 Apr 2024 07:58:34 -0400 Subject: [PATCH] fix(backend) changes to improve Command-R+ behavior, plus file i/o error improvements. (#1347) * Some improvements to prompts, some better exception handling for various file IO errors, added timeout and max return token configurations for the LLM api. * More monologue prompt improvements * Dynamically set username provided in prompt. * Remove absolute paths from llm prompts, fetch working directory from sandbox when resolving paths in fileio operations, add customizable timeout for bash commands, mention said timeout in llm prompt. * Switched ssh_box to disabling tty echo and removed the logic attempting to delete it from the response afterwards, fixed get_working_directory for ssh_box. * Update prompts in integration tests to match monologue agent changes. * Minor tweaks to make merge easier. * Another minor prompt tweak, better invalid json handling. * Fix lint error * More catch-up to fix lint errors introduced by merge. --------- Co-authored-by: Jim Su Co-authored-by: Robert Brennan --- agenthub/monologue_agent/agent.py | 2 +- agenthub/monologue_agent/utils/prompts.py | 22 +++++++-- opendevin/action/fileop.py | 16 +++++-- opendevin/config.py | 3 ++ opendevin/controller/action_manager.py | 12 +++-- opendevin/llm/llm.py | 13 +++-- opendevin/sandbox/docker/exec_box.py | 3 ++ opendevin/sandbox/docker/local_box.py | 3 ++ opendevin/sandbox/docker/ssh_box.py | 48 ++++--------------- opendevin/sandbox/e2b/sandbox.py | 3 ++ opendevin/sandbox/sandbox.py | 4 ++ opendevin/schema/config.py | 3 ++ .../test_write_simple_script/prompt_001.log | 16 ++++--- .../test_write_simple_script/prompt_002.log | 16 ++++--- .../test_write_simple_script/prompt_003.log | 16 ++++--- .../test_write_simple_script/prompt_004.log | 16 ++++--- .../test_write_simple_script/prompt_005.log | 16 ++++--- .../test_write_simple_script/prompt_006.log | 16 ++++--- tests/test_fileops.py | 28 ++++++----- 19 files changed, 151 insertions(+), 105 deletions(-) diff --git a/agenthub/monologue_agent/agent.py b/agenthub/monologue_agent/agent.py index c1fbab4148..f4c1ee3d70 100644 --- a/agenthub/monologue_agent/agent.py +++ b/agenthub/monologue_agent/agent.py @@ -77,7 +77,7 @@ INITIAL_THOUGHTS = [ "I'll need a strategy. And as I make progress, I'll need to keep refining that strategy. I'll need to set goals, and break them into sub-goals.", 'In between actions, I must always take some time to think, strategize, and set new goals. I should never take two actions in a row.', "OK so my task is to $TASK. I haven't made any progress yet. Where should I start?", - "It seems like there might be an existing project here. I should probably start by running `ls` to see what's here.", + 'It seems like there might be an existing project here. I should probably start by running `pwd` and `ls` to orient myself.', ] diff --git a/agenthub/monologue_agent/utils/prompts.py b/agenthub/monologue_agent/utils/prompts.py index 13763fb72b..f0c18dce77 100644 --- a/agenthub/monologue_agent/utils/prompts.py +++ b/agenthub/monologue_agent/utils/prompts.py @@ -28,8 +28,8 @@ This is your internal monologue, in JSON format: Your most recent thought is at the bottom of that monologue. Continue your train of thought. -What is your next thought or action? Your response must be in JSON format. -It must be an object, and it must contain two fields: +What is your next single thought or action? Your response must be in JSON format. +It must be a single object, and it must contain two fields: * `action`, which is one of the actions below * `args`, which is a map of key-value pairs, specifying the arguments for that action @@ -59,11 +59,15 @@ You should never act twice in a row without thinking. But if your last several actions are all "think" actions, you should consider taking a different action. Notes: -* your environment is Debian Linux. You can install software with `apt` -* your working directory will not change, even if you run `cd`. All commands will be run in the `%(WORKSPACE_MOUNT_PATH_IN_SANDBOX)s` directory. +* you are logged in as %(user)s, but sudo will always work without a password. +* all non-background commands will be forcibly stopped if they remain running for over %(timeout)s seconds. +* your environment is Debian Linux. You can install software with `sudo apt-get`, but remember to use -y. * don't run interactive commands, or commands that don't return (e.g. `node server.js`). You may run commands in the background (e.g. `node server.js &`) +* don't run interactive text editors (e.g. `nano` or 'vim'), instead use the 'write' or 'read' action. +* don't run gui applications (e.g. software IDEs (like vs code or codium), web browsers (like firefox or chromium), or other complex software packages). Use non-interactive cli applications, or special actions instead. +* whenever an action fails, always `think` about why it may have happened before acting again. -What is your next thought or action? Again, you must reply with JSON, and only with JSON. +What is your next single thought or action? Again, you must reply with JSON, and only with JSON. You must respond with exactly one 'action' object. %(hint)s """ @@ -142,11 +146,15 @@ def get_request_action_prompt( ) bg_commands_message += '\nYou can end any process by sending a `kill` action with the numerical `id` above.' + user = 'opendevin' if config.get(ConfigType.RUN_AS_DEVIN) else 'root' + return ACTION_PROMPT % { 'task': task, 'monologue': json.dumps(thoughts, indent=2), 'background_commands': bg_commands_message, 'hint': hint, + 'user': user, + 'timeout': config.get(ConfigType.SANDBOX_TIMEOUT), 'WORKSPACE_MOUNT_PATH_IN_SANDBOX': config.get(ConfigType.WORKSPACE_MOUNT_PATH_IN_SANDBOX), } @@ -181,6 +189,10 @@ def parse_action_response(response: str) -> Action: raise LLMOutputError( 'Invalid JSON, the response must be well-formed JSON as specified in the prompt.' ) + except TypeError: + raise LLMOutputError( + 'Invalid JSON, the response must be well-formed JSON as specified in the prompt.' + ) if 'content' in action_dict: # The LLM gets confused here. Might as well be robust action_dict['contents'] = action_dict.pop('content') diff --git a/opendevin/action/fileop.py b/opendevin/action/fileop.py index f8068f26f2..610327af25 100644 --- a/opendevin/action/fileop.py +++ b/opendevin/action/fileop.py @@ -20,10 +20,16 @@ from .base import ExecutableAction SANDBOX_PATH_PREFIX = '/workspace/' -def resolve_path(file_path): +def resolve_path(file_path, working_directory): + path_in_sandbox = Path(file_path) + + # Apply working directory + if not path_in_sandbox.is_absolute(): + path_in_sandbox = Path(working_directory) / path_in_sandbox + # Sanitize the path with respect to the root of the full sandbox - # (deny any .. path traversal to parent directories of this) - abs_path_in_sandbox = (Path(SANDBOX_PATH_PREFIX) / Path(file_path)).resolve() + # (deny any .. path traversal to parent directories of the sandbox) + abs_path_in_sandbox = path_in_sandbox.resolve() # If the path is outside the workspace, deny it if not abs_path_in_sandbox.is_relative_to(SANDBOX_PATH_PREFIX): @@ -71,7 +77,7 @@ class FileReadAction(ExecutableAction): code_view = ''.join(read_lines) else: try: - whole_path = resolve_path(self.path) + whole_path = resolve_path(self.path, controller.action_manager.sandbox.get_working_directory()) self.start = max(self.start, 0) try: with open(whole_path, 'r', encoding='utf-8') as file: @@ -121,7 +127,7 @@ class FileWriteAction(ExecutableAction): return AgentErrorObservation(f'File not found: {self.path}') else: try: - whole_path = resolve_path(self.path) + whole_path = resolve_path(self.path, controller.action_manager.sandbox.get_working_directory()) mode = 'w' if not os.path.exists(whole_path) else 'r+' try: with open(whole_path, mode, encoding='utf-8') as file: diff --git a/opendevin/config.py b/opendevin/config.py index 7cd60653f5..5a7a747142 100644 --- a/opendevin/config.py +++ b/opendevin/config.py @@ -36,6 +36,8 @@ DEFAULT_CONFIG: dict = { ConfigType.LLM_RETRY_MIN_WAIT: 3, ConfigType.LLM_RETRY_MAX_WAIT: 60, ConfigType.MAX_ITERATIONS: 100, + ConfigType.LLM_TIMEOUT: None, + ConfigType.LLM_MAX_RETURN_TOKENS: None, ConfigType.AGENT_MEMORY_MAX_THREADS: 2, ConfigType.AGENT_MEMORY_ENABLED: False, # GPT-4 pricing is $10 per 1M input tokens. Since tokenization happens on LLM side, @@ -48,6 +50,7 @@ DEFAULT_CONFIG: dict = { ConfigType.USE_HOST_NETWORK: 'false', ConfigType.SSH_HOSTNAME: 'localhost', ConfigType.DISABLE_COLOR: 'false', + ConfigType.SANDBOX_TIMEOUT: 120 } config_str = '' diff --git a/opendevin/controller/action_manager.py b/opendevin/controller/action_manager.py index 8be460e1f2..bc4f6d67de 100644 --- a/opendevin/controller/action_manager.py +++ b/opendevin/controller/action_manager.py @@ -26,15 +26,21 @@ class ActionManager: if sandbox_type == 'exec': self.sandbox = DockerExecBox( sid=(sid or 'default'), + timeout=config.get(ConfigType.SANDBOX_TIMEOUT) ) elif sandbox_type == 'local': - self.sandbox = LocalBox() + self.sandbox = LocalBox( + timeout=config.get(ConfigType.SANDBOX_TIMEOUT) + ) elif sandbox_type == 'ssh': self.sandbox = DockerSSHBox( - sid=(sid or 'default') + sid=(sid or 'default'), + timeout=config.get(ConfigType.SANDBOX_TIMEOUT) ) elif sandbox_type == 'e2b': - self.sandbox = E2BBox() + self.sandbox = E2BBox( + timeout=config.get(ConfigType.SANDBOX_TIMEOUT) + ) else: raise ValueError(f'Invalid sandbox type: {sandbox_type}') diff --git a/opendevin/llm/llm.py b/opendevin/llm/llm.py index 04fa6cecb8..2677c1d18c 100644 --- a/opendevin/llm/llm.py +++ b/opendevin/llm/llm.py @@ -4,10 +4,9 @@ from litellm.exceptions import APIConnectionError, RateLimitError, ServiceUnavai from functools import partial from opendevin import config -from opendevin.schema.config import ConfigType from opendevin.logger import llm_prompt_logger, llm_response_logger from opendevin.logger import opendevin_logger as logger - +from opendevin.schema import ConfigType DEFAULT_API_KEY = config.get(ConfigType.LLM_API_KEY) DEFAULT_BASE_URL = config.get(ConfigType.LLM_BASE_URL) @@ -16,6 +15,8 @@ DEFAULT_API_VERSION = config.get(ConfigType.LLM_API_VERSION) LLM_NUM_RETRIES = config.get(ConfigType.LLM_NUM_RETRIES) LLM_RETRY_MIN_WAIT = config.get(ConfigType.LLM_RETRY_MIN_WAIT) LLM_RETRY_MAX_WAIT = config.get(ConfigType.LLM_RETRY_MAX_WAIT) +LLM_TIMEOUT = config.get(ConfigType.LLM_TIMEOUT) +LLM_MAX_RETURN_TOKENS = config.get(ConfigType.LLM_MAX_RETURN_TOKENS) class LLM: @@ -31,6 +32,8 @@ class LLM: num_retries=LLM_NUM_RETRIES, retry_min_wait=LLM_RETRY_MIN_WAIT, retry_max_wait=LLM_RETRY_MAX_WAIT, + llm_timeout=LLM_TIMEOUT, + llm_max_return_tokens=LLM_MAX_RETURN_TOKENS ): """ Args: @@ -41,6 +44,8 @@ class LLM: num_retries (int, optional): The number of retries for API calls. Defaults to LLM_NUM_RETRIES. retry_min_wait (int, optional): The minimum time to wait between retries in seconds. Defaults to LLM_RETRY_MIN_TIME. retry_max_wait (int, optional): The maximum time to wait between retries in seconds. Defaults to LLM_RETRY_MAX_TIME. + llm_timeout (int, optional): The maximum time to wait for a response in seconds. Defaults to LLM_TIMEOUT. + llm_max_return_tokens (int, optional): The maximum number of tokens to return. Defaults to LLM_MAX_RETURN_TOKENS. Attributes: model_name (str): The name of the language model. @@ -54,9 +59,11 @@ class LLM: self.api_key = api_key self.base_url = base_url self.api_version = api_version + self.llm_timeout = llm_timeout + self.llm_max_return_tokens = llm_max_return_tokens self._completion = partial( - litellm_completion, model=self.model_name, api_key=self.api_key, base_url=self.base_url, api_version=self.api_version) + litellm_completion, model=self.model_name, api_key=self.api_key, base_url=self.base_url, api_version=self.api_version, max_tokens=self.llm_max_return_tokens, timeout=self.llm_timeout) completion_unwrapped = self._completion diff --git a/opendevin/sandbox/docker/exec_box.py b/opendevin/sandbox/docker/exec_box.py index 89f634ef6c..7e86ed0ad5 100644 --- a/opendevin/sandbox/docker/exec_box.py +++ b/opendevin/sandbox/docker/exec_box.py @@ -268,6 +268,9 @@ class DockerExecBox(Sandbox): except docker.errors.NotFound: pass + def get_working_directory(self): + return SANDBOX_WORKSPACE_DIR + if __name__ == '__main__': try: diff --git a/opendevin/sandbox/docker/local_box.py b/opendevin/sandbox/docker/local_box.py index 4d84fdfb9f..a5d54d4b68 100644 --- a/opendevin/sandbox/docker/local_box.py +++ b/opendevin/sandbox/docker/local_box.py @@ -96,3 +96,6 @@ class LocalBox(Sandbox): def cleanup(self): self.close() + + def get_working_directory(self): + return config.get(ConfigType.WORKSPACE_BASE) diff --git a/opendevin/sandbox/docker/ssh_box.py b/opendevin/sandbox/docker/ssh_box.py index e21895fa41..4a13a95f1b 100644 --- a/opendevin/sandbox/docker/ssh_box.py +++ b/opendevin/sandbox/docker/ssh_box.py @@ -169,7 +169,7 @@ class DockerSSHBox(Sandbox): def start_ssh_session(self): # start ssh session at the background - self.ssh = pxssh.pxssh() + self.ssh = pxssh.pxssh(echo=False) hostname = SSH_HOSTNAME if RUN_AS_DEVIN: username = 'opendevin' @@ -211,49 +211,14 @@ class DockerSSHBox(Sandbox): # send a SIGINT to the process self.ssh.sendintr() self.ssh.prompt() - command_output = self.ssh.before.decode( - 'utf-8').lstrip(cmd).strip() + command_output = self.ssh.before.decode('utf-8').strip() return -1, f'Command: "{cmd}" timed out. Sending SIGINT to the process: {command_output}' command_output = self.ssh.before.decode('utf-8').strip() - # NOTE: there's some weird behavior with the prompt (it may come AFTER the command output) - # so we need to check if the command is in the output - n_tries = 5 - while not command_output.startswith(cmd) and n_tries > 0: - self.ssh.prompt() - command_output = self.ssh.before.decode('utf-8').strip() - time.sleep(0.5) - n_tries -= 1 - if n_tries == 0 and not command_output.startswith(cmd): - raise Exception( - f'Something went wrong with the SSH sanbox, cannot get output for command [{cmd}] after 5 retries' - ) - logger.debug(f'Command output GOT SO FAR: {command_output}') - # once out, make sure that we have *every* output, we while loop until we get an empty output - while True: - logger.debug('WAITING FOR .prompt()') - self.ssh.sendline('\n') - timeout_not_reached = self.ssh.prompt(timeout=1) - if not timeout_not_reached: - logger.debug('TIMEOUT REACHED') - break - logger.debug('WAITING FOR .before') - output = self.ssh.before.decode('utf-8').strip() - logger.debug(f'WAITING FOR END OF command output ({bool(output)}): {output}') - if output == '': - break - command_output += output - command_output = command_output.lstrip(cmd).strip() - # get the exit code self.ssh.sendline('echo $?') - self.ssh.prompt() - exit_code = self.ssh.before.decode('utf-8') - while not exit_code.startswith('echo $?'): - self.ssh.prompt() - exit_code = self.ssh.before.decode('utf-8') - logger.debug(f'WAITING FOR exit code: {exit_code}') - exit_code = int(exit_code.lstrip('echo $?').strip()) + self.ssh.prompt(timeout=10) + exit_code = int(self.ssh.before.decode('utf-8').strip()) return exit_code, command_output def copy_to(self, host_src: str, sandbox_dest: str, recursive: bool = False): @@ -337,6 +302,11 @@ class DockerSSHBox(Sandbox): except docker.errors.NotFound: pass + def get_working_directory(self): + self.ssh.sendline('pwd') + self.ssh.prompt(timeout=10) + return self.ssh.before.decode('utf-8').strip() + def is_container_running(self): try: container = self.docker_client.containers.get(self.container_name) diff --git a/opendevin/sandbox/e2b/sandbox.py b/opendevin/sandbox/e2b/sandbox.py index 58511e3ff0..ef7d53beb2 100644 --- a/opendevin/sandbox/e2b/sandbox.py +++ b/opendevin/sandbox/e2b/sandbox.py @@ -124,3 +124,6 @@ class E2BBox(Sandbox): def close(self): self.sandbox.close() + + def get_working_directory(self): + return self.sandbox.cwd diff --git a/opendevin/sandbox/sandbox.py b/opendevin/sandbox/sandbox.py index ed648c3559..fcbcfc5ab2 100644 --- a/opendevin/sandbox/sandbox.py +++ b/opendevin/sandbox/sandbox.py @@ -32,3 +32,7 @@ class Sandbox(ABC, PluginMixin): @abstractmethod def copy_to(self, host_src: str, sandbox_dest: str, recursive: bool = False): pass + + @abstractmethod + def get_working_directory(self): + pass diff --git a/opendevin/schema/config.py b/opendevin/schema/config.py index 0d5e7eef03..8dcf7b4713 100644 --- a/opendevin/schema/config.py +++ b/opendevin/schema/config.py @@ -2,6 +2,8 @@ from enum import Enum class ConfigType(str, Enum): + LLM_MAX_RETURN_TOKENS = 'LLM_MAX_RETURN_TOKENS' + LLM_TIMEOUT = 'LLM_TIMEOUT' LLM_API_KEY = 'LLM_API_KEY' LLM_BASE_URL = 'LLM_BASE_URL' WORKSPACE_BASE = 'WORKSPACE_BASE' @@ -26,6 +28,7 @@ class ConfigType(str, Enum): E2B_API_KEY = 'E2B_API_KEY' SANDBOX_TYPE = 'SANDBOX_TYPE' SANDBOX_USER_ID = 'SANDBOX_USER_ID' + SANDBOX_TIMEOUT = 'SANDBOX_TIMEOUT' USE_HOST_NETWORK = 'USE_HOST_NETWORK' SSH_HOSTNAME = 'SSH_HOSTNAME' DISABLE_COLOR = 'DISABLE_COLOR' diff --git a/tests/integration/mock/MonologueAgent/test_write_simple_script/prompt_001.log b/tests/integration/mock/MonologueAgent/test_write_simple_script/prompt_001.log index 77fb6697e0..3f70cebcd1 100644 --- a/tests/integration/mock/MonologueAgent/test_write_simple_script/prompt_001.log +++ b/tests/integration/mock/MonologueAgent/test_write_simple_script/prompt_001.log @@ -281,15 +281,15 @@ This is your internal monologue, in JSON format: { "action": "think", "args": { - "thought": "It seems like there might be an existing project here. I should probably start by running `ls` to see what's here." + "thought": "It seems like there might be an existing project here. I should probably start by running `pwd` and `ls` to orient myself." } } ] Your most recent thought is at the bottom of that monologue. Continue your train of thought. -What is your next thought or action? Your response must be in JSON format. -It must be an object, and it must contain two fields: +What is your next single thought or action? Your response must be in JSON format. +It must be a single object, and it must contain two fields: * `action`, which is one of the actions below * `args`, which is a map of key-value pairs, specifying the arguments for that action @@ -319,10 +319,14 @@ You should never act twice in a row without thinking. But if your last several actions are all "think" actions, you should consider taking a different action. Notes: -* your environment is Debian Linux. You can install software with `apt` -* your working directory will not change, even if you run `cd`. All commands will be run in the `/workspace` directory. +* you are logged in as opendevin, but sudo will always work without a password. +* all non-background commands will be forcibly stopped if they remain running for over 120 seconds. +* your environment is Debian Linux. You can install software with `sudo apt-get`, but remember to use -y. * don't run interactive commands, or commands that don't return (e.g. `node server.js`). You may run commands in the background (e.g. `node server.js &`) +* don't run interactive text editors (e.g. `nano` or 'vim'), instead use the 'write' or 'read' action. +* don't run gui applications (e.g. software IDEs (like vs code or codium), web browsers (like firefox or chromium), or other complex software packages). Use non-interactive cli applications, or special actions instead. +* whenever an action fails, always `think` about why it may have happened before acting again. -What is your next thought or action? Again, you must reply with JSON, and only with JSON. +What is your next single thought or action? Again, you must reply with JSON, and only with JSON. You must respond with exactly one 'action' object. You've been thinking a lot lately. Maybe it's time to take action? diff --git a/tests/integration/mock/MonologueAgent/test_write_simple_script/prompt_002.log b/tests/integration/mock/MonologueAgent/test_write_simple_script/prompt_002.log index 1be5f98dca..0f8e50655f 100644 --- a/tests/integration/mock/MonologueAgent/test_write_simple_script/prompt_002.log +++ b/tests/integration/mock/MonologueAgent/test_write_simple_script/prompt_002.log @@ -281,7 +281,7 @@ This is your internal monologue, in JSON format: { "action": "think", "args": { - "thought": "It seems like there might be an existing project here. I should probably start by running `ls` to see what's here." + "thought": "It seems like there might be an existing project here. I should probably start by running `pwd` and `ls` to orient myself." } }, { @@ -304,8 +304,8 @@ This is your internal monologue, in JSON format: Your most recent thought is at the bottom of that monologue. Continue your train of thought. -What is your next thought or action? Your response must be in JSON format. -It must be an object, and it must contain two fields: +What is your next single thought or action? Your response must be in JSON format. +It must be a single object, and it must contain two fields: * `action`, which is one of the actions below * `args`, which is a map of key-value pairs, specifying the arguments for that action @@ -335,8 +335,12 @@ You should never act twice in a row without thinking. But if your last several actions are all "think" actions, you should consider taking a different action. Notes: -* your environment is Debian Linux. You can install software with `apt` -* your working directory will not change, even if you run `cd`. All commands will be run in the `/workspace` directory. +* you are logged in as opendevin, but sudo will always work without a password. +* all non-background commands will be forcibly stopped if they remain running for over 120 seconds. +* your environment is Debian Linux. You can install software with `sudo apt-get`, but remember to use -y. * don't run interactive commands, or commands that don't return (e.g. `node server.js`). You may run commands in the background (e.g. `node server.js &`) +* don't run interactive text editors (e.g. `nano` or 'vim'), instead use the 'write' or 'read' action. +* don't run gui applications (e.g. software IDEs (like vs code or codium), web browsers (like firefox or chromium), or other complex software packages). Use non-interactive cli applications, or special actions instead. +* whenever an action fails, always `think` about why it may have happened before acting again. -What is your next thought or action? Again, you must reply with JSON, and only with JSON. +What is your next single thought or action? Again, you must reply with JSON, and only with JSON. You must respond with exactly one 'action' object. diff --git a/tests/integration/mock/MonologueAgent/test_write_simple_script/prompt_003.log b/tests/integration/mock/MonologueAgent/test_write_simple_script/prompt_003.log index f6641dd139..e418873a50 100644 --- a/tests/integration/mock/MonologueAgent/test_write_simple_script/prompt_003.log +++ b/tests/integration/mock/MonologueAgent/test_write_simple_script/prompt_003.log @@ -281,7 +281,7 @@ This is your internal monologue, in JSON format: { "action": "think", "args": { - "thought": "It seems like there might be an existing project here. I should probably start by running `ls` to see what's here." + "thought": "It seems like there might be an existing project here. I should probably start by running `pwd` and `ls` to orient myself." } }, { @@ -321,8 +321,8 @@ This is your internal monologue, in JSON format: Your most recent thought is at the bottom of that monologue. Continue your train of thought. -What is your next thought or action? Your response must be in JSON format. -It must be an object, and it must contain two fields: +What is your next single thought or action? Your response must be in JSON format. +It must be a single object, and it must contain two fields: * `action`, which is one of the actions below * `args`, which is a map of key-value pairs, specifying the arguments for that action @@ -352,8 +352,12 @@ You should never act twice in a row without thinking. But if your last several actions are all "think" actions, you should consider taking a different action. Notes: -* your environment is Debian Linux. You can install software with `apt` -* your working directory will not change, even if you run `cd`. All commands will be run in the `/workspace` directory. +* you are logged in as opendevin, but sudo will always work without a password. +* all non-background commands will be forcibly stopped if they remain running for over 120 seconds. +* your environment is Debian Linux. You can install software with `sudo apt-get`, but remember to use -y. * don't run interactive commands, or commands that don't return (e.g. `node server.js`). You may run commands in the background (e.g. `node server.js &`) +* don't run interactive text editors (e.g. `nano` or 'vim'), instead use the 'write' or 'read' action. +* don't run gui applications (e.g. software IDEs (like vs code or codium), web browsers (like firefox or chromium), or other complex software packages). Use non-interactive cli applications, or special actions instead. +* whenever an action fails, always `think` about why it may have happened before acting again. -What is your next thought or action? Again, you must reply with JSON, and only with JSON. +What is your next single thought or action? Again, you must reply with JSON, and only with JSON. You must respond with exactly one 'action' object. diff --git a/tests/integration/mock/MonologueAgent/test_write_simple_script/prompt_004.log b/tests/integration/mock/MonologueAgent/test_write_simple_script/prompt_004.log index ec12245158..8247b02985 100644 --- a/tests/integration/mock/MonologueAgent/test_write_simple_script/prompt_004.log +++ b/tests/integration/mock/MonologueAgent/test_write_simple_script/prompt_004.log @@ -281,7 +281,7 @@ This is your internal monologue, in JSON format: { "action": "think", "args": { - "thought": "It seems like there might be an existing project here. I should probably start by running `ls` to see what's here." + "thought": "It seems like there might be an existing project here. I should probably start by running `pwd` and `ls` to orient myself." } }, { @@ -332,8 +332,8 @@ This is your internal monologue, in JSON format: Your most recent thought is at the bottom of that monologue. Continue your train of thought. -What is your next thought or action? Your response must be in JSON format. -It must be an object, and it must contain two fields: +What is your next single thought or action? Your response must be in JSON format. +It must be a single object, and it must contain two fields: * `action`, which is one of the actions below * `args`, which is a map of key-value pairs, specifying the arguments for that action @@ -363,8 +363,12 @@ You should never act twice in a row without thinking. But if your last several actions are all "think" actions, you should consider taking a different action. Notes: -* your environment is Debian Linux. You can install software with `apt` -* your working directory will not change, even if you run `cd`. All commands will be run in the `/workspace` directory. +* you are logged in as opendevin, but sudo will always work without a password. +* all non-background commands will be forcibly stopped if they remain running for over 120 seconds. +* your environment is Debian Linux. You can install software with `sudo apt-get`, but remember to use -y. * don't run interactive commands, or commands that don't return (e.g. `node server.js`). You may run commands in the background (e.g. `node server.js &`) +* don't run interactive text editors (e.g. `nano` or 'vim'), instead use the 'write' or 'read' action. +* don't run gui applications (e.g. software IDEs (like vs code or codium), web browsers (like firefox or chromium), or other complex software packages). Use non-interactive cli applications, or special actions instead. +* whenever an action fails, always `think` about why it may have happened before acting again. -What is your next thought or action? Again, you must reply with JSON, and only with JSON. +What is your next single thought or action? Again, you must reply with JSON, and only with JSON. You must respond with exactly one 'action' object. diff --git a/tests/integration/mock/MonologueAgent/test_write_simple_script/prompt_005.log b/tests/integration/mock/MonologueAgent/test_write_simple_script/prompt_005.log index 4878085a4c..aa7a9229e2 100644 --- a/tests/integration/mock/MonologueAgent/test_write_simple_script/prompt_005.log +++ b/tests/integration/mock/MonologueAgent/test_write_simple_script/prompt_005.log @@ -281,7 +281,7 @@ This is your internal monologue, in JSON format: { "action": "think", "args": { - "thought": "It seems like there might be an existing project here. I should probably start by running `ls` to see what's here." + "thought": "It seems like there might be an existing project here. I should probably start by running `pwd` and `ls` to orient myself." } }, { @@ -348,8 +348,8 @@ This is your internal monologue, in JSON format: Your most recent thought is at the bottom of that monologue. Continue your train of thought. -What is your next thought or action? Your response must be in JSON format. -It must be an object, and it must contain two fields: +What is your next single thought or action? Your response must be in JSON format. +It must be a single object, and it must contain two fields: * `action`, which is one of the actions below * `args`, which is a map of key-value pairs, specifying the arguments for that action @@ -379,8 +379,12 @@ You should never act twice in a row without thinking. But if your last several actions are all "think" actions, you should consider taking a different action. Notes: -* your environment is Debian Linux. You can install software with `apt` -* your working directory will not change, even if you run `cd`. All commands will be run in the `/workspace` directory. +* you are logged in as opendevin, but sudo will always work without a password. +* all non-background commands will be forcibly stopped if they remain running for over 120 seconds. +* your environment is Debian Linux. You can install software with `sudo apt-get`, but remember to use -y. * don't run interactive commands, or commands that don't return (e.g. `node server.js`). You may run commands in the background (e.g. `node server.js &`) +* don't run interactive text editors (e.g. `nano` or 'vim'), instead use the 'write' or 'read' action. +* don't run gui applications (e.g. software IDEs (like vs code or codium), web browsers (like firefox or chromium), or other complex software packages). Use non-interactive cli applications, or special actions instead. +* whenever an action fails, always `think` about why it may have happened before acting again. -What is your next thought or action? Again, you must reply with JSON, and only with JSON. +What is your next single thought or action? Again, you must reply with JSON, and only with JSON. You must respond with exactly one 'action' object. diff --git a/tests/integration/mock/MonologueAgent/test_write_simple_script/prompt_006.log b/tests/integration/mock/MonologueAgent/test_write_simple_script/prompt_006.log index 06dbef8635..d164391e0a 100644 --- a/tests/integration/mock/MonologueAgent/test_write_simple_script/prompt_006.log +++ b/tests/integration/mock/MonologueAgent/test_write_simple_script/prompt_006.log @@ -281,7 +281,7 @@ This is your internal monologue, in JSON format: { "action": "think", "args": { - "thought": "It seems like there might be an existing project here. I should probably start by running `ls` to see what's here." + "thought": "It seems like there might be an existing project here. I should probably start by running `pwd` and `ls` to orient myself." } }, { @@ -364,8 +364,8 @@ This is your internal monologue, in JSON format: Your most recent thought is at the bottom of that monologue. Continue your train of thought. -What is your next thought or action? Your response must be in JSON format. -It must be an object, and it must contain two fields: +What is your next single thought or action? Your response must be in JSON format. +It must be a single object, and it must contain two fields: * `action`, which is one of the actions below * `args`, which is a map of key-value pairs, specifying the arguments for that action @@ -395,8 +395,12 @@ You should never act twice in a row without thinking. But if your last several actions are all "think" actions, you should consider taking a different action. Notes: -* your environment is Debian Linux. You can install software with `apt` -* your working directory will not change, even if you run `cd`. All commands will be run in the `/workspace` directory. +* you are logged in as opendevin, but sudo will always work without a password. +* all non-background commands will be forcibly stopped if they remain running for over 120 seconds. +* your environment is Debian Linux. You can install software with `sudo apt-get`, but remember to use -y. * don't run interactive commands, or commands that don't return (e.g. `node server.js`). You may run commands in the background (e.g. `node server.js &`) +* don't run interactive text editors (e.g. `nano` or 'vim'), instead use the 'write' or 'read' action. +* don't run gui applications (e.g. software IDEs (like vs code or codium), web browsers (like firefox or chromium), or other complex software packages). Use non-interactive cli applications, or special actions instead. +* whenever an action fails, always `think` about why it may have happened before acting again. -What is your next thought or action? Again, you must reply with JSON, and only with JSON. +What is your next single thought or action? Again, you must reply with JSON, and only with JSON. You must respond with exactly one 'action' object. diff --git a/tests/test_fileops.py b/tests/test_fileops.py index fa9ca46e7e..e657ac7dc8 100644 --- a/tests/test_fileops.py +++ b/tests/test_fileops.py @@ -1,24 +1,26 @@ +from opendevin import config +from opendevin.schema import ConfigType +from opendevin.action import fileop from pathlib import Path - import pytest -from opendevin import config -from opendevin.schema.config import ConfigType -from opendevin.action import fileop def test_resolve_path(): - assert fileop.resolve_path('test.txt') == Path(config.get(ConfigType.WORKSPACE_BASE)) / 'test.txt' - assert fileop.resolve_path('subdir/test.txt') == Path(config.get(ConfigType.WORKSPACE_BASE)) / 'subdir' / 'test.txt' - assert fileop.resolve_path(Path(fileop.SANDBOX_PATH_PREFIX) / 'test.txt') == \ - Path(config.get(ConfigType.WORKSPACE_BASE)) / 'test.txt' - assert fileop.resolve_path(Path(fileop.SANDBOX_PATH_PREFIX) / 'subdir' / 'test.txt') == \ + assert fileop.resolve_path('test.txt', '/workspace') == Path(config.get(ConfigType.WORKSPACE_BASE)) / 'test.txt' + assert fileop.resolve_path('subdir/test.txt', '/workspace') == \ Path(config.get(ConfigType.WORKSPACE_BASE)) / 'subdir' / 'test.txt' - assert fileop.resolve_path(Path(fileop.SANDBOX_PATH_PREFIX) / 'subdir' / '..' / 'test.txt') == \ + assert fileop.resolve_path(Path(fileop.SANDBOX_PATH_PREFIX) / 'test.txt', '/workspace') == \ Path(config.get(ConfigType.WORKSPACE_BASE)) / 'test.txt' + assert fileop.resolve_path(Path(fileop.SANDBOX_PATH_PREFIX) / 'subdir' / 'test.txt', + '/workspace') == Path(config.get(ConfigType.WORKSPACE_BASE)) / 'subdir' / 'test.txt' + assert fileop.resolve_path(Path(fileop.SANDBOX_PATH_PREFIX) / 'subdir' / '..' / 'test.txt', + '/workspace') == Path(config.get(ConfigType.WORKSPACE_BASE)) / 'test.txt' with pytest.raises(PermissionError): - fileop.resolve_path(Path(fileop.SANDBOX_PATH_PREFIX) / '..' / 'test.txt') + fileop.resolve_path(Path(fileop.SANDBOX_PATH_PREFIX) / '..' / 'test.txt', '/workspace') with pytest.raises(PermissionError): - fileop.resolve_path(Path('..') / 'test.txt') + fileop.resolve_path(Path('..') / 'test.txt', '/workspace') with pytest.raises(PermissionError): - fileop.resolve_path(Path('/') / 'test.txt') + fileop.resolve_path(Path('/') / 'test.txt', '/workspace') + assert fileop.resolve_path('test.txt', '/workspace/test') == \ + Path(config.get(ConfigType.WORKSPACE_BASE)) / 'test' / 'test.txt'