From a84d19f03c8fbea5d0b468685a125e7706341068 Mon Sep 17 00:00:00 2001 From: Frank Xu Date: Wed, 15 May 2024 11:59:58 -0400 Subject: [PATCH] Enable CodeAct agents with browsing, and also enable arbitrary BrowserGym action support (#1807) * enable browsing in codeact, and arbitrary browsergym DSL support * fix * fix unit test case * update frontend for the new interactive browsing action * bump ver * Fix integration tests --------- Co-authored-by: OpenDevinBot --- agenthub/codeact_agent/codeact_agent.py | 20 +++++- agenthub/codeact_agent/prompt.py | 22 +++++- frontend/src/services/actions.ts | 5 ++ frontend/src/types/ActionType.tsx | 3 + opendevin/core/schema/action.py | 4 ++ opendevin/events/action/__init__.py | 3 +- opendevin/events/action/browse.py | 12 ++++ opendevin/events/serialization/action.py | 3 +- opendevin/runtime/runtime.py | 5 ++ opendevin/runtime/server/browse.py | 25 +++++-- opendevin/runtime/server/runtime.py | 4 ++ .../CodeActAgent/test_edits/prompt_001.log | 21 +++++- .../CodeActAgent/test_edits/prompt_002.log | 34 +++++++--- .../CodeActAgent/test_edits/prompt_003.log | 68 +++++++++++++------ .../CodeActAgent/test_edits/response_001.log | 4 +- .../CodeActAgent/test_edits/response_002.log | 7 +- .../CodeActAgent/test_edits/response_003.log | 9 ++- .../CodeActAgent/test_ipython/prompt_001.log | 21 +++++- .../CodeActAgent/test_ipython/prompt_002.log | 24 +++++-- .../test_ipython/response_001.log | 3 +- .../test_ipython/response_002.log | 2 +- .../test_write_simple_script/prompt_001.log | 21 +++++- .../test_write_simple_script/prompt_002.log | 25 +++++-- .../test_write_simple_script/response_001.log | 4 +- .../test_write_simple_script/response_002.log | 2 +- tests/unit/test_action_serialization.py | 9 +++ 26 files changed, 293 insertions(+), 67 deletions(-) diff --git a/agenthub/codeact_agent/codeact_agent.py b/agenthub/codeact_agent/codeact_agent.py index 3273e6d745..c0e7ad1053 100644 --- a/agenthub/codeact_agent/codeact_agent.py +++ b/agenthub/codeact_agent/codeact_agent.py @@ -13,11 +13,13 @@ from opendevin.core.logger import opendevin_logger as logger from opendevin.events.action import ( Action, AgentFinishAction, + BrowseInteractiveAction, CmdRunAction, IPythonRunCellAction, MessageAction, ) from opendevin.events.observation import ( + BrowserOutputObservation, CmdOutputObservation, IPythonRunCellObservation, ) @@ -33,7 +35,7 @@ ENABLE_GITHUB = True def parse_response(response) -> str: action = response.choices[0].message.content - for lang in ['bash', 'ipython']: + for lang in ['bash', 'ipython', 'browse']: if f'' in action and f'' not in action: action += f'' return action @@ -85,7 +87,7 @@ def swe_agent_edit_hack(bash_command: str) -> str: class CodeActAgent(Agent): - VERSION = '1.2' + VERSION = '1.3' """ The Code Act Agent is a minimalist agent. The agent works by passing the model a list of action-observation pairs and prompting the model to take the next step. @@ -171,6 +173,7 @@ class CodeActAgent(Agent): Returns: - CmdRunAction(command) - bash command to run - IPythonRunCellAction(code) - IPython code to run + - BrowseInteractiveAction(browsergym_command) - BrowserGym commands to run - MessageAction(content) - Message action to run (e.g. ask for clarification) - AgentFinishAction() - end the interaction """ @@ -205,6 +208,9 @@ class CodeActAgent(Agent): content = '\n'.join(splitted) content = truncate_observation(content) self.messages.append({'role': 'user', 'content': content}) + elif isinstance(obs, BrowserOutputObservation): + content = 'OBSERVATION:\n' + truncate_observation(obs.content) + self.messages.append({'role': 'user', 'content': content}) latest_user_message = [m for m in self.messages if m['role'] == 'user'][-1] if latest_user_message: @@ -217,6 +223,7 @@ class CodeActAgent(Agent): stop=[ '', '', + '', ], temperature=0.0, ) @@ -251,6 +258,15 @@ class CodeActAgent(Agent): code_group = python_code.group(1).strip() thought = action_str.replace(python_code.group(0), '').strip() return IPythonRunCellAction(code=code_group, thought=thought) + elif browse_command := re.search( + r'(.*)', action_str, re.DOTALL + ): + # BrowserGym actions was found + browse_actions = browse_command.group(1).strip() + thought = action_str.replace(browse_command.group(0), '').strip() + return BrowseInteractiveAction( + browser_actions=browse_actions, thought=thought + ) else: # We assume the LLM is GOOD enough that when it returns pure natural language # it want to talk to the user diff --git a/agenthub/codeact_agent/prompt.py b/agenthub/codeact_agent/prompt.py index e84f4d8e0e..aea5d4de69 100644 --- a/agenthub/codeact_agent/prompt.py +++ b/agenthub/codeact_agent/prompt.py @@ -34,6 +34,8 @@ print("Hello World!") The assistant can execute bash commands on behalf of the user by wrapping them with and . For example, you can list the files in the current directory by ls . +The assistant can browse the Internet with commands on behalf of the user by wrapping them with and . +For example, you can browse a given URL by goto("") . The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block. The assistant can install Python packages through bash by pip install [package needed] and should always import packages and define variables before starting to use them. The assistant should stop and provide an answer when they have already obtained the answer from the execution result. @@ -49,8 +51,8 @@ For instance, to push a local branch `my_branch` to the github repo `owner/repo` If you require access to GitHub but $GITHUB_TOKEN is not set, ask the user to set it for you.""" SYSTEM_SUFFIX = """The assistant's response should be concise. -You should include or in every one of your responses, unless you are finished with the task or need more input or action from the user in order to proceed. -IMPORTANT: Whenever possible, execute the code for the user using or instead of providing it. +You should include or or in every one of your responses, unless you are finished with the task or need more input or action from the user in order to proceed. +IMPORTANT: Whenever possible, execute the code for the user using or or instead of providing it. """ EXAMPLES = """ @@ -154,6 +156,21 @@ Press CTRL+C to quit ASSISTANT: The server is running on port 5000 with PID 124. You can access the list of numbers by visiting http://127.0.0.1:5000. If you have any further questions, feel free to ask! +USER: Now browse the newly started server's homepage and show me the content. + +ASSISTANT: +Sure! Let me browse the server's homepage at http://127.0.0.1:5000: + +goto("http://127.0.0.1:5000") + + +USER: +Observation: +[1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + +ASSISTANT: +The content of the server's homepage is "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]". If you have any further questions, feel free to ask! + USER: Now kill the server, make it display the numbers in a table format. ASSISTANT: @@ -230,4 +247,5 @@ INVALID_INPUT_MESSAGE = ( "I don't understand your input. \n" 'If you want to execute a bash command, please use YOUR_COMMAND_HERE .\n' 'If you want to execute a block of Python code, please use YOUR_COMMAND_HERE .\n' + 'If you want to browse the Internet, please use YOUR_COMMAND_HERE .\n' ) diff --git a/frontend/src/services/actions.ts b/frontend/src/services/actions.ts index ea095ca3e4..4603eaa3ae 100644 --- a/frontend/src/services/actions.ts +++ b/frontend/src/services/actions.ts @@ -17,6 +17,11 @@ const messageActions = { store.dispatch(setUrl(url)); store.dispatch(setScreenshotSrc(screenshotSrc)); }, + [ActionType.BROWSE_INTERACTIVE]: (message: ActionMessage) => { + const { url, screenshotSrc } = message.args; + store.dispatch(setUrl(url)); + store.dispatch(setScreenshotSrc(screenshotSrc)); + }, [ActionType.WRITE]: (message: ActionMessage) => { const { path, content } = message.args; store.dispatch(updatePath(path)); diff --git a/frontend/src/types/ActionType.tsx b/frontend/src/types/ActionType.tsx index f6e170ea1d..37bd5b4d9d 100644 --- a/frontend/src/types/ActionType.tsx +++ b/frontend/src/types/ActionType.tsx @@ -23,6 +23,9 @@ enum ActionType { // Opens a web page. BROWSE = "browse", + // Interact with the browser instance. + BROWSE_INTERACTIVE = "browse_interactive", + // Searches long-term memory. RECALL = "recall", diff --git a/opendevin/core/schema/action.py b/opendevin/core/schema/action.py index b1d19373b0..52aa151e4a 100644 --- a/opendevin/core/schema/action.py +++ b/opendevin/core/schema/action.py @@ -40,6 +40,10 @@ class ActionTypeSchema(BaseModel): """Opens a web page. """ + BROWSE_INTERACTIVE: str = Field(default='browse_interactive') + """Interact with the browser instance. + """ + RECALL: str = Field(default='recall') """Searches long-term memory """ diff --git a/opendevin/events/action/__init__.py b/opendevin/events/action/__init__.py index 9104a1e97e..f65e37fccc 100644 --- a/opendevin/events/action/__init__.py +++ b/opendevin/events/action/__init__.py @@ -7,7 +7,7 @@ from .agent import ( AgentSummarizeAction, ChangeAgentStateAction, ) -from .browse import BrowseURLAction +from .browse import BrowseInteractiveAction, BrowseURLAction from .commands import CmdKillAction, CmdRunAction, IPythonRunCellAction from .empty import NullAction from .files import FileReadAction, FileWriteAction @@ -20,6 +20,7 @@ __all__ = [ 'CmdRunAction', 'CmdKillAction', 'BrowseURLAction', + 'BrowseInteractiveAction', 'FileReadAction', 'FileWriteAction', 'AgentRecallAction', diff --git a/opendevin/events/action/browse.py b/opendevin/events/action/browse.py index 548548f25b..d536340aa1 100644 --- a/opendevin/events/action/browse.py +++ b/opendevin/events/action/browse.py @@ -16,3 +16,15 @@ class BrowseURLAction(Action): @property def message(self) -> str: return f'Browsing URL: {self.url}' + + +@dataclass +class BrowseInteractiveAction(Action): + browser_actions: str + thought: str = '' + action: str = ActionType.BROWSE_INTERACTIVE + runnable: ClassVar[bool] = True + + @property + def message(self) -> str: + return f'Executing browser actions: {self.browser_actions}' diff --git a/opendevin/events/serialization/action.py b/opendevin/events/serialization/action.py index fcc5c9df5c..f0f850d7ab 100644 --- a/opendevin/events/serialization/action.py +++ b/opendevin/events/serialization/action.py @@ -7,7 +7,7 @@ from opendevin.events.action.agent import ( AgentRejectAction, ChangeAgentStateAction, ) -from opendevin.events.action.browse import BrowseURLAction +from opendevin.events.action.browse import BrowseInteractiveAction, BrowseURLAction from opendevin.events.action.commands import ( CmdKillAction, CmdRunAction, @@ -22,6 +22,7 @@ actions = ( CmdRunAction, IPythonRunCellAction, BrowseURLAction, + BrowseInteractiveAction, FileReadAction, FileWriteAction, AgentRecallAction, diff --git a/opendevin/runtime/runtime.py b/opendevin/runtime/runtime.py index 3e2e47891b..d47c3af9c6 100644 --- a/opendevin/runtime/runtime.py +++ b/opendevin/runtime/runtime.py @@ -5,6 +5,7 @@ from opendevin.core.config import config from opendevin.events.action import ( Action, AgentRecallAction, + BrowseInteractiveAction, BrowseURLAction, CmdKillAction, CmdRunAction, @@ -154,6 +155,10 @@ class Runtime: async def browse(self, action: BrowseURLAction) -> Observation: pass + @abstractmethod + async def browse_interactive(self, action: BrowseInteractiveAction) -> Observation: + pass + @abstractmethod async def recall(self, action: AgentRecallAction) -> Observation: pass diff --git a/opendevin/runtime/server/browse.py b/opendevin/runtime/server/browse.py index 3b16bea06c..66cc46ff02 100644 --- a/opendevin/runtime/server/browse.py +++ b/opendevin/runtime/server/browse.py @@ -1,15 +1,23 @@ import os +from opendevin.core.schema import ActionType from opendevin.events.observation import BrowserOutputObservation async def browse(action, browser) -> BrowserOutputObservation: # type: ignore - asked_url = action.url - if not asked_url.startswith('http'): - asked_url = os.path.abspath(os.curdir) + action.url - try: - # action in BrowserGym: see https://github.com/ServiceNow/BrowserGym/blob/main/core/src/browsergym/core/action/functions.py + if action.action == ActionType.BROWSE: + # legacy BrowseURLAction + asked_url = action.url + if not asked_url.startswith('http'): + asked_url = os.path.abspath(os.curdir) + action.url action_str = f'goto("{asked_url}")' + elif action.action == ActionType.BROWSE_INTERACTIVE: + # new BrowseInteractiveAction, supports full featured BrowserGym actions + # action in BrowserGym: see https://github.com/ServiceNow/BrowserGym/blob/main/core/src/browsergym/core/action/functions.py + action_str = action.browser_actions + else: + raise ValueError(f'Invalid action type: {action.action}') + try: # obs provided by BrowserGym: see https://github.com/ServiceNow/BrowserGym/blob/main/core/src/browsergym/core/env.py#L396 obs = browser.step(action_str) return BrowserOutputObservation( @@ -21,9 +29,12 @@ async def browse(action, browser) -> BrowserOutputObservation: # type: ignore last_browser_action=obs['last_action'], # last browser env action performed focused_element_bid=obs['focused_element_bid'], # focused element bid screenshot=obs['screenshot'], # base64-encoded screenshot, png - url=asked_url, + url=obs['url'], # URL of the page ) except Exception as e: return BrowserOutputObservation( - content=str(e), screenshot='', error=True, url=asked_url + content=str(e), + screenshot='', + error=True, + url=asked_url if action.action == ActionType.BROWSE else '', ) diff --git a/opendevin/runtime/server/runtime.py b/opendevin/runtime/server/runtime.py index d523e0ff38..fa0ee323e3 100644 --- a/opendevin/runtime/server/runtime.py +++ b/opendevin/runtime/server/runtime.py @@ -1,5 +1,6 @@ from opendevin.events.action import ( AgentRecallAction, + BrowseInteractiveAction, BrowseURLAction, CmdKillAction, CmdRunAction, @@ -58,6 +59,9 @@ class ServerRuntime(Runtime): async def browse(self, action: BrowseURLAction) -> Observation: return await browse(action, self.browser) + async def browse_interactive(self, action: BrowseInteractiveAction) -> Observation: + return await browse(action, self.browser) + async def recall(self, action: AgentRecallAction) -> Observation: return NullObservation('') diff --git a/tests/integration/mock/CodeActAgent/test_edits/prompt_001.log b/tests/integration/mock/CodeActAgent/test_edits/prompt_001.log index 0c16e702dd..8901b1be7e 100644 --- a/tests/integration/mock/CodeActAgent/test_edits/prompt_001.log +++ b/tests/integration/mock/CodeActAgent/test_edits/prompt_001.log @@ -9,6 +9,8 @@ print("Hello World!") The assistant can execute bash commands on behalf of the user by wrapping them with and . For example, you can list the files in the current directory by ls . +The assistant can browse the Internet with commands on behalf of the user by wrapping them with and . +For example, you can browse a given URL by goto("") . The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block. The assistant can install Python packages through bash by pip install [package needed] and should always import packages and define variables before starting to use them. The assistant should stop and provide an answer when they have already obtained the answer from the execution result. @@ -38,8 +40,8 @@ end_of_edit - replaces lines through (inclusive) with th Please note that THE EDIT COMMAND REQUIRES PROPER INDENTATION. If you'd like to add the line ' print(x)' you must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. The assistant's response should be concise. -You should include or in every one of your responses, unless you are finished with the task or need more input or action from the user in order to proceed. -IMPORTANT: Whenever possible, execute the code for the user using or instead of providing it. +You should include or or in every one of your responses, unless you are finished with the task or need more input or action from the user in order to proceed. +IMPORTANT: Whenever possible, execute the code for the user using or or instead of providing it. ---------- @@ -146,6 +148,21 @@ Press CTRL+C to quit ASSISTANT: The server is running on port 5000 with PID 124. You can access the list of numbers by visiting http://127.0.0.1:5000. If you have any further questions, feel free to ask! +USER: Now browse the newly started server's homepage and show me the content. + +ASSISTANT: +Sure! Let me browse the server's homepage at http://127.0.0.1:5000: + +goto("http://127.0.0.1:5000") + + +USER: +Observation: +[1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + +ASSISTANT: +The content of the server's homepage is "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]". If you have any further questions, feel free to ask! + USER: Now kill the server, make it display the numbers in a table format. ASSISTANT: diff --git a/tests/integration/mock/CodeActAgent/test_edits/prompt_002.log b/tests/integration/mock/CodeActAgent/test_edits/prompt_002.log index 91e702a602..398be8edb1 100644 --- a/tests/integration/mock/CodeActAgent/test_edits/prompt_002.log +++ b/tests/integration/mock/CodeActAgent/test_edits/prompt_002.log @@ -9,6 +9,8 @@ print("Hello World!") The assistant can execute bash commands on behalf of the user by wrapping them with and . For example, you can list the files in the current directory by ls . +The assistant can browse the Internet with commands on behalf of the user by wrapping them with and . +For example, you can browse a given URL by goto("") . The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block. The assistant can install Python packages through bash by pip install [package needed] and should always import packages and define variables before starting to use them. The assistant should stop and provide an answer when they have already obtained the answer from the execution result. @@ -38,8 +40,8 @@ end_of_edit - replaces lines through (inclusive) with th Please note that THE EDIT COMMAND REQUIRES PROPER INDENTATION. If you'd like to add the line ' print(x)' you must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. The assistant's response should be concise. -You should include or in every one of your responses, unless you are finished with the task or need more input or action from the user in order to proceed. -IMPORTANT: Whenever possible, execute the code for the user using or instead of providing it. +You should include or or in every one of your responses, unless you are finished with the task or need more input or action from the user in order to proceed. +IMPORTANT: Whenever possible, execute the code for the user using or or instead of providing it. ---------- @@ -146,6 +148,21 @@ Press CTRL+C to quit ASSISTANT: The server is running on port 5000 with PID 124. You can access the list of numbers by visiting http://127.0.0.1:5000. If you have any further questions, feel free to ask! +USER: Now browse the newly started server's homepage and show me the content. + +ASSISTANT: +Sure! Let me browse the server's homepage at http://127.0.0.1:5000: + +goto("http://127.0.0.1:5000") + + +USER: +Observation: +[1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + +ASSISTANT: +The content of the server's homepage is "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]". If you have any further questions, feel free to ask! + USER: Now kill the server, make it display the numbers in a table format. ASSISTANT: @@ -228,20 +245,19 @@ ENVIRONMENT REMINDER: You have 9 turns left to complete the task. ---------- -Certainly! I will start by opening the file `bad.txt` to see the content and then proceed to fix the typos. +Let's start by examining the contents of `bad.txt` to identify the typos. I'll read the file first. -open bad.txt +cat bad.txt ---------- OBSERVATION: -[File: /workspace/bad.txt (4 lines total)] -1:This is a stupid typoo. -2:Really? -3:No mor typos! -4:Enjoy! +This is a stupid typoo. +Really? +No mor typos! +Enjoy! [Command -1 finished with exit code 0]] ENVIRONMENT REMINDER: You have 8 turns left to complete the task. diff --git a/tests/integration/mock/CodeActAgent/test_edits/prompt_003.log b/tests/integration/mock/CodeActAgent/test_edits/prompt_003.log index 163b657ee0..9a2f6b1ef6 100644 --- a/tests/integration/mock/CodeActAgent/test_edits/prompt_003.log +++ b/tests/integration/mock/CodeActAgent/test_edits/prompt_003.log @@ -9,6 +9,8 @@ print("Hello World!") The assistant can execute bash commands on behalf of the user by wrapping them with and . For example, you can list the files in the current directory by ls . +The assistant can browse the Internet with commands on behalf of the user by wrapping them with and . +For example, you can browse a given URL by goto("") . The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block. The assistant can install Python packages through bash by pip install [package needed] and should always import packages and define variables before starting to use them. The assistant should stop and provide an answer when they have already obtained the answer from the execution result. @@ -38,8 +40,8 @@ end_of_edit - replaces lines through (inclusive) with th Please note that THE EDIT COMMAND REQUIRES PROPER INDENTATION. If you'd like to add the line ' print(x)' you must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. The assistant's response should be concise. -You should include or in every one of your responses, unless you are finished with the task or need more input or action from the user in order to proceed. -IMPORTANT: Whenever possible, execute the code for the user using or instead of providing it. +You should include or or in every one of your responses, unless you are finished with the task or need more input or action from the user in order to proceed. +IMPORTANT: Whenever possible, execute the code for the user using or or instead of providing it. ---------- @@ -146,6 +148,21 @@ Press CTRL+C to quit ASSISTANT: The server is running on port 5000 with PID 124. You can access the list of numbers by visiting http://127.0.0.1:5000. If you have any further questions, feel free to ask! +USER: Now browse the newly started server's homepage and show me the content. + +ASSISTANT: +Sure! Let me browse the server's homepage at http://127.0.0.1:5000: + +goto("http://127.0.0.1:5000") + + +USER: +Observation: +[1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + +ASSISTANT: +The content of the server's homepage is "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]". If you have any further questions, feel free to ask! + USER: Now kill the server, make it display the numbers in a table format. ASSISTANT: @@ -228,29 +245,33 @@ ENVIRONMENT REMINDER: You have 9 turns left to complete the task. ---------- -Certainly! I will start by opening the file `bad.txt` to see the content and then proceed to fix the typos. +Let's start by examining the contents of `bad.txt` to identify the typos. I'll read the file first. -open bad.txt +cat bad.txt ---------- OBSERVATION: -[File: /workspace/bad.txt (4 lines total)] -1:This is a stupid typoo. -2:Really? -3:No mor typos! -4:Enjoy! +This is a stupid typoo. +Really? +No mor typos! +Enjoy! [Command -1 finished with exit code 0]] ENVIRONMENT REMINDER: You have 8 turns left to complete the task. ---------- -I will now edit the file to correct the typos. +I'll correct the typos in the file `bad.txt`. Here are the corrections: +- "typoo" should be "typo" +- "mor" should be "more" + +Let me update the file accordingly. +open bad.txt edit 1:1 This is a stupid typo. end_of_edit @@ -262,17 +283,22 @@ end_of_edit ---------- OBSERVATION: -> > [File: /workspace/bad.txt (4 lines total)] -1:This is a stupid typo. -2:Really? -3:No mor typos! -4:Enjoy! -File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary. -> > [File: /workspace/bad.txt (4 lines total)] -1:This is a stupid typo. -2:Really? -3:No more typos! -4:Enjoy! +[File: /workspace/bad.txt (4 lines total)] +1:This is a stupid typoo. +2:Really? +3:No mor typos! +4:Enjoy! +> > [File: /workspace/bad.txt (4 lines total)] +1:This is a stupid typo. +2:Really? +3:No mor typos! +4:Enjoy! +File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary. +> > [File: /workspace/bad.txt (4 lines total)] +1:This is a stupid typo. +2:Really? +3:No more typos! +4:Enjoy! File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary. [Command -1 finished with exit code 0]] diff --git a/tests/integration/mock/CodeActAgent/test_edits/response_001.log b/tests/integration/mock/CodeActAgent/test_edits/response_001.log index 9a251342e9..3425276c53 100644 --- a/tests/integration/mock/CodeActAgent/test_edits/response_001.log +++ b/tests/integration/mock/CodeActAgent/test_edits/response_001.log @@ -1,4 +1,4 @@ -Certainly! I will start by opening the file `bad.txt` to see the content and then proceed to fix the typos. +Let's start by examining the contents of `bad.txt` to identify the typos. I'll read the file first. -open bad.txt +cat bad.txt diff --git a/tests/integration/mock/CodeActAgent/test_edits/response_002.log b/tests/integration/mock/CodeActAgent/test_edits/response_002.log index 8016c2db85..b728304347 100644 --- a/tests/integration/mock/CodeActAgent/test_edits/response_002.log +++ b/tests/integration/mock/CodeActAgent/test_edits/response_002.log @@ -1,6 +1,11 @@ -I will now edit the file to correct the typos. +I'll correct the typos in the file `bad.txt`. Here are the corrections: +- "typoo" should be "typo" +- "mor" should be "more" + +Let me update the file accordingly. +open bad.txt edit 1:1 This is a stupid typo. end_of_edit diff --git a/tests/integration/mock/CodeActAgent/test_edits/response_003.log b/tests/integration/mock/CodeActAgent/test_edits/response_003.log index b515529ad1..649ff9273f 100644 --- a/tests/integration/mock/CodeActAgent/test_edits/response_003.log +++ b/tests/integration/mock/CodeActAgent/test_edits/response_003.log @@ -1 +1,8 @@ -The typos in `bad.txt` have been fixed. The task is complete. If you need further assistance, feel free to ask! +The typos in `bad.txt` have been corrected successfully. The file now reads correctly as follows: + +1. This is a stupid typo. +2. Really? +3. No more typos! +4. Enjoy! + +The task is completed. If you have any more requests or need further assistance, feel free to let me know! diff --git a/tests/integration/mock/CodeActAgent/test_ipython/prompt_001.log b/tests/integration/mock/CodeActAgent/test_ipython/prompt_001.log index 094adb0963..1456af2d67 100644 --- a/tests/integration/mock/CodeActAgent/test_ipython/prompt_001.log +++ b/tests/integration/mock/CodeActAgent/test_ipython/prompt_001.log @@ -9,6 +9,8 @@ print("Hello World!") The assistant can execute bash commands on behalf of the user by wrapping them with and . For example, you can list the files in the current directory by ls . +The assistant can browse the Internet with commands on behalf of the user by wrapping them with and . +For example, you can browse a given URL by goto("") . The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block. The assistant can install Python packages through bash by pip install [package needed] and should always import packages and define variables before starting to use them. The assistant should stop and provide an answer when they have already obtained the answer from the execution result. @@ -38,8 +40,8 @@ end_of_edit - replaces lines through (inclusive) with th Please note that THE EDIT COMMAND REQUIRES PROPER INDENTATION. If you'd like to add the line ' print(x)' you must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. The assistant's response should be concise. -You should include or in every one of your responses, unless you are finished with the task or need more input or action from the user in order to proceed. -IMPORTANT: Whenever possible, execute the code for the user using or instead of providing it. +You should include or or in every one of your responses, unless you are finished with the task or need more input or action from the user in order to proceed. +IMPORTANT: Whenever possible, execute the code for the user using or or instead of providing it. ---------- @@ -146,6 +148,21 @@ Press CTRL+C to quit ASSISTANT: The server is running on port 5000 with PID 124. You can access the list of numbers by visiting http://127.0.0.1:5000. If you have any further questions, feel free to ask! +USER: Now browse the newly started server's homepage and show me the content. + +ASSISTANT: +Sure! Let me browse the server's homepage at http://127.0.0.1:5000: + +goto("http://127.0.0.1:5000") + + +USER: +Observation: +[1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + +ASSISTANT: +The content of the server's homepage is "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]". If you have any further questions, feel free to ask! + USER: Now kill the server, make it display the numbers in a table format. ASSISTANT: diff --git a/tests/integration/mock/CodeActAgent/test_ipython/prompt_002.log b/tests/integration/mock/CodeActAgent/test_ipython/prompt_002.log index 857e6c825c..68c3d43080 100644 --- a/tests/integration/mock/CodeActAgent/test_ipython/prompt_002.log +++ b/tests/integration/mock/CodeActAgent/test_ipython/prompt_002.log @@ -9,6 +9,8 @@ print("Hello World!") The assistant can execute bash commands on behalf of the user by wrapping them with and . For example, you can list the files in the current directory by ls . +The assistant can browse the Internet with commands on behalf of the user by wrapping them with and . +For example, you can browse a given URL by goto("") . The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block. The assistant can install Python packages through bash by pip install [package needed] and should always import packages and define variables before starting to use them. The assistant should stop and provide an answer when they have already obtained the answer from the execution result. @@ -38,8 +40,8 @@ end_of_edit - replaces lines through (inclusive) with th Please note that THE EDIT COMMAND REQUIRES PROPER INDENTATION. If you'd like to add the line ' print(x)' you must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. The assistant's response should be concise. -You should include or in every one of your responses, unless you are finished with the task or need more input or action from the user in order to proceed. -IMPORTANT: Whenever possible, execute the code for the user using or instead of providing it. +You should include or or in every one of your responses, unless you are finished with the task or need more input or action from the user in order to proceed. +IMPORTANT: Whenever possible, execute the code for the user using or or instead of providing it. ---------- @@ -146,6 +148,21 @@ Press CTRL+C to quit ASSISTANT: The server is running on port 5000 with PID 124. You can access the list of numbers by visiting http://127.0.0.1:5000. If you have any further questions, feel free to ask! +USER: Now browse the newly started server's homepage and show me the content. + +ASSISTANT: +Sure! Let me browse the server's homepage at http://127.0.0.1:5000: + +goto("http://127.0.0.1:5000") + + +USER: +Observation: +[1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + +ASSISTANT: +The content of the server's homepage is "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]". If you have any further questions, feel free to ask! + USER: Now kill the server, make it display the numbers in a table format. ASSISTANT: @@ -228,10 +245,9 @@ ENVIRONMENT REMINDER: You have 9 turns left to complete the task. ---------- -Certainly! I will write the text 'hello world' to the file `/workspace/test.txt` using Jupyter IPython. +I will write the text 'hello world' to the file '/workspace/test.txt' using Jupyter IPython. Let's proceed: -# Write 'hello world' to the specified text file with open('/workspace/test.txt', 'w') as file: file.write('hello world') diff --git a/tests/integration/mock/CodeActAgent/test_ipython/response_001.log b/tests/integration/mock/CodeActAgent/test_ipython/response_001.log index 7525ffbf6d..268969342a 100644 --- a/tests/integration/mock/CodeActAgent/test_ipython/response_001.log +++ b/tests/integration/mock/CodeActAgent/test_ipython/response_001.log @@ -1,6 +1,5 @@ -Certainly! I will write the text 'hello world' to the file `/workspace/test.txt` using Jupyter IPython. +I will write the text 'hello world' to the file '/workspace/test.txt' using Jupyter IPython. Let's proceed: -# Write 'hello world' to the specified text file with open('/workspace/test.txt', 'w') as file: file.write('hello world') diff --git a/tests/integration/mock/CodeActAgent/test_ipython/response_002.log b/tests/integration/mock/CodeActAgent/test_ipython/response_002.log index f0a4ebd49a..5a011a3c8e 100644 --- a/tests/integration/mock/CodeActAgent/test_ipython/response_002.log +++ b/tests/integration/mock/CodeActAgent/test_ipython/response_002.log @@ -1 +1 @@ -The text 'hello world' has been successfully written to `/workspace/test.txt`. If you need any further assistance, feel free to ask! +The text 'hello world' has been successfully written to the file '/workspace/test.txt'. If you need any further assistance or another task, feel free to let me know! diff --git a/tests/integration/mock/CodeActAgent/test_write_simple_script/prompt_001.log b/tests/integration/mock/CodeActAgent/test_write_simple_script/prompt_001.log index c0d9297fa8..4d6660b3f3 100644 --- a/tests/integration/mock/CodeActAgent/test_write_simple_script/prompt_001.log +++ b/tests/integration/mock/CodeActAgent/test_write_simple_script/prompt_001.log @@ -9,6 +9,8 @@ print("Hello World!") The assistant can execute bash commands on behalf of the user by wrapping them with and . For example, you can list the files in the current directory by ls . +The assistant can browse the Internet with commands on behalf of the user by wrapping them with and . +For example, you can browse a given URL by goto("") . The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block. The assistant can install Python packages through bash by pip install [package needed] and should always import packages and define variables before starting to use them. The assistant should stop and provide an answer when they have already obtained the answer from the execution result. @@ -38,8 +40,8 @@ end_of_edit - replaces lines through (inclusive) with th Please note that THE EDIT COMMAND REQUIRES PROPER INDENTATION. If you'd like to add the line ' print(x)' you must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. The assistant's response should be concise. -You should include or in every one of your responses, unless you are finished with the task or need more input or action from the user in order to proceed. -IMPORTANT: Whenever possible, execute the code for the user using or instead of providing it. +You should include or or in every one of your responses, unless you are finished with the task or need more input or action from the user in order to proceed. +IMPORTANT: Whenever possible, execute the code for the user using or or instead of providing it. ---------- @@ -146,6 +148,21 @@ Press CTRL+C to quit ASSISTANT: The server is running on port 5000 with PID 124. You can access the list of numbers by visiting http://127.0.0.1:5000. If you have any further questions, feel free to ask! +USER: Now browse the newly started server's homepage and show me the content. + +ASSISTANT: +Sure! Let me browse the server's homepage at http://127.0.0.1:5000: + +goto("http://127.0.0.1:5000") + + +USER: +Observation: +[1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + +ASSISTANT: +The content of the server's homepage is "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]". If you have any further questions, feel free to ask! + USER: Now kill the server, make it display the numbers in a table format. ASSISTANT: diff --git a/tests/integration/mock/CodeActAgent/test_write_simple_script/prompt_002.log b/tests/integration/mock/CodeActAgent/test_write_simple_script/prompt_002.log index 8347f7f305..f93246fb8f 100644 --- a/tests/integration/mock/CodeActAgent/test_write_simple_script/prompt_002.log +++ b/tests/integration/mock/CodeActAgent/test_write_simple_script/prompt_002.log @@ -9,6 +9,8 @@ print("Hello World!") The assistant can execute bash commands on behalf of the user by wrapping them with and . For example, you can list the files in the current directory by ls . +The assistant can browse the Internet with commands on behalf of the user by wrapping them with and . +For example, you can browse a given URL by goto("") . The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block. The assistant can install Python packages through bash by pip install [package needed] and should always import packages and define variables before starting to use them. The assistant should stop and provide an answer when they have already obtained the answer from the execution result. @@ -38,8 +40,8 @@ end_of_edit - replaces lines through (inclusive) with th Please note that THE EDIT COMMAND REQUIRES PROPER INDENTATION. If you'd like to add the line ' print(x)' you must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. The assistant's response should be concise. -You should include or in every one of your responses, unless you are finished with the task or need more input or action from the user in order to proceed. -IMPORTANT: Whenever possible, execute the code for the user using or instead of providing it. +You should include or or in every one of your responses, unless you are finished with the task or need more input or action from the user in order to proceed. +IMPORTANT: Whenever possible, execute the code for the user using or or instead of providing it. ---------- @@ -146,6 +148,21 @@ Press CTRL+C to quit ASSISTANT: The server is running on port 5000 with PID 124. You can access the list of numbers by visiting http://127.0.0.1:5000. If you have any further questions, feel free to ask! +USER: Now browse the newly started server's homepage and show me the content. + +ASSISTANT: +Sure! Let me browse the server's homepage at http://127.0.0.1:5000: + +goto("http://127.0.0.1:5000") + + +USER: +Observation: +[1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + +ASSISTANT: +The content of the server's homepage is "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]". If you have any further questions, feel free to ask! + USER: Now kill the server, make it display the numbers in a table format. ASSISTANT: @@ -228,10 +245,10 @@ ENVIRONMENT REMINDER: You have 9 turns left to complete the task. ---------- -Certainly! I will create a shell script named `hello.sh` that prints 'hello'. +I will create and write the shell script `hello.sh` that prints "hello". Let's proceed with that. -echo "echo 'hello'" > hello.sh && chmod +x hello.sh +echo "echo hello" > hello.sh ---------- diff --git a/tests/integration/mock/CodeActAgent/test_write_simple_script/response_001.log b/tests/integration/mock/CodeActAgent/test_write_simple_script/response_001.log index fb26597bad..11e8b1e07a 100644 --- a/tests/integration/mock/CodeActAgent/test_write_simple_script/response_001.log +++ b/tests/integration/mock/CodeActAgent/test_write_simple_script/response_001.log @@ -1,4 +1,4 @@ -Certainly! I will create a shell script named `hello.sh` that prints 'hello'. +I will create and write the shell script `hello.sh` that prints "hello". Let's proceed with that. -echo "echo 'hello'" > hello.sh && chmod +x hello.sh +echo "echo hello" > hello.sh diff --git a/tests/integration/mock/CodeActAgent/test_write_simple_script/response_002.log b/tests/integration/mock/CodeActAgent/test_write_simple_script/response_002.log index 966062535b..0221f5cd7b 100644 --- a/tests/integration/mock/CodeActAgent/test_write_simple_script/response_002.log +++ b/tests/integration/mock/CodeActAgent/test_write_simple_script/response_002.log @@ -1 +1 @@ -The shell script `hello.sh` has been created and made executable. You can now run it to print 'hello'. +The shell script `hello.sh` has been successfully created and written. If you need anything else, feel free to ask! diff --git a/tests/unit/test_action_serialization.py b/tests/unit/test_action_serialization.py index 3b7aba31b9..2a93c8be53 100644 --- a/tests/unit/test_action_serialization.py +++ b/tests/unit/test_action_serialization.py @@ -4,6 +4,7 @@ from opendevin.events.action import ( AgentFinishAction, AgentRecallAction, AgentRejectAction, + BrowseInteractiveAction, BrowseURLAction, CmdKillAction, CmdRunAction, @@ -91,6 +92,14 @@ def test_browse_url_action_serialization_deserialization(): serialization_deserialization(original_action_dict, BrowseURLAction) +def test_browse_interactive_action_serialization_deserialization(): + original_action_dict = { + 'action': 'browse_interactive', + 'args': {'thought': '', 'browser_actions': 'goto("https://www.example.com")'}, + } + serialization_deserialization(original_action_dict, BrowseInteractiveAction) + + def test_file_read_action_serialization_deserialization(): original_action_dict = { 'action': 'read',