Enable CodeAct agents with browsing, and also enable arbitrary BrowserGym action support (#1807)

* enable browsing in codeact, and arbitrary browsergym DSL support * fix * fix unit test case * update frontend for the new interactive browsing action * bump ver * Fix integration tests --------- Co-authored-by: OpenDevinBot <bot@opendevin.com>
2025-12-26 05:48:36 +08:00 · 2024-05-15 11:59:58 -04:00 · 2024-05-15 11:59:58 -04:00 · a84d19f03c
commit a84d19f03c
parent 76abca361c
26 changed files with 293 additions and 67 deletions
--- a/agenthub/codeact_agent/codeact_agent.py
+++ b/agenthub/codeact_agent/codeact_agent.py
@ -13,11 +13,13 @@ from opendevin.core.logger import opendevin_logger as logger
 from opendevin.events.action import (
    Action,
    AgentFinishAction,
+    BrowseInteractiveAction,
    CmdRunAction,
    IPythonRunCellAction,
    MessageAction,
 )
 from opendevin.events.observation import (
+    BrowserOutputObservation,
    CmdOutputObservation,
    IPythonRunCellObservation,
 )
@ -33,7 +35,7 @@ ENABLE_GITHUB = True

 def parse_response(response) -> str:
    action = response.choices[0].message.content
-    for lang in ['bash', 'ipython']:
+    for lang in ['bash', 'ipython', 'browse']:
        if f'<execute_{lang}>' in action and f'</execute_{lang}>' not in action:
            action += f'</execute_{lang}>'
    return action
@ -85,7 +87,7 @@ def swe_agent_edit_hack(bash_command: str) -> str:


 class CodeActAgent(Agent):
-    VERSION = '1.2'
+    VERSION = '1.3'
    """
    The Code Act Agent is a minimalist agent.
    The agent works by passing the model a list of action-observation pairs and prompting the model to take the next step.
@ -171,6 +173,7 @@ class CodeActAgent(Agent):
        Returns:
        - CmdRunAction(command) - bash command to run
        - IPythonRunCellAction(code) - IPython code to run
+        - BrowseInteractiveAction(browsergym_command) - BrowserGym commands to run
        - MessageAction(content) - Message action to run (e.g. ask for clarification)
        - AgentFinishAction() - end the interaction
        """
@ -205,6 +208,9 @@ class CodeActAgent(Agent):
                    content = '\n'.join(splitted)
                    content = truncate_observation(content)
                    self.messages.append({'role': 'user', 'content': content})
+                elif isinstance(obs, BrowserOutputObservation):
+                    content = 'OBSERVATION:\n' + truncate_observation(obs.content)
+                    self.messages.append({'role': 'user', 'content': content})

        latest_user_message = [m for m in self.messages if m['role'] == 'user'][-1]
        if latest_user_message:
@ -217,6 +223,7 @@ class CodeActAgent(Agent):
            stop=[
                '</execute_ipython>',
                '</execute_bash>',
+                '</execute_browse>',
            ],
            temperature=0.0,
        )
@ -251,6 +258,15 @@ class CodeActAgent(Agent):
            code_group = python_code.group(1).strip()
            thought = action_str.replace(python_code.group(0), '').strip()
            return IPythonRunCellAction(code=code_group, thought=thought)
+        elif browse_command := re.search(
+            r'<execute_browse>(.*)</execute_browse>', action_str, re.DOTALL
+        ):
+            # BrowserGym actions was found
+            browse_actions = browse_command.group(1).strip()
+            thought = action_str.replace(browse_command.group(0), '').strip()
+            return BrowseInteractiveAction(
+                browser_actions=browse_actions, thought=thought
+            )
        else:
            # We assume the LLM is GOOD enough that when it returns pure natural language
            # it want to talk to the user
--- a/agenthub/codeact_agent/prompt.py
+++ b/agenthub/codeact_agent/prompt.py
@ -34,6 +34,8 @@ print("Hello World!")
 </execute_ipython>
 The assistant can execute bash commands on behalf of the user by wrapping them with <execute_bash> and </execute_bash>.
 For example, you can list the files in the current directory by <execute_bash> ls </execute_bash>.
+The assistant can browse the Internet with commands on behalf of the user by wrapping them with <execute_browse> and </execute_browse>.
+For example, you can browse a given URL by <execute_browse> goto("<URL>") </execute_browse>.
 The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block.
 The assistant can install Python packages through bash by <execute_bash> pip install [package needed] </execute_bash> and should always import packages and define variables before starting to use them.
 The assistant should stop <execute> and provide an answer when they have already obtained the answer from the execution result.
@ -49,8 +51,8 @@ For instance, to push a local branch `my_branch` to the github repo `owner/repo`
 If you require access to GitHub but $GITHUB_TOKEN is not set, ask the user to set it for you."""

 SYSTEM_SUFFIX = """The assistant's response should be concise.
-You should include <execute_ipython> or <execute_bash> in every one of your responses, unless you are finished with the task or need more input or action from the user in order to proceed.
-IMPORTANT: Whenever possible, execute the code for the user using <execute_ipython> or <execute_bash> instead of providing it.
+You should include <execute_ipython> or <execute_bash> or <execute_browse> in every one of your responses, unless you are finished with the task or need more input or action from the user in order to proceed.
+IMPORTANT: Whenever possible, execute the code for the user using <execute_ipython> or <execute_bash> or <execute_browse> instead of providing it.
 """

 EXAMPLES = """
@ -154,6 +156,21 @@ Press CTRL+C to quit
 ASSISTANT:
 The server is running on port 5000 with PID 124. You can access the list of numbers by visiting http://127.0.0.1:5000. If you have any further questions, feel free to ask!

+USER: Now browse the newly started server's homepage and show me the content.
+
+ASSISTANT:
+Sure! Let me browse the server's homepage at http://127.0.0.1:5000:
+<execute_browse>
+goto("http://127.0.0.1:5000")
+</execute_browse>
+
+USER:
+Observation:
+[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+
+ASSISTANT:
+The content of the server's homepage is "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]". If you have any further questions, feel free to ask!
+
 USER: Now kill the server, make it display the numbers in a table format.

 ASSISTANT:
@ -230,4 +247,5 @@ INVALID_INPUT_MESSAGE = (
    "I don't understand your input. \n"
    'If you want to execute a bash command, please use <execute_bash> YOUR_COMMAND_HERE </execute_bash>.\n'
    'If you want to execute a block of Python code, please use <execute_ipython> YOUR_COMMAND_HERE </execute_ipython>.\n'
+    'If you want to browse the Internet, please use <execute_browse> YOUR_COMMAND_HERE </execute_browse>.\n'
 )
--- a/frontend/src/services/actions.ts
+++ b/frontend/src/services/actions.ts
@ -17,6 +17,11 @@ const messageActions = {
    store.dispatch(setUrl(url));
    store.dispatch(setScreenshotSrc(screenshotSrc));
  },
+  [ActionType.BROWSE_INTERACTIVE]: (message: ActionMessage) => {
+    const { url, screenshotSrc } = message.args;
+    store.dispatch(setUrl(url));
+    store.dispatch(setScreenshotSrc(screenshotSrc));
+  },
  [ActionType.WRITE]: (message: ActionMessage) => {
    const { path, content } = message.args;
    store.dispatch(updatePath(path));
--- a/frontend/src/types/ActionType.tsx
+++ b/frontend/src/types/ActionType.tsx
@ -23,6 +23,9 @@ enum ActionType {
  // Opens a web page.
  BROWSE = "browse",

+  // Interact with the browser instance.
+  BROWSE_INTERACTIVE = "browse_interactive",
+
  // Searches long-term memory.
  RECALL = "recall",

--- a/opendevin/core/schema/action.py
+++ b/opendevin/core/schema/action.py
@ -40,6 +40,10 @@ class ActionTypeSchema(BaseModel):
    """Opens a web page.
    """

+    BROWSE_INTERACTIVE: str = Field(default='browse_interactive')
+    """Interact with the browser instance.
+    """
+
    RECALL: str = Field(default='recall')
    """Searches long-term memory
    """
--- a/opendevin/events/action/init.py
+++ b/opendevin/events/action/init.py
@ -7,7 +7,7 @@ from .agent import (
    AgentSummarizeAction,
    ChangeAgentStateAction,
 )
-from .browse import BrowseURLAction
+from .browse import BrowseInteractiveAction, BrowseURLAction
 from .commands import CmdKillAction, CmdRunAction, IPythonRunCellAction
 from .empty import NullAction
 from .files import FileReadAction, FileWriteAction
@ -20,6 +20,7 @@ __all__ = [
    'CmdRunAction',
    'CmdKillAction',
    'BrowseURLAction',
+    'BrowseInteractiveAction',
    'FileReadAction',
    'FileWriteAction',
    'AgentRecallAction',
--- a/opendevin/events/action/browse.py
+++ b/opendevin/events/action/browse.py
@ -16,3 +16,15 @@ class BrowseURLAction(Action):
    @property
    def message(self) -> str:
        return f'Browsing URL: {self.url}'
+
+
+@dataclass
+class BrowseInteractiveAction(Action):
+    browser_actions: str
+    thought: str = ''
+    action: str = ActionType.BROWSE_INTERACTIVE
+    runnable: ClassVar[bool] = True
+
+    @property
+    def message(self) -> str:
+        return f'Executing browser actions: {self.browser_actions}'
--- a/opendevin/events/serialization/action.py
+++ b/opendevin/events/serialization/action.py
@ -7,7 +7,7 @@ from opendevin.events.action.agent import (
    AgentRejectAction,
    ChangeAgentStateAction,
 )
-from opendevin.events.action.browse import BrowseURLAction
+from opendevin.events.action.browse import BrowseInteractiveAction, BrowseURLAction
 from opendevin.events.action.commands import (
    CmdKillAction,
    CmdRunAction,
@ -22,6 +22,7 @@ actions = (
    CmdRunAction,
    IPythonRunCellAction,
    BrowseURLAction,
+    BrowseInteractiveAction,
    FileReadAction,
    FileWriteAction,
    AgentRecallAction,
--- a/opendevin/runtime/runtime.py
+++ b/opendevin/runtime/runtime.py
@ -5,6 +5,7 @@ from opendevin.core.config import config
 from opendevin.events.action import (
    Action,
    AgentRecallAction,
+    BrowseInteractiveAction,
    BrowseURLAction,
    CmdKillAction,
    CmdRunAction,
@ -154,6 +155,10 @@ class Runtime:
    async def browse(self, action: BrowseURLAction) -> Observation:
        pass

+    @abstractmethod
+    async def browse_interactive(self, action: BrowseInteractiveAction) -> Observation:
+        pass
+
    @abstractmethod
    async def recall(self, action: AgentRecallAction) -> Observation:
        pass
--- a/opendevin/runtime/server/browse.py
+++ b/opendevin/runtime/server/browse.py
@ -1,15 +1,23 @@
 import os

+from opendevin.core.schema import ActionType
 from opendevin.events.observation import BrowserOutputObservation


 async def browse(action, browser) -> BrowserOutputObservation:  # type: ignore
-    asked_url = action.url
-    if not asked_url.startswith('http'):
-        asked_url = os.path.abspath(os.curdir) + action.url
-    try:
-        # action in BrowserGym: see https://github.com/ServiceNow/BrowserGym/blob/main/core/src/browsergym/core/action/functions.py
+    if action.action == ActionType.BROWSE:
+        # legacy BrowseURLAction
+        asked_url = action.url
+        if not asked_url.startswith('http'):
+            asked_url = os.path.abspath(os.curdir) + action.url
        action_str = f'goto("{asked_url}")'
+    elif action.action == ActionType.BROWSE_INTERACTIVE:
+        # new BrowseInteractiveAction, supports full featured BrowserGym actions
+        # action in BrowserGym: see https://github.com/ServiceNow/BrowserGym/blob/main/core/src/browsergym/core/action/functions.py
+        action_str = action.browser_actions
+    else:
+        raise ValueError(f'Invalid action type: {action.action}')
+    try:
        # obs provided by BrowserGym: see https://github.com/ServiceNow/BrowserGym/blob/main/core/src/browsergym/core/env.py#L396
        obs = browser.step(action_str)
        return BrowserOutputObservation(
@ -21,9 +29,12 @@ async def browse(action, browser) -> BrowserOutputObservation:  # type: ignore
            last_browser_action=obs['last_action'],  # last browser env action performed
            focused_element_bid=obs['focused_element_bid'],  # focused element bid
            screenshot=obs['screenshot'],  # base64-encoded screenshot, png
-            url=asked_url,
+            url=obs['url'],  # URL of the page
        )
    except Exception as e:
        return BrowserOutputObservation(
-            content=str(e), screenshot='', error=True, url=asked_url
+            content=str(e),
+            screenshot='',
+            error=True,
+            url=asked_url if action.action == ActionType.BROWSE else '',
        )
--- a/opendevin/runtime/server/runtime.py
+++ b/opendevin/runtime/server/runtime.py
@ -1,5 +1,6 @@
 from opendevin.events.action import (
    AgentRecallAction,
+    BrowseInteractiveAction,
    BrowseURLAction,
    CmdKillAction,
    CmdRunAction,
@ -58,6 +59,9 @@ class ServerRuntime(Runtime):
    async def browse(self, action: BrowseURLAction) -> Observation:
        return await browse(action, self.browser)

+    async def browse_interactive(self, action: BrowseInteractiveAction) -> Observation:
+        return await browse(action, self.browser)
+
    async def recall(self, action: AgentRecallAction) -> Observation:
        return NullObservation('')

--- a/tests/integration/mock/CodeActAgent/test_edits/prompt_001.log
+++ b/tests/integration/mock/CodeActAgent/test_edits/prompt_001.log
@ -9,6 +9,8 @@ print("Hello World!")
 </execute_ipython>
 The assistant can execute bash commands on behalf of the user by wrapping them with <execute_bash> and </execute_bash>.
 For example, you can list the files in the current directory by <execute_bash> ls </execute_bash>.
+The assistant can browse the Internet with commands on behalf of the user by wrapping them with <execute_browse> and </execute_browse>.
+For example, you can browse a given URL by <execute_browse> goto("<URL>") </execute_browse>.
 The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block.
 The assistant can install Python packages through bash by <execute_bash> pip install [package needed] </execute_bash> and should always import packages and define variables before starting to use them.
 The assistant should stop <execute> and provide an answer when they have already obtained the answer from the execution result.
@ -38,8 +40,8 @@ end_of_edit - replaces lines <start_line> through <end_line> (inclusive) with th
 Please note that THE EDIT COMMAND REQUIRES PROPER INDENTATION. If you'd like to add the line '        print(x)' you must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run.

 The assistant's response should be concise.
-You should include <execute_ipython> or <execute_bash> in every one of your responses, unless you are finished with the task or need more input or action from the user in order to proceed.
-IMPORTANT: Whenever possible, execute the code for the user using <execute_ipython> or <execute_bash> instead of providing it.
+You should include <execute_ipython> or <execute_bash> or <execute_browse> in every one of your responses, unless you are finished with the task or need more input or action from the user in order to proceed.
+IMPORTANT: Whenever possible, execute the code for the user using <execute_ipython> or <execute_bash> or <execute_browse> instead of providing it.


 ----------
@ -146,6 +148,21 @@ Press CTRL+C to quit
 ASSISTANT:
 The server is running on port 5000 with PID 124. You can access the list of numbers by visiting http://127.0.0.1:5000. If you have any further questions, feel free to ask!

+USER: Now browse the newly started server's homepage and show me the content.
+
+ASSISTANT:
+Sure! Let me browse the server's homepage at http://127.0.0.1:5000:
+<execute_browse>
+goto("http://127.0.0.1:5000")
+</execute_browse>
+
+USER:
+Observation:
+[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+
+ASSISTANT:
+The content of the server's homepage is "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]". If you have any further questions, feel free to ask!
+
 USER: Now kill the server, make it display the numbers in a table format.

 ASSISTANT:
--- a/tests/integration/mock/CodeActAgent/test_edits/prompt_002.log
+++ b/tests/integration/mock/CodeActAgent/test_edits/prompt_002.log
@ -9,6 +9,8 @@ print("Hello World!")
 </execute_ipython>
 The assistant can execute bash commands on behalf of the user by wrapping them with <execute_bash> and </execute_bash>.
 For example, you can list the files in the current directory by <execute_bash> ls </execute_bash>.
+The assistant can browse the Internet with commands on behalf of the user by wrapping them with <execute_browse> and </execute_browse>.
+For example, you can browse a given URL by <execute_browse> goto("<URL>") </execute_browse>.
 The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block.
 The assistant can install Python packages through bash by <execute_bash> pip install [package needed] </execute_bash> and should always import packages and define variables before starting to use them.
 The assistant should stop <execute> and provide an answer when they have already obtained the answer from the execution result.
@ -38,8 +40,8 @@ end_of_edit - replaces lines <start_line> through <end_line> (inclusive) with th
 Please note that THE EDIT COMMAND REQUIRES PROPER INDENTATION. If you'd like to add the line '        print(x)' you must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run.

 The assistant's response should be concise.
-You should include <execute_ipython> or <execute_bash> in every one of your responses, unless you are finished with the task or need more input or action from the user in order to proceed.
-IMPORTANT: Whenever possible, execute the code for the user using <execute_ipython> or <execute_bash> instead of providing it.
+You should include <execute_ipython> or <execute_bash> or <execute_browse> in every one of your responses, unless you are finished with the task or need more input or action from the user in order to proceed.
+IMPORTANT: Whenever possible, execute the code for the user using <execute_ipython> or <execute_bash> or <execute_browse> instead of providing it.


 ----------
@ -146,6 +148,21 @@ Press CTRL+C to quit
 ASSISTANT:
 The server is running on port 5000 with PID 124. You can access the list of numbers by visiting http://127.0.0.1:5000. If you have any further questions, feel free to ask!

+USER: Now browse the newly started server's homepage and show me the content.
+
+ASSISTANT:
+Sure! Let me browse the server's homepage at http://127.0.0.1:5000:
+<execute_browse>
+goto("http://127.0.0.1:5000")
+</execute_browse>
+
+USER:
+Observation:
+[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+
+ASSISTANT:
+The content of the server's homepage is "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]". If you have any further questions, feel free to ask!
+
 USER: Now kill the server, make it display the numbers in a table format.

 ASSISTANT:
@ -228,20 +245,19 @@ ENVIRONMENT REMINDER: You have 9 turns left to complete the task.

 ----------

-Certainly! I will start by opening the file `bad.txt` to see the content and then proceed to fix the typos.
+Let's start by examining the contents of `bad.txt` to identify the typos. I'll read the file first.

 <execute_bash>
-open bad.txt
+cat bad.txt
 </execute_bash>

 ----------

 OBSERVATION:
-[File: /workspace/bad.txt (4 lines total)]
-1:This is a stupid typoo.
-2:Really?
-3:No mor typos!
-4:Enjoy!
+This is a stupid typoo.
+Really?
+No mor typos!
+Enjoy!
 [Command -1 finished with exit code 0]]

 ENVIRONMENT REMINDER: You have 8 turns left to complete the task.
--- a/tests/integration/mock/CodeActAgent/test_edits/prompt_003.log
+++ b/tests/integration/mock/CodeActAgent/test_edits/prompt_003.log
@ -9,6 +9,8 @@ print("Hello World!")
 </execute_ipython>
 The assistant can execute bash commands on behalf of the user by wrapping them with <execute_bash> and </execute_bash>.
 For example, you can list the files in the current directory by <execute_bash> ls </execute_bash>.
+The assistant can browse the Internet with commands on behalf of the user by wrapping them with <execute_browse> and </execute_browse>.
+For example, you can browse a given URL by <execute_browse> goto("<URL>") </execute_browse>.
 The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block.
 The assistant can install Python packages through bash by <execute_bash> pip install [package needed] </execute_bash> and should always import packages and define variables before starting to use them.
 The assistant should stop <execute> and provide an answer when they have already obtained the answer from the execution result.
@ -38,8 +40,8 @@ end_of_edit - replaces lines <start_line> through <end_line> (inclusive) with th
 Please note that THE EDIT COMMAND REQUIRES PROPER INDENTATION. If you'd like to add the line '        print(x)' you must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run.

 The assistant's response should be concise.
-You should include <execute_ipython> or <execute_bash> in every one of your responses, unless you are finished with the task or need more input or action from the user in order to proceed.
-IMPORTANT: Whenever possible, execute the code for the user using <execute_ipython> or <execute_bash> instead of providing it.
+You should include <execute_ipython> or <execute_bash> or <execute_browse> in every one of your responses, unless you are finished with the task or need more input or action from the user in order to proceed.
+IMPORTANT: Whenever possible, execute the code for the user using <execute_ipython> or <execute_bash> or <execute_browse> instead of providing it.


 ----------
@ -146,6 +148,21 @@ Press CTRL+C to quit
 ASSISTANT:
 The server is running on port 5000 with PID 124. You can access the list of numbers by visiting http://127.0.0.1:5000. If you have any further questions, feel free to ask!

+USER: Now browse the newly started server's homepage and show me the content.
+
+ASSISTANT:
+Sure! Let me browse the server's homepage at http://127.0.0.1:5000:
+<execute_browse>
+goto("http://127.0.0.1:5000")
+</execute_browse>
+
+USER:
+Observation:
+[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+
+ASSISTANT:
+The content of the server's homepage is "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]". If you have any further questions, feel free to ask!
+
 USER: Now kill the server, make it display the numbers in a table format.

 ASSISTANT:
@ -228,29 +245,33 @@ ENVIRONMENT REMINDER: You have 9 turns left to complete the task.

 ----------

-Certainly! I will start by opening the file `bad.txt` to see the content and then proceed to fix the typos.
+Let's start by examining the contents of `bad.txt` to identify the typos. I'll read the file first.

 <execute_bash>
-open bad.txt
+cat bad.txt
 </execute_bash>

 ----------

 OBSERVATION:
-[File: /workspace/bad.txt (4 lines total)]
-1:This is a stupid typoo.
-2:Really?
-3:No mor typos!
-4:Enjoy!
+This is a stupid typoo.
+Really?
+No mor typos!
+Enjoy!
 [Command -1 finished with exit code 0]]

 ENVIRONMENT REMINDER: You have 8 turns left to complete the task.

 ----------

-I will now edit the file to correct the typos.
+I'll correct the typos in the file `bad.txt`. Here are the corrections:
+- "typoo" should be "typo"
+- "mor" should be "more"
+
+Let me update the file accordingly.

 <execute_bash>
+open bad.txt
 edit 1:1
 This is a stupid typo.
 end_of_edit
@ -262,17 +283,22 @@ end_of_edit
 ----------

 OBSERVATION:
-> > [File: /workspace/bad.txt (4 lines total)]
-1:This is a stupid typo.
-2:Really?
-3:No mor typos!
-4:Enjoy!
-File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.
-> > [File: /workspace/bad.txt (4 lines total)]
-1:This is a stupid typo.
-2:Really?
-3:No more typos!
-4:Enjoy!
+[File: /workspace/bad.txt (4 lines total)]
+1:This is a stupid typoo.
+2:Really?
+3:No mor typos!
+4:Enjoy!
+> > [File: /workspace/bad.txt (4 lines total)]
+1:This is a stupid typo.
+2:Really?
+3:No mor typos!
+4:Enjoy!
+File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.
+> > [File: /workspace/bad.txt (4 lines total)]
+1:This is a stupid typo.
+2:Really?
+3:No more typos!
+4:Enjoy!
 File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.
 [Command -1 finished with exit code 0]]

--- a/tests/integration/mock/CodeActAgent/test_edits/response_001.log
+++ b/tests/integration/mock/CodeActAgent/test_edits/response_001.log
@ -1,4 +1,4 @@
-Certainly! I will start by opening the file `bad.txt` to see the content and then proceed to fix the typos.
+Let's start by examining the contents of `bad.txt` to identify the typos. I'll read the file first.

 <execute_bash>
-open bad.txt
+cat bad.txt
--- a/tests/integration/mock/CodeActAgent/test_edits/response_002.log
+++ b/tests/integration/mock/CodeActAgent/test_edits/response_002.log
@ -1,6 +1,11 @@
-I will now edit the file to correct the typos.
+I'll correct the typos in the file `bad.txt`. Here are the corrections:
+- "typoo" should be "typo"
+- "mor" should be "more"
+
+Let me update the file accordingly.

 <execute_bash>
+open bad.txt
 edit 1:1
 This is a stupid typo.
 end_of_edit
--- a/tests/integration/mock/CodeActAgent/test_edits/response_003.log
+++ b/tests/integration/mock/CodeActAgent/test_edits/response_003.log
@ -1 +1,8 @@
-The typos in `bad.txt` have been fixed. The task is complete. If you need further assistance, feel free to ask!
+The typos in `bad.txt` have been corrected successfully. The file now reads correctly as follows:
+
+1. This is a stupid typo.
+2. Really?
+3. No more typos!
+4. Enjoy!
+
+The task is completed. If you have any more requests or need further assistance, feel free to let me know!
--- a/tests/integration/mock/CodeActAgent/test_ipython/prompt_001.log
+++ b/tests/integration/mock/CodeActAgent/test_ipython/prompt_001.log
@ -9,6 +9,8 @@ print("Hello World!")
 </execute_ipython>
 The assistant can execute bash commands on behalf of the user by wrapping them with <execute_bash> and </execute_bash>.
 For example, you can list the files in the current directory by <execute_bash> ls </execute_bash>.
+The assistant can browse the Internet with commands on behalf of the user by wrapping them with <execute_browse> and </execute_browse>.
+For example, you can browse a given URL by <execute_browse> goto("<URL>") </execute_browse>.
 The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block.
 The assistant can install Python packages through bash by <execute_bash> pip install [package needed] </execute_bash> and should always import packages and define variables before starting to use them.
 The assistant should stop <execute> and provide an answer when they have already obtained the answer from the execution result.
@ -38,8 +40,8 @@ end_of_edit - replaces lines <start_line> through <end_line> (inclusive) with th
 Please note that THE EDIT COMMAND REQUIRES PROPER INDENTATION. If you'd like to add the line '        print(x)' you must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run.

 The assistant's response should be concise.
-You should include <execute_ipython> or <execute_bash> in every one of your responses, unless you are finished with the task or need more input or action from the user in order to proceed.
-IMPORTANT: Whenever possible, execute the code for the user using <execute_ipython> or <execute_bash> instead of providing it.
+You should include <execute_ipython> or <execute_bash> or <execute_browse> in every one of your responses, unless you are finished with the task or need more input or action from the user in order to proceed.
+IMPORTANT: Whenever possible, execute the code for the user using <execute_ipython> or <execute_bash> or <execute_browse> instead of providing it.


 ----------
@ -146,6 +148,21 @@ Press CTRL+C to quit
 ASSISTANT:
 The server is running on port 5000 with PID 124. You can access the list of numbers by visiting http://127.0.0.1:5000. If you have any further questions, feel free to ask!

+USER: Now browse the newly started server's homepage and show me the content.
+
+ASSISTANT:
+Sure! Let me browse the server's homepage at http://127.0.0.1:5000:
+<execute_browse>
+goto("http://127.0.0.1:5000")
+</execute_browse>
+
+USER:
+Observation:
+[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+
+ASSISTANT:
+The content of the server's homepage is "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]". If you have any further questions, feel free to ask!
+
 USER: Now kill the server, make it display the numbers in a table format.

 ASSISTANT:
--- a/tests/integration/mock/CodeActAgent/test_ipython/prompt_002.log
+++ b/tests/integration/mock/CodeActAgent/test_ipython/prompt_002.log
@ -9,6 +9,8 @@ print("Hello World!")
 </execute_ipython>
 The assistant can execute bash commands on behalf of the user by wrapping them with <execute_bash> and </execute_bash>.
 For example, you can list the files in the current directory by <execute_bash> ls </execute_bash>.
+The assistant can browse the Internet with commands on behalf of the user by wrapping them with <execute_browse> and </execute_browse>.
+For example, you can browse a given URL by <execute_browse> goto("<URL>") </execute_browse>.
 The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block.
 The assistant can install Python packages through bash by <execute_bash> pip install [package needed] </execute_bash> and should always import packages and define variables before starting to use them.
 The assistant should stop <execute> and provide an answer when they have already obtained the answer from the execution result.
@ -38,8 +40,8 @@ end_of_edit - replaces lines <start_line> through <end_line> (inclusive) with th
 Please note that THE EDIT COMMAND REQUIRES PROPER INDENTATION. If you'd like to add the line '        print(x)' you must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run.

 The assistant's response should be concise.
-You should include <execute_ipython> or <execute_bash> in every one of your responses, unless you are finished with the task or need more input or action from the user in order to proceed.
-IMPORTANT: Whenever possible, execute the code for the user using <execute_ipython> or <execute_bash> instead of providing it.
+You should include <execute_ipython> or <execute_bash> or <execute_browse> in every one of your responses, unless you are finished with the task or need more input or action from the user in order to proceed.
+IMPORTANT: Whenever possible, execute the code for the user using <execute_ipython> or <execute_bash> or <execute_browse> instead of providing it.


 ----------
@ -146,6 +148,21 @@ Press CTRL+C to quit
 ASSISTANT:
 The server is running on port 5000 with PID 124. You can access the list of numbers by visiting http://127.0.0.1:5000. If you have any further questions, feel free to ask!

+USER: Now browse the newly started server's homepage and show me the content.
+
+ASSISTANT:
+Sure! Let me browse the server's homepage at http://127.0.0.1:5000:
+<execute_browse>
+goto("http://127.0.0.1:5000")
+</execute_browse>
+
+USER:
+Observation:
+[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+
+ASSISTANT:
+The content of the server's homepage is "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]". If you have any further questions, feel free to ask!
+
 USER: Now kill the server, make it display the numbers in a table format.

 ASSISTANT:
@ -228,10 +245,9 @@ ENVIRONMENT REMINDER: You have 9 turns left to complete the task.

 ----------

-Certainly! I will write the text 'hello world' to the file `/workspace/test.txt` using Jupyter IPython.
+I will write the text 'hello world' to the file '/workspace/test.txt' using Jupyter IPython. Let's proceed:

 <execute_ipython>
-# Write 'hello world' to the specified text file
 with open('/workspace/test.txt', 'w') as file:
    file.write('hello world')
 </execute_ipython>
--- a/tests/integration/mock/CodeActAgent/test_ipython/response_001.log
+++ b/tests/integration/mock/CodeActAgent/test_ipython/response_001.log
@ -1,6 +1,5 @@
-Certainly! I will write the text 'hello world' to the file `/workspace/test.txt` using Jupyter IPython.
+I will write the text 'hello world' to the file '/workspace/test.txt' using Jupyter IPython. Let's proceed:

 <execute_ipython>
-# Write 'hello world' to the specified text file
 with open('/workspace/test.txt', 'w') as file:
    file.write('hello world')
--- a/tests/integration/mock/CodeActAgent/test_ipython/response_002.log
+++ b/tests/integration/mock/CodeActAgent/test_ipython/response_002.log
@ -1 +1 @@
-The text 'hello world' has been successfully written to `/workspace/test.txt`. If you need any further assistance, feel free to ask!
+The text 'hello world' has been successfully written to the file '/workspace/test.txt'. If you need any further assistance or another task, feel free to let me know!
--- a/tests/integration/mock/CodeActAgent/test_write_simple_script/prompt_001.log
+++ b/tests/integration/mock/CodeActAgent/test_write_simple_script/prompt_001.log
@ -9,6 +9,8 @@ print("Hello World!")
 </execute_ipython>
 The assistant can execute bash commands on behalf of the user by wrapping them with <execute_bash> and </execute_bash>.
 For example, you can list the files in the current directory by <execute_bash> ls </execute_bash>.
+The assistant can browse the Internet with commands on behalf of the user by wrapping them with <execute_browse> and </execute_browse>.
+For example, you can browse a given URL by <execute_browse> goto("<URL>") </execute_browse>.
 The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block.
 The assistant can install Python packages through bash by <execute_bash> pip install [package needed] </execute_bash> and should always import packages and define variables before starting to use them.
 The assistant should stop <execute> and provide an answer when they have already obtained the answer from the execution result.
@ -38,8 +40,8 @@ end_of_edit - replaces lines <start_line> through <end_line> (inclusive) with th
 Please note that THE EDIT COMMAND REQUIRES PROPER INDENTATION. If you'd like to add the line '        print(x)' you must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run.

 The assistant's response should be concise.
-You should include <execute_ipython> or <execute_bash> in every one of your responses, unless you are finished with the task or need more input or action from the user in order to proceed.
-IMPORTANT: Whenever possible, execute the code for the user using <execute_ipython> or <execute_bash> instead of providing it.
+You should include <execute_ipython> or <execute_bash> or <execute_browse> in every one of your responses, unless you are finished with the task or need more input or action from the user in order to proceed.
+IMPORTANT: Whenever possible, execute the code for the user using <execute_ipython> or <execute_bash> or <execute_browse> instead of providing it.


 ----------
@ -146,6 +148,21 @@ Press CTRL+C to quit
 ASSISTANT:
 The server is running on port 5000 with PID 124. You can access the list of numbers by visiting http://127.0.0.1:5000. If you have any further questions, feel free to ask!

+USER: Now browse the newly started server's homepage and show me the content.
+
+ASSISTANT:
+Sure! Let me browse the server's homepage at http://127.0.0.1:5000:
+<execute_browse>
+goto("http://127.0.0.1:5000")
+</execute_browse>
+
+USER:
+Observation:
+[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+
+ASSISTANT:
+The content of the server's homepage is "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]". If you have any further questions, feel free to ask!
+
 USER: Now kill the server, make it display the numbers in a table format.

 ASSISTANT:
--- a/tests/integration/mock/CodeActAgent/test_write_simple_script/prompt_002.log
+++ b/tests/integration/mock/CodeActAgent/test_write_simple_script/prompt_002.log
@ -9,6 +9,8 @@ print("Hello World!")
 </execute_ipython>
 The assistant can execute bash commands on behalf of the user by wrapping them with <execute_bash> and </execute_bash>.
 For example, you can list the files in the current directory by <execute_bash> ls </execute_bash>.
+The assistant can browse the Internet with commands on behalf of the user by wrapping them with <execute_browse> and </execute_browse>.
+For example, you can browse a given URL by <execute_browse> goto("<URL>") </execute_browse>.
 The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block.
 The assistant can install Python packages through bash by <execute_bash> pip install [package needed] </execute_bash> and should always import packages and define variables before starting to use them.
 The assistant should stop <execute> and provide an answer when they have already obtained the answer from the execution result.
@ -38,8 +40,8 @@ end_of_edit - replaces lines <start_line> through <end_line> (inclusive) with th
 Please note that THE EDIT COMMAND REQUIRES PROPER INDENTATION. If you'd like to add the line '        print(x)' you must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run.

 The assistant's response should be concise.
-You should include <execute_ipython> or <execute_bash> in every one of your responses, unless you are finished with the task or need more input or action from the user in order to proceed.
-IMPORTANT: Whenever possible, execute the code for the user using <execute_ipython> or <execute_bash> instead of providing it.
+You should include <execute_ipython> or <execute_bash> or <execute_browse> in every one of your responses, unless you are finished with the task or need more input or action from the user in order to proceed.
+IMPORTANT: Whenever possible, execute the code for the user using <execute_ipython> or <execute_bash> or <execute_browse> instead of providing it.


 ----------
@ -146,6 +148,21 @@ Press CTRL+C to quit
 ASSISTANT:
 The server is running on port 5000 with PID 124. You can access the list of numbers by visiting http://127.0.0.1:5000. If you have any further questions, feel free to ask!

+USER: Now browse the newly started server's homepage and show me the content.
+
+ASSISTANT:
+Sure! Let me browse the server's homepage at http://127.0.0.1:5000:
+<execute_browse>
+goto("http://127.0.0.1:5000")
+</execute_browse>
+
+USER:
+Observation:
+[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+
+ASSISTANT:
+The content of the server's homepage is "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]". If you have any further questions, feel free to ask!
+
 USER: Now kill the server, make it display the numbers in a table format.

 ASSISTANT:
@ -228,10 +245,10 @@ ENVIRONMENT REMINDER: You have 9 turns left to complete the task.

 ----------

-Certainly! I will create a shell script named `hello.sh` that prints 'hello'.
+I will create and write the shell script `hello.sh` that prints "hello". Let's proceed with that.

 <execute_bash>
-echo "echo 'hello'" > hello.sh && chmod +x hello.sh
+echo "echo hello" > hello.sh
 </execute_bash>

 ----------
--- a/tests/integration/mock/CodeActAgent/test_write_simple_script/response_001.log
+++ b/tests/integration/mock/CodeActAgent/test_write_simple_script/response_001.log
@ -1,4 +1,4 @@
-Certainly! I will create a shell script named `hello.sh` that prints 'hello'.
+I will create and write the shell script `hello.sh` that prints "hello". Let's proceed with that.

 <execute_bash>
-echo "echo 'hello'" > hello.sh && chmod +x hello.sh
+echo "echo hello" > hello.sh
--- a/tests/integration/mock/CodeActAgent/test_write_simple_script/response_002.log
+++ b/tests/integration/mock/CodeActAgent/test_write_simple_script/response_002.log
@ -1 +1 @@
-The shell script `hello.sh` has been created and made executable. You can now run it to print 'hello'.
+The shell script `hello.sh` has been successfully created and written. If you need anything else, feel free to ask!
--- a/tests/unit/test_action_serialization.py
+++ b/tests/unit/test_action_serialization.py
@ -4,6 +4,7 @@ from opendevin.events.action import (
    AgentFinishAction,
    AgentRecallAction,
    AgentRejectAction,
+    BrowseInteractiveAction,
    BrowseURLAction,
    CmdKillAction,
    CmdRunAction,
@ -91,6 +92,14 @@ def test_browse_url_action_serialization_deserialization():
    serialization_deserialization(original_action_dict, BrowseURLAction)


+def test_browse_interactive_action_serialization_deserialization():
+    original_action_dict = {
+        'action': 'browse_interactive',
+        'args': {'thought': '', 'browser_actions': 'goto("https://www.example.com")'},
+    }
+    serialization_deserialization(original_action_dict, BrowseInteractiveAction)
+
+
 def test_file_read_action_serialization_deserialization():
    original_action_dict = {
        'action': 'read',