CodeActAgent: Delegate to BrowsingAgent for browsing tasks (#2103)

2025-12-26 05:48:36 +08:00 · 2024-06-07 00:53:47 -07:00 · 2024-06-07 00:53:47 -07:00 · 45ce09d70e
commit 45ce09d70e
parent 001cc33664
25 changed files with 1081 additions and 37 deletions
--- a/agenthub/browsing_agent/browsing_agent.py
+++ b/agenthub/browsing_agent/browsing_agent.py
@ -109,6 +109,8 @@ class BrowsingAgent(Agent):
        - AgentFinishAction() - end the interaction
        """
        goal = state.get_current_user_intent()
+        if goal is None:
+            goal = state.inputs['task']
        messages = []
        prev_actions = []
        cur_axtree_txt = ''
@ -129,7 +131,7 @@ class BrowsingAgent(Agent):
                and prev_action.source == EventSource.AGENT
            ):
                # agent has responded, task finish.
-                return AgentFinishAction()
+                return AgentFinishAction(outputs={'content': prev_action.content})

        prev_action_str = '\n'.join(prev_actions[1:])
        # if the final BrowserInteractiveAction exec BrowserGym's send_msg_to_user,
--- a/agenthub/codeact_agent/codeact_agent.py
+++ b/agenthub/codeact_agent/codeact_agent.py
@ -11,6 +11,7 @@ from opendevin.controller.agent import Agent
 from opendevin.controller.state.state import State
 from opendevin.events.action import (
    Action,
+    AgentDelegateAction,
    AgentFinishAction,
    BrowseInteractiveAction,
    CmdRunAction,
@ -18,6 +19,7 @@ from opendevin.events.action import (
    MessageAction,
 )
 from opendevin.events.observation import (
+    AgentDelegateObservation,
    BrowserOutputObservation,
    CmdOutputObservation,
    IPythonRunCellObservation,
@ -89,6 +91,9 @@ def get_observation_message(obs) -> dict[str, str] | None:
    elif isinstance(obs, BrowserOutputObservation):
        content = 'OBSERVATION:\n' + truncate_observation(obs.content)
        return {'role': 'user', 'content': content}
+    elif isinstance(obs, AgentDelegateObservation):
+        content = 'OBSERVATION:\n' + truncate_observation(str(obs.outputs))
+        return {'role': 'user', 'content': content}
    return None


@ -119,7 +124,7 @@ def get_in_context_example() -> str:


 class CodeActAgent(Agent):
-    VERSION = '1.5'
+    VERSION = '1.6'
    """
    The Code Act Agent is a minimalist agent.
    The agent works by passing the model a list of action-observation pairs and prompting the model to take the next step.
@ -199,7 +204,7 @@ class CodeActAgent(Agent):
        Returns:
        - CmdRunAction(command) - bash command to run
        - IPythonRunCellAction(code) - IPython code to run
-        - BrowseInteractiveAction(browsergym_command) - BrowserGym commands to run
+        - AgentDelegateAction(agent, inputs) - delegate action for (sub)task
        - MessageAction(content) - Message action to run (e.g. ask for clarification)
        - AgentFinishAction() - end the interaction
        """
@ -268,12 +273,10 @@ class CodeActAgent(Agent):
        elif browse_command := re.search(
            r'<execute_browse>(.*)</execute_browse>', action_str, re.DOTALL
        ):
-            # BrowserGym actions was found
-            browse_actions = browse_command.group(1).strip()
            thought = action_str.replace(browse_command.group(0), '').strip()
-            return BrowseInteractiveAction(
-                browser_actions=browse_actions, thought=thought
-            )
+            browse_actions = browse_command.group(1).strip()
+            task = f'{thought}. I should start with: {browse_actions}'
+            return AgentDelegateAction(agent='BrowsingAgent', inputs={'task': task})
        else:
            # We assume the LLM is GOOD enough that when it returns pure natural language
            # it want to talk to the user
--- a/agenthub/codeact_agent/prompt.py
+++ b/agenthub/codeact_agent/prompt.py
@ -19,7 +19,7 @@ For example, you can list the files in the current directory by <execute_bash> l
 """

 BROWSING_PREFIX = """The assistant can browse the Internet with commands on behalf of the user by wrapping them with <execute_browse> and </execute_browse>.
-For example, you can browse a given URL by <execute_browse> goto("<URL>") </execute_browse>.
+For example, you can browse a given URL by <execute_browse> Tell me the usa's president using google search </execute_browse>.
 The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block.
 """
 PIP_INSTALL_PREFIX = """The assistant can install Python packages using the %pip magic command in an IPython environment by using the following syntax: <execute_ipython> %pip install [package needed] </execute_ipython> and should always import packages and define variables before starting to use them."""
@ -163,7 +163,7 @@ USER: Now browse the newly started server's homepage and show me the content.
 ASSISTANT:
 Sure! Let me browse the server's homepage at http://127.0.0.1:5000:
 <execute_browse>
-goto("http://127.0.0.1:5000")
+Get the content on "http://127.0.0.1:5000"
 </execute_browse>

 USER:
--- a/tests/integration/mock/CodeActAgent/test_browse_internet/prompt_001.log
+++ b/tests/integration/mock/CodeActAgent/test_browse_internet/prompt_001.log
@ -0,0 +1,314 @@
+
+
+----------
+
+A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.
+The assistant can interact with an interactive Python (Jupyter Notebook) environment and receive the corresponding output when needed. The code should be enclosed using "<execute_ipython>" tag, for example:
+<execute_ipython>
+print("Hello World!")
+</execute_ipython>
+The assistant can execute bash commands on behalf of the user by wrapping them with <execute_bash> and </execute_bash>.
+For example, you can list the files in the current directory by <execute_bash> ls </execute_bash>.
+The assistant can browse the Internet with commands on behalf of the user by wrapping them with <execute_browse> and </execute_browse>.
+For example, you can browse a given URL by <execute_browse> Tell me the usa's president using google search </execute_browse>.
+The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block.
+The assistant can install Python packages using the %pip magic command in an IPython environment by using the following syntax: <execute_ipython> %pip install [package needed] </execute_ipython> and should always import packages and define variables before starting to use them.
+To do any activities on GitHub, the assistant should use the token in the $GITHUB_TOKEN environment variable.
+For instance, to push a local branch `my_branch` to the github repo `owner/repo`, the assistant can use the following four commands:
+<execute_bash> git push https://$GITHUB_TOKEN@github.com/owner/repo.git my_branch </execute_bash>
+If the assistant require access to GitHub but $GITHUB_TOKEN is not set, ask the user to set it.
+
+
+Apart from the standard Python library, the assistant can also use the following functions (already imported) in <execute_ipython> environment:
+open_file(path: str, line_number: Optional[int] = None) -> None:
+    Opens the file at the given path in the editor. If line_number is provided, the window will be moved to include that line.
+    Args:
+    path: str: The path to the file to open.
+    line_number: Optional[int]: The line number to move to.
+
+goto_line(line_number: int) -> None:
+    Moves the window to show the specified line number.
+    Args:
+    line_number: int: The line number to move to.
+
+scroll_down() -> None:
+    Moves the window down by 100 lines.
+    Args:
+    None
+
+scroll_up() -> None:
+    Moves the window up by 100 lines.
+    Args:
+    None
+
+create_file(filename: str) -> None:
+    Creates and opens a new file with the given name.
+    Args:
+    filename: str: The name of the file to create.
+
+edit_file(start: int, end: int, content: str) -> None:
+    Edit a file.
+    It replaces lines `start` through `end` (inclusive) with the given text `content` in the open file. Remember, the file must be open before editing.
+    Args:
+    start: int: The start line number. Must satisfy start >= 1.
+    end: int: The end line number. Must satisfy start <= end <= number of lines in the file.
+    content: str: The content to replace the lines with.
+
+search_dir(search_term: str, dir_path: str = './') -> None:
+    Searches for search_term in all files in dir. If dir is not provided, searches in the current directory.
+    Args:
+    search_term: str: The term to search for.
+    dir_path: Optional[str]: The path to the directory to search.
+
+search_file(search_term: str, file_path: Optional[str] = None) -> None:
+    Searches for search_term in file. If file is not provided, searches in the current open file.
+    Args:
+    search_term: str: The term to search for.
+    file_path: Optional[str]: The path to the file to search.
+
+find_file(file_name: str, dir_path: str = './') -> None:
+    Finds all files with the given name in the specified directory.
+    Args:
+    file_name: str: The name of the file to find.
+    dir_path: Optional[str]: The path to the directory to search.
+
+parse_pdf(file_path: str) -> None:
+    Parses the content of a PDF file and prints it.
+    Args:
+    file_path: str: The path to the file to open.
+
+parse_docx(file_path: str) -> None:
+    Parses the content of a DOCX file and prints it.
+    Args:
+    file_path: str: The path to the file to open.
+
+parse_latex(file_path: str) -> None:
+    Parses the content of a LaTex file and prints it.
+    Args:
+    file_path: str: The path to the file to open.
+
+parse_pptx(file_path: str) -> None:
+    Parses the content of a pptx file and prints it.
+    Args:
+    file_path: str: The path to the file to open.
+
+Please note that THE `edit_file` FUNCTION REQUIRES PROPER INDENTATION. If the assistant would like to add the line '        print(x)', it must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run.
+
+The assistant's response should be concise.
+The assistant should include ONLY ONE <execute_ipython> or <execute_bash> or <execute_browse> in every one of the responses, unless the assistant is finished with the task or need more input or action from the user in order to proceed.
+IMPORTANT: Whenever possible, execute the code for the user using <execute_ipython> or <execute_bash> or <execute_browse> instead of providing it.
+
+
+----------
+
+Here is an example of how you can interact with the environment for task solving:
+
+--- START OF EXAMPLE ---
+
+USER: Can you create a list of numbers from 1 to 10, and create a web page to display them at port 5000?
+
+ASSISTANT:
+Sure! Let me create a file first:
+<execute_ipython>
+create_file('app.py')
+</execute_ipython>
+
+USER:
+OBSERVATION:
+[File: /workspace/app.py (1 lines total)]
+1|
+[File app.py created.]
+
+ASSISTANT:
+Now I will write the Python code for starting a web server and save it to the file `app.py`:
+<execute_ipython>
+EDITED_CODE="""from flask import Flask
+app = Flask(__name__)
+
+@app.route('/')
+def index():
+    numbers = list(range(1, 11))
+    return str(numbers)
+
+if __name__ == '__main__':
+    app.run(port=5000)"""
+edit_file(start=1, end=1, content=EDITED_CODE)
+</execute_ipython>
+
+USER:
+OBSERVATION:
+1|from flask import Flask
+2|app = Flask(__name__)
+3|
+4|@app.route('/')
+5|def index():
+6|    numbers = list(range(1, 11))
+7|    return str(numbers)
+8|
+9|if __name__ == '__main__':
+10|    app.run(port=5000)
+[File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]
+
+ASSISTANT:
+I have created a Python file `app.py` that will display a list of numbers from 1 to 10 when you run it. Let me run the Python file for you:
+<execute_bash>
+python3 app.py > server.log 2>&1 &
+</execute_bash>
+
+USER:
+OBSERVATION:
+[1] 121[1]+  Exit 1                  python3 app.py > server.log 2>&1
+
+ASSISTANT: Looks like the server is running with PID 121 then crashed. Let me check the server log:
+<execute_bash>
+cat server.log
+</execute_bash>
+
+USER:
+OBSERVATION:
+Traceback (most recent call last):
+  File "/workspace/app.py", line 2, in <module>
+    from flask import Flask
+ModuleNotFoundError: No module named 'flask'
+
+ASSISTANT:
+It seems that Flask is not installed. Let me install Flask for you:
+<execute_bash>
+pip install flask
+</execute_bash>
+
+USER:
+OBSERVATION:
+Defaulting to user installation because normal site-packages is not writeable
+Collecting flask
+  Using cached flask-3.0.3-py3-none-any.whl (101 kB)
+Collecting blinker>=1.6.2
+  Using cached blinker-1.7.0-py3-none-any.whl (13 kB)
+Collecting Werkzeug>=3.0.0
+  Using cached werkzeug-3.0.2-py3-none-any.whl (226 kB)
+Collecting click>=8.1.3
+  Using cached click-8.1.7-py3-none-any.whl (97 kB)
+Collecting itsdangerous>=2.1.2
+  Using cached itsdangerous-2.2.0-py3-none-any.whl (16 kB)
+Requirement already satisfied: Jinja2>=3.1.2 in /home/opendevin/.local/lib/python3.10/site-packages (from flask) (3.1.3)
+Requirement already satisfied: MarkupSafe>=2.0 in /home/opendevin/.local/lib/python3.10/site-packages (from Jinja2>=3.1.2->flask) (2.1.5)
+Installing collected packages: Werkzeug, itsdangerous, click, blinker, flask
+Successfully installed Werkzeug-3.0.2 blinker-1.7.0 click-8.1.7 flask-3.0.3 itsdangerous-2.2.0
+
+ASSISTANT:
+Now that Flask is installed, let me run the Python file again:
+<execute_bash>
+python3 app.py > server.log 2>&1 &
+</execute_bash>
+
+USER:
+OBSERVATION:
+[1] 124
+
+ASSISTANT:
+Let me check the server log again:
+<execute_bash>
+cat server.log
+</execute_bash>
+
+USER:
+OBSERVATION:
+* Serving Flask app 'app'
+ * Debug mode: off
+WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead.
+ * Running on http://127.0.0.1:5000
+Press CTRL+C to quit
+
+ASSISTANT:
+The server is running on port 5000 with PID 124. You can access the list of numbers by visiting http://127.0.0.1:5000. If you have any further questions, feel free to ask!
+
+USER: Now browse the newly started server's homepage and show me the content.
+
+ASSISTANT:
+Sure! Let me browse the server's homepage at http://127.0.0.1:5000:
+<execute_browse>
+Get the content on "http://127.0.0.1:5000"
+</execute_browse>
+
+USER:
+OBSERVATION:
+[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+
+ASSISTANT:
+The content of the server's homepage is "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]". If you have any further questions, feel free to ask!
+
+USER: Now kill the server, make it display the numbers in a table format.
+
+ASSISTANT:
+Sure! Let me stop the server first:
+<execute_bash>
+kill 124
+</execute_bash>
+
+USER:
+OBSERVATION:
+[1]+  Terminated              python3 app.py > server.log 2>&1
+
+ASSISTANT:
+The server has been stopped. Let me open the Python file and modify it.
+<execute_ipython>
+open_file('app.py')
+</execute_ipython>
+
+USER:
+[File: /workspace/app.py (10 lines total)]
+1|from flask import Flask
+2|app = Flask(__name__)
+3|
+4|@app.route('/')
+5|def index():
+6|    numbers = list(range(1, 11))
+7|    return str(numbers)
+8|
+9|if __name__ == '__main__':
+10|    app.run(port=5000)
+
+ASSISTANT:
+I should edit the file to display the numbers in a table format. I should include correct indentation. Let me update the file:
+<execute_ipython>
+edit_file(start=7, end=7, content="    return '<table>' + ''.join([f'<tr><td>{i}</td></tr>' for i in numbers]) + '</table>'")
+</execute_ipython>
+
+USER:
+Observation:
+[File: /workspace/app.py (10 lines total after edit)]
+1|from flask import Flask
+2|app = Flask(__name__)
+3|
+4|@app.route('/')
+5|def index():
+6|    numbers = list(range(1, 11))
+7|    return '<table>' + ''.join([f'<tr><td>{i}</td></tr>' for i in numbers]) + '</table>'
+8|
+9|if __name__ == '__main__':
+10|    app.run(port=5000)
+[File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]
+
+ASSISTANT:
+The file has been updated. Let me run the Python file again with the new changes:
+<execute_bash>
+python3 app.py > server.log 2>&1 &
+</execute_bash>
+
+USER:
+Observation:
+[1] 126
+
+ASSISTANT:
+The server is running on port 5000 with PID 126. You can access the list of numbers in a table format by visiting http://127.0.0.1:5000. Free free to let me know if you have any further requests!
+
+--- END OF EXAMPLE ---
+
+
+NOW, LET'S START!
+
+----------
+
+Browse localhost:8000, and tell me the ultimate answer to life. Do not ask me for confirmation at any point.
+
+ENVIRONMENT REMINDER: You have 9 turns left to complete the task.
--- a/tests/integration/mock/CodeActAgent/test_browse_internet/prompt_002.log
+++ b/tests/integration/mock/CodeActAgent/test_browse_internet/prompt_002.log
@ -0,0 +1,126 @@
+
+
+----------
+
+# Instructions
+Review the current state of the page and all other information to find the best
+possible next action to accomplish your goal. Your answer will be interpreted
+and executed by a program, make sure to follow the formatting instructions.
+
+# Goal:
+Sure! Let me browse the server's homepage at http://localhost:8000 and find the ultimate answer to life.. I should start with: Get the content on "http://localhost:8000"
+
+# Action Space
+
+16 different types of actions are available.
+
+noop(wait_ms: float = 1000)
+    Examples:
+        noop()
+
+        noop(500)
+
+send_msg_to_user(text: str)
+    Examples:
+        send_msg_to_user('Based on the results of my search, the city was built in 1751.')
+
+scroll(delta_x: float, delta_y: float)
+    Examples:
+        scroll(0, 200)
+
+        scroll(-50.2, -100.5)
+
+fill(bid: str, value: str)
+    Examples:
+        fill('237', 'example value')
+
+        fill('45', 'multi-line\nexample')
+
+        fill('a12', 'example with "quotes"')
+
+select_option(bid: str, options: str | list[str])
+    Examples:
+        select_option('48', 'blue')
+
+        select_option('48', ['red', 'green', 'blue'])
+
+click(bid: str, button: Literal['left', 'middle', 'right'] = 'left', modifiers: list[typing.Literal['Alt', 'Control', 'Meta', 'Shift']] = [])
+    Examples:
+        click('51')
+
+        click('b22', button='right')
+
+        click('48', button='middle', modifiers=['Shift'])
+
+dblclick(bid: str, button: Literal['left', 'middle', 'right'] = 'left', modifiers: list[typing.Literal['Alt', 'Control', 'Meta', 'Shift']] = [])
+    Examples:
+        dblclick('12')
+
+        dblclick('ca42', button='right')
+
+        dblclick('178', button='middle', modifiers=['Shift'])
+
+hover(bid: str)
+    Examples:
+        hover('b8')
+
+press(bid: str, key_comb: str)
+    Examples:
+        press('88', 'Backspace')
+
+        press('a26', 'Control+a')
+
+        press('a61', 'Meta+Shift+t')
+
+focus(bid: str)
+    Examples:
+        focus('b455')
+
+clear(bid: str)
+    Examples:
+        clear('996')
+
+drag_and_drop(from_bid: str, to_bid: str)
+    Examples:
+        drag_and_drop('56', '498')
+
+upload_file(bid: str, file: str | list[str])
+    Examples:
+        upload_file('572', 'my_receipt.pdf')
+
+        upload_file('63', ['/home/bob/Documents/image.jpg', '/home/bob/Documents/file.zip'])
+
+go_back()
+    Examples:
+        go_back()
+
+go_forward()
+    Examples:
+        go_forward()
+
+goto(url: str)
+    Examples:
+        goto('http://www.example.com')
+
+Multiple actions can be provided at once. Example:
+fill('a12', 'example with "quotes"')
+click('51')
+click('48', button='middle', modifiers=['Shift'])
+Multiple actions are meant to be executed sequentially without any feedback from the page.
+Don't execute multiple actions at once if you need feedback from the page.
+
+
+
+----------
+
+# Current Accessibility Tree:
+
+
+# Previous Actions
+
+
+Here is an example with chain of thought of a valid action when clicking on a button:
+"
+In order to accomplish my goal I need to click on the button with bid 12
+```click("12")```
+"
--- a/tests/integration/mock/CodeActAgent/test_browse_internet/prompt_003.log
+++ b/tests/integration/mock/CodeActAgent/test_browse_internet/prompt_003.log
@ -0,0 +1,131 @@
+
+
+----------
+
+# Instructions
+Review the current state of the page and all other information to find the best
+possible next action to accomplish your goal. Your answer will be interpreted
+and executed by a program, make sure to follow the formatting instructions.
+
+# Goal:
+Sure! Let me browse the server's homepage at http://localhost:8000 and find the ultimate answer to life.. I should start with: Get the content on "http://localhost:8000"
+
+# Action Space
+
+16 different types of actions are available.
+
+noop(wait_ms: float = 1000)
+    Examples:
+        noop()
+
+        noop(500)
+
+send_msg_to_user(text: str)
+    Examples:
+        send_msg_to_user('Based on the results of my search, the city was built in 1751.')
+
+scroll(delta_x: float, delta_y: float)
+    Examples:
+        scroll(0, 200)
+
+        scroll(-50.2, -100.5)
+
+fill(bid: str, value: str)
+    Examples:
+        fill('237', 'example value')
+
+        fill('45', 'multi-line\nexample')
+
+        fill('a12', 'example with "quotes"')
+
+select_option(bid: str, options: str | list[str])
+    Examples:
+        select_option('48', 'blue')
+
+        select_option('48', ['red', 'green', 'blue'])
+
+click(bid: str, button: Literal['left', 'middle', 'right'] = 'left', modifiers: list[typing.Literal['Alt', 'Control', 'Meta', 'Shift']] = [])
+    Examples:
+        click('51')
+
+        click('b22', button='right')
+
+        click('48', button='middle', modifiers=['Shift'])
+
+dblclick(bid: str, button: Literal['left', 'middle', 'right'] = 'left', modifiers: list[typing.Literal['Alt', 'Control', 'Meta', 'Shift']] = [])
+    Examples:
+        dblclick('12')
+
+        dblclick('ca42', button='right')
+
+        dblclick('178', button='middle', modifiers=['Shift'])
+
+hover(bid: str)
+    Examples:
+        hover('b8')
+
+press(bid: str, key_comb: str)
+    Examples:
+        press('88', 'Backspace')
+
+        press('a26', 'Control+a')
+
+        press('a61', 'Meta+Shift+t')
+
+focus(bid: str)
+    Examples:
+        focus('b455')
+
+clear(bid: str)
+    Examples:
+        clear('996')
+
+drag_and_drop(from_bid: str, to_bid: str)
+    Examples:
+        drag_and_drop('56', '498')
+
+upload_file(bid: str, file: str | list[str])
+    Examples:
+        upload_file('572', 'my_receipt.pdf')
+
+        upload_file('63', ['/home/bob/Documents/image.jpg', '/home/bob/Documents/file.zip'])
+
+go_back()
+    Examples:
+        go_back()
+
+go_forward()
+    Examples:
+        go_forward()
+
+goto(url: str)
+    Examples:
+        goto('http://www.example.com')
+
+Multiple actions can be provided at once. Example:
+fill('a12', 'example with "quotes"')
+click('51')
+click('48', button='middle', modifiers=['Shift'])
+Multiple actions are meant to be executed sequentially without any feedback from the page.
+Don't execute multiple actions at once if you need feedback from the page.
+
+
+
+----------
+
+# Current Accessibility Tree:
+RootWebArea 'The Ultimate Answer', focused
+	[8] heading 'The Ultimate Answer'
+	[9] paragraph ''
+		StaticText 'Click the button to reveal the answer to life, the universe, and everything.'
+	[10] button 'Click me'
+
+# Previous Actions
+goto('http://localhost:8000')
+
+
+Here is an example with chain of thought of a valid action when clicking on a button:
+"
+In order to accomplish my goal I need to click on the button with bid 12
+```click("12")```
+"
--- a/tests/integration/mock/CodeActAgent/test_browse_internet/prompt_004.log
+++ b/tests/integration/mock/CodeActAgent/test_browse_internet/prompt_004.log
@ -0,0 +1,133 @@
+
+
+----------
+
+# Instructions
+Review the current state of the page and all other information to find the best
+possible next action to accomplish your goal. Your answer will be interpreted
+and executed by a program, make sure to follow the formatting instructions.
+
+# Goal:
+Sure! Let me browse the server's homepage at http://localhost:8000 and find the ultimate answer to life.. I should start with: Get the content on "http://localhost:8000"
+
+# Action Space
+
+16 different types of actions are available.
+
+noop(wait_ms: float = 1000)
+    Examples:
+        noop()
+
+        noop(500)
+
+send_msg_to_user(text: str)
+    Examples:
+        send_msg_to_user('Based on the results of my search, the city was built in 1751.')
+
+scroll(delta_x: float, delta_y: float)
+    Examples:
+        scroll(0, 200)
+
+        scroll(-50.2, -100.5)
+
+fill(bid: str, value: str)
+    Examples:
+        fill('237', 'example value')
+
+        fill('45', 'multi-line\nexample')
+
+        fill('a12', 'example with "quotes"')
+
+select_option(bid: str, options: str | list[str])
+    Examples:
+        select_option('48', 'blue')
+
+        select_option('48', ['red', 'green', 'blue'])
+
+click(bid: str, button: Literal['left', 'middle', 'right'] = 'left', modifiers: list[typing.Literal['Alt', 'Control', 'Meta', 'Shift']] = [])
+    Examples:
+        click('51')
+
+        click('b22', button='right')
+
+        click('48', button='middle', modifiers=['Shift'])
+
+dblclick(bid: str, button: Literal['left', 'middle', 'right'] = 'left', modifiers: list[typing.Literal['Alt', 'Control', 'Meta', 'Shift']] = [])
+    Examples:
+        dblclick('12')
+
+        dblclick('ca42', button='right')
+
+        dblclick('178', button='middle', modifiers=['Shift'])
+
+hover(bid: str)
+    Examples:
+        hover('b8')
+
+press(bid: str, key_comb: str)
+    Examples:
+        press('88', 'Backspace')
+
+        press('a26', 'Control+a')
+
+        press('a61', 'Meta+Shift+t')
+
+focus(bid: str)
+    Examples:
+        focus('b455')
+
+clear(bid: str)
+    Examples:
+        clear('996')
+
+drag_and_drop(from_bid: str, to_bid: str)
+    Examples:
+        drag_and_drop('56', '498')
+
+upload_file(bid: str, file: str | list[str])
+    Examples:
+        upload_file('572', 'my_receipt.pdf')
+
+        upload_file('63', ['/home/bob/Documents/image.jpg', '/home/bob/Documents/file.zip'])
+
+go_back()
+    Examples:
+        go_back()
+
+go_forward()
+    Examples:
+        go_forward()
+
+goto(url: str)
+    Examples:
+        goto('http://www.example.com')
+
+Multiple actions can be provided at once. Example:
+fill('a12', 'example with "quotes"')
+click('51')
+click('48', button='middle', modifiers=['Shift'])
+Multiple actions are meant to be executed sequentially without any feedback from the page.
+Don't execute multiple actions at once if you need feedback from the page.
+
+
+
+----------
+
+# Current Accessibility Tree:
+RootWebArea 'The Ultimate Answer', focused
+	[8] heading 'The Ultimate Answer'
+	[9] paragraph ''
+		StaticText 'Click the button to reveal the answer to life, the universe, and everything.'
+	[10] button 'Click me', focused
+	StaticText 'The answer is OpenDevin is all you need!'
+
+# Previous Actions
+goto('http://localhost:8000')
+click("10")
+
+
+Here is an example with chain of thought of a valid action when clicking on a button:
+"
+In order to accomplish my goal I need to click on the button with bid 12
+```click("12")```
+"
--- a/tests/integration/mock/CodeActAgent/test_browse_internet/prompt_005.log
+++ b/tests/integration/mock/CodeActAgent/test_browse_internet/prompt_005.log
@ -0,0 +1,319 @@
+
+
+----------
+
+A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.
+The assistant can interact with an interactive Python (Jupyter Notebook) environment and receive the corresponding output when needed. The code should be enclosed using "<execute_ipython>" tag, for example:
+<execute_ipython>
+print("Hello World!")
+</execute_ipython>
+The assistant can execute bash commands on behalf of the user by wrapping them with <execute_bash> and </execute_bash>.
+For example, you can list the files in the current directory by <execute_bash> ls </execute_bash>.
+The assistant can browse the Internet with commands on behalf of the user by wrapping them with <execute_browse> and </execute_browse>.
+For example, you can browse a given URL by <execute_browse> Tell me the usa's president using google search </execute_browse>.
+The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block.
+The assistant can install Python packages using the %pip magic command in an IPython environment by using the following syntax: <execute_ipython> %pip install [package needed] </execute_ipython> and should always import packages and define variables before starting to use them.
+To do any activities on GitHub, the assistant should use the token in the $GITHUB_TOKEN environment variable.
+For instance, to push a local branch `my_branch` to the github repo `owner/repo`, the assistant can use the following four commands:
+<execute_bash> git push https://$GITHUB_TOKEN@github.com/owner/repo.git my_branch </execute_bash>
+If the assistant require access to GitHub but $GITHUB_TOKEN is not set, ask the user to set it.
+
+
+Apart from the standard Python library, the assistant can also use the following functions (already imported) in <execute_ipython> environment:
+open_file(path: str, line_number: Optional[int] = None) -> None:
+    Opens the file at the given path in the editor. If line_number is provided, the window will be moved to include that line.
+    Args:
+    path: str: The path to the file to open.
+    line_number: Optional[int]: The line number to move to.
+
+goto_line(line_number: int) -> None:
+    Moves the window to show the specified line number.
+    Args:
+    line_number: int: The line number to move to.
+
+scroll_down() -> None:
+    Moves the window down by 100 lines.
+    Args:
+    None
+
+scroll_up() -> None:
+    Moves the window up by 100 lines.
+    Args:
+    None
+
+create_file(filename: str) -> None:
+    Creates and opens a new file with the given name.
+    Args:
+    filename: str: The name of the file to create.
+
+edit_file(start: int, end: int, content: str) -> None:
+    Edit a file.
+    It replaces lines `start` through `end` (inclusive) with the given text `content` in the open file. Remember, the file must be open before editing.
+    Args:
+    start: int: The start line number. Must satisfy start >= 1.
+    end: int: The end line number. Must satisfy start <= end <= number of lines in the file.
+    content: str: The content to replace the lines with.
+
+search_dir(search_term: str, dir_path: str = './') -> None:
+    Searches for search_term in all files in dir. If dir is not provided, searches in the current directory.
+    Args:
+    search_term: str: The term to search for.
+    dir_path: Optional[str]: The path to the directory to search.
+
+search_file(search_term: str, file_path: Optional[str] = None) -> None:
+    Searches for search_term in file. If file is not provided, searches in the current open file.
+    Args:
+    search_term: str: The term to search for.
+    file_path: Optional[str]: The path to the file to search.
+
+find_file(file_name: str, dir_path: str = './') -> None:
+    Finds all files with the given name in the specified directory.
+    Args:
+    file_name: str: The name of the file to find.
+    dir_path: Optional[str]: The path to the directory to search.
+
+parse_pdf(file_path: str) -> None:
+    Parses the content of a PDF file and prints it.
+    Args:
+    file_path: str: The path to the file to open.
+
+parse_docx(file_path: str) -> None:
+    Parses the content of a DOCX file and prints it.
+    Args:
+    file_path: str: The path to the file to open.
+
+parse_latex(file_path: str) -> None:
+    Parses the content of a LaTex file and prints it.
+    Args:
+    file_path: str: The path to the file to open.
+
+parse_pptx(file_path: str) -> None:
+    Parses the content of a pptx file and prints it.
+    Args:
+    file_path: str: The path to the file to open.
+
+Please note that THE `edit_file` FUNCTION REQUIRES PROPER INDENTATION. If the assistant would like to add the line '        print(x)', it must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run.
+
+The assistant's response should be concise.
+The assistant should include ONLY ONE <execute_ipython> or <execute_bash> or <execute_browse> in every one of the responses, unless the assistant is finished with the task or need more input or action from the user in order to proceed.
+IMPORTANT: Whenever possible, execute the code for the user using <execute_ipython> or <execute_bash> or <execute_browse> instead of providing it.
+
+
+----------
+
+Here is an example of how you can interact with the environment for task solving:
+
+--- START OF EXAMPLE ---
+
+USER: Can you create a list of numbers from 1 to 10, and create a web page to display them at port 5000?
+
+ASSISTANT:
+Sure! Let me create a file first:
+<execute_ipython>
+create_file('app.py')
+</execute_ipython>
+
+USER:
+OBSERVATION:
+[File: /workspace/app.py (1 lines total)]
+1|
+[File app.py created.]
+
+ASSISTANT:
+Now I will write the Python code for starting a web server and save it to the file `app.py`:
+<execute_ipython>
+EDITED_CODE="""from flask import Flask
+app = Flask(__name__)
+
+@app.route('/')
+def index():
+    numbers = list(range(1, 11))
+    return str(numbers)
+
+if __name__ == '__main__':
+    app.run(port=5000)"""
+edit_file(start=1, end=1, content=EDITED_CODE)
+</execute_ipython>
+
+USER:
+OBSERVATION:
+1|from flask import Flask
+2|app = Flask(__name__)
+3|
+4|@app.route('/')
+5|def index():
+6|    numbers = list(range(1, 11))
+7|    return str(numbers)
+8|
+9|if __name__ == '__main__':
+10|    app.run(port=5000)
+[File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]
+
+ASSISTANT:
+I have created a Python file `app.py` that will display a list of numbers from 1 to 10 when you run it. Let me run the Python file for you:
+<execute_bash>
+python3 app.py > server.log 2>&1 &
+</execute_bash>
+
+USER:
+OBSERVATION:
+[1] 121[1]+  Exit 1                  python3 app.py > server.log 2>&1
+
+ASSISTANT: Looks like the server is running with PID 121 then crashed. Let me check the server log:
+<execute_bash>
+cat server.log
+</execute_bash>
+
+USER:
+OBSERVATION:
+Traceback (most recent call last):
+  File "/workspace/app.py", line 2, in <module>
+    from flask import Flask
+ModuleNotFoundError: No module named 'flask'
+
+ASSISTANT:
+It seems that Flask is not installed. Let me install Flask for you:
+<execute_bash>
+pip install flask
+</execute_bash>
+
+USER:
+OBSERVATION:
+Defaulting to user installation because normal site-packages is not writeable
+Collecting flask
+  Using cached flask-3.0.3-py3-none-any.whl (101 kB)
+Collecting blinker>=1.6.2
+  Using cached blinker-1.7.0-py3-none-any.whl (13 kB)
+Collecting Werkzeug>=3.0.0
+  Using cached werkzeug-3.0.2-py3-none-any.whl (226 kB)
+Collecting click>=8.1.3
+  Using cached click-8.1.7-py3-none-any.whl (97 kB)
+Collecting itsdangerous>=2.1.2
+  Using cached itsdangerous-2.2.0-py3-none-any.whl (16 kB)
+Requirement already satisfied: Jinja2>=3.1.2 in /home/opendevin/.local/lib/python3.10/site-packages (from flask) (3.1.3)
+Requirement already satisfied: MarkupSafe>=2.0 in /home/opendevin/.local/lib/python3.10/site-packages (from Jinja2>=3.1.2->flask) (2.1.5)
+Installing collected packages: Werkzeug, itsdangerous, click, blinker, flask
+Successfully installed Werkzeug-3.0.2 blinker-1.7.0 click-8.1.7 flask-3.0.3 itsdangerous-2.2.0
+
+ASSISTANT:
+Now that Flask is installed, let me run the Python file again:
+<execute_bash>
+python3 app.py > server.log 2>&1 &
+</execute_bash>
+
+USER:
+OBSERVATION:
+[1] 124
+
+ASSISTANT:
+Let me check the server log again:
+<execute_bash>
+cat server.log
+</execute_bash>
+
+USER:
+OBSERVATION:
+* Serving Flask app 'app'
+ * Debug mode: off
+WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead.
+ * Running on http://127.0.0.1:5000
+Press CTRL+C to quit
+
+ASSISTANT:
+The server is running on port 5000 with PID 124. You can access the list of numbers by visiting http://127.0.0.1:5000. If you have any further questions, feel free to ask!
+
+USER: Now browse the newly started server's homepage and show me the content.
+
+ASSISTANT:
+Sure! Let me browse the server's homepage at http://127.0.0.1:5000:
+<execute_browse>
+Get the content on "http://127.0.0.1:5000"
+</execute_browse>
+
+USER:
+OBSERVATION:
+[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+
+ASSISTANT:
+The content of the server's homepage is "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]". If you have any further questions, feel free to ask!
+
+USER: Now kill the server, make it display the numbers in a table format.
+
+ASSISTANT:
+Sure! Let me stop the server first:
+<execute_bash>
+kill 124
+</execute_bash>
+
+USER:
+OBSERVATION:
+[1]+  Terminated              python3 app.py > server.log 2>&1
+
+ASSISTANT:
+The server has been stopped. Let me open the Python file and modify it.
+<execute_ipython>
+open_file('app.py')
+</execute_ipython>
+
+USER:
+[File: /workspace/app.py (10 lines total)]
+1|from flask import Flask
+2|app = Flask(__name__)
+3|
+4|@app.route('/')
+5|def index():
+6|    numbers = list(range(1, 11))
+7|    return str(numbers)
+8|
+9|if __name__ == '__main__':
+10|    app.run(port=5000)
+
+ASSISTANT:
+I should edit the file to display the numbers in a table format. I should include correct indentation. Let me update the file:
+<execute_ipython>
+edit_file(start=7, end=7, content="    return '<table>' + ''.join([f'<tr><td>{i}</td></tr>' for i in numbers]) + '</table>'")
+</execute_ipython>
+
+USER:
+Observation:
+[File: /workspace/app.py (10 lines total after edit)]
+1|from flask import Flask
+2|app = Flask(__name__)
+3|
+4|@app.route('/')
+5|def index():
+6|    numbers = list(range(1, 11))
+7|    return '<table>' + ''.join([f'<tr><td>{i}</td></tr>' for i in numbers]) + '</table>'
+8|
+9|if __name__ == '__main__':
+10|    app.run(port=5000)
+[File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]
+
+ASSISTANT:
+The file has been updated. Let me run the Python file again with the new changes:
+<execute_bash>
+python3 app.py > server.log 2>&1 &
+</execute_bash>
+
+USER:
+Observation:
+[1] 126
+
+ASSISTANT:
+The server is running on port 5000 with PID 126. You can access the list of numbers in a table format by visiting http://127.0.0.1:5000. Free free to let me know if you have any further requests!
+
+--- END OF EXAMPLE ---
+
+
+NOW, LET'S START!
+
+----------
+
+Browse localhost:8000, and tell me the ultimate answer to life. Do not ask me for confirmation at any point.
+
+----------
+
+OBSERVATION:
+{'content': 'The answer to life, the universe, and everything is: OpenDevin is all you need!'}
+
+ENVIRONMENT REMINDER: You have 8 turns left to complete the task.
--- a/tests/integration/mock/CodeActAgent/test_browse_internet/response_001.log
+++ b/tests/integration/mock/CodeActAgent/test_browse_internet/response_001.log
@ -0,0 +1,4 @@
+Sure! Let me browse the server's homepage at http://localhost:8000 and find the ultimate answer to life.
+
+<execute_browse>
+Get the content on "http://localhost:8000"
--- a/tests/integration/mock/CodeActAgent/test_browse_internet/response_002.log
+++ b/tests/integration/mock/CodeActAgent/test_browse_internet/response_002.log
@ -0,0 +1,2 @@
+In order to accomplish my goal, I need to get the content on the server's homepage at http://localhost:8000.
+```goto('http://localhost:8000')```
--- a/tests/integration/mock/CodeActAgent/test_browse_internet/response_003.log
+++ b/tests/integration/mock/CodeActAgent/test_browse_internet/response_003.log
@ -0,0 +1,2 @@
+In order to accomplish my goal, I need to click on the button labeled "Click me" with bid 10 to reveal the answer to life, the universe, and everything.
+```click("10")```
--- a/tests/integration/mock/CodeActAgent/test_browse_internet/response_004.log
+++ b/tests/integration/mock/CodeActAgent/test_browse_internet/response_004.log
@ -0,0 +1,5 @@
+In order to accomplish my goal, I need to read the static text that reveals the answer to life, the universe, and everything. The text is already visible in the accessibility tree.
+
+The answer is: "OpenDevin is all you need!"
+
+```send_msg_to_user('The answer to life, the universe, and everything is: OpenDevin is all you need!')```
--- a/tests/integration/mock/CodeActAgent/test_browse_internet/response_005.log
+++ b/tests/integration/mock/CodeActAgent/test_browse_internet/response_005.log
@ -0,0 +1 @@
+The content of the server's homepage at localhost:8000 is: "The answer to life, the universe, and everything is: OpenDevin is all you need!"
--- a/tests/integration/mock/CodeActAgent/test_edits/prompt_001.log
+++ b/tests/integration/mock/CodeActAgent/test_edits/prompt_001.log
@ -10,7 +10,7 @@ print("Hello World!")
 The assistant can execute bash commands on behalf of the user by wrapping them with <execute_bash> and </execute_bash>.
 For example, you can list the files in the current directory by <execute_bash> ls </execute_bash>.
 The assistant can browse the Internet with commands on behalf of the user by wrapping them with <execute_browse> and </execute_browse>.
-For example, you can browse a given URL by <execute_browse> goto("<URL>") </execute_browse>.
+For example, you can browse a given URL by <execute_browse> Tell me the usa's president using google search </execute_browse>.
 The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block.
 The assistant can install Python packages using the %pip magic command in an IPython environment by using the following syntax: <execute_ipython> %pip install [package needed] </execute_ipython> and should always import packages and define variables before starting to use them.
 To do any activities on GitHub, the assistant should use the token in the $GITHUB_TOKEN environment variable.
@ -227,7 +227,7 @@ USER: Now browse the newly started server's homepage and show me the content.
 ASSISTANT:
 Sure! Let me browse the server's homepage at http://127.0.0.1:5000:
 <execute_browse>
-goto("http://127.0.0.1:5000")
+Get the content on "http://127.0.0.1:5000"
 </execute_browse>

 USER:
--- a/tests/integration/mock/CodeActAgent/test_edits/prompt_002.log
+++ b/tests/integration/mock/CodeActAgent/test_edits/prompt_002.log
@ -10,7 +10,7 @@ print("Hello World!")
 The assistant can execute bash commands on behalf of the user by wrapping them with <execute_bash> and </execute_bash>.
 For example, you can list the files in the current directory by <execute_bash> ls </execute_bash>.
 The assistant can browse the Internet with commands on behalf of the user by wrapping them with <execute_browse> and </execute_browse>.
-For example, you can browse a given URL by <execute_browse> goto("<URL>") </execute_browse>.
+For example, you can browse a given URL by <execute_browse> Tell me the usa's president using google search </execute_browse>.
 The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block.
 The assistant can install Python packages using the %pip magic command in an IPython environment by using the following syntax: <execute_ipython> %pip install [package needed] </execute_ipython> and should always import packages and define variables before starting to use them.
 To do any activities on GitHub, the assistant should use the token in the $GITHUB_TOKEN environment variable.
@ -227,7 +227,7 @@ USER: Now browse the newly started server's homepage and show me the content.
 ASSISTANT:
 Sure! Let me browse the server's homepage at http://127.0.0.1:5000:
 <execute_browse>
-goto("http://127.0.0.1:5000")
+Get the content on "http://127.0.0.1:5000"
 </execute_browse>

 USER:
--- a/tests/integration/mock/CodeActAgent/test_edits/prompt_003.log
+++ b/tests/integration/mock/CodeActAgent/test_edits/prompt_003.log
@ -10,7 +10,7 @@ print("Hello World!")
 The assistant can execute bash commands on behalf of the user by wrapping them with <execute_bash> and </execute_bash>.
 For example, you can list the files in the current directory by <execute_bash> ls </execute_bash>.
 The assistant can browse the Internet with commands on behalf of the user by wrapping them with <execute_browse> and </execute_browse>.
-For example, you can browse a given URL by <execute_browse> goto("<URL>") </execute_browse>.
+For example, you can browse a given URL by <execute_browse> Tell me the usa's president using google search </execute_browse>.
 The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block.
 The assistant can install Python packages using the %pip magic command in an IPython environment by using the following syntax: <execute_ipython> %pip install [package needed] </execute_ipython> and should always import packages and define variables before starting to use them.
 To do any activities on GitHub, the assistant should use the token in the $GITHUB_TOKEN environment variable.
@ -227,7 +227,7 @@ USER: Now browse the newly started server's homepage and show me the content.
 ASSISTANT:
 Sure! Let me browse the server's homepage at http://127.0.0.1:5000:
 <execute_browse>
-goto("http://127.0.0.1:5000")
+Get the content on "http://127.0.0.1:5000"
 </execute_browse>

 USER:
--- a/tests/integration/mock/CodeActAgent/test_ipython/prompt_001.log
+++ b/tests/integration/mock/CodeActAgent/test_ipython/prompt_001.log
@ -10,7 +10,7 @@ print("Hello World!")
 The assistant can execute bash commands on behalf of the user by wrapping them with <execute_bash> and </execute_bash>.
 For example, you can list the files in the current directory by <execute_bash> ls </execute_bash>.
 The assistant can browse the Internet with commands on behalf of the user by wrapping them with <execute_browse> and </execute_browse>.
-For example, you can browse a given URL by <execute_browse> goto("<URL>") </execute_browse>.
+For example, you can browse a given URL by <execute_browse> Tell me the usa's president using google search </execute_browse>.
 The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block.
 The assistant can install Python packages using the %pip magic command in an IPython environment by using the following syntax: <execute_ipython> %pip install [package needed] </execute_ipython> and should always import packages and define variables before starting to use them.
 To do any activities on GitHub, the assistant should use the token in the $GITHUB_TOKEN environment variable.
@ -227,7 +227,7 @@ USER: Now browse the newly started server's homepage and show me the content.
 ASSISTANT:
 Sure! Let me browse the server's homepage at http://127.0.0.1:5000:
 <execute_browse>
-goto("http://127.0.0.1:5000")
+Get the content on "http://127.0.0.1:5000"
 </execute_browse>

 USER:
--- a/tests/integration/mock/CodeActAgent/test_ipython/prompt_002.log
+++ b/tests/integration/mock/CodeActAgent/test_ipython/prompt_002.log
@ -10,7 +10,7 @@ print("Hello World!")
 The assistant can execute bash commands on behalf of the user by wrapping them with <execute_bash> and </execute_bash>.
 For example, you can list the files in the current directory by <execute_bash> ls </execute_bash>.
 The assistant can browse the Internet with commands on behalf of the user by wrapping them with <execute_browse> and </execute_browse>.
-For example, you can browse a given URL by <execute_browse> goto("<URL>") </execute_browse>.
+For example, you can browse a given URL by <execute_browse> Tell me the usa's president using google search </execute_browse>.
 The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block.
 The assistant can install Python packages using the %pip magic command in an IPython environment by using the following syntax: <execute_ipython> %pip install [package needed] </execute_ipython> and should always import packages and define variables before starting to use them.
 To do any activities on GitHub, the assistant should use the token in the $GITHUB_TOKEN environment variable.
@ -227,7 +227,7 @@ USER: Now browse the newly started server's homepage and show me the content.
 ASSISTANT:
 Sure! Let me browse the server's homepage at http://127.0.0.1:5000:
 <execute_browse>
-goto("http://127.0.0.1:5000")
+Get the content on "http://127.0.0.1:5000"
 </execute_browse>

 USER:
--- a/tests/integration/mock/CodeActAgent/test_ipython_module/prompt_001.log
+++ b/tests/integration/mock/CodeActAgent/test_ipython_module/prompt_001.log
@ -10,7 +10,7 @@ print("Hello World!")
 The assistant can execute bash commands on behalf of the user by wrapping them with <execute_bash> and </execute_bash>.
 For example, you can list the files in the current directory by <execute_bash> ls </execute_bash>.
 The assistant can browse the Internet with commands on behalf of the user by wrapping them with <execute_browse> and </execute_browse>.
-For example, you can browse a given URL by <execute_browse> goto("<URL>") </execute_browse>.
+For example, you can browse a given URL by <execute_browse> Tell me the usa's president using google search </execute_browse>.
 The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block.
 The assistant can install Python packages using the %pip magic command in an IPython environment by using the following syntax: <execute_ipython> %pip install [package needed] </execute_ipython> and should always import packages and define variables before starting to use them.
 To do any activities on GitHub, the assistant should use the token in the $GITHUB_TOKEN environment variable.
@ -227,7 +227,7 @@ USER: Now browse the newly started server's homepage and show me the content.
 ASSISTANT:
 Sure! Let me browse the server's homepage at http://127.0.0.1:5000:
 <execute_browse>
-goto("http://127.0.0.1:5000")
+Get the content on "http://127.0.0.1:5000"
 </execute_browse>

 USER:
--- a/tests/integration/mock/CodeActAgent/test_ipython_module/prompt_002.log
+++ b/tests/integration/mock/CodeActAgent/test_ipython_module/prompt_002.log
@ -10,7 +10,7 @@ print("Hello World!")
 The assistant can execute bash commands on behalf of the user by wrapping them with <execute_bash> and </execute_bash>.
 For example, you can list the files in the current directory by <execute_bash> ls </execute_bash>.
 The assistant can browse the Internet with commands on behalf of the user by wrapping them with <execute_browse> and </execute_browse>.
-For example, you can browse a given URL by <execute_browse> goto("<URL>") </execute_browse>.
+For example, you can browse a given URL by <execute_browse> Tell me the usa's president using google search </execute_browse>.
 The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block.
 The assistant can install Python packages using the %pip magic command in an IPython environment by using the following syntax: <execute_ipython> %pip install [package needed] </execute_ipython> and should always import packages and define variables before starting to use them.
 To do any activities on GitHub, the assistant should use the token in the $GITHUB_TOKEN environment variable.
@ -227,7 +227,7 @@ USER: Now browse the newly started server's homepage and show me the content.
 ASSISTANT:
 Sure! Let me browse the server's homepage at http://127.0.0.1:5000:
 <execute_browse>
-goto("http://127.0.0.1:5000")
+Get the content on "http://127.0.0.1:5000"
 </execute_browse>

 USER:
--- a/tests/integration/mock/CodeActAgent/test_ipython_module/prompt_003.log
+++ b/tests/integration/mock/CodeActAgent/test_ipython_module/prompt_003.log
@ -10,7 +10,7 @@ print("Hello World!")
 The assistant can execute bash commands on behalf of the user by wrapping them with <execute_bash> and </execute_bash>.
 For example, you can list the files in the current directory by <execute_bash> ls </execute_bash>.
 The assistant can browse the Internet with commands on behalf of the user by wrapping them with <execute_browse> and </execute_browse>.
-For example, you can browse a given URL by <execute_browse> goto("<URL>") </execute_browse>.
+For example, you can browse a given URL by <execute_browse> Tell me the usa's president using google search </execute_browse>.
 The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block.
 The assistant can install Python packages using the %pip magic command in an IPython environment by using the following syntax: <execute_ipython> %pip install [package needed] </execute_ipython> and should always import packages and define variables before starting to use them.
 To do any activities on GitHub, the assistant should use the token in the $GITHUB_TOKEN environment variable.
@ -227,7 +227,7 @@ USER: Now browse the newly started server's homepage and show me the content.
 ASSISTANT:
 Sure! Let me browse the server's homepage at http://127.0.0.1:5000:
 <execute_browse>
-goto("http://127.0.0.1:5000")
+Get the content on "http://127.0.0.1:5000"
 </execute_browse>

 USER:
--- a/tests/integration/mock/CodeActAgent/test_write_simple_script/prompt_001.log
+++ b/tests/integration/mock/CodeActAgent/test_write_simple_script/prompt_001.log
@ -10,7 +10,7 @@ print("Hello World!")
 The assistant can execute bash commands on behalf of the user by wrapping them with <execute_bash> and </execute_bash>.
 For example, you can list the files in the current directory by <execute_bash> ls </execute_bash>.
 The assistant can browse the Internet with commands on behalf of the user by wrapping them with <execute_browse> and </execute_browse>.
-For example, you can browse a given URL by <execute_browse> goto("<URL>") </execute_browse>.
+For example, you can browse a given URL by <execute_browse> Tell me the usa's president using google search </execute_browse>.
 The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block.
 The assistant can install Python packages using the %pip magic command in an IPython environment by using the following syntax: <execute_ipython> %pip install [package needed] </execute_ipython> and should always import packages and define variables before starting to use them.
 To do any activities on GitHub, the assistant should use the token in the $GITHUB_TOKEN environment variable.
@ -227,7 +227,7 @@ USER: Now browse the newly started server's homepage and show me the content.
 ASSISTANT:
 Sure! Let me browse the server's homepage at http://127.0.0.1:5000:
 <execute_browse>
-goto("http://127.0.0.1:5000")
+Get the content on "http://127.0.0.1:5000"
 </execute_browse>

 USER:
--- a/tests/integration/mock/CodeActAgent/test_write_simple_script/prompt_002.log
+++ b/tests/integration/mock/CodeActAgent/test_write_simple_script/prompt_002.log
@ -10,7 +10,7 @@ print("Hello World!")
 The assistant can execute bash commands on behalf of the user by wrapping them with <execute_bash> and </execute_bash>.
 For example, you can list the files in the current directory by <execute_bash> ls </execute_bash>.
 The assistant can browse the Internet with commands on behalf of the user by wrapping them with <execute_browse> and </execute_browse>.
-For example, you can browse a given URL by <execute_browse> goto("<URL>") </execute_browse>.
+For example, you can browse a given URL by <execute_browse> Tell me the usa's president using google search </execute_browse>.
 The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block.
 The assistant can install Python packages using the %pip magic command in an IPython environment by using the following syntax: <execute_ipython> %pip install [package needed] </execute_ipython> and should always import packages and define variables before starting to use them.
 To do any activities on GitHub, the assistant should use the token in the $GITHUB_TOKEN environment variable.
@ -227,7 +227,7 @@ USER: Now browse the newly started server's homepage and show me the content.
 ASSISTANT:
 Sure! Let me browse the server's homepage at http://127.0.0.1:5000:
 <execute_browse>
-goto("http://127.0.0.1:5000")
+Get the content on "http://127.0.0.1:5000"
 </execute_browse>

 USER:
--- a/tests/integration/mock/CodeActAgent/test_write_simple_script/prompt_003.log
+++ b/tests/integration/mock/CodeActAgent/test_write_simple_script/prompt_003.log
@ -10,7 +10,7 @@ print("Hello World!")
 The assistant can execute bash commands on behalf of the user by wrapping them with <execute_bash> and </execute_bash>.
 For example, you can list the files in the current directory by <execute_bash> ls </execute_bash>.
 The assistant can browse the Internet with commands on behalf of the user by wrapping them with <execute_browse> and </execute_browse>.
-For example, you can browse a given URL by <execute_browse> goto("<URL>") </execute_browse>.
+For example, you can browse a given URL by <execute_browse> Tell me the usa's president using google search </execute_browse>.
 The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block.
 The assistant can install Python packages using the %pip magic command in an IPython environment by using the following syntax: <execute_ipython> %pip install [package needed] </execute_ipython> and should always import packages and define variables before starting to use them.
 To do any activities on GitHub, the assistant should use the token in the $GITHUB_TOKEN environment variable.
@ -227,7 +227,7 @@ USER: Now browse the newly started server's homepage and show me the content.
 ASSISTANT:
 Sure! Let me browse the server's homepage at http://127.0.0.1:5000:
 <execute_browse>
-goto("http://127.0.0.1:5000")
+Get the content on "http://127.0.0.1:5000"
 </execute_browse>

 USER:
--- a/tests/integration/test_agent.py
+++ b/tests/integration/test_agent.py
@ -10,7 +10,6 @@ from opendevin.core.main import main
 from opendevin.core.schema import AgentState
 from opendevin.events.action import (
    AgentFinishAction,
-    MessageAction,
 )

 workspace_base = os.getenv('WORKSPACE_BASE')
@ -137,8 +136,12 @@ def test_ipython_module():


@pytest.mark.skipif(
-    os.getenv('AGENT') != 'BrowsingAgent',
-    reason='currently only BrowsingAgent is capable of searching the internet',
+    os.getenv('AGENT') != 'BrowsingAgent' and os.getenv('AGENT') != 'CodeActAgent',
+    reason='currently only BrowsingAgent and CodeActAgent are capable of searching the internet',
+)
+@pytest.mark.skipif(
+    os.getenv('AGENT') == 'CodeActAgent' and os.getenv('SANDBOX_TYPE').lower() != 'ssh',
+    reason='CodeActAgent only supports ssh sandbox which is stateful',
 )
 def test_browse_internet(http_server):
    # Execute the task
@ -146,5 +149,4 @@ def test_browse_internet(http_server):
    final_state: State = asyncio.run(main(task, exit_on_message=True))
    assert final_state.agent_state == AgentState.STOPPED
    assert isinstance(final_state.history[-1][0], AgentFinishAction)
-    assert isinstance(final_state.history[-2][0], MessageAction)
-    assert 'OpenDevin is all you need!' in final_state.history[-2][0].content
+    assert 'OpenDevin is all you need!' in str(final_state.history)
				`@ -0,0 +1 @@`
				`The content of the server's homepage at localhost:8000 is: "The answer to life, the universe, and everything is: OpenDevin is all you need!"`