Enable CodeAct agents with browsing, and also enable arbitrary BrowserGym action support (#1807)

* enable browsing in codeact, and  arbitrary browsergym DSL support

* fix

* fix unit test case

* update frontend for the new interactive browsing action

* bump ver

* Fix integration tests

---------

Co-authored-by: OpenDevinBot <bot@opendevin.com>
This commit is contained in:
Frank Xu 2024-05-15 11:59:58 -04:00 committed by GitHub
parent 76abca361c
commit a84d19f03c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
26 changed files with 293 additions and 67 deletions

View File

@ -13,11 +13,13 @@ from opendevin.core.logger import opendevin_logger as logger
from opendevin.events.action import (
Action,
AgentFinishAction,
BrowseInteractiveAction,
CmdRunAction,
IPythonRunCellAction,
MessageAction,
)
from opendevin.events.observation import (
BrowserOutputObservation,
CmdOutputObservation,
IPythonRunCellObservation,
)
@ -33,7 +35,7 @@ ENABLE_GITHUB = True
def parse_response(response) -> str:
action = response.choices[0].message.content
for lang in ['bash', 'ipython']:
for lang in ['bash', 'ipython', 'browse']:
if f'<execute_{lang}>' in action and f'</execute_{lang}>' not in action:
action += f'</execute_{lang}>'
return action
@ -85,7 +87,7 @@ def swe_agent_edit_hack(bash_command: str) -> str:
class CodeActAgent(Agent):
VERSION = '1.2'
VERSION = '1.3'
"""
The Code Act Agent is a minimalist agent.
The agent works by passing the model a list of action-observation pairs and prompting the model to take the next step.
@ -171,6 +173,7 @@ class CodeActAgent(Agent):
Returns:
- CmdRunAction(command) - bash command to run
- IPythonRunCellAction(code) - IPython code to run
- BrowseInteractiveAction(browsergym_command) - BrowserGym commands to run
- MessageAction(content) - Message action to run (e.g. ask for clarification)
- AgentFinishAction() - end the interaction
"""
@ -205,6 +208,9 @@ class CodeActAgent(Agent):
content = '\n'.join(splitted)
content = truncate_observation(content)
self.messages.append({'role': 'user', 'content': content})
elif isinstance(obs, BrowserOutputObservation):
content = 'OBSERVATION:\n' + truncate_observation(obs.content)
self.messages.append({'role': 'user', 'content': content})
latest_user_message = [m for m in self.messages if m['role'] == 'user'][-1]
if latest_user_message:
@ -217,6 +223,7 @@ class CodeActAgent(Agent):
stop=[
'</execute_ipython>',
'</execute_bash>',
'</execute_browse>',
],
temperature=0.0,
)
@ -251,6 +258,15 @@ class CodeActAgent(Agent):
code_group = python_code.group(1).strip()
thought = action_str.replace(python_code.group(0), '').strip()
return IPythonRunCellAction(code=code_group, thought=thought)
elif browse_command := re.search(
r'<execute_browse>(.*)</execute_browse>', action_str, re.DOTALL
):
# BrowserGym actions was found
browse_actions = browse_command.group(1).strip()
thought = action_str.replace(browse_command.group(0), '').strip()
return BrowseInteractiveAction(
browser_actions=browse_actions, thought=thought
)
else:
# We assume the LLM is GOOD enough that when it returns pure natural language
# it want to talk to the user

View File

@ -34,6 +34,8 @@ print("Hello World!")
</execute_ipython>
The assistant can execute bash commands on behalf of the user by wrapping them with <execute_bash> and </execute_bash>.
For example, you can list the files in the current directory by <execute_bash> ls </execute_bash>.
The assistant can browse the Internet with commands on behalf of the user by wrapping them with <execute_browse> and </execute_browse>.
For example, you can browse a given URL by <execute_browse> goto("<URL>") </execute_browse>.
The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block.
The assistant can install Python packages through bash by <execute_bash> pip install [package needed] </execute_bash> and should always import packages and define variables before starting to use them.
The assistant should stop <execute> and provide an answer when they have already obtained the answer from the execution result.
@ -49,8 +51,8 @@ For instance, to push a local branch `my_branch` to the github repo `owner/repo`
If you require access to GitHub but $GITHUB_TOKEN is not set, ask the user to set it for you."""
SYSTEM_SUFFIX = """The assistant's response should be concise.
You should include <execute_ipython> or <execute_bash> in every one of your responses, unless you are finished with the task or need more input or action from the user in order to proceed.
IMPORTANT: Whenever possible, execute the code for the user using <execute_ipython> or <execute_bash> instead of providing it.
You should include <execute_ipython> or <execute_bash> or <execute_browse> in every one of your responses, unless you are finished with the task or need more input or action from the user in order to proceed.
IMPORTANT: Whenever possible, execute the code for the user using <execute_ipython> or <execute_bash> or <execute_browse> instead of providing it.
"""
EXAMPLES = """
@ -154,6 +156,21 @@ Press CTRL+C to quit
ASSISTANT:
The server is running on port 5000 with PID 124. You can access the list of numbers by visiting http://127.0.0.1:5000. If you have any further questions, feel free to ask!
USER: Now browse the newly started server's homepage and show me the content.
ASSISTANT:
Sure! Let me browse the server's homepage at http://127.0.0.1:5000:
<execute_browse>
goto("http://127.0.0.1:5000")
</execute_browse>
USER:
Observation:
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
ASSISTANT:
The content of the server's homepage is "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]". If you have any further questions, feel free to ask!
USER: Now kill the server, make it display the numbers in a table format.
ASSISTANT:
@ -230,4 +247,5 @@ INVALID_INPUT_MESSAGE = (
"I don't understand your input. \n"
'If you want to execute a bash command, please use <execute_bash> YOUR_COMMAND_HERE </execute_bash>.\n'
'If you want to execute a block of Python code, please use <execute_ipython> YOUR_COMMAND_HERE </execute_ipython>.\n'
'If you want to browse the Internet, please use <execute_browse> YOUR_COMMAND_HERE </execute_browse>.\n'
)

View File

@ -17,6 +17,11 @@ const messageActions = {
store.dispatch(setUrl(url));
store.dispatch(setScreenshotSrc(screenshotSrc));
},
[ActionType.BROWSE_INTERACTIVE]: (message: ActionMessage) => {
const { url, screenshotSrc } = message.args;
store.dispatch(setUrl(url));
store.dispatch(setScreenshotSrc(screenshotSrc));
},
[ActionType.WRITE]: (message: ActionMessage) => {
const { path, content } = message.args;
store.dispatch(updatePath(path));

View File

@ -23,6 +23,9 @@ enum ActionType {
// Opens a web page.
BROWSE = "browse",
// Interact with the browser instance.
BROWSE_INTERACTIVE = "browse_interactive",
// Searches long-term memory.
RECALL = "recall",

View File

@ -40,6 +40,10 @@ class ActionTypeSchema(BaseModel):
"""Opens a web page.
"""
BROWSE_INTERACTIVE: str = Field(default='browse_interactive')
"""Interact with the browser instance.
"""
RECALL: str = Field(default='recall')
"""Searches long-term memory
"""

View File

@ -7,7 +7,7 @@ from .agent import (
AgentSummarizeAction,
ChangeAgentStateAction,
)
from .browse import BrowseURLAction
from .browse import BrowseInteractiveAction, BrowseURLAction
from .commands import CmdKillAction, CmdRunAction, IPythonRunCellAction
from .empty import NullAction
from .files import FileReadAction, FileWriteAction
@ -20,6 +20,7 @@ __all__ = [
'CmdRunAction',
'CmdKillAction',
'BrowseURLAction',
'BrowseInteractiveAction',
'FileReadAction',
'FileWriteAction',
'AgentRecallAction',

View File

@ -16,3 +16,15 @@ class BrowseURLAction(Action):
@property
def message(self) -> str:
return f'Browsing URL: {self.url}'
@dataclass
class BrowseInteractiveAction(Action):
browser_actions: str
thought: str = ''
action: str = ActionType.BROWSE_INTERACTIVE
runnable: ClassVar[bool] = True
@property
def message(self) -> str:
return f'Executing browser actions: {self.browser_actions}'

View File

@ -7,7 +7,7 @@ from opendevin.events.action.agent import (
AgentRejectAction,
ChangeAgentStateAction,
)
from opendevin.events.action.browse import BrowseURLAction
from opendevin.events.action.browse import BrowseInteractiveAction, BrowseURLAction
from opendevin.events.action.commands import (
CmdKillAction,
CmdRunAction,
@ -22,6 +22,7 @@ actions = (
CmdRunAction,
IPythonRunCellAction,
BrowseURLAction,
BrowseInteractiveAction,
FileReadAction,
FileWriteAction,
AgentRecallAction,

View File

@ -5,6 +5,7 @@ from opendevin.core.config import config
from opendevin.events.action import (
Action,
AgentRecallAction,
BrowseInteractiveAction,
BrowseURLAction,
CmdKillAction,
CmdRunAction,
@ -154,6 +155,10 @@ class Runtime:
async def browse(self, action: BrowseURLAction) -> Observation:
pass
@abstractmethod
async def browse_interactive(self, action: BrowseInteractiveAction) -> Observation:
pass
@abstractmethod
async def recall(self, action: AgentRecallAction) -> Observation:
pass

View File

@ -1,15 +1,23 @@
import os
from opendevin.core.schema import ActionType
from opendevin.events.observation import BrowserOutputObservation
async def browse(action, browser) -> BrowserOutputObservation: # type: ignore
asked_url = action.url
if not asked_url.startswith('http'):
asked_url = os.path.abspath(os.curdir) + action.url
try:
# action in BrowserGym: see https://github.com/ServiceNow/BrowserGym/blob/main/core/src/browsergym/core/action/functions.py
if action.action == ActionType.BROWSE:
# legacy BrowseURLAction
asked_url = action.url
if not asked_url.startswith('http'):
asked_url = os.path.abspath(os.curdir) + action.url
action_str = f'goto("{asked_url}")'
elif action.action == ActionType.BROWSE_INTERACTIVE:
# new BrowseInteractiveAction, supports full featured BrowserGym actions
# action in BrowserGym: see https://github.com/ServiceNow/BrowserGym/blob/main/core/src/browsergym/core/action/functions.py
action_str = action.browser_actions
else:
raise ValueError(f'Invalid action type: {action.action}')
try:
# obs provided by BrowserGym: see https://github.com/ServiceNow/BrowserGym/blob/main/core/src/browsergym/core/env.py#L396
obs = browser.step(action_str)
return BrowserOutputObservation(
@ -21,9 +29,12 @@ async def browse(action, browser) -> BrowserOutputObservation: # type: ignore
last_browser_action=obs['last_action'], # last browser env action performed
focused_element_bid=obs['focused_element_bid'], # focused element bid
screenshot=obs['screenshot'], # base64-encoded screenshot, png
url=asked_url,
url=obs['url'], # URL of the page
)
except Exception as e:
return BrowserOutputObservation(
content=str(e), screenshot='', error=True, url=asked_url
content=str(e),
screenshot='',
error=True,
url=asked_url if action.action == ActionType.BROWSE else '',
)

View File

@ -1,5 +1,6 @@
from opendevin.events.action import (
AgentRecallAction,
BrowseInteractiveAction,
BrowseURLAction,
CmdKillAction,
CmdRunAction,
@ -58,6 +59,9 @@ class ServerRuntime(Runtime):
async def browse(self, action: BrowseURLAction) -> Observation:
return await browse(action, self.browser)
async def browse_interactive(self, action: BrowseInteractiveAction) -> Observation:
return await browse(action, self.browser)
async def recall(self, action: AgentRecallAction) -> Observation:
return NullObservation('')

View File

@ -9,6 +9,8 @@ print("Hello World!")
</execute_ipython>
The assistant can execute bash commands on behalf of the user by wrapping them with <execute_bash> and </execute_bash>.
For example, you can list the files in the current directory by <execute_bash> ls </execute_bash>.
The assistant can browse the Internet with commands on behalf of the user by wrapping them with <execute_browse> and </execute_browse>.
For example, you can browse a given URL by <execute_browse> goto("<URL>") </execute_browse>.
The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block.
The assistant can install Python packages through bash by <execute_bash> pip install [package needed] </execute_bash> and should always import packages and define variables before starting to use them.
The assistant should stop <execute> and provide an answer when they have already obtained the answer from the execution result.
@ -38,8 +40,8 @@ end_of_edit - replaces lines <start_line> through <end_line> (inclusive) with th
Please note that THE EDIT COMMAND REQUIRES PROPER INDENTATION. If you'd like to add the line ' print(x)' you must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run.
The assistant's response should be concise.
You should include <execute_ipython> or <execute_bash> in every one of your responses, unless you are finished with the task or need more input or action from the user in order to proceed.
IMPORTANT: Whenever possible, execute the code for the user using <execute_ipython> or <execute_bash> instead of providing it.
You should include <execute_ipython> or <execute_bash> or <execute_browse> in every one of your responses, unless you are finished with the task or need more input or action from the user in order to proceed.
IMPORTANT: Whenever possible, execute the code for the user using <execute_ipython> or <execute_bash> or <execute_browse> instead of providing it.
----------
@ -146,6 +148,21 @@ Press CTRL+C to quit
ASSISTANT:
The server is running on port 5000 with PID 124. You can access the list of numbers by visiting http://127.0.0.1:5000. If you have any further questions, feel free to ask!
USER: Now browse the newly started server's homepage and show me the content.
ASSISTANT:
Sure! Let me browse the server's homepage at http://127.0.0.1:5000:
<execute_browse>
goto("http://127.0.0.1:5000")
</execute_browse>
USER:
Observation:
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
ASSISTANT:
The content of the server's homepage is "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]". If you have any further questions, feel free to ask!
USER: Now kill the server, make it display the numbers in a table format.
ASSISTANT:

View File

@ -9,6 +9,8 @@ print("Hello World!")
</execute_ipython>
The assistant can execute bash commands on behalf of the user by wrapping them with <execute_bash> and </execute_bash>.
For example, you can list the files in the current directory by <execute_bash> ls </execute_bash>.
The assistant can browse the Internet with commands on behalf of the user by wrapping them with <execute_browse> and </execute_browse>.
For example, you can browse a given URL by <execute_browse> goto("<URL>") </execute_browse>.
The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block.
The assistant can install Python packages through bash by <execute_bash> pip install [package needed] </execute_bash> and should always import packages and define variables before starting to use them.
The assistant should stop <execute> and provide an answer when they have already obtained the answer from the execution result.
@ -38,8 +40,8 @@ end_of_edit - replaces lines <start_line> through <end_line> (inclusive) with th
Please note that THE EDIT COMMAND REQUIRES PROPER INDENTATION. If you'd like to add the line ' print(x)' you must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run.
The assistant's response should be concise.
You should include <execute_ipython> or <execute_bash> in every one of your responses, unless you are finished with the task or need more input or action from the user in order to proceed.
IMPORTANT: Whenever possible, execute the code for the user using <execute_ipython> or <execute_bash> instead of providing it.
You should include <execute_ipython> or <execute_bash> or <execute_browse> in every one of your responses, unless you are finished with the task or need more input or action from the user in order to proceed.
IMPORTANT: Whenever possible, execute the code for the user using <execute_ipython> or <execute_bash> or <execute_browse> instead of providing it.
----------
@ -146,6 +148,21 @@ Press CTRL+C to quit
ASSISTANT:
The server is running on port 5000 with PID 124. You can access the list of numbers by visiting http://127.0.0.1:5000. If you have any further questions, feel free to ask!
USER: Now browse the newly started server's homepage and show me the content.
ASSISTANT:
Sure! Let me browse the server's homepage at http://127.0.0.1:5000:
<execute_browse>
goto("http://127.0.0.1:5000")
</execute_browse>
USER:
Observation:
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
ASSISTANT:
The content of the server's homepage is "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]". If you have any further questions, feel free to ask!
USER: Now kill the server, make it display the numbers in a table format.
ASSISTANT:
@ -228,20 +245,19 @@ ENVIRONMENT REMINDER: You have 9 turns left to complete the task.
----------
Certainly! I will start by opening the file `bad.txt` to see the content and then proceed to fix the typos.
Let's start by examining the contents of `bad.txt` to identify the typos. I'll read the file first.
<execute_bash>
open bad.txt
cat bad.txt
</execute_bash>
----------
OBSERVATION:
[File: /workspace/bad.txt (4 lines total)]
1:This is a stupid typoo.
2:Really?
3:No mor typos!
4:Enjoy!
This is a stupid typoo.
Really?
No mor typos!
Enjoy!
[Command -1 finished with exit code 0]]
ENVIRONMENT REMINDER: You have 8 turns left to complete the task.

View File

@ -9,6 +9,8 @@ print("Hello World!")
</execute_ipython>
The assistant can execute bash commands on behalf of the user by wrapping them with <execute_bash> and </execute_bash>.
For example, you can list the files in the current directory by <execute_bash> ls </execute_bash>.
The assistant can browse the Internet with commands on behalf of the user by wrapping them with <execute_browse> and </execute_browse>.
For example, you can browse a given URL by <execute_browse> goto("<URL>") </execute_browse>.
The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block.
The assistant can install Python packages through bash by <execute_bash> pip install [package needed] </execute_bash> and should always import packages and define variables before starting to use them.
The assistant should stop <execute> and provide an answer when they have already obtained the answer from the execution result.
@ -38,8 +40,8 @@ end_of_edit - replaces lines <start_line> through <end_line> (inclusive) with th
Please note that THE EDIT COMMAND REQUIRES PROPER INDENTATION. If you'd like to add the line ' print(x)' you must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run.
The assistant's response should be concise.
You should include <execute_ipython> or <execute_bash> in every one of your responses, unless you are finished with the task or need more input or action from the user in order to proceed.
IMPORTANT: Whenever possible, execute the code for the user using <execute_ipython> or <execute_bash> instead of providing it.
You should include <execute_ipython> or <execute_bash> or <execute_browse> in every one of your responses, unless you are finished with the task or need more input or action from the user in order to proceed.
IMPORTANT: Whenever possible, execute the code for the user using <execute_ipython> or <execute_bash> or <execute_browse> instead of providing it.
----------
@ -146,6 +148,21 @@ Press CTRL+C to quit
ASSISTANT:
The server is running on port 5000 with PID 124. You can access the list of numbers by visiting http://127.0.0.1:5000. If you have any further questions, feel free to ask!
USER: Now browse the newly started server's homepage and show me the content.
ASSISTANT:
Sure! Let me browse the server's homepage at http://127.0.0.1:5000:
<execute_browse>
goto("http://127.0.0.1:5000")
</execute_browse>
USER:
Observation:
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
ASSISTANT:
The content of the server's homepage is "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]". If you have any further questions, feel free to ask!
USER: Now kill the server, make it display the numbers in a table format.
ASSISTANT:
@ -228,29 +245,33 @@ ENVIRONMENT REMINDER: You have 9 turns left to complete the task.
----------
Certainly! I will start by opening the file `bad.txt` to see the content and then proceed to fix the typos.
Let's start by examining the contents of `bad.txt` to identify the typos. I'll read the file first.
<execute_bash>
open bad.txt
cat bad.txt
</execute_bash>
----------
OBSERVATION:
[File: /workspace/bad.txt (4 lines total)]
1:This is a stupid typoo.
2:Really?
3:No mor typos!
4:Enjoy!
This is a stupid typoo.
Really?
No mor typos!
Enjoy!
[Command -1 finished with exit code 0]]
ENVIRONMENT REMINDER: You have 8 turns left to complete the task.
----------
I will now edit the file to correct the typos.
I'll correct the typos in the file `bad.txt`. Here are the corrections:
- "typoo" should be "typo"
- "mor" should be "more"
Let me update the file accordingly.
<execute_bash>
open bad.txt
edit 1:1
This is a stupid typo.
end_of_edit
@ -262,17 +283,22 @@ end_of_edit
----------
OBSERVATION:
> > [File: /workspace/bad.txt (4 lines total)]
1:This is a stupid typo.
2:Really?
3:No mor typos!
4:Enjoy!
File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.
> > [File: /workspace/bad.txt (4 lines total)]
1:This is a stupid typo.
2:Really?
3:No more typos!
4:Enjoy!
[File: /workspace/bad.txt (4 lines total)]
1:This is a stupid typoo.
2:Really?
3:No mor typos!
4:Enjoy!
> > [File: /workspace/bad.txt (4 lines total)]
1:This is a stupid typo.
2:Really?
3:No mor typos!
4:Enjoy!
File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.
> > [File: /workspace/bad.txt (4 lines total)]
1:This is a stupid typo.
2:Really?
3:No more typos!
4:Enjoy!
File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.
[Command -1 finished with exit code 0]]

View File

@ -1,4 +1,4 @@
Certainly! I will start by opening the file `bad.txt` to see the content and then proceed to fix the typos.
Let's start by examining the contents of `bad.txt` to identify the typos. I'll read the file first.
<execute_bash>
open bad.txt
cat bad.txt

View File

@ -1,6 +1,11 @@
I will now edit the file to correct the typos.
I'll correct the typos in the file `bad.txt`. Here are the corrections:
- "typoo" should be "typo"
- "mor" should be "more"
Let me update the file accordingly.
<execute_bash>
open bad.txt
edit 1:1
This is a stupid typo.
end_of_edit

View File

@ -1 +1,8 @@
The typos in `bad.txt` have been fixed. The task is complete. If you need further assistance, feel free to ask!
The typos in `bad.txt` have been corrected successfully. The file now reads correctly as follows:
1. This is a stupid typo.
2. Really?
3. No more typos!
4. Enjoy!
The task is completed. If you have any more requests or need further assistance, feel free to let me know!

View File

@ -9,6 +9,8 @@ print("Hello World!")
</execute_ipython>
The assistant can execute bash commands on behalf of the user by wrapping them with <execute_bash> and </execute_bash>.
For example, you can list the files in the current directory by <execute_bash> ls </execute_bash>.
The assistant can browse the Internet with commands on behalf of the user by wrapping them with <execute_browse> and </execute_browse>.
For example, you can browse a given URL by <execute_browse> goto("<URL>") </execute_browse>.
The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block.
The assistant can install Python packages through bash by <execute_bash> pip install [package needed] </execute_bash> and should always import packages and define variables before starting to use them.
The assistant should stop <execute> and provide an answer when they have already obtained the answer from the execution result.
@ -38,8 +40,8 @@ end_of_edit - replaces lines <start_line> through <end_line> (inclusive) with th
Please note that THE EDIT COMMAND REQUIRES PROPER INDENTATION. If you'd like to add the line ' print(x)' you must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run.
The assistant's response should be concise.
You should include <execute_ipython> or <execute_bash> in every one of your responses, unless you are finished with the task or need more input or action from the user in order to proceed.
IMPORTANT: Whenever possible, execute the code for the user using <execute_ipython> or <execute_bash> instead of providing it.
You should include <execute_ipython> or <execute_bash> or <execute_browse> in every one of your responses, unless you are finished with the task or need more input or action from the user in order to proceed.
IMPORTANT: Whenever possible, execute the code for the user using <execute_ipython> or <execute_bash> or <execute_browse> instead of providing it.
----------
@ -146,6 +148,21 @@ Press CTRL+C to quit
ASSISTANT:
The server is running on port 5000 with PID 124. You can access the list of numbers by visiting http://127.0.0.1:5000. If you have any further questions, feel free to ask!
USER: Now browse the newly started server's homepage and show me the content.
ASSISTANT:
Sure! Let me browse the server's homepage at http://127.0.0.1:5000:
<execute_browse>
goto("http://127.0.0.1:5000")
</execute_browse>
USER:
Observation:
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
ASSISTANT:
The content of the server's homepage is "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]". If you have any further questions, feel free to ask!
USER: Now kill the server, make it display the numbers in a table format.
ASSISTANT:

View File

@ -9,6 +9,8 @@ print("Hello World!")
</execute_ipython>
The assistant can execute bash commands on behalf of the user by wrapping them with <execute_bash> and </execute_bash>.
For example, you can list the files in the current directory by <execute_bash> ls </execute_bash>.
The assistant can browse the Internet with commands on behalf of the user by wrapping them with <execute_browse> and </execute_browse>.
For example, you can browse a given URL by <execute_browse> goto("<URL>") </execute_browse>.
The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block.
The assistant can install Python packages through bash by <execute_bash> pip install [package needed] </execute_bash> and should always import packages and define variables before starting to use them.
The assistant should stop <execute> and provide an answer when they have already obtained the answer from the execution result.
@ -38,8 +40,8 @@ end_of_edit - replaces lines <start_line> through <end_line> (inclusive) with th
Please note that THE EDIT COMMAND REQUIRES PROPER INDENTATION. If you'd like to add the line ' print(x)' you must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run.
The assistant's response should be concise.
You should include <execute_ipython> or <execute_bash> in every one of your responses, unless you are finished with the task or need more input or action from the user in order to proceed.
IMPORTANT: Whenever possible, execute the code for the user using <execute_ipython> or <execute_bash> instead of providing it.
You should include <execute_ipython> or <execute_bash> or <execute_browse> in every one of your responses, unless you are finished with the task or need more input or action from the user in order to proceed.
IMPORTANT: Whenever possible, execute the code for the user using <execute_ipython> or <execute_bash> or <execute_browse> instead of providing it.
----------
@ -146,6 +148,21 @@ Press CTRL+C to quit
ASSISTANT:
The server is running on port 5000 with PID 124. You can access the list of numbers by visiting http://127.0.0.1:5000. If you have any further questions, feel free to ask!
USER: Now browse the newly started server's homepage and show me the content.
ASSISTANT:
Sure! Let me browse the server's homepage at http://127.0.0.1:5000:
<execute_browse>
goto("http://127.0.0.1:5000")
</execute_browse>
USER:
Observation:
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
ASSISTANT:
The content of the server's homepage is "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]". If you have any further questions, feel free to ask!
USER: Now kill the server, make it display the numbers in a table format.
ASSISTANT:
@ -228,10 +245,9 @@ ENVIRONMENT REMINDER: You have 9 turns left to complete the task.
----------
Certainly! I will write the text 'hello world' to the file `/workspace/test.txt` using Jupyter IPython.
I will write the text 'hello world' to the file '/workspace/test.txt' using Jupyter IPython. Let's proceed:
<execute_ipython>
# Write 'hello world' to the specified text file
with open('/workspace/test.txt', 'w') as file:
file.write('hello world')
</execute_ipython>

View File

@ -1,6 +1,5 @@
Certainly! I will write the text 'hello world' to the file `/workspace/test.txt` using Jupyter IPython.
I will write the text 'hello world' to the file '/workspace/test.txt' using Jupyter IPython. Let's proceed:
<execute_ipython>
# Write 'hello world' to the specified text file
with open('/workspace/test.txt', 'w') as file:
file.write('hello world')

View File

@ -1 +1 @@
The text 'hello world' has been successfully written to `/workspace/test.txt`. If you need any further assistance, feel free to ask!
The text 'hello world' has been successfully written to the file '/workspace/test.txt'. If you need any further assistance or another task, feel free to let me know!

View File

@ -9,6 +9,8 @@ print("Hello World!")
</execute_ipython>
The assistant can execute bash commands on behalf of the user by wrapping them with <execute_bash> and </execute_bash>.
For example, you can list the files in the current directory by <execute_bash> ls </execute_bash>.
The assistant can browse the Internet with commands on behalf of the user by wrapping them with <execute_browse> and </execute_browse>.
For example, you can browse a given URL by <execute_browse> goto("<URL>") </execute_browse>.
The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block.
The assistant can install Python packages through bash by <execute_bash> pip install [package needed] </execute_bash> and should always import packages and define variables before starting to use them.
The assistant should stop <execute> and provide an answer when they have already obtained the answer from the execution result.
@ -38,8 +40,8 @@ end_of_edit - replaces lines <start_line> through <end_line> (inclusive) with th
Please note that THE EDIT COMMAND REQUIRES PROPER INDENTATION. If you'd like to add the line ' print(x)' you must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run.
The assistant's response should be concise.
You should include <execute_ipython> or <execute_bash> in every one of your responses, unless you are finished with the task or need more input or action from the user in order to proceed.
IMPORTANT: Whenever possible, execute the code for the user using <execute_ipython> or <execute_bash> instead of providing it.
You should include <execute_ipython> or <execute_bash> or <execute_browse> in every one of your responses, unless you are finished with the task or need more input or action from the user in order to proceed.
IMPORTANT: Whenever possible, execute the code for the user using <execute_ipython> or <execute_bash> or <execute_browse> instead of providing it.
----------
@ -146,6 +148,21 @@ Press CTRL+C to quit
ASSISTANT:
The server is running on port 5000 with PID 124. You can access the list of numbers by visiting http://127.0.0.1:5000. If you have any further questions, feel free to ask!
USER: Now browse the newly started server's homepage and show me the content.
ASSISTANT:
Sure! Let me browse the server's homepage at http://127.0.0.1:5000:
<execute_browse>
goto("http://127.0.0.1:5000")
</execute_browse>
USER:
Observation:
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
ASSISTANT:
The content of the server's homepage is "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]". If you have any further questions, feel free to ask!
USER: Now kill the server, make it display the numbers in a table format.
ASSISTANT:

View File

@ -9,6 +9,8 @@ print("Hello World!")
</execute_ipython>
The assistant can execute bash commands on behalf of the user by wrapping them with <execute_bash> and </execute_bash>.
For example, you can list the files in the current directory by <execute_bash> ls </execute_bash>.
The assistant can browse the Internet with commands on behalf of the user by wrapping them with <execute_browse> and </execute_browse>.
For example, you can browse a given URL by <execute_browse> goto("<URL>") </execute_browse>.
The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block.
The assistant can install Python packages through bash by <execute_bash> pip install [package needed] </execute_bash> and should always import packages and define variables before starting to use them.
The assistant should stop <execute> and provide an answer when they have already obtained the answer from the execution result.
@ -38,8 +40,8 @@ end_of_edit - replaces lines <start_line> through <end_line> (inclusive) with th
Please note that THE EDIT COMMAND REQUIRES PROPER INDENTATION. If you'd like to add the line ' print(x)' you must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run.
The assistant's response should be concise.
You should include <execute_ipython> or <execute_bash> in every one of your responses, unless you are finished with the task or need more input or action from the user in order to proceed.
IMPORTANT: Whenever possible, execute the code for the user using <execute_ipython> or <execute_bash> instead of providing it.
You should include <execute_ipython> or <execute_bash> or <execute_browse> in every one of your responses, unless you are finished with the task or need more input or action from the user in order to proceed.
IMPORTANT: Whenever possible, execute the code for the user using <execute_ipython> or <execute_bash> or <execute_browse> instead of providing it.
----------
@ -146,6 +148,21 @@ Press CTRL+C to quit
ASSISTANT:
The server is running on port 5000 with PID 124. You can access the list of numbers by visiting http://127.0.0.1:5000. If you have any further questions, feel free to ask!
USER: Now browse the newly started server's homepage and show me the content.
ASSISTANT:
Sure! Let me browse the server's homepage at http://127.0.0.1:5000:
<execute_browse>
goto("http://127.0.0.1:5000")
</execute_browse>
USER:
Observation:
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
ASSISTANT:
The content of the server's homepage is "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]". If you have any further questions, feel free to ask!
USER: Now kill the server, make it display the numbers in a table format.
ASSISTANT:
@ -228,10 +245,10 @@ ENVIRONMENT REMINDER: You have 9 turns left to complete the task.
----------
Certainly! I will create a shell script named `hello.sh` that prints 'hello'.
I will create and write the shell script `hello.sh` that prints "hello". Let's proceed with that.
<execute_bash>
echo "echo 'hello'" > hello.sh && chmod +x hello.sh
echo "echo hello" > hello.sh
</execute_bash>
----------

View File

@ -1,4 +1,4 @@
Certainly! I will create a shell script named `hello.sh` that prints 'hello'.
I will create and write the shell script `hello.sh` that prints "hello". Let's proceed with that.
<execute_bash>
echo "echo 'hello'" > hello.sh && chmod +x hello.sh
echo "echo hello" > hello.sh

View File

@ -1 +1 @@
The shell script `hello.sh` has been created and made executable. You can now run it to print 'hello'.
The shell script `hello.sh` has been successfully created and written. If you need anything else, feel free to ask!

View File

@ -4,6 +4,7 @@ from opendevin.events.action import (
AgentFinishAction,
AgentRecallAction,
AgentRejectAction,
BrowseInteractiveAction,
BrowseURLAction,
CmdKillAction,
CmdRunAction,
@ -91,6 +92,14 @@ def test_browse_url_action_serialization_deserialization():
serialization_deserialization(original_action_dict, BrowseURLAction)
def test_browse_interactive_action_serialization_deserialization():
original_action_dict = {
'action': 'browse_interactive',
'args': {'thought': '', 'browser_actions': 'goto("https://www.example.com")'},
}
serialization_deserialization(original_action_dict, BrowseInteractiveAction)
def test_file_read_action_serialization_deserialization():
original_action_dict = {
'action': 'read',