Frank Xu 836864fa88
[feat] Integrate BrowserGym (#1452)
* add a single-threaded server serving browsergym

* update poetry

* update browser page content

* add import to make sure browsergym environments are registered properly

* remove flask server, use multiprocess impl and Pipe

* fix

* refactor BrowserEnv

* update browser action and obs to include more complete info

* fix screenshot

* update poetry lock

* add playwright install to workflow

* update

* add better html to text conversion

* update for better text conversion to maintain parity with the current handling of browseurlaction

* update

* update poetry

* update multiprocessing mp

* fix multiprocessing

* update

* update github workflow

---------

Co-authored-by: Xingyao Wang <xingyao6@illinois.edu>
2024-05-02 19:52:53 +08:00

48 lines
2.0 KiB
Python

import os
from dataclasses import dataclass
from typing import TYPE_CHECKING
from opendevin.observation import BrowserOutputObservation
from opendevin.schema import ActionType
from .base import ExecutableAction
if TYPE_CHECKING:
from opendevin.controller import AgentController
@dataclass
class BrowseURLAction(ExecutableAction):
url: str
thought: str = ''
action: str = ActionType.BROWSE
async def run(self, controller: 'AgentController') -> BrowserOutputObservation: # type: ignore
asked_url = self.url
if not asked_url.startswith('http'):
asked_url = os.path.abspath(os.curdir) + self.url
try:
# action in BrowserGym: see https://github.com/ServiceNow/BrowserGym/blob/main/core/src/browsergym/core/action/functions.py
action_str = f'goto("{asked_url}")'
# obs provided by BrowserGym: see https://github.com/ServiceNow/BrowserGym/blob/main/core/src/browsergym/core/env.py#L396
obs = controller.browser.step(action_str)
return BrowserOutputObservation(
content=obs['text_content'], # text content of the page
open_pages_urls=obs['open_pages_urls'], # list of open pages
active_page_index=obs['active_page_index'], # index of the active page
dom_object=obs['dom_object'], # DOM object
axtree_object=obs['axtree_object'], # accessibility tree object
last_browser_action=obs['last_action'], # last browser env action performed
focused_element_bid=obs['focused_element_bid'], # focused element bid
screenshot=obs['screenshot'], # base64-encoded screenshot, png
url=asked_url,
)
except Exception as e:
return BrowserOutputObservation(
content=str(e), screenshot='', error=True, url=asked_url
)
@property
def message(self) -> str:
return f'Browsing URL: {self.url}'