diff --git a/agenthub/monologue_agent/agent.py b/agenthub/monologue_agent/agent.py index b987248622..8db6ce7a54 100644 --- a/agenthub/monologue_agent/agent.py +++ b/agenthub/monologue_agent/agent.py @@ -85,6 +85,8 @@ class MonologueAgent(Agent): self.memory = LongTermMemory() def _add_event(self, event: dict): + if "extras" in event and "screenshot" in event["extras"]: + del event["extras"]["screenshot"] if 'args' in event and 'output' in event['args'] and len(event['args']['output']) > MAX_OUTPUT_LENGTH: event['args']['output'] = event['args']['output'][:MAX_OUTPUT_LENGTH] + "..." @@ -114,7 +116,7 @@ class MonologueAgent(Agent): elif output_type == "recall": observation = AgentRecallObservation(content=thought, memories=[]) elif output_type == "browse": - observation = BrowserOutputObservation(content=thought, url="") + observation = BrowserOutputObservation(content=thought, url="", screenshot="") self._add_event(observation.to_dict()) output_type = "" else: diff --git a/agenthub/planner_agent/prompt.py b/agenthub/planner_agent/prompt.py index 5b56fc3ffe..e34d8752fb 100644 --- a/agenthub/planner_agent/prompt.py +++ b/agenthub/planner_agent/prompt.py @@ -139,7 +139,10 @@ def get_prompt(plan: Plan, history: List[Tuple[Action, Observation]]): history_dicts.append(action.to_dict()) latest_action = action if not isinstance(observation, NullObservation): - history_dicts.append(observation.to_dict()) + observation_dict = observation.to_dict() + if "extras" in observation_dict and "screenshot" in observation_dict["extras"]: + del observation_dict["extras"]["screenshot"] + history_dicts.append(observation_dict) history_str = json.dumps(history_dicts, indent=2) hint = "" diff --git a/frontend/package-lock.json b/frontend/package-lock.json index d0523e4d6e..5c5cefb268 100644 --- a/frontend/package-lock.json +++ b/frontend/package-lock.json @@ -3099,7 +3099,6 @@ "version": "0.11.0", "resolved": "https://registry.npmmirror.com/@pkgjs/parseargs/-/parseargs-0.11.0.tgz", "integrity": "sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg==", - "dev": true, "optional": true, "engines": { "node": ">=14" @@ -10209,6 +10208,12 @@ "node": ">=16 || 14 >=14.17" } }, + "node_modules/monaco-editor": { + "version": "0.47.0", + "resolved": "https://registry.npmjs.org/monaco-editor/-/monaco-editor-0.47.0.tgz", + "integrity": "sha512-VabVvHvQ9QmMwXu4du008ZDuyLnHs9j7ThVFsiJoXSOQk18+LF89N4ADzPbFenm0W4V2bGHnFBztIRQTgBfxzw==", + "peer": true + }, "node_modules/ms": { "version": "2.1.2", "resolved": "https://registry.npmmirror.com/ms/-/ms-2.1.2.tgz", @@ -12961,6 +12966,12 @@ "node": ">=0.4" } }, + "node_modules/xterm": { + "version": "5.3.0", + "resolved": "https://registry.npmjs.org/xterm/-/xterm-5.3.0.tgz", + "integrity": "sha512-8QqjlekLUFTrU6x7xck1MsPzPA571K5zNqWm0M0oroYEWVOptZ0+ubQSkQ3uxIEhcIHRujJy6emDWX4A7qyFzg==", + "peer": true + }, "node_modules/xterm-addon-fit": { "version": "0.8.0", "resolved": "https://registry.npmmirror.com/xterm-addon-fit/-/xterm-addon-fit-0.8.0.tgz", @@ -15190,7 +15201,6 @@ "version": "0.11.0", "resolved": "https://registry.npmmirror.com/@pkgjs/parseargs/-/parseargs-0.11.0.tgz", "integrity": "sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg==", - "dev": true, "optional": true }, "@pkgr/core": { @@ -20319,6 +20329,12 @@ "resolved": "https://registry.npmmirror.com/minipass/-/minipass-7.0.4.tgz", "integrity": "sha512-jYofLM5Dam9279rdkWzqHozUo4ybjdZmCsDHePy5V/PbBcVMiSZR97gmAy45aqi8CK1lG2ECd356FU86avfwUQ==" }, + "monaco-editor": { + "version": "0.47.0", + "resolved": "https://registry.npmjs.org/monaco-editor/-/monaco-editor-0.47.0.tgz", + "integrity": "sha512-VabVvHvQ9QmMwXu4du008ZDuyLnHs9j7ThVFsiJoXSOQk18+LF89N4ADzPbFenm0W4V2bGHnFBztIRQTgBfxzw==", + "peer": true + }, "ms": { "version": "2.1.2", "resolved": "https://registry.npmmirror.com/ms/-/ms-2.1.2.tgz", @@ -22149,6 +22165,12 @@ "resolved": "https://registry.npmmirror.com/xtend/-/xtend-4.0.2.tgz", "integrity": "sha512-LKYU1iAXJXUgAXn9URjiu+MWhyUXHsvfp7mcuYm9dSUKK0/CjtrUwFAxD82/mCWbtLsGjFIad0wIsod4zrTAEQ==" }, + "xterm": { + "version": "5.3.0", + "resolved": "https://registry.npmjs.org/xterm/-/xterm-5.3.0.tgz", + "integrity": "sha512-8QqjlekLUFTrU6x7xck1MsPzPA571K5zNqWm0M0oroYEWVOptZ0+ubQSkQ3uxIEhcIHRujJy6emDWX4A7qyFzg==", + "peer": true + }, "xterm-addon-fit": { "version": "0.8.0", "resolved": "https://registry.npmmirror.com/xterm-addon-fit/-/xterm-addon-fit-0.8.0.tgz", diff --git a/frontend/src/components/Browser.tsx b/frontend/src/components/Browser.tsx index 261fe14264..7fa5fac242 100644 --- a/frontend/src/components/Browser.tsx +++ b/frontend/src/components/Browser.tsx @@ -3,12 +3,29 @@ import { useSelector } from "react-redux"; import { RootState } from "../store"; function Browser(): JSX.Element { - const url = useSelector((state: RootState) => state.browser.url); + const { url, screenshotSrc } = useSelector( + (state: RootState) => state.browser, + ); + + const imgSrc = + screenshotSrc && screenshotSrc.startsWith("data:image/png;base64,") + ? screenshotSrc + : `data:image/png;base64,${screenshotSrc || ""}`; + return (
{url}
+ {screenshotSrc ? ( + Browser Screenshot + ) : ( +
No screenshot available.
+ )}
); } diff --git a/frontend/src/socket/observations.ts b/frontend/src/socket/observations.ts index ba577065d7..6a264149b8 100644 --- a/frontend/src/socket/observations.ts +++ b/frontend/src/socket/observations.ts @@ -1,7 +1,16 @@ import { appendAssistantMessage } from "../state/chatSlice"; +import { setUrl, setScreenshotSrc } from "../state/browserSlice"; import store from "../store"; import { ObservationMessage } from "../types/Message"; export function handleObservationMessage(message: ObservationMessage) { store.dispatch(appendAssistantMessage(message.message)); + if (message.observation === "browse") { + if (message.extras?.screenshot) { + store.dispatch(setScreenshotSrc(message.extras.screenshot)); + } + if (message.extras?.url) { + store.dispatch(setUrl(message.extras.url)); + } + } } diff --git a/frontend/src/types/Message.tsx b/frontend/src/types/Message.tsx index 1ff375e905..a7397e0528 100644 --- a/frontend/src/types/Message.tsx +++ b/frontend/src/types/Message.tsx @@ -21,4 +21,7 @@ export interface ObservationMessage { // A friendly message that can be put in the chat log message: string; + + // optional screenshoot + screenshot?: string; } diff --git a/opendevin/action/browse.py b/opendevin/action/browse.py index 7f60ab73d6..b7a59defd5 100644 --- a/opendevin/action/browse.py +++ b/opendevin/action/browse.py @@ -1,30 +1,45 @@ -import requests - +import base64 from dataclasses import dataclass from opendevin.observation import BrowserOutputObservation +from typing import TYPE_CHECKING +from playwright.async_api import async_playwright from .base import ExecutableAction +if TYPE_CHECKING: + from opendevin.controller import AgentController + @dataclass class BrowseURLAction(ExecutableAction): url: str action: str = "browse" - def run(self, *args, **kwargs) -> BrowserOutputObservation: + async def run(self, controller: "AgentController") -> BrowserOutputObservation: # type: ignore try: - response = requests.get(self.url) - return BrowserOutputObservation( - content=response.text, - status_code=response.status_code, - url=self.url - ) - except requests.exceptions.RequestException as e: + async with async_playwright() as p: + browser = await p.chromium.launch() + page = await browser.new_page() + response = await page.goto(self.url) + # content = await page.content() + inner_text = await page.evaluate("() => document.body.innerText") + screenshot_bytes = await page.screenshot(full_page=True) + await browser.close() + + screenshot_base64 = base64.b64encode(screenshot_bytes).decode("utf-8") + return BrowserOutputObservation( + content=inner_text, # HTML content of the page + screenshot=screenshot_base64, # Base64-encoded screenshot + url=self.url, + status_code=response.status if response else 0, # HTTP status code + ) + except Exception as e: return BrowserOutputObservation( content=str(e), + screenshot="", error=True, url=self.url ) - + @property def message(self) -> str: return f"Browsing URL: {self.url}" \ No newline at end of file diff --git a/opendevin/controller/agent_controller.py b/opendevin/controller/agent_controller.py index cde35546ba..466bf86782 100644 --- a/opendevin/controller/agent_controller.py +++ b/opendevin/controller/agent_controller.py @@ -1,7 +1,9 @@ import asyncio +import inspect import traceback -from typing import List, Callable, Literal, Mapping, Any +from typing import List, Callable, Literal, Mapping, Awaitable, Any, cast + from termcolor import colored from opendevin.plan import Plan @@ -144,7 +146,10 @@ class AgentController: if action.executable: try: - observation = action.run(self) + if inspect.isawaitable(action.run(self)): + observation = await cast(Awaitable[Observation], action.run(self)) + else: + observation = action.run(self) except Exception as e: observation = AgentErrorObservation(str(e)) print_with_color(observation, "ERROR") diff --git a/opendevin/observation/browse.py b/opendevin/observation/browse.py index 37faed8352..648985bb1c 100644 --- a/opendevin/observation/browse.py +++ b/opendevin/observation/browse.py @@ -9,6 +9,7 @@ class BrowserOutputObservation(Observation): """ url: str + screenshot: str status_code: int = 200 error: bool = False observation : str = "browse"