Lazy launching BrowseEnv / making BrowseEnv optional (#2155)

* feat: lazy launching browser; browser optional for diffrent agents.

* style: lint

* fix: integration test fail due to browser not started.

* fix: run by cli and integration test failed.

* fix: lint

* fix: lint

---------

Co-authored-by: Graham Neubig <neubig@gmail.com>
This commit is contained in:
Aaron Xia 2024-06-01 04:40:42 +08:00 committed by GitHub
parent 8413f147c9
commit 42c6b506b5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 43 additions and 8 deletions

View File

@ -8,6 +8,7 @@ from opendevin.events.action import (
)
from opendevin.events.serialization.event import event_to_memory
from opendevin.llm.llm import LLM
from opendevin.runtime.tools import RuntimeTool
from .parser import parse_command
from .prompts import (
@ -27,6 +28,7 @@ class SWEAgent(Agent):
SWE-agent includes ACI functions like 'goto', 'search_for', 'edit', 'scroll', 'run'
"""
runtime_tools: list[RuntimeTool] = [RuntimeTool.BROWSER]
def __init__(self, llm: LLM):
super().__init__(llm)

View File

@ -17,6 +17,7 @@ from opendevin.llm.llm import LLM
from opendevin.runtime.plugins import (
PluginRequirement,
)
from opendevin.runtime.tools import RuntimeTool
def parse_response(response: str) -> Action:
@ -42,6 +43,7 @@ class BrowsingAgent(Agent):
"""
sandbox_plugins: list[PluginRequirement] = []
runtime_tools: list[RuntimeTool] = [RuntimeTool.BROWSER]
def __init__(
self,

View File

@ -28,6 +28,7 @@ from opendevin.runtime.plugins import (
JupyterRequirement,
PluginRequirement,
)
from opendevin.runtime.tools import RuntimeTool
ENABLE_GITHUB = True
@ -162,6 +163,7 @@ class CodeActAgent(Agent):
AgentSkillsRequirement(),
JupyterRequirement(),
]
runtime_tools: list[RuntimeTool] = [RuntimeTool.BROWSER]
jupyter_kernel_init_code: str = 'from agentskills import *'
system_message: str = get_system_message()

View File

@ -27,6 +27,7 @@ from opendevin.runtime.plugins import (
JupyterRequirement,
PluginRequirement,
)
from opendevin.runtime.tools import RuntimeTool
def parse_response(response) -> str:
@ -127,6 +128,7 @@ class CodeActSWEAgent(Agent):
AgentSkillsRequirement(),
JupyterRequirement(),
]
runtime_tools: list[RuntimeTool] = [RuntimeTool.BROWSER]
jupyter_kernel_init_code: str = 'from agentskills import *'
system_message: str = get_system_message()

View File

@ -26,6 +26,7 @@ from opendevin.events.observation import (
from opendevin.events.serialization.event import event_to_memory
from opendevin.llm.llm import LLM
from opendevin.memory.condenser import MemoryCondenser
from opendevin.runtime.tools import RuntimeTool
if config.agent.memory_enabled:
from opendevin.memory.memory import LongTermMemory
@ -46,6 +47,7 @@ class MonologueAgent(Agent):
initial_thoughts: list[dict[str, str]]
memory: 'LongTermMemory | None'
memory_condenser: MemoryCondenser
runtime_tools: list[RuntimeTool] = [RuntimeTool.BROWSER]
def __init__(self, llm: LLM):
"""

View File

@ -2,6 +2,7 @@ from opendevin.controller.agent import Agent
from opendevin.controller.state.state import State
from opendevin.events.action import Action, AgentFinishAction
from opendevin.llm.llm import LLM
from opendevin.runtime.tools import RuntimeTool
from .prompt import get_prompt, parse_response
@ -12,6 +13,7 @@ class PlannerAgent(Agent):
The planner agent utilizes a special prompting strategy to create long term plans for solving problems.
The agent is given its previous action-observation pairs, current task, and hint based on last action taken at every step.
"""
runtime_tools: list[RuntimeTool] = [RuntimeTool.BROWSER]
def __init__(self, llm: LLM):
"""

View File

@ -10,6 +10,7 @@ from opendevin.core.exceptions import (
)
from opendevin.llm.llm import LLM
from opendevin.runtime.plugins import PluginRequirement
from opendevin.runtime.tools import RuntimeTool
class Agent(ABC):
@ -23,6 +24,7 @@ class Agent(ABC):
_registry: dict[str, Type['Agent']] = {}
sandbox_plugins: list[PluginRequirement] = []
runtime_tools: list[RuntimeTool] = []
def __init__(
self,

View File

@ -92,6 +92,7 @@ async def main(
)
runtime = ServerRuntime(event_stream=event_stream, sandbox=sandbox)
runtime.init_sandbox_plugins(controller.agent.sandbox_plugins)
runtime.init_runtime_tools(controller.agent.runtime_tools, is_async=False)
await event_stream.add_event(MessageAction(content=task), EventSource.USER)

View File

@ -2,6 +2,7 @@ import atexit
import base64
import io
import multiprocessing
import threading
import time
import uuid
@ -17,7 +18,7 @@ from opendevin.core.logger import opendevin_logger as logger
class BrowserEnv:
def __init__(self):
def __init__(self, is_async: bool = True):
self.html_text_converter = html2text.HTML2Text()
# ignore links and images
self.html_text_converter.ignore_links = False
@ -32,12 +33,18 @@ class BrowserEnv:
self.process = multiprocessing.Process(
target=self.browser_process,
)
if is_async:
threading.Thread(target=self.init_browser).start()
else:
self.init_browser()
atexit.register(self.close)
def init_browser(self):
logger.info('Starting browser env...')
self.process.start()
if not self.check_alive():
self.close()
raise BrowserInitException('Failed to start browser environment.')
atexit.register(self.close)
def browser_process(self):
env = gym.make(

View File

@ -33,6 +33,7 @@ from opendevin.runtime import (
)
from opendevin.runtime.browser.browser_env import BrowserEnv
from opendevin.runtime.plugins import PluginRequirement
from opendevin.runtime.tools import RuntimeTool
from opendevin.storage import FileStore, InMemoryFileStore
@ -74,12 +75,6 @@ class Runtime:
self.sandbox = sandbox
self._is_external_sandbox = True
self.browser: BrowserEnv | None = None
try:
self.browser = BrowserEnv()
except BrowserInitException:
logger.warn(
'Failed to start browser environment, web browsing functionality will not work'
)
self.file_store = InMemoryFileStore()
self.event_stream = event_stream
self.event_stream.subscribe(EventStreamSubscriber.RUNTIME, self.on_event)
@ -95,6 +90,18 @@ class Runtime:
def init_sandbox_plugins(self, plugins: list[PluginRequirement]) -> None:
self.sandbox.init_plugins(plugins)
def init_runtime_tools(
self, runtime_tools: list[RuntimeTool], is_async: bool = True
) -> None:
# if browser in runtime_tools, init it
if RuntimeTool.BROWSER in runtime_tools:
try:
self.browser = BrowserEnv(is_async)
except BrowserInitException:
logger.warn(
'Failed to start browser environment, web browsing functionality will not work'
)
async def on_event(self, event: Event) -> None:
if isinstance(event, Action):
observation = await self.run_action(event)

View File

@ -0,0 +1,5 @@
from enum import Enum
class RuntimeTool(Enum):
BROWSER = 'browser'

View File

@ -102,6 +102,7 @@ class AgentSession:
'CodeActAgent requires DockerSSHBox as sandbox! Using other sandbox that are not stateful (LocalBox, DockerExecBox) will not work properly.'
)
self.runtime.init_sandbox_plugins(agent.sandbox_plugins)
self.runtime.init_runtime_tools(agent.runtime_tools)
self.controller = AgentController(
sid=self.sid,