add deep research agent

This commit is contained in:
vincent
2025-04-29 22:02:51 +08:00
parent 47b5b55b0d
commit dad8fc990a
10 changed files with 960 additions and 1477 deletions

View File

@@ -5,4 +5,6 @@ json-repair
langchain-mistralai==0.2.4
MainContentExtractor==0.0.4
langchain-ibm==0.3.10
langchain_mcp_adapters==0.0.9
langchain_mcp_adapters==0.0.9
langgraph==0.3.34
langchain-community==0.3.23

File diff suppressed because it is too large Load Diff

View File

@@ -7,6 +7,7 @@ from typing import Any, Dict, Optional
from src.webui.webui_manager import WebuiManager
from src.utils import config
import logging
from functools import partial
logger = logging.getLogger(__name__)
@@ -23,10 +24,15 @@ def update_model_dropdown(llm_provider):
return gr.Dropdown(choices=[], value="", interactive=True, allow_custom_value=True)
def update_mcp_server(mcp_file: str):
def update_mcp_server(mcp_file: str, webui_manager: WebuiManager):
"""
Update the MCP server.
"""
if hasattr(webui_manager, "bu_controller") and webui_manager.bu_controller:
logger.warning("⚠️ Close controller because mcp file has changed!")
webui_manager.bu_controller.close_mcp_client()
webui_manager.bu_controller = None
if not mcp_file or not os.path.exists(mcp_file) or not mcp_file.endswith('.json'):
logger.warning(f"{mcp_file} is not a valid MCP file.")
return None, gr.update(visible=False)
@@ -37,7 +43,7 @@ def update_mcp_server(mcp_file: str):
return json.dumps(mcp_server, indent=2), gr.update(visible=True)
def create_agent_settings_tab(webui_manager: WebuiManager) -> dict[str, Component]:
def create_agent_settings_tab(webui_manager: WebuiManager):
"""
Creates an agent settings tab.
"""
@@ -252,7 +258,7 @@ def create_agent_settings_tab(webui_manager: WebuiManager) -> dict[str, Componen
)
mcp_json_file.change(
update_mcp_server,
partial(update_mcp_server, webui_manager=webui_manager),
inputs=[mcp_json_file],
outputs=[mcp_server_config, mcp_server_config]
)

View File

@@ -14,13 +14,16 @@ async def close_browser(webui_manager: WebuiManager):
if webui_manager.bu_current_task and not webui_manager.bu_current_task.done():
webui_manager.bu_current_task.cancel()
webui_manager.bu_current_task = None
if webui_manager.bu_browser:
await webui_manager.bu_browser.close()
webui_manager.bu_browser = None
if webui_manager.bu_browser_context:
logger.info("⚠️ Closing browser context when changing browser config.")
await webui_manager.bu_browser_context.close()
webui_manager.bu_browser_context = None
if webui_manager.bu_browser:
logger.info("⚠️ Closing browser when changing browser config.")
await webui_manager.bu_browser.close()
webui_manager.bu_browser = None
def create_browser_settings_tab(webui_manager: WebuiManager):
"""
@@ -43,6 +46,7 @@ def create_browser_settings_tab(webui_manager: WebuiManager):
interactive=True,
placeholder="Leave it empty if you use your default user data",
)
with gr.Group():
with gr.Row():
use_own_browser = gr.Checkbox(
label="Use Own Browser",
@@ -64,11 +68,12 @@ def create_browser_settings_tab(webui_manager: WebuiManager):
)
disable_security = gr.Checkbox(
label="Disable Security",
value=True,
info="Disable browser security features",
value=False,
info="Disable browser security",
interactive=True
)
with gr.Group():
with gr.Row():
window_w = gr.Number(
label="Window Width",
@@ -82,7 +87,7 @@ def create_browser_settings_tab(webui_manager: WebuiManager):
info="Browser window height",
interactive=True
)
with gr.Group():
with gr.Row():
cdp_url = gr.Textbox(
label="CDP URL",
@@ -94,7 +99,7 @@ def create_browser_settings_tab(webui_manager: WebuiManager):
info="WSS URL for browser remote debugging",
interactive=True,
)
with gr.Group():
with gr.Row():
save_recording_path = gr.Textbox(
label="Recording Path",

View File

@@ -1,3 +1,5 @@
import pdb
import gradio as gr
from gradio.components import Component
import asyncio
@@ -388,7 +390,6 @@ async def run_agent_task(webui_manager: WebuiManager, components: Dict[gr.compon
extra_args += [f"--user-data-dir={chrome_user_data}"]
else:
browser_binary_path = None
webui_manager.bu_browser = CustomBrowser(
config=BrowserConfig(
headless=headless,
@@ -432,7 +433,6 @@ async def run_agent_task(webui_manager: WebuiManager, components: Dict[gr.compon
logger.info(f"Initializing new agent for task: {task}")
if not webui_manager.bu_browser or not webui_manager.bu_browser_context:
raise ValueError("Browser or Context not initialized, cannot create agent.")
webui_manager.bu_agent = BrowserUseAgent(
task=task,
llm=main_llm,
@@ -456,6 +456,9 @@ async def run_agent_task(webui_manager: WebuiManager, components: Dict[gr.compon
webui_manager.bu_agent.state.agent_id = webui_manager.bu_agent_task_id
webui_manager.bu_agent.add_new_task(task)
webui_manager.bu_agent.settings.generate_gif = gif_path
webui_manager.bu_agent.browser = webui_manager.bu_browser
webui_manager.bu_agent.browser_context = webui_manager.bu_browser_context
webui_manager.bu_agent.controller = webui_manager.bu_controller
# --- 6. Run Agent Task and Stream Updates ---
agent_run_coro = webui_manager.bu_agent.run(max_steps=max_steps)
@@ -832,15 +835,13 @@ def create_browser_use_agent_tab(webui_manager: WebuiManager):
async def submit_wrapper(components_dict: Dict[Component, Any]) -> AsyncGenerator[Dict[Component, Any], None]:
"""Wrapper for handle_submit that yields its results."""
# handle_submit is an async generator, iterate and yield
async for update in handle_submit(webui_manager, components_dict):
yield update
async def stop_wrapper() -> AsyncGenerator[Dict[Component, Any], None]:
"""Wrapper for handle_stop."""
# handle_stop is async def but returns a single dict. We yield it once.
update_dict = await handle_stop(webui_manager)
yield update_dict # Yield the final dictionary
yield update_dict
async def pause_resume_wrapper() -> AsyncGenerator[Dict[Component, Any], None]:
"""Wrapper for handle_pause_resume."""

View File

@@ -5,7 +5,7 @@ from src.webui.webui_manager import WebuiManager
from src.utils import config
def create_deep_research_agent_tab(webui_manager: WebuiManager) -> dict[str, Component]:
def create_deep_research_agent_tab(webui_manager: WebuiManager):
"""
Creates a deep research agent tab
"""

View File

@@ -5,7 +5,7 @@ from src.webui.webui_manager import WebuiManager
from src.utils import config
def create_load_save_config_tab(webui_manager: WebuiManager) -> dict[str, Component]:
def create_load_save_config_tab(webui_manager: WebuiManager):
"""
Creates a load and save config tab.
"""
@@ -13,7 +13,7 @@ def create_load_save_config_tab(webui_manager: WebuiManager) -> dict[str, Compon
tab_components = {}
config_file = gr.File(
label="Load UI Settings from Config File",
label="Load UI Settings from json",
file_types=[".json"],
interactive=True
)

View File

@@ -194,7 +194,6 @@ async def test_browser_use_parallel():
# api_key=os.getenv("OPENAI_API_KEY", ""),
# )
# llm = utils.get_llm_model(
# provider="google",
# model_name="gemini-2.0-flash",
@@ -335,6 +334,70 @@ async def test_browser_use_parallel():
await browser.close()
async def test_deep_research_agent():
from src.agent.deep_research.deep_research_agent import DeepSearchAgent
from src.utils import llm_provider
llm = llm_provider.get_llm_model(
provider="azure_openai",
model_name="gpt-4o",
temperature=0.5,
base_url=os.getenv("AZURE_OPENAI_ENDPOINT", ""),
api_key=os.getenv("AZURE_OPENAI_API_KEY", ""),
)
mcp_server_config = {
"mcpServers": {
"desktop-commander": {
"command": "npx",
"args": [
"-y",
"@wonderwhy-er/desktop-commander"
]
},
}
}
browser_config = {"headless": False, "window_width": 1280, "window_height": 1100, "use_own_browser": False}
agent = DeepSearchAgent(llm=llm, browser_config=browser_config, mcp_server_config=mcp_server_config)
research_topic = "Impact of Microplastics on Marine Ecosystems"
task_id_to_resume = None # Set this to resume a previous task ID
print(f"Starting research on: {research_topic}")
try:
# Call run and wait for the final result dictionary
result = await agent.run(research_topic, task_id=task_id_to_resume)
print("\n--- Research Process Ended ---")
print(f"Status: {result.get('status')}")
print(f"Message: {result.get('message')}")
print(f"Task ID: {result.get('task_id')}")
# Check the final state for the report
final_state = result.get('final_state', {})
if final_state:
print("\n--- Final State Summary ---")
print(
f" Plan Steps Completed: {sum(1 for item in final_state.get('research_plan', []) if item.get('status') == 'completed')}")
print(f" Total Search Results Logged: {len(final_state.get('search_results', []))}")
if final_state.get("final_report"):
print(" Final Report: Generated (content omitted). You can find it in the output directory.")
# print("\n--- Final Report ---") # Optionally print report
# print(final_state["final_report"])
else:
print(" Final Report: Not generated.")
else:
print("Final state information not available.")
except Exception as e:
print(f"\n--- An unhandled error occurred outside the agent run ---")
print(e)
if __name__ == "__main__":
# asyncio.run(test_browser_use_agent())
asyncio.run(test_browser_use_parallel())
# asyncio.run(test_browser_use_parallel())
asyncio.run(test_deep_research_agent())

View File

@@ -32,6 +32,7 @@ async def test_mcp_client():
}
mcp_tools, mcp_client = await setup_mcp_client_and_tools(test_server_config)
for tool in mcp_tools:
tool_param_model = create_tool_param_model(tool)
print(tool.name)

1095
webui2.py

File diff suppressed because it is too large Load Diff