Merge pull request #3 from warmshao/dev

add deepseek
This commit is contained in:
warmshao
2025-01-03 19:33:28 +08:00
committed by GitHub
6 changed files with 83 additions and 12 deletions

View File

@@ -6,7 +6,7 @@ This project builds upon the foundation of the [browser-use](https://github.com/
1. **A Brand New WebUI:** We offer a comprehensive web interface that supports a wide range of `browser-use` functionalities. This UI is designed to be user-friendly and enables easy interaction with the browser agent.
2. **Expanded LLM Support:** We've integrated support for various Large Language Models (LLMs), including: Gemini, OpenAI, Azure OpenAI, Anthropic etc. And we plan to add support for even more models in the future.
2. **Expanded LLM Support:** We've integrated support for various Large Language Models (LLMs), including: Gemini, OpenAI, Azure OpenAI, Anthropic, DeepSeek etc. And we plan to add support for even more models in the future.
3. **Custom Browser Support:** You can use your own browser with our tool, eliminating the need to re-login to sites or deal with other authentication challenges. This feature also supports high-definition screen recording.
@@ -43,5 +43,6 @@ This project builds upon the foundation of the [browser-use](https://github.com/
```
2. **Access the WebUI:** Open your web browser and navigate to `http://127.0.0.1:7788`.
3. **Using Your Own Browser:**
- Close all chrome windows
- Open the WebUI in a non-Chrome browser, such as Firefox or Edge. This is important because the persistent browser context will use the Chrome data when running the agent.
- Check the "Use Own Browser" option within the Browser Settings.

View File

@@ -151,6 +151,20 @@ class CustomAgent(Agent):
if completed_contents and 'None' not in completed_contents:
step_info.task_progress = completed_contents
@time_execution_async('--get_next_action')
async def get_next_action(self, input_messages: list[BaseMessage]) -> AgentOutput:
"""Get next action from LLM based on current state"""
ret = self.llm.invoke(input_messages)
parsed_json = json.loads(ret.content.replace('```json', '').replace("```", ""))
parsed: AgentOutput = self.AgentOutput(**parsed_json)
# cut the number of actions to max_actions_per_step
parsed.action = parsed.action[: self.max_actions_per_step]
self._log_response(parsed)
self.n_steps += 1
return parsed
@time_execution_async('--step')
async def step(self, step_info: Optional[CustomAgentStepInfo] = None) -> None:
"""Execute one step of the task"""

View File

@@ -48,6 +48,23 @@ def get_llm_model(provider: str, **kwargs):
else:
api_key = kwargs.get("api_key")
return ChatOpenAI(
model=kwargs.get("model_name", 'gpt-4o'),
temperature=kwargs.get("temperature", 0.0),
base_url=base_url,
api_key=api_key
)
elif provider == 'deepseek':
if not kwargs.get("base_url", ""):
base_url = os.getenv("DEEPSEEK_ENDPOINT", "")
else:
base_url = kwargs.get("base_url")
if not kwargs.get("api_key", ""):
api_key = os.getenv("DEEPSEEK_API_KEY", "")
else:
api_key = kwargs.get("api_key")
return ChatOpenAI(
model=kwargs.get("model_name", 'gpt-4o'),
temperature=kwargs.get("temperature", 0.0),

View File

@@ -98,16 +98,23 @@ async def test_browser_use_custom():
# api_key=os.getenv("AZURE_OPENAI_API_KEY", "")
# )
# llm = utils.get_llm_model(
# provider="gemini",
# model_name="gemini-2.0-flash-exp",
# temperature=1.0,
# api_key=os.getenv("GOOGLE_API_KEY", "")
# )
llm = utils.get_llm_model(
provider="gemini",
model_name="gemini-2.0-flash-exp",
temperature=1.0,
api_key=os.getenv("GOOGLE_API_KEY", "")
provider="deepseek",
model_name="deepseek-chat",
temperature=0.8
)
controller = CustomController()
use_own_browser = False
disable_security = True
use_vision = False
playwright = None
browser_context_ = None
try:
@@ -156,7 +163,8 @@ async def test_browser_use_custom():
llm=llm,
browser_context=browser_context,
controller=controller,
system_prompt_class=CustomSystemPrompt
system_prompt_class=CustomSystemPrompt,
use_vision=use_vision
)
history: AgentHistoryList = await agent.run(max_steps=10)

View File

@@ -95,7 +95,29 @@ def test_azure_openai_model():
print(ai_msg.content)
def test_deepseek_model():
from langchain_core.messages import HumanMessage
from src.utils import utils
llm = utils.get_llm_model(
provider="deepseek",
model_name="deepseek-chat",
temperature=0.8,
base_url=os.getenv("DEEPSEEK_ENDPOINT", ""),
api_key=os.getenv("DEEPSEEK_API_KEY", "")
)
pdb.set_trace()
message = HumanMessage(
content=[
{"type": "text", "text": "who are you?"}
]
)
ai_msg = llm.invoke([message])
print(ai_msg.content)
if __name__ == '__main__':
# test_openai_model()
test_gemini_model()
# test_gemini_model()
# test_azure_openai_model()
test_deepseek_model()

View File

@@ -52,7 +52,8 @@ async def run_browser_agent(
save_recording_path,
task,
add_infos,
max_steps
max_steps,
use_vision
):
"""
Runs the browser agent based on user configurations.
@@ -75,6 +76,7 @@ async def run_browser_agent(
save_recording_path=save_recording_path,
task=task,
max_steps=max_steps,
use_vision=use_vision
)
elif agent_type == "custom":
return await run_custom_agent(
@@ -88,6 +90,7 @@ async def run_browser_agent(
task=task,
add_infos=add_infos,
max_steps=max_steps,
use_vision=use_vision
)
else:
raise ValueError(f"Invalid agent type: {agent_type}")
@@ -101,7 +104,8 @@ async def run_org_agent(
window_h,
save_recording_path,
task,
max_steps
max_steps,
use_vision
):
browser = Browser(
config=BrowserConfig(
@@ -121,6 +125,7 @@ async def run_org_agent(
agent = Agent(
task=task,
llm=llm,
use_vision=use_vision,
browser_context=browser_context,
)
history = await agent.run(max_steps=max_steps)
@@ -143,7 +148,8 @@ async def run_custom_agent(
save_recording_path,
task,
add_infos,
max_steps
max_steps,
use_vision
):
controller = CustomController()
playwright = None
@@ -190,6 +196,7 @@ async def run_custom_agent(
agent = CustomAgent(
task=task,
add_infos=add_infos,
use_vision=use_vision,
llm=llm,
browser_context=browser_context,
controller=controller,
@@ -245,9 +252,10 @@ def main():
with gr.Row():
agent_type = gr.Radio(["org", "custom"], label="Agent Type", value="custom")
max_steps = gr.Number(label="max run steps", value=100)
use_vision = gr.Checkbox(label="use vision", value=True)
with gr.Row():
llm_provider = gr.Dropdown(
["anthropic", "openai", "gemini", "azure_openai"], label="LLM Provider", value="gemini"
["anthropic", "openai", "gemini", "azure_openai", "deepseek"], label="LLM Provider", value="gemini"
)
llm_model_name = gr.Textbox(label="LLM Model Name", value="gemini-2.0-flash-exp")
llm_temperature = gr.Number(label="LLM Temperature", value=1.0)
@@ -293,7 +301,8 @@ def main():
save_recording_path,
task,
add_infos,
max_steps
max_steps,
use_vision
],
outputs=[final_result_output, errors_output, model_actions_output, model_thoughts_output],
)