From c4b021dcb947d67f53e9405148c59977d1567b7e Mon Sep 17 00:00:00 2001
From: Wendong <w3ndong.fan@gmail.com>
Date: Mon, 10 Mar 2025 12:07:08 +0800
Subject: [PATCH] update deepseek example and fix format

---
 owl/app.py                    | 405 ++++++++++++++++++++--------------
 owl/run_deepseek_zh.py        |   7 +-
 owl/script_adapter.py         | 130 +++++++----
 owl/utils/document_toolkit.py |   7 +-
 run_app.py                    |  28 ++-
 5 files changed, 355 insertions(+), 222 deletions(-)

diff --git a/owl/app.py b/owl/app.py
index d4631a8..92af864 100644
--- a/owl/app.py
+++ b/owl/app.py
@@ -1,3 +1,16 @@
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
 import os
 import sys
 import gradio as gr
@@ -25,7 +38,7 @@ SCRIPTS = {
     "Mini": "run_mini.py",
     "DeepSeek （中文）": "run_deepseek_zh.py",
     "Default": "run.py",
-    "GAIA Roleplaying": "run_gaia_roleplaying.py"
+    "GAIA Roleplaying": "run_gaia_roleplaying.py",
 }
 
 # 脚本描述
@@ -35,98 +48,100 @@ SCRIPT_DESCRIPTIONS = {
     "Mini": "轻量级版本，使用OpenAI GPT-4o模型",
     "DeepSeek （中文）": "使用DeepSeek模型，适合非多模态任务",
     "Default": "默认OWL实现，使用OpenAI GPT-4o模型和全套工具",
-    "GAIA Roleplaying": "GAIA基准测试实现，用于评估模型能力"
+    "GAIA Roleplaying": "GAIA基准测试实现，用于评估模型能力",
 }
 
 # 环境变量分组
 ENV_GROUPS = {
     "模型API": [
         {
-            "name": "OPENAI_API_KEY", 
-            "label": "OpenAI API密钥", 
-            "type": "password", 
+            "name": "OPENAI_API_KEY",
+            "label": "OpenAI API密钥",
+            "type": "password",
             "required": False,
-            "help": "OpenAI API密钥，用于访问GPT模型。获取方式：https://platform.openai.com/api-keys"
+            "help": "OpenAI API密钥，用于访问GPT模型。获取方式：https://platform.openai.com/api-keys",
         },
         {
-            "name": "OPENAI_API_BASE_URL", 
-            "label": "OpenAI API基础URL", 
-            "type": "text", 
+            "name": "OPENAI_API_BASE_URL",
+            "label": "OpenAI API基础URL",
+            "type": "text",
             "required": False,
-            "help": "OpenAI API的基础URL，可选。如果使用代理或自定义端点，请设置此项。"
+            "help": "OpenAI API的基础URL，可选。如果使用代理或自定义端点，请设置此项。",
         },
         {
-            "name": "QWEN_API_KEY", 
-            "label": "阿里云Qwen API密钥", 
-            "type": "password", 
+            "name": "QWEN_API_KEY",
+            "label": "阿里云Qwen API密钥",
+            "type": "password",
             "required": False,
-            "help": "阿里云Qwen API密钥，用于访问Qwen模型。获取方式：https://help.aliyun.com/zh/model-studio/developer-reference/get-api-key"
+            "help": "阿里云Qwen API密钥，用于访问Qwen模型。获取方式：https://help.aliyun.com/zh/model-studio/developer-reference/get-api-key",
         },
         {
-            "name": "DEEPSEEK_API_KEY", 
-            "label": "DeepSeek API密钥", 
-            "type": "password", 
+            "name": "DEEPSEEK_API_KEY",
+            "label": "DeepSeek API密钥",
+            "type": "password",
             "required": False,
-            "help": "DeepSeek API密钥，用于访问DeepSeek模型。获取方式：https://platform.deepseek.com/api_keys"
+            "help": "DeepSeek API密钥，用于访问DeepSeek模型。获取方式：https://platform.deepseek.com/api_keys",
         },
     ],
     "搜索工具": [
         {
-            "name": "GOOGLE_API_KEY", 
-            "label": "Google API密钥", 
-            "type": "password", 
+            "name": "GOOGLE_API_KEY",
+            "label": "Google API密钥",
+            "type": "password",
             "required": False,
-            "help": "Google搜索API密钥，用于网络搜索功能。获取方式：https://developers.google.com/custom-search/v1/overview"
+            "help": "Google搜索API密钥，用于网络搜索功能。获取方式：https://developers.google.com/custom-search/v1/overview",
         },
         {
-            "name": "SEARCH_ENGINE_ID", 
-            "label": "搜索引擎ID", 
-            "type": "text", 
+            "name": "SEARCH_ENGINE_ID",
+            "label": "搜索引擎ID",
+            "type": "text",
             "required": False,
-            "help": "Google自定义搜索引擎ID，与Google API密钥配合使用。获取方式：https://developers.google.com/custom-search/v1/overview"
+            "help": "Google自定义搜索引擎ID，与Google API密钥配合使用。获取方式：https://developers.google.com/custom-search/v1/overview",
         },
     ],
     "其他工具": [
         {
-            "name": "HF_TOKEN", 
-            "label": "Hugging Face令牌", 
-            "type": "password", 
+            "name": "HF_TOKEN",
+            "label": "Hugging Face令牌",
+            "type": "password",
             "required": False,
-            "help": "Hugging Face API令牌，用于访问Hugging Face模型和数据集。获取方式：https://huggingface.co/join"
+            "help": "Hugging Face API令牌，用于访问Hugging Face模型和数据集。获取方式：https://huggingface.co/join",
         },
         {
-            "name": "CHUNKR_API_KEY", 
-            "label": "Chunkr API密钥", 
-            "type": "password", 
+            "name": "CHUNKR_API_KEY",
+            "label": "Chunkr API密钥",
+            "type": "password",
             "required": False,
-            "help": "Chunkr API密钥，用于文档处理功能。获取方式：https://chunkr.ai/"
+            "help": "Chunkr API密钥，用于文档处理功能。获取方式：https://chunkr.ai/",
         },
         {
-            "name": "FIRECRAWL_API_KEY", 
-            "label": "Firecrawl API密钥", 
-            "type": "password", 
+            "name": "FIRECRAWL_API_KEY",
+            "label": "Firecrawl API密钥",
+            "type": "password",
             "required": False,
-            "help": "Firecrawl API密钥，用于网页爬取功能。获取方式：https://www.firecrawl.dev/"
+            "help": "Firecrawl API密钥，用于网页爬取功能。获取方式：https://www.firecrawl.dev/",
         },
     ],
-    "自定义环境变量": []  # 用户自定义的环境变量将存储在这里
+    "自定义环境变量": [],  # 用户自定义的环境变量将存储在这里
 }
 
+
 def get_script_info(script_name):
     """获取脚本的详细信息"""
     return SCRIPT_DESCRIPTIONS.get(script_name, "无描述信息")
 
+
 def load_env_vars():
     """加载环境变量"""
     env_vars = {}
     # 尝试从.env文件加载
     dotenv.load_dotenv()
-    
+
     # 获取所有环境变量
     for group in ENV_GROUPS.values():
         for var in group:
             env_vars[var["name"]] = os.environ.get(var["name"], "")
-    
+
     # 加载.env文件中可能存在的其他环境变量
     if Path(".env").exists():
         with open(".env", "r", encoding="utf-8") as f:
@@ -135,34 +150,37 @@ def load_env_vars():
                 if line and not line.startswith("#") and "=" in line:
                     key, value = line.split("=", 1)
                     key = key.strip()
-                    value = value.strip().strip('"\'')
-                    
+                    value = value.strip().strip("\"'")
+
                     # 检查是否是已知的环境变量
                     known_var = False
                     for group in ENV_GROUPS.values():
                         if any(var["name"] == key for var in group):
                             known_var = True
                             break
-                    
+
                     # 如果不是已知的环境变量，添加到自定义环境变量组
                     if not known_var and key not in env_vars:
-                        ENV_GROUPS["自定义环境变量"].append({
-                            "name": key,
-                            "label": key,
-                            "type": "text",
-                            "required": False,
-                            "help": "用户自定义环境变量"
-                        })
+                        ENV_GROUPS["自定义环境变量"].append(
+                            {
+                                "name": key,
+                                "label": key,
+                                "type": "text",
+                                "required": False,
+                                "help": "用户自定义环境变量",
+                            }
+                        )
                         env_vars[key] = value
-    
+
     return env_vars
 
+
 def save_env_vars(env_vars):
     """保存环境变量到.env文件"""
     # 读取现有的.env文件内容
     env_path = Path(".env")
     existing_content = {}
-    
+
     if env_path.exists():
         with open(env_path, "r", encoding="utf-8") as f:
             for line in f:
@@ -170,105 +188,116 @@ def save_env_vars(env_vars):
                 if line and not line.startswith("#") and "=" in line:
                     key, value = line.split("=", 1)
                     existing_content[key.strip()] = value.strip()
-    
+
     # 更新环境变量
     for key, value in env_vars.items():
         if value:  # 只保存非空值
             # 确保值是字符串形式，并用引号包裹
             value = str(value)  # 确保值是字符串
-            if not (value.startswith('"') and value.endswith('"')) and not (value.startswith("'") and value.endswith("'")):
+            if not (value.startswith('"') and value.endswith('"')) and not (
+                value.startswith("'") and value.endswith("'")
+            ):
                 value = f'"{value}"'
             existing_content[key] = value
             # 同时更新当前进程的环境变量
-            os.environ[key] = value.strip('"\'')
-    
+            os.environ[key] = value.strip("\"'")
+
     # 写入.env文件
     with open(env_path, "w", encoding="utf-8") as f:
         for key, value in existing_content.items():
             f.write(f"{key}={value}\n")
-    
+
     return "✅ 环境变量已保存"
 
+
 def add_custom_env_var(name, value, var_type):
     """添加自定义环境变量"""
     if not name:
         return "❌ 环境变量名不能为空", None
-    
+
     # 检查是否已存在同名环境变量
     for group in ENV_GROUPS.values():
         if any(var["name"] == name for var in group):
             return f"❌ 环境变量 {name} 已存在", None
-    
+
     # 添加到自定义环境变量组
-    ENV_GROUPS["自定义环境变量"].append({
-        "name": name,
-        "label": name,
-        "type": var_type,
-        "required": False,
-        "help": "用户自定义环境变量"
-    })
-    
+    ENV_GROUPS["自定义环境变量"].append(
+        {
+            "name": name,
+            "label": name,
+            "type": var_type,
+            "required": False,
+            "help": "用户自定义环境变量",
+        }
+    )
+
     # 保存环境变量
     env_vars = {name: value}
     save_env_vars(env_vars)
-    
+
     # 返回成功消息和更新后的环境变量组
     return f"✅ 已添加环境变量 {name}", ENV_GROUPS["自定义环境变量"]
 
+
 def terminate_process():
     """终止当前运行的进程"""
     global current_process
-    
+
     with process_lock:
         if current_process is not None and current_process.poll() is None:
             # 在Windows上使用CTRL_BREAK_EVENT，在Unix上使用SIGTERM
-            if os.name == 'nt':
+            if os.name == "nt":
                 current_process.send_signal(signal.CTRL_BREAK_EVENT)
             else:
                 current_process.terminate()
-            
+
             # 等待进程终止
             try:
                 current_process.wait(timeout=5)
             except subprocess.TimeoutExpired:
                 # 如果进程没有在5秒内终止，强制终止
                 current_process.kill()
-            
+
             log_queue.put("进程已终止\n")
             return "✅ 进程已终止"
         else:
             return "❌ 没有正在运行的进程"
 
+
 def run_script(script_dropdown, question, progress=gr.Progress()):
     """运行选定的脚本并返回输出"""
     global current_process
-    
+
     script_name = SCRIPTS.get(script_dropdown)
     if not script_name:
         return "❌ 无效的脚本选择", "", "", "", None
-    
+
     if not question.strip():
         return "请输入问题！", "", "", "", None
-    
+
     # 清空日志队列
     while not log_queue.empty():
         log_queue.get()
-    
+
     # 创建日志目录
     log_dir = Path("logs")
     log_dir.mkdir(exist_ok=True)
-    
+
     # 创建带时间戳的日志文件
     timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
     log_file = log_dir / f"{script_name.replace('.py', '')}_{timestamp}.log"
-    
+
     # 构建命令
-    cmd = [sys.executable, os.path.join("owl", "script_adapter.py"), os.path.join("owl", script_name)]
-    
+    cmd = [
+        sys.executable,
+        os.path.join("owl", "script_adapter.py"),
+        os.path.join("owl", script_name),
+    ]
+
     # 创建环境变量副本并添加问题
     env = os.environ.copy()
     env["OWL_QUESTION"] = question
-    
+
     # 启动进程
     with process_lock:
         current_process = subprocess.Popen(
@@ -277,9 +306,9 @@ def run_script(script_dropdown, question, progress=gr.Progress()):
             stderr=subprocess.STDOUT,
             text=True,
             bufsize=1,
-            env=env
+            env=env,
         )
-    
+
     # 创建线程来读取输出
     def read_output():
         try:
@@ -293,54 +322,67 @@ def run_script(script_dropdown, question, progress=gr.Progress()):
                         log_queue.put(line)
         except Exception as e:
             log_queue.put(f"读取输出时出错: {str(e)}\n")
-    
+
     # 启动读取线程
     threading.Thread(target=read_output, daemon=True).start()
-    
+
     # 收集日志
     logs = []
     progress(0, desc="正在运行...")
-    
+
     # 等待进程完成或超时
     start_time = time.time()
     timeout = 1800  # 30分钟超时
-    
+
     while current_process.poll() is None:
         # 检查是否超时
         if time.time() - start_time > timeout:
             with process_lock:
                 if current_process.poll() is None:
-                    if os.name == 'nt':
+                    if os.name == "nt":
                         current_process.send_signal(signal.CTRL_BREAK_EVENT)
                     else:
                         current_process.terminate()
                     log_queue.put("执行超时，已终止进程\n")
             break
-        
+
         # 从队列获取日志
         while not log_queue.empty():
             log = log_queue.get()
             logs.append(log)
-        
+
         # 更新进度
         elapsed = time.time() - start_time
         progress(min(elapsed / 300, 0.99), desc="正在运行...")
-        
+
         # 短暂休眠以减少CPU使用
         time.sleep(0.1)
-        
+
         # 每秒更新一次日志显示
-        yield status_message(current_process), extract_answer(logs), "".join(logs), str(log_file), None
-    
+        yield (
+            status_message(current_process),
+            extract_answer(logs),
+            "".join(logs),
+            str(log_file),
+            None,
+        )
+
     # 获取剩余日志
     while not log_queue.empty():
         logs.append(log_queue.get())
-    
+
     # 提取聊天历史（如果有）
     chat_history = extract_chat_history(logs)
-    
+
     # 返回最终状态和日志
-    return status_message(current_process), extract_answer(logs), "".join(logs), str(log_file), chat_history
+    return (
+        status_message(current_process),
+        extract_answer(logs),
+        "".join(logs),
+        str(log_file),
+        chat_history,
+    )
+
 
 def status_message(process):
     """根据进程状态返回状态消息"""
@@ -351,6 +393,7 @@ def status_message(process):
     else:
         return f"❌ 执行失败 (返回码: {process.returncode})"
 
+
 def extract_answer(logs):
     """从日志中提取答案"""
     answer = ""
@@ -360,12 +403,13 @@ def extract_answer(logs):
             break
     return answer
 
+
 def extract_chat_history(logs):
     """尝试从日志中提取聊天历史"""
     try:
         chat_json_str = ""
         capture_json = False
-        
+
         for log in logs:
             if "chat_history" in log:
                 # 开始捕获JSON
@@ -384,7 +428,7 @@ def extract_chat_history(logs):
                             # 清理可能的额外文本
                             json_str = chat_json_str[:end_idx].strip()
                             chat_data = json.loads(json_str)
-                            
+
                             # 格式化为Gradio聊天组件可用的格式
                             formatted_chat = []
                             for msg in chat_data:
@@ -402,11 +446,12 @@ def extract_chat_history(logs):
         pass
     return None
 
+
 def create_ui():
     """创建Gradio界面"""
     # 加载环境变量
     env_vars = load_env_vars()
-    
+
     with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as app:
         gr.Markdown(
             """
@@ -415,7 +460,7 @@ def create_ui():
             选择一个模型并输入您的问题，系统将运行相应的脚本并显示结果。
             """
         )
-        
+
         with gr.Tabs() as tabs:
             with gr.TabItem("运行模式"):
                 with gr.Row():
@@ -425,144 +470,173 @@ def create_ui():
                         script_dropdown = gr.Dropdown(
                             choices=list(SCRIPTS.keys()),
                             value=default_script,
-                            label="选择模式"
+                            label="选择模式",
                         )
-                        
+
                         script_info = gr.Textbox(
-                            value=get_script_info(default_script) if default_script else "",
+                            value=get_script_info(default_script)
+                            if default_script
+                            else "",
                             label="模型描述",
-                            interactive=False
+                            interactive=False,
                         )
-                        
+
                         script_dropdown.change(
                             fn=lambda x: get_script_info(x),
                             inputs=script_dropdown,
-                            outputs=script_info
+                            outputs=script_info,
                         )
-                        
+
                         question_input = gr.Textbox(
-                            lines=5,
-                            placeholder="请输入您的问题...",
-                            label="问题"
+                            lines=5, placeholder="请输入您的问题...", label="问题"
                         )
-                        
+
                         gr.Markdown(
                             """
                             > **注意**: 您输入的问题将替换脚本中的默认问题。系统会自动处理问题的替换，确保您的问题被正确使用。
                             """
                         )
-                        
+
                         with gr.Row():
                             run_button = gr.Button("运行", variant="primary")
                             stop_button = gr.Button("终止", variant="stop")
-                    
+
                     with gr.Column(scale=2):
                         with gr.Tabs():
                             with gr.TabItem("结果"):
                                 status_output = gr.Textbox(label="状态")
                                 answer_output = gr.Textbox(label="回答", lines=10)
                                 log_file_output = gr.Textbox(label="日志文件路径")
-                            
+
                             with gr.TabItem("运行日志"):
                                 log_output = gr.Textbox(label="完整日志", lines=25)
-                            
+
                             with gr.TabItem("聊天历史"):
                                 chat_output = gr.Chatbot(label="对话历史")
-                
+
                 # 示例问题
                 examples = [
-                    ["Qwen Mini (中文)", "浏览亚马逊并找出一款对程序员有吸引力的产品。请提供产品名称和价格"],
-                    ["DeepSeek （中文）", "请分析GitHub上CAMEL-AI项目的最新统计数据。找出该项目的星标数量、贡献者数量和最近的活跃度。然后，创建一个简单的Excel表格来展示这些数据，并生成一个柱状图来可视化这些指标。最后，总结CAMEL项目的受欢迎程度和发展趋势。"],
-                    ["Default", "Navigate to Amazon.com and identify one product that is attractive to coders. Please provide me with the product name and price. No need to verify your answer."]
+                    [
+                        "Qwen Mini (中文)",
+                        "浏览亚马逊并找出一款对程序员有吸引力的产品。请提供产品名称和价格",
+                    ],
+                    [
+                        "DeepSeek （中文）",
+                        "请分析GitHub上CAMEL-AI项目的最新统计数据。找出该项目的星标数量、贡献者数量和最近的活跃度。然后，创建一个简单的Excel表格来展示这些数据，并生成一个柱状图来可视化这些指标。最后，总结CAMEL项目的受欢迎程度和发展趋势。",
+                    ],
+                    [
+                        "Default",
+                        "Navigate to Amazon.com and identify one product that is attractive to coders. Please provide me with the product name and price. No need to verify your answer.",
+                    ],
                 ]
-                
-                gr.Examples(
-                    examples=examples,
-                    inputs=[script_dropdown, question_input]
-                )
-            
+
+                gr.Examples(examples=examples, inputs=[script_dropdown, question_input])
+
             with gr.TabItem("环境变量配置"):
                 env_inputs = {}
                 save_status = gr.Textbox(label="保存状态", interactive=False)
-                
+
                 # 添加自定义环境变量部分
                 with gr.Accordion("添加自定义环境变量", open=True):
                     with gr.Row():
-                        new_var_name = gr.Textbox(label="环境变量名", placeholder="例如：MY_CUSTOM_API_KEY")
-                        new_var_value = gr.Textbox(label="环境变量值", placeholder="输入值")
-                        new_var_type = gr.Dropdown(
-                            choices=["text", "password"],
-                            value="text",
-                            label="类型"
+                        new_var_name = gr.Textbox(
+                            label="环境变量名", placeholder="例如：MY_CUSTOM_API_KEY"
                         )
-                    
+                        new_var_value = gr.Textbox(
+                            label="环境变量值", placeholder="输入值"
+                        )
+                        new_var_type = gr.Dropdown(
+                            choices=["text", "password"], value="text", label="类型"
+                        )
+
                     add_var_button = gr.Button("添加环境变量", variant="primary")
                     add_var_status = gr.Textbox(label="添加状态", interactive=False)
-                    
+
                     # 自定义环境变量列表
                     custom_vars_list = gr.JSON(
                         value=ENV_GROUPS["自定义环境变量"],
                         label="已添加的自定义环境变量",
-                        visible=len(ENV_GROUPS["自定义环境变量"]) > 0
+                        visible=len(ENV_GROUPS["自定义环境变量"]) > 0,
                     )
-                    
+
                     # 添加环境变量按钮点击事件
                     add_var_button.click(
                         fn=add_custom_env_var,
                         inputs=[new_var_name, new_var_value, new_var_type],
-                        outputs=[add_var_status, custom_vars_list]
+                        outputs=[add_var_status, custom_vars_list],
                     )
-                
+
                 # 现有环境变量配置
                 for group_name, vars in ENV_GROUPS.items():
-                    if group_name != "自定义环境变量" or len(vars) > 0:  # 只显示非空的自定义环境变量组
-                        with gr.Accordion(group_name, open=(group_name != "自定义环境变量")):
+                    if (
+                        group_name != "自定义环境变量" or len(vars) > 0
+                    ):  # 只显示非空的自定义环境变量组
+                        with gr.Accordion(
+                            group_name, open=(group_name != "自定义环境变量")
+                        ):
                             for var in vars:
                                 # 添加帮助信息
                                 gr.Markdown(f"**{var['help']}**")
-                                
+
                                 if var["type"] == "password":
                                     env_inputs[var["name"]] = gr.Textbox(
                                         value=env_vars.get(var["name"], ""),
                                         label=var["label"],
                                         placeholder=f"请输入{var['label']}",
-                                        type="password"
+                                        type="password",
                                     )
                                 else:
                                     env_inputs[var["name"]] = gr.Textbox(
                                         value=env_vars.get(var["name"], ""),
                                         label=var["label"],
-                                        placeholder=f"请输入{var['label']}"
+                                        placeholder=f"请输入{var['label']}",
                                     )
-                
+
                 save_button = gr.Button("保存环境变量", variant="primary")
-                
+
                 # 保存环境变量
-                save_inputs = [env_inputs[var_name] for group in ENV_GROUPS.values() for var in group for var_name in [var["name"]] if var_name in env_inputs]
+                save_inputs = [
+                    env_inputs[var_name]
+                    for group in ENV_GROUPS.values()
+                    for var in group
+                    for var_name in [var["name"]]
+                    if var_name in env_inputs
+                ]
                 save_button.click(
-                    fn=lambda *values: save_env_vars(dict(zip([var["name"] for group in ENV_GROUPS.values() for var in group if var["name"] in env_inputs], values))),
+                    fn=lambda *values: save_env_vars(
+                        dict(
+                            zip(
+                                [
+                                    var["name"]
+                                    for group in ENV_GROUPS.values()
+                                    for var in group
+                                    if var["name"] in env_inputs
+                                ],
+                                values,
+                            )
+                        )
+                    ),
                     inputs=save_inputs,
-                    outputs=save_status
+                    outputs=save_status,
                 )
-        
+
         # 运行脚本
         run_button.click(
             fn=run_script,
-            inputs=[
-                script_dropdown,
-                question_input
+            inputs=[script_dropdown, question_input],
+            outputs=[
+                status_output,
+                answer_output,
+                log_output,
+                log_file_output,
+                chat_output,
             ],
-            outputs=[status_output, answer_output, log_output, log_file_output, chat_output],
-            show_progress=True
+            show_progress=True,
         )
-        
+
         # 终止运行
-        stop_button.click(
-            fn=terminate_process,
-            inputs=[],
-            outputs=[status_output]
-        )
-        
+        stop_button.click(fn=terminate_process, inputs=[], outputs=[status_output])
+
         # 添加页脚
         gr.Markdown(
             """
@@ -585,10 +659,11 @@ def create_ui():
             - 您输入的问题将替换脚本中的默认问题，确保问题与所选模型兼容
             """
         )
-    
+
     return app
 
+
 if __name__ == "__main__":
     # 创建并启动应用
     app = create_ui()
-    app.queue().launch(share=True) 
\ No newline at end of file
+    app.queue().launch(share=True)
diff --git a/owl/run_deepseek_zh.py b/owl/run_deepseek_zh.py
index c2df322..e7bad12 100644
--- a/owl/run_deepseek_zh.py
+++ b/owl/run_deepseek_zh.py
@@ -81,6 +81,11 @@ def construct_society(question: str) -> OwlRolePlaying:
             model_type=ModelType.DEEPSEEK_CHAT,
             model_config_dict={"temperature": 0},
         ),
+        "document": ModelFactory.create(
+            model_platform=ModelPlatformType.DEEPSEEK,
+            model_type=ModelType.DEEPSEEK_CHAT,
+            model_config_dict={"temperature": 0},
+        ),
     }
 
     # Configure toolkits
@@ -89,7 +94,7 @@ def construct_society(question: str) -> OwlRolePlaying:
         SearchToolkit().search_duckduckgo,
         SearchToolkit().search_wiki,
         *ExcelToolkit().get_tools(),
-        *DocumentProcessingToolkit().get_tools(),
+        *DocumentProcessingToolkit(model=models["document"]).get_tools(),
     ]
 
     # Configure agent roles and parameters
diff --git a/owl/script_adapter.py b/owl/script_adapter.py
index db285c5..4f796dc 100644
--- a/owl/script_adapter.py
+++ b/owl/script_adapter.py
@@ -1,3 +1,16 @@
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
 import os
 import sys
 import importlib.util
@@ -5,6 +18,7 @@ import re
 from pathlib import Path
 import traceback
 
+
 def load_module_from_path(module_name, file_path):
     """从文件路径加载Python模块"""
     try:
@@ -12,7 +26,7 @@ def load_module_from_path(module_name, file_path):
         if spec is None:
             print(f"错误: 无法从 {file_path} 创建模块规范")
             return None
-        
+
         module = importlib.util.module_from_spec(spec)
         sys.modules[module_name] = module
         spec.loader.exec_module(module)
@@ -22,6 +36,7 @@ def load_module_from_path(module_name, file_path):
         traceback.print_exc()
         return None
 
+
 def run_script_with_env_question(script_name):
     """使用环境变量中的问题运行脚本"""
     # 获取环境变量中的问题
@@ -29,16 +44,16 @@ def run_script_with_env_question(script_name):
     if not question:
         print("错误: 未设置OWL_QUESTION环境变量")
         sys.exit(1)
-    
+
     # 脚本路径
     script_path = Path(script_name).resolve()
     if not script_path.exists():
         print(f"错误: 脚本 {script_path} 不存在")
         sys.exit(1)
-    
+
     # 创建临时文件路径
     temp_script_path = script_path.with_name(f"temp_{script_path.name}")
-    
+
     try:
         # 读取脚本内容
         try:
@@ -47,44 +62,55 @@ def run_script_with_env_question(script_name):
         except Exception as e:
             print(f"读取脚本文件时出错: {e}")
             sys.exit(1)
-        
+
         # 检查脚本是否有main函数
-        has_main = re.search(r'def\s+main\s*\(\s*\)\s*:', content) is not None
-        
+        has_main = re.search(r"def\s+main\s*\(\s*\)\s*:", content) is not None
+
         # 转义问题中的特殊字符
-        escaped_question = question.replace("\\", "\\\\").replace("\"", "\\\"").replace("'", "\\'")
-        
+        escaped_question = (
+            question.replace("\\", "\\\\").replace('"', '\\"').replace("'", "\\'")
+        )
+
         # 查找脚本中所有的question赋值 - 改进的正则表达式
         # 匹配单行和多行字符串赋值
-        question_assignments = re.findall(r'question\s*=\s*(?:["\'].*?["\']|""".*?"""|\'\'\'.*?\'\'\'|\(.*?\))', content, re.DOTALL)
+        question_assignments = re.findall(
+            r'question\s*=\s*(?:["\'].*?["\']|""".*?"""|\'\'\'.*?\'\'\'|\(.*?\))',
+            content,
+            re.DOTALL,
+        )
         print(f"在脚本中找到 {len(question_assignments)} 个question赋值")
-        
+
         # 修改脚本内容，替换所有的question赋值
         modified_content = content
-        
+
         # 如果脚本中有question赋值，替换所有的赋值
         if question_assignments:
             for assignment in question_assignments:
                 modified_content = modified_content.replace(
-                    assignment, 
-                    f'question = "{escaped_question}"'
+                    assignment, f'question = "{escaped_question}"'
                 )
             print(f"已替换脚本中的所有question赋值为: {question}")
         else:
             # 如果没有找到question赋值，尝试在main函数前插入
             if has_main:
-                main_match = re.search(r'def\s+main\s*\(\s*\)\s*:', content)
+                main_match = re.search(r"def\s+main\s*\(\s*\)\s*:", content)
                 if main_match:
                     insert_pos = main_match.start()
-                    modified_content = content[:insert_pos] + f'\n# 用户输入的问题\nquestion = "{escaped_question}"\n\n' + content[insert_pos:]
+                    modified_content = (
+                        content[:insert_pos]
+                        + f'\n# 用户输入的问题\nquestion = "{escaped_question}"\n\n'
+                        + content[insert_pos:]
+                    )
                     print(f"已在main函数前插入问题: {question}")
             else:
                 # 如果没有main函数，在文件开头插入
-                modified_content = f'# 用户输入的问题\nquestion = "{escaped_question}"\n\n' + content
+                modified_content = (
+                    f'# 用户输入的问题\nquestion = "{escaped_question}"\n\n' + content
+                )
                 print(f"已在文件开头插入问题: {question}")
-        
+
         # 添加monkey patch代码，确保construct_society函数使用用户的问题
-        monkey_patch_code = f'''
+        monkey_patch_code = f"""
 # 确保construct_society函数使用用户的问题
 original_construct_society = globals().get('construct_society')
 if original_construct_society:
@@ -95,24 +121,28 @@ if original_construct_society:
     # 替换原始函数
     globals()['construct_society'] = patched_construct_society
     print("已修补construct_society函数，确保使用用户问题")
-'''
-        
+"""
+
         # 在文件末尾添加monkey patch代码
         modified_content += monkey_patch_code
-        
+
         # 如果脚本没有调用main函数，添加调用代码
         if has_main and "__main__" not in content:
-            modified_content += '''
+            modified_content += """
 
 # 确保调用main函数
 if __name__ == "__main__":
     main()
-'''
+"""
             print("已添加main函数调用代码")
-        
+
         # 如果脚本没有construct_society调用，添加调用代码
-        if "construct_society" in content and "run_society" in content and "Answer:" not in content:
-            modified_content += f'''
+        if (
+            "construct_society" in content
+            and "run_society" in content
+            and "Answer:" not in content
+        ):
+            modified_content += f"""
 
 # 确保执行construct_society和run_society
 if "construct_society" in globals() and "run_society" in globals():
@@ -125,16 +155,16 @@ if "construct_society" in globals() and "run_society" in globals():
         print(f"运行时出错: {{e}}")
         import traceback
         traceback.print_exc()
-'''
+"""
             print("已添加construct_society和run_society调用代码")
-        
+
         # 执行修改后的脚本
         try:
             # 将脚本目录添加到sys.path
             script_dir = script_path.parent
             if str(script_dir) not in sys.path:
                 sys.path.insert(0, str(script_dir))
-            
+
             # 创建临时文件
             try:
                 with open(temp_script_path, "w", encoding="utf-8") as f:
@@ -143,32 +173,34 @@ if "construct_society" in globals() and "run_society" in globals():
             except Exception as e:
                 print(f"创建临时脚本文件时出错: {e}")
                 sys.exit(1)
-            
+
             try:
                 # 直接执行临时脚本
-                print(f"开始执行脚本...")
-                
+                print("开始执行脚本...")
+
                 # 如果有main函数，加载模块并调用main
                 if has_main:
                     # 加载临时模块
                     module_name = f"temp_{script_path.stem}"
                     module = load_module_from_path(module_name, temp_script_path)
-                    
+
                     if module is None:
                         print(f"错误: 无法加载模块 {module_name}")
                         sys.exit(1)
-                    
+
                     # 确保模块中有question变量，并且值是用户输入的问题
                     setattr(module, "question", question)
-                    
+
                     # 如果模块中有construct_society函数，修补它
                     if hasattr(module, "construct_society"):
                         original_func = module.construct_society
+
                         def patched_func(*args, **kwargs):
                             return original_func(question)
+
                         module.construct_society = patched_func
                         print("已在模块级别修补construct_society函数")
-                    
+
                     # 调用main函数
                     if hasattr(module, "main"):
                         print("调用main函数...")
@@ -182,34 +214,35 @@ if "construct_society" in globals() and "run_society" in globals():
                     # 使用更安全的方式执行脚本
                     with open(temp_script_path, "r", encoding="utf-8") as f:
                         script_code = f.read()
-                    
+
                     # 创建一个安全的全局命名空间
                     safe_globals = {
                         "__file__": str(temp_script_path),
-                        "__name__": "__main__"
+                        "__name__": "__main__",
                     }
                     # 添加内置函数
-                    safe_globals.update({k: v for k, v in globals().items() 
-                                        if k in ['__builtins__']})
-                    
+                    safe_globals.update(
+                        {k: v for k, v in globals().items() if k in ["__builtins__"]}
+                    )
+
                     # 执行脚本
                     exec(script_code, safe_globals)
-            
+
             except Exception as e:
                 print(f"执行脚本时出错: {e}")
                 traceback.print_exc()
                 sys.exit(1)
-            
+
         except Exception as e:
             print(f"处理脚本时出错: {e}")
             traceback.print_exc()
             sys.exit(1)
-        
+
     except Exception as e:
         print(f"处理脚本时出错: {e}")
         traceback.print_exc()
         sys.exit(1)
-    
+
     finally:
         # 删除临时文件
         if temp_script_path.exists():
@@ -219,11 +252,12 @@ if "construct_society" in globals() and "run_society" in globals():
             except Exception as e:
                 print(f"删除临时脚本文件时出错: {e}")
 
+
 if __name__ == "__main__":
     # 检查命令行参数
     if len(sys.argv) < 2:
         print("用法: python script_adapter.py <script_path>")
         sys.exit(1)
-    
+
     # 运行指定的脚本
-    run_script_with_env_question(sys.argv[1])
\ No newline at end of file
+    run_script_with_env_question(sys.argv[1])
diff --git a/owl/utils/document_toolkit.py b/owl/utils/document_toolkit.py
index 5de6c64..192d1f1 100644
--- a/owl/utils/document_toolkit.py
+++ b/owl/utils/document_toolkit.py
@@ -17,6 +17,7 @@ from camel.toolkits.function_tool import FunctionTool
 from camel.toolkits import ImageAnalysisToolkit, ExcelToolkit
 from camel.utils import retry_on_error
 from camel.logger import get_logger
+from camel.models import BaseModelBackend
 from docx2markdown._docx_to_markdown import docx_to_markdown
 from chunkr_ai import Chunkr
 import requests
@@ -40,8 +41,10 @@ class DocumentProcessingToolkit(BaseToolkit):
     This class provides method for processing docx, pdf, pptx, etc. It cannot process excel files.
     """
 
-    def __init__(self, cache_dir: Optional[str] = None):
-        self.image_tool = ImageAnalysisToolkit()
+    def __init__(
+        self, cache_dir: Optional[str] = None, model: Optional[BaseModelBackend] = None
+    ):
+        self.image_tool = ImageAnalysisToolkit(model=model)
         # self.audio_tool = AudioAnalysisToolkit()
         self.excel_tool = ExcelToolkit()
 
diff --git a/run_app.py b/run_app.py
index 89eee1b..f33f8f6 100644
--- a/run_app.py
+++ b/run_app.py
@@ -1,3 +1,16 @@
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
@@ -9,26 +22,27 @@ import os
 import sys
 from pathlib import Path
 
+
 def main():
     """主函数，启动OWL智能助手运行平台"""
     # 确保当前目录是项目根目录
     project_root = Path(__file__).resolve().parent
     os.chdir(project_root)
-    
+
     # 创建日志目录
     log_dir = project_root / "logs"
     log_dir.mkdir(exist_ok=True)
-    
+
     # 导入并运行应用
     sys.path.insert(0, str(project_root))
-    
+
     try:
         from owl.app import create_ui
-        
+
         # 创建并启动应用
         app = create_ui()
         app.queue().launch(share=False)
-        
+
     except ImportError as e:
         print(f"错误: 无法导入必要的模块。请确保已安装所有依赖项: {e}")
         print("提示: 运行 'pip install -r requirements.txt' 安装所有依赖项")
@@ -36,8 +50,10 @@ def main():
     except Exception as e:
         print(f"启动应用程序时出错: {e}")
         import traceback
+
         traceback.print_exc()
         sys.exit(1)
 
+
 if __name__ == "__main__":
-    main() 
\ No newline at end of file
+    main()