From c4b021dcb947d67f53e9405148c59977d1567b7e Mon Sep 17 00:00:00 2001 From: Wendong Date: Mon, 10 Mar 2025 12:07:08 +0800 Subject: [PATCH] update deepseek example and fix format --- owl/app.py | 405 ++++++++++++++++++++-------------- owl/run_deepseek_zh.py | 7 +- owl/script_adapter.py | 130 +++++++---- owl/utils/document_toolkit.py | 7 +- run_app.py | 28 ++- 5 files changed, 355 insertions(+), 222 deletions(-) diff --git a/owl/app.py b/owl/app.py index d4631a8..92af864 100644 --- a/owl/app.py +++ b/owl/app.py @@ -1,3 +1,16 @@ +# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= import os import sys import gradio as gr @@ -25,7 +38,7 @@ SCRIPTS = { "Mini": "run_mini.py", "DeepSeek (中文)": "run_deepseek_zh.py", "Default": "run.py", - "GAIA Roleplaying": "run_gaia_roleplaying.py" + "GAIA Roleplaying": "run_gaia_roleplaying.py", } # 脚本描述 @@ -35,98 +48,100 @@ SCRIPT_DESCRIPTIONS = { "Mini": "轻量级版本,使用OpenAI GPT-4o模型", "DeepSeek (中文)": "使用DeepSeek模型,适合非多模态任务", "Default": "默认OWL实现,使用OpenAI GPT-4o模型和全套工具", - "GAIA Roleplaying": "GAIA基准测试实现,用于评估模型能力" + "GAIA Roleplaying": "GAIA基准测试实现,用于评估模型能力", } # 环境变量分组 ENV_GROUPS = { "模型API": [ { - "name": "OPENAI_API_KEY", - "label": "OpenAI API密钥", - "type": "password", + "name": "OPENAI_API_KEY", + "label": "OpenAI API密钥", + "type": "password", "required": False, - "help": "OpenAI API密钥,用于访问GPT模型。获取方式:https://platform.openai.com/api-keys" + "help": "OpenAI API密钥,用于访问GPT模型。获取方式:https://platform.openai.com/api-keys", }, { - "name": "OPENAI_API_BASE_URL", - "label": "OpenAI API基础URL", - "type": "text", + "name": "OPENAI_API_BASE_URL", + "label": "OpenAI API基础URL", + "type": "text", "required": False, - "help": "OpenAI API的基础URL,可选。如果使用代理或自定义端点,请设置此项。" + "help": "OpenAI API的基础URL,可选。如果使用代理或自定义端点,请设置此项。", }, { - "name": "QWEN_API_KEY", - "label": "阿里云Qwen API密钥", - "type": "password", + "name": "QWEN_API_KEY", + "label": "阿里云Qwen API密钥", + "type": "password", "required": False, - "help": "阿里云Qwen API密钥,用于访问Qwen模型。获取方式:https://help.aliyun.com/zh/model-studio/developer-reference/get-api-key" + "help": "阿里云Qwen API密钥,用于访问Qwen模型。获取方式:https://help.aliyun.com/zh/model-studio/developer-reference/get-api-key", }, { - "name": "DEEPSEEK_API_KEY", - "label": "DeepSeek API密钥", - "type": "password", + "name": "DEEPSEEK_API_KEY", + "label": "DeepSeek API密钥", + "type": "password", "required": False, - "help": "DeepSeek API密钥,用于访问DeepSeek模型。获取方式:https://platform.deepseek.com/api_keys" + "help": "DeepSeek API密钥,用于访问DeepSeek模型。获取方式:https://platform.deepseek.com/api_keys", }, ], "搜索工具": [ { - "name": "GOOGLE_API_KEY", - "label": "Google API密钥", - "type": "password", + "name": "GOOGLE_API_KEY", + "label": "Google API密钥", + "type": "password", "required": False, - "help": "Google搜索API密钥,用于网络搜索功能。获取方式:https://developers.google.com/custom-search/v1/overview" + "help": "Google搜索API密钥,用于网络搜索功能。获取方式:https://developers.google.com/custom-search/v1/overview", }, { - "name": "SEARCH_ENGINE_ID", - "label": "搜索引擎ID", - "type": "text", + "name": "SEARCH_ENGINE_ID", + "label": "搜索引擎ID", + "type": "text", "required": False, - "help": "Google自定义搜索引擎ID,与Google API密钥配合使用。获取方式:https://developers.google.com/custom-search/v1/overview" + "help": "Google自定义搜索引擎ID,与Google API密钥配合使用。获取方式:https://developers.google.com/custom-search/v1/overview", }, ], "其他工具": [ { - "name": "HF_TOKEN", - "label": "Hugging Face令牌", - "type": "password", + "name": "HF_TOKEN", + "label": "Hugging Face令牌", + "type": "password", "required": False, - "help": "Hugging Face API令牌,用于访问Hugging Face模型和数据集。获取方式:https://huggingface.co/join" + "help": "Hugging Face API令牌,用于访问Hugging Face模型和数据集。获取方式:https://huggingface.co/join", }, { - "name": "CHUNKR_API_KEY", - "label": "Chunkr API密钥", - "type": "password", + "name": "CHUNKR_API_KEY", + "label": "Chunkr API密钥", + "type": "password", "required": False, - "help": "Chunkr API密钥,用于文档处理功能。获取方式:https://chunkr.ai/" + "help": "Chunkr API密钥,用于文档处理功能。获取方式:https://chunkr.ai/", }, { - "name": "FIRECRAWL_API_KEY", - "label": "Firecrawl API密钥", - "type": "password", + "name": "FIRECRAWL_API_KEY", + "label": "Firecrawl API密钥", + "type": "password", "required": False, - "help": "Firecrawl API密钥,用于网页爬取功能。获取方式:https://www.firecrawl.dev/" + "help": "Firecrawl API密钥,用于网页爬取功能。获取方式:https://www.firecrawl.dev/", }, ], - "自定义环境变量": [] # 用户自定义的环境变量将存储在这里 + "自定义环境变量": [], # 用户自定义的环境变量将存储在这里 } + def get_script_info(script_name): """获取脚本的详细信息""" return SCRIPT_DESCRIPTIONS.get(script_name, "无描述信息") + def load_env_vars(): """加载环境变量""" env_vars = {} # 尝试从.env文件加载 dotenv.load_dotenv() - + # 获取所有环境变量 for group in ENV_GROUPS.values(): for var in group: env_vars[var["name"]] = os.environ.get(var["name"], "") - + # 加载.env文件中可能存在的其他环境变量 if Path(".env").exists(): with open(".env", "r", encoding="utf-8") as f: @@ -135,34 +150,37 @@ def load_env_vars(): if line and not line.startswith("#") and "=" in line: key, value = line.split("=", 1) key = key.strip() - value = value.strip().strip('"\'') - + value = value.strip().strip("\"'") + # 检查是否是已知的环境变量 known_var = False for group in ENV_GROUPS.values(): if any(var["name"] == key for var in group): known_var = True break - + # 如果不是已知的环境变量,添加到自定义环境变量组 if not known_var and key not in env_vars: - ENV_GROUPS["自定义环境变量"].append({ - "name": key, - "label": key, - "type": "text", - "required": False, - "help": "用户自定义环境变量" - }) + ENV_GROUPS["自定义环境变量"].append( + { + "name": key, + "label": key, + "type": "text", + "required": False, + "help": "用户自定义环境变量", + } + ) env_vars[key] = value - + return env_vars + def save_env_vars(env_vars): """保存环境变量到.env文件""" # 读取现有的.env文件内容 env_path = Path(".env") existing_content = {} - + if env_path.exists(): with open(env_path, "r", encoding="utf-8") as f: for line in f: @@ -170,105 +188,116 @@ def save_env_vars(env_vars): if line and not line.startswith("#") and "=" in line: key, value = line.split("=", 1) existing_content[key.strip()] = value.strip() - + # 更新环境变量 for key, value in env_vars.items(): if value: # 只保存非空值 # 确保值是字符串形式,并用引号包裹 value = str(value) # 确保值是字符串 - if not (value.startswith('"') and value.endswith('"')) and not (value.startswith("'") and value.endswith("'")): + if not (value.startswith('"') and value.endswith('"')) and not ( + value.startswith("'") and value.endswith("'") + ): value = f'"{value}"' existing_content[key] = value # 同时更新当前进程的环境变量 - os.environ[key] = value.strip('"\'') - + os.environ[key] = value.strip("\"'") + # 写入.env文件 with open(env_path, "w", encoding="utf-8") as f: for key, value in existing_content.items(): f.write(f"{key}={value}\n") - + return "✅ 环境变量已保存" + def add_custom_env_var(name, value, var_type): """添加自定义环境变量""" if not name: return "❌ 环境变量名不能为空", None - + # 检查是否已存在同名环境变量 for group in ENV_GROUPS.values(): if any(var["name"] == name for var in group): return f"❌ 环境变量 {name} 已存在", None - + # 添加到自定义环境变量组 - ENV_GROUPS["自定义环境变量"].append({ - "name": name, - "label": name, - "type": var_type, - "required": False, - "help": "用户自定义环境变量" - }) - + ENV_GROUPS["自定义环境变量"].append( + { + "name": name, + "label": name, + "type": var_type, + "required": False, + "help": "用户自定义环境变量", + } + ) + # 保存环境变量 env_vars = {name: value} save_env_vars(env_vars) - + # 返回成功消息和更新后的环境变量组 return f"✅ 已添加环境变量 {name}", ENV_GROUPS["自定义环境变量"] + def terminate_process(): """终止当前运行的进程""" global current_process - + with process_lock: if current_process is not None and current_process.poll() is None: # 在Windows上使用CTRL_BREAK_EVENT,在Unix上使用SIGTERM - if os.name == 'nt': + if os.name == "nt": current_process.send_signal(signal.CTRL_BREAK_EVENT) else: current_process.terminate() - + # 等待进程终止 try: current_process.wait(timeout=5) except subprocess.TimeoutExpired: # 如果进程没有在5秒内终止,强制终止 current_process.kill() - + log_queue.put("进程已终止\n") return "✅ 进程已终止" else: return "❌ 没有正在运行的进程" + def run_script(script_dropdown, question, progress=gr.Progress()): """运行选定的脚本并返回输出""" global current_process - + script_name = SCRIPTS.get(script_dropdown) if not script_name: return "❌ 无效的脚本选择", "", "", "", None - + if not question.strip(): return "请输入问题!", "", "", "", None - + # 清空日志队列 while not log_queue.empty(): log_queue.get() - + # 创建日志目录 log_dir = Path("logs") log_dir.mkdir(exist_ok=True) - + # 创建带时间戳的日志文件 timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") log_file = log_dir / f"{script_name.replace('.py', '')}_{timestamp}.log" - + # 构建命令 - cmd = [sys.executable, os.path.join("owl", "script_adapter.py"), os.path.join("owl", script_name)] - + cmd = [ + sys.executable, + os.path.join("owl", "script_adapter.py"), + os.path.join("owl", script_name), + ] + # 创建环境变量副本并添加问题 env = os.environ.copy() env["OWL_QUESTION"] = question - + # 启动进程 with process_lock: current_process = subprocess.Popen( @@ -277,9 +306,9 @@ def run_script(script_dropdown, question, progress=gr.Progress()): stderr=subprocess.STDOUT, text=True, bufsize=1, - env=env + env=env, ) - + # 创建线程来读取输出 def read_output(): try: @@ -293,54 +322,67 @@ def run_script(script_dropdown, question, progress=gr.Progress()): log_queue.put(line) except Exception as e: log_queue.put(f"读取输出时出错: {str(e)}\n") - + # 启动读取线程 threading.Thread(target=read_output, daemon=True).start() - + # 收集日志 logs = [] progress(0, desc="正在运行...") - + # 等待进程完成或超时 start_time = time.time() timeout = 1800 # 30分钟超时 - + while current_process.poll() is None: # 检查是否超时 if time.time() - start_time > timeout: with process_lock: if current_process.poll() is None: - if os.name == 'nt': + if os.name == "nt": current_process.send_signal(signal.CTRL_BREAK_EVENT) else: current_process.terminate() log_queue.put("执行超时,已终止进程\n") break - + # 从队列获取日志 while not log_queue.empty(): log = log_queue.get() logs.append(log) - + # 更新进度 elapsed = time.time() - start_time progress(min(elapsed / 300, 0.99), desc="正在运行...") - + # 短暂休眠以减少CPU使用 time.sleep(0.1) - + # 每秒更新一次日志显示 - yield status_message(current_process), extract_answer(logs), "".join(logs), str(log_file), None - + yield ( + status_message(current_process), + extract_answer(logs), + "".join(logs), + str(log_file), + None, + ) + # 获取剩余日志 while not log_queue.empty(): logs.append(log_queue.get()) - + # 提取聊天历史(如果有) chat_history = extract_chat_history(logs) - + # 返回最终状态和日志 - return status_message(current_process), extract_answer(logs), "".join(logs), str(log_file), chat_history + return ( + status_message(current_process), + extract_answer(logs), + "".join(logs), + str(log_file), + chat_history, + ) + def status_message(process): """根据进程状态返回状态消息""" @@ -351,6 +393,7 @@ def status_message(process): else: return f"❌ 执行失败 (返回码: {process.returncode})" + def extract_answer(logs): """从日志中提取答案""" answer = "" @@ -360,12 +403,13 @@ def extract_answer(logs): break return answer + def extract_chat_history(logs): """尝试从日志中提取聊天历史""" try: chat_json_str = "" capture_json = False - + for log in logs: if "chat_history" in log: # 开始捕获JSON @@ -384,7 +428,7 @@ def extract_chat_history(logs): # 清理可能的额外文本 json_str = chat_json_str[:end_idx].strip() chat_data = json.loads(json_str) - + # 格式化为Gradio聊天组件可用的格式 formatted_chat = [] for msg in chat_data: @@ -402,11 +446,12 @@ def extract_chat_history(logs): pass return None + def create_ui(): """创建Gradio界面""" # 加载环境变量 env_vars = load_env_vars() - + with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as app: gr.Markdown( """ @@ -415,7 +460,7 @@ def create_ui(): 选择一个模型并输入您的问题,系统将运行相应的脚本并显示结果。 """ ) - + with gr.Tabs() as tabs: with gr.TabItem("运行模式"): with gr.Row(): @@ -425,144 +470,173 @@ def create_ui(): script_dropdown = gr.Dropdown( choices=list(SCRIPTS.keys()), value=default_script, - label="选择模式" + label="选择模式", ) - + script_info = gr.Textbox( - value=get_script_info(default_script) if default_script else "", + value=get_script_info(default_script) + if default_script + else "", label="模型描述", - interactive=False + interactive=False, ) - + script_dropdown.change( fn=lambda x: get_script_info(x), inputs=script_dropdown, - outputs=script_info + outputs=script_info, ) - + question_input = gr.Textbox( - lines=5, - placeholder="请输入您的问题...", - label="问题" + lines=5, placeholder="请输入您的问题...", label="问题" ) - + gr.Markdown( """ > **注意**: 您输入的问题将替换脚本中的默认问题。系统会自动处理问题的替换,确保您的问题被正确使用。 """ ) - + with gr.Row(): run_button = gr.Button("运行", variant="primary") stop_button = gr.Button("终止", variant="stop") - + with gr.Column(scale=2): with gr.Tabs(): with gr.TabItem("结果"): status_output = gr.Textbox(label="状态") answer_output = gr.Textbox(label="回答", lines=10) log_file_output = gr.Textbox(label="日志文件路径") - + with gr.TabItem("运行日志"): log_output = gr.Textbox(label="完整日志", lines=25) - + with gr.TabItem("聊天历史"): chat_output = gr.Chatbot(label="对话历史") - + # 示例问题 examples = [ - ["Qwen Mini (中文)", "浏览亚马逊并找出一款对程序员有吸引力的产品。请提供产品名称和价格"], - ["DeepSeek (中文)", "请分析GitHub上CAMEL-AI项目的最新统计数据。找出该项目的星标数量、贡献者数量和最近的活跃度。然后,创建一个简单的Excel表格来展示这些数据,并生成一个柱状图来可视化这些指标。最后,总结CAMEL项目的受欢迎程度和发展趋势。"], - ["Default", "Navigate to Amazon.com and identify one product that is attractive to coders. Please provide me with the product name and price. No need to verify your answer."] + [ + "Qwen Mini (中文)", + "浏览亚马逊并找出一款对程序员有吸引力的产品。请提供产品名称和价格", + ], + [ + "DeepSeek (中文)", + "请分析GitHub上CAMEL-AI项目的最新统计数据。找出该项目的星标数量、贡献者数量和最近的活跃度。然后,创建一个简单的Excel表格来展示这些数据,并生成一个柱状图来可视化这些指标。最后,总结CAMEL项目的受欢迎程度和发展趋势。", + ], + [ + "Default", + "Navigate to Amazon.com and identify one product that is attractive to coders. Please provide me with the product name and price. No need to verify your answer.", + ], ] - - gr.Examples( - examples=examples, - inputs=[script_dropdown, question_input] - ) - + + gr.Examples(examples=examples, inputs=[script_dropdown, question_input]) + with gr.TabItem("环境变量配置"): env_inputs = {} save_status = gr.Textbox(label="保存状态", interactive=False) - + # 添加自定义环境变量部分 with gr.Accordion("添加自定义环境变量", open=True): with gr.Row(): - new_var_name = gr.Textbox(label="环境变量名", placeholder="例如:MY_CUSTOM_API_KEY") - new_var_value = gr.Textbox(label="环境变量值", placeholder="输入值") - new_var_type = gr.Dropdown( - choices=["text", "password"], - value="text", - label="类型" + new_var_name = gr.Textbox( + label="环境变量名", placeholder="例如:MY_CUSTOM_API_KEY" ) - + new_var_value = gr.Textbox( + label="环境变量值", placeholder="输入值" + ) + new_var_type = gr.Dropdown( + choices=["text", "password"], value="text", label="类型" + ) + add_var_button = gr.Button("添加环境变量", variant="primary") add_var_status = gr.Textbox(label="添加状态", interactive=False) - + # 自定义环境变量列表 custom_vars_list = gr.JSON( value=ENV_GROUPS["自定义环境变量"], label="已添加的自定义环境变量", - visible=len(ENV_GROUPS["自定义环境变量"]) > 0 + visible=len(ENV_GROUPS["自定义环境变量"]) > 0, ) - + # 添加环境变量按钮点击事件 add_var_button.click( fn=add_custom_env_var, inputs=[new_var_name, new_var_value, new_var_type], - outputs=[add_var_status, custom_vars_list] + outputs=[add_var_status, custom_vars_list], ) - + # 现有环境变量配置 for group_name, vars in ENV_GROUPS.items(): - if group_name != "自定义环境变量" or len(vars) > 0: # 只显示非空的自定义环境变量组 - with gr.Accordion(group_name, open=(group_name != "自定义环境变量")): + if ( + group_name != "自定义环境变量" or len(vars) > 0 + ): # 只显示非空的自定义环境变量组 + with gr.Accordion( + group_name, open=(group_name != "自定义环境变量") + ): for var in vars: # 添加帮助信息 gr.Markdown(f"**{var['help']}**") - + if var["type"] == "password": env_inputs[var["name"]] = gr.Textbox( value=env_vars.get(var["name"], ""), label=var["label"], placeholder=f"请输入{var['label']}", - type="password" + type="password", ) else: env_inputs[var["name"]] = gr.Textbox( value=env_vars.get(var["name"], ""), label=var["label"], - placeholder=f"请输入{var['label']}" + placeholder=f"请输入{var['label']}", ) - + save_button = gr.Button("保存环境变量", variant="primary") - + # 保存环境变量 - save_inputs = [env_inputs[var_name] for group in ENV_GROUPS.values() for var in group for var_name in [var["name"]] if var_name in env_inputs] + save_inputs = [ + env_inputs[var_name] + for group in ENV_GROUPS.values() + for var in group + for var_name in [var["name"]] + if var_name in env_inputs + ] save_button.click( - fn=lambda *values: save_env_vars(dict(zip([var["name"] for group in ENV_GROUPS.values() for var in group if var["name"] in env_inputs], values))), + fn=lambda *values: save_env_vars( + dict( + zip( + [ + var["name"] + for group in ENV_GROUPS.values() + for var in group + if var["name"] in env_inputs + ], + values, + ) + ) + ), inputs=save_inputs, - outputs=save_status + outputs=save_status, ) - + # 运行脚本 run_button.click( fn=run_script, - inputs=[ - script_dropdown, - question_input + inputs=[script_dropdown, question_input], + outputs=[ + status_output, + answer_output, + log_output, + log_file_output, + chat_output, ], - outputs=[status_output, answer_output, log_output, log_file_output, chat_output], - show_progress=True + show_progress=True, ) - + # 终止运行 - stop_button.click( - fn=terminate_process, - inputs=[], - outputs=[status_output] - ) - + stop_button.click(fn=terminate_process, inputs=[], outputs=[status_output]) + # 添加页脚 gr.Markdown( """ @@ -585,10 +659,11 @@ def create_ui(): - 您输入的问题将替换脚本中的默认问题,确保问题与所选模型兼容 """ ) - + return app + if __name__ == "__main__": # 创建并启动应用 app = create_ui() - app.queue().launch(share=True) \ No newline at end of file + app.queue().launch(share=True) diff --git a/owl/run_deepseek_zh.py b/owl/run_deepseek_zh.py index c2df322..e7bad12 100644 --- a/owl/run_deepseek_zh.py +++ b/owl/run_deepseek_zh.py @@ -81,6 +81,11 @@ def construct_society(question: str) -> OwlRolePlaying: model_type=ModelType.DEEPSEEK_CHAT, model_config_dict={"temperature": 0}, ), + "document": ModelFactory.create( + model_platform=ModelPlatformType.DEEPSEEK, + model_type=ModelType.DEEPSEEK_CHAT, + model_config_dict={"temperature": 0}, + ), } # Configure toolkits @@ -89,7 +94,7 @@ def construct_society(question: str) -> OwlRolePlaying: SearchToolkit().search_duckduckgo, SearchToolkit().search_wiki, *ExcelToolkit().get_tools(), - *DocumentProcessingToolkit().get_tools(), + *DocumentProcessingToolkit(model=models["document"]).get_tools(), ] # Configure agent roles and parameters diff --git a/owl/script_adapter.py b/owl/script_adapter.py index db285c5..4f796dc 100644 --- a/owl/script_adapter.py +++ b/owl/script_adapter.py @@ -1,3 +1,16 @@ +# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= import os import sys import importlib.util @@ -5,6 +18,7 @@ import re from pathlib import Path import traceback + def load_module_from_path(module_name, file_path): """从文件路径加载Python模块""" try: @@ -12,7 +26,7 @@ def load_module_from_path(module_name, file_path): if spec is None: print(f"错误: 无法从 {file_path} 创建模块规范") return None - + module = importlib.util.module_from_spec(spec) sys.modules[module_name] = module spec.loader.exec_module(module) @@ -22,6 +36,7 @@ def load_module_from_path(module_name, file_path): traceback.print_exc() return None + def run_script_with_env_question(script_name): """使用环境变量中的问题运行脚本""" # 获取环境变量中的问题 @@ -29,16 +44,16 @@ def run_script_with_env_question(script_name): if not question: print("错误: 未设置OWL_QUESTION环境变量") sys.exit(1) - + # 脚本路径 script_path = Path(script_name).resolve() if not script_path.exists(): print(f"错误: 脚本 {script_path} 不存在") sys.exit(1) - + # 创建临时文件路径 temp_script_path = script_path.with_name(f"temp_{script_path.name}") - + try: # 读取脚本内容 try: @@ -47,44 +62,55 @@ def run_script_with_env_question(script_name): except Exception as e: print(f"读取脚本文件时出错: {e}") sys.exit(1) - + # 检查脚本是否有main函数 - has_main = re.search(r'def\s+main\s*\(\s*\)\s*:', content) is not None - + has_main = re.search(r"def\s+main\s*\(\s*\)\s*:", content) is not None + # 转义问题中的特殊字符 - escaped_question = question.replace("\\", "\\\\").replace("\"", "\\\"").replace("'", "\\'") - + escaped_question = ( + question.replace("\\", "\\\\").replace('"', '\\"').replace("'", "\\'") + ) + # 查找脚本中所有的question赋值 - 改进的正则表达式 # 匹配单行和多行字符串赋值 - question_assignments = re.findall(r'question\s*=\s*(?:["\'].*?["\']|""".*?"""|\'\'\'.*?\'\'\'|\(.*?\))', content, re.DOTALL) + question_assignments = re.findall( + r'question\s*=\s*(?:["\'].*?["\']|""".*?"""|\'\'\'.*?\'\'\'|\(.*?\))', + content, + re.DOTALL, + ) print(f"在脚本中找到 {len(question_assignments)} 个question赋值") - + # 修改脚本内容,替换所有的question赋值 modified_content = content - + # 如果脚本中有question赋值,替换所有的赋值 if question_assignments: for assignment in question_assignments: modified_content = modified_content.replace( - assignment, - f'question = "{escaped_question}"' + assignment, f'question = "{escaped_question}"' ) print(f"已替换脚本中的所有question赋值为: {question}") else: # 如果没有找到question赋值,尝试在main函数前插入 if has_main: - main_match = re.search(r'def\s+main\s*\(\s*\)\s*:', content) + main_match = re.search(r"def\s+main\s*\(\s*\)\s*:", content) if main_match: insert_pos = main_match.start() - modified_content = content[:insert_pos] + f'\n# 用户输入的问题\nquestion = "{escaped_question}"\n\n' + content[insert_pos:] + modified_content = ( + content[:insert_pos] + + f'\n# 用户输入的问题\nquestion = "{escaped_question}"\n\n' + + content[insert_pos:] + ) print(f"已在main函数前插入问题: {question}") else: # 如果没有main函数,在文件开头插入 - modified_content = f'# 用户输入的问题\nquestion = "{escaped_question}"\n\n' + content + modified_content = ( + f'# 用户输入的问题\nquestion = "{escaped_question}"\n\n' + content + ) print(f"已在文件开头插入问题: {question}") - + # 添加monkey patch代码,确保construct_society函数使用用户的问题 - monkey_patch_code = f''' + monkey_patch_code = f""" # 确保construct_society函数使用用户的问题 original_construct_society = globals().get('construct_society') if original_construct_society: @@ -95,24 +121,28 @@ if original_construct_society: # 替换原始函数 globals()['construct_society'] = patched_construct_society print("已修补construct_society函数,确保使用用户问题") -''' - +""" + # 在文件末尾添加monkey patch代码 modified_content += monkey_patch_code - + # 如果脚本没有调用main函数,添加调用代码 if has_main and "__main__" not in content: - modified_content += ''' + modified_content += """ # 确保调用main函数 if __name__ == "__main__": main() -''' +""" print("已添加main函数调用代码") - + # 如果脚本没有construct_society调用,添加调用代码 - if "construct_society" in content and "run_society" in content and "Answer:" not in content: - modified_content += f''' + if ( + "construct_society" in content + and "run_society" in content + and "Answer:" not in content + ): + modified_content += f""" # 确保执行construct_society和run_society if "construct_society" in globals() and "run_society" in globals(): @@ -125,16 +155,16 @@ if "construct_society" in globals() and "run_society" in globals(): print(f"运行时出错: {{e}}") import traceback traceback.print_exc() -''' +""" print("已添加construct_society和run_society调用代码") - + # 执行修改后的脚本 try: # 将脚本目录添加到sys.path script_dir = script_path.parent if str(script_dir) not in sys.path: sys.path.insert(0, str(script_dir)) - + # 创建临时文件 try: with open(temp_script_path, "w", encoding="utf-8") as f: @@ -143,32 +173,34 @@ if "construct_society" in globals() and "run_society" in globals(): except Exception as e: print(f"创建临时脚本文件时出错: {e}") sys.exit(1) - + try: # 直接执行临时脚本 - print(f"开始执行脚本...") - + print("开始执行脚本...") + # 如果有main函数,加载模块并调用main if has_main: # 加载临时模块 module_name = f"temp_{script_path.stem}" module = load_module_from_path(module_name, temp_script_path) - + if module is None: print(f"错误: 无法加载模块 {module_name}") sys.exit(1) - + # 确保模块中有question变量,并且值是用户输入的问题 setattr(module, "question", question) - + # 如果模块中有construct_society函数,修补它 if hasattr(module, "construct_society"): original_func = module.construct_society + def patched_func(*args, **kwargs): return original_func(question) + module.construct_society = patched_func print("已在模块级别修补construct_society函数") - + # 调用main函数 if hasattr(module, "main"): print("调用main函数...") @@ -182,34 +214,35 @@ if "construct_society" in globals() and "run_society" in globals(): # 使用更安全的方式执行脚本 with open(temp_script_path, "r", encoding="utf-8") as f: script_code = f.read() - + # 创建一个安全的全局命名空间 safe_globals = { "__file__": str(temp_script_path), - "__name__": "__main__" + "__name__": "__main__", } # 添加内置函数 - safe_globals.update({k: v for k, v in globals().items() - if k in ['__builtins__']}) - + safe_globals.update( + {k: v for k, v in globals().items() if k in ["__builtins__"]} + ) + # 执行脚本 exec(script_code, safe_globals) - + except Exception as e: print(f"执行脚本时出错: {e}") traceback.print_exc() sys.exit(1) - + except Exception as e: print(f"处理脚本时出错: {e}") traceback.print_exc() sys.exit(1) - + except Exception as e: print(f"处理脚本时出错: {e}") traceback.print_exc() sys.exit(1) - + finally: # 删除临时文件 if temp_script_path.exists(): @@ -219,11 +252,12 @@ if "construct_society" in globals() and "run_society" in globals(): except Exception as e: print(f"删除临时脚本文件时出错: {e}") + if __name__ == "__main__": # 检查命令行参数 if len(sys.argv) < 2: print("用法: python script_adapter.py ") sys.exit(1) - + # 运行指定的脚本 - run_script_with_env_question(sys.argv[1]) \ No newline at end of file + run_script_with_env_question(sys.argv[1]) diff --git a/owl/utils/document_toolkit.py b/owl/utils/document_toolkit.py index 5de6c64..192d1f1 100644 --- a/owl/utils/document_toolkit.py +++ b/owl/utils/document_toolkit.py @@ -17,6 +17,7 @@ from camel.toolkits.function_tool import FunctionTool from camel.toolkits import ImageAnalysisToolkit, ExcelToolkit from camel.utils import retry_on_error from camel.logger import get_logger +from camel.models import BaseModelBackend from docx2markdown._docx_to_markdown import docx_to_markdown from chunkr_ai import Chunkr import requests @@ -40,8 +41,10 @@ class DocumentProcessingToolkit(BaseToolkit): This class provides method for processing docx, pdf, pptx, etc. It cannot process excel files. """ - def __init__(self, cache_dir: Optional[str] = None): - self.image_tool = ImageAnalysisToolkit() + def __init__( + self, cache_dir: Optional[str] = None, model: Optional[BaseModelBackend] = None + ): + self.image_tool = ImageAnalysisToolkit(model=model) # self.audio_tool = AudioAnalysisToolkit() self.excel_tool = ExcelToolkit() diff --git a/run_app.py b/run_app.py index 89eee1b..f33f8f6 100644 --- a/run_app.py +++ b/run_app.py @@ -1,3 +1,16 @@ +# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= #!/usr/bin/env python # -*- coding: utf-8 -*- @@ -9,26 +22,27 @@ import os import sys from pathlib import Path + def main(): """主函数,启动OWL智能助手运行平台""" # 确保当前目录是项目根目录 project_root = Path(__file__).resolve().parent os.chdir(project_root) - + # 创建日志目录 log_dir = project_root / "logs" log_dir.mkdir(exist_ok=True) - + # 导入并运行应用 sys.path.insert(0, str(project_root)) - + try: from owl.app import create_ui - + # 创建并启动应用 app = create_ui() app.queue().launch(share=False) - + except ImportError as e: print(f"错误: 无法导入必要的模块。请确保已安装所有依赖项: {e}") print("提示: 运行 'pip install -r requirements.txt' 安装所有依赖项") @@ -36,8 +50,10 @@ def main(): except Exception as e: print(f"启动应用程序时出错: {e}") import traceback + traceback.print_exc() sys.exit(1) + if __name__ == "__main__": - main() \ No newline at end of file + main()