add community-use-cae:excel_analyzer

2025-12-26 10:07:51 +08:00 · 2025-03-31 18:31:41 +08:00 · 2025-03-31 18:31:41 +08:00 · 99cd01db8f
commit 99cd01db8f
parent df2b688e2d
6 changed files with 629 additions and 0 deletions
--- a/community_usecase/excel_analyzer/README.md
+++ b/community_usecase/excel_analyzer/README.md
@ -0,0 +1,25 @@
+# Excel Analyzer  
+This project uses **Owl** for data analysis and visualization.
+
+## Features
+
+- Provides both English and Chinese versions of the raw data and prompts
+- Utilizes **CodeExecutionToolkit**, **ExcelToolkit**, and **FileWriteToolkit** to complete related tasks  
+- Implements **ExcelRolePlaying** based on **OwlRolePlaying**, which overrides the `system_prompt` with a cleaner, more focused version tailored for data analysis scenarios  
+
+## How to Use  
+1. Set up the environment according to Owl's official instructions
+2. Run the following commands:  
+    ```bash
+    cd community_usecase/excel_analyzer
+
+    # Chinese version
+    python data_insights_deepseek_zh.py
+
+    # English version
+    python data_insights_gpt4o_zh.py
+    ```
+3. The analysis results will be saved in the current directory
+
+## Demo Video
+
--- a/community_usecase/excel_analyzer/README_zh.md
+++ b/community_usecase/excel_analyzer/README_zh.md
@ -0,0 +1,30 @@
+# Excel Analyzer
+这个项目使用owl来做数据分析和可视化
+
+
+## Features
+
+- 提供了英文，中文两个版本的原始数据和prompt，方便理解
+- 使用**CodeExecutionToolkit**，**ExcelToolkit**，**FileWriteToolkit**来完成相关工作
+- 在**OwlRolePlaying**基础之上实现了**ExcelRolePalying**，它重写了system_prompt，更简洁，聚焦在数据分析场景
+
+
+## How to use 
+1. 按照owl的官方流程搭建好环境
+2. 运行
+    ```
+    cd community_usecase/excel_analyzer
+
+    # Chinese version
+    python excel_analyzer_zh.py
+
+    # English version
+    python excel_analyzer_zh.py
+    ```
+3. 数据集分析的结果将会在出存在当前目录下
+
+
+## Demo
+视频结果：[link]
+
+
--- a/community_usecase/excel_analyzer/data/admission_en.xlsx
+++ b/community_usecase/excel_analyzer/data/admission_en.xlsx
--- a/community_usecase/excel_analyzer/data/admission_zh.xlsx
+++ b/community_usecase/excel_analyzer/data/admission_zh.xlsx
--- a/community_usecase/excel_analyzer/data_analyzer_en.py
+++ b/community_usecase/excel_analyzer/data_analyzer_en.py
@ -0,0 +1,270 @@
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+import os
+import sys
+
+
+from dotenv import load_dotenv
+from camel.configs import ChatGPTConfig
+from camel.models import ModelFactory
+from camel.messages.base import BaseMessage
+
+from camel.toolkits import (
+    CodeExecutionToolkit,
+    ExcelToolkit,
+    FileWriteToolkit,
+)
+from camel.types import ModelPlatformType
+
+from owl.utils import OwlRolePlaying
+from typing import Dict, List, Optional, Tuple
+from camel.logger import set_log_level, set_log_file, get_logger
+
+import pathlib
+
+logger = get_logger(__name__)
+
+base_dir = pathlib.Path(__file__).parent.parent.parent
+env_path = base_dir / "owl" / ".env"
+load_dotenv(dotenv_path=str(env_path))
+
+set_log_level(level="DEBUG")
+
+class ExcelRolePalying(OwlRolePlaying):
+    def _construct_gaia_sys_msgs(self):
+        user_system_prompt = f"""
+===== RULES OF USER =====
+Never forget you are a user and I am a assistant. Never flip roles! You will always instruct me. We share a common interest in collaborating to successfully complete a task.
+I must help you to complete a difficult task.
+You must instruct me based on my expertise and your needs to solve the task step by step. The format of your instruction is: `Instruction: [YOUR INSTRUCTION]`, where "Instruction" describes a sub-task or question.
+You must give me one instruction at a time.
+I must write a response that appropriately solves the requested instruction.
+You should instruct me not ask me questions.
+
+Please note that the task may be very complicated. Do not attempt to solve the task by single step. You must instruct me to find the answer step by step.
+Here are some tips that will help you to give more valuable instructions about our task to me:
+<tips>
+- I can use various tools, such as Excel Toolkit and Code Execution Toolkit.  
+
+- Although the task may be complex, the answer exists.  
+  If you find that the current approach does not lead to the answer, reconsider the task, and use alternative methods or tools to achieve the same goal.  
+
+- Always remind me to verify whether the final answer is correct!  
+  This can be done in multiple ways, such as screenshots, web analysis, etc.  
+
+- If I have written code, remind me to run the code and obtain the results.  
+
+- Flexibly use code to solve problems, especially for Excel-related tasks.  
+
+</tips>
+
+Now, here is the overall task: <task>{self.task_prompt}</task>. Never forget our task!
+
+Now you must start to instruct me to solve the task step-by-step. Do not add anything else other than your instruction!
+Keep giving me instructions until you think the task is completed.
+When the task is completed, you must only reply with a single word <TASK_DONE>.
+Never say <TASK_DONE> unless my responses have solved your task.
+        """
+
+        assistant_system_prompt = f"""
+===== RULES OF ASSISTANT =====
+Never forget you are a assistant and I am a user. Never flip roles! Never instruct me! You have to utilize your available tools to solve the task I assigned.
+We share a common interest in collaborating to successfully complete a complex task.
+You must help me to complete the task.
+
+Here is our overall task: {self.task_prompt}. Never forget our task!
+
+I must instruct you based on your expertise and my needs to complete the task. An instruction is typically a sub-task or question.
+
+You must leverage your available tools, try your best to solve the problem, and explain your solutions.
+Unless I say the task is completed, you should always start with:
+Solution: [YOUR_SOLUTION]
+[YOUR_SOLUTION] should be specific, including detailed explanations and provide preferable detailed implementations and examples and lists for task-solving.
+
+Please note that our overall task may be very complicated. Here are some tips that may help you solve the task:
+<tips>
+- If one method fails, try another. The answer exists!  
+- When it comes to viewing information in an Excel file, you can always start by writing Python code to read the Excel file and check sheet names, column names, and similar details.  
+- When providing Python code, always remember to import the necessary libraries at the beginning, such as the commonly used libraries for Excel analysis below:  
+```
+import pandas as pd
+```
+- Always verify whether your final answer is correct!  
+- Always write complete code from scratch. After writing the code, be sure to run it and obtain the results!  
+  If you encounter errors, try debugging the code.  
+  Note that the code execution environment does not support interactive input.  
+- If the tool fails to run or the code does not execute correctly,  
+  never assume that it has returned the correct result and continue reasoning based on it!  
+  The correct approach is to analyze the cause of the error and try to fix it!  
+</tips>
+
+        """
+
+        user_sys_msg = BaseMessage.make_user_message(
+            role_name=self.user_role_name, content=user_system_prompt
+        )
+
+        assistant_sys_msg = BaseMessage.make_assistant_message(
+            role_name=self.assistant_role_name, content=assistant_system_prompt
+        )
+
+        return user_sys_msg, assistant_sys_msg
+
+def run_society(
+    society: ExcelRolePalying,
+    round_limit: int = 15,
+) -> Tuple[str, List[dict], dict]:
+    overall_completion_token_count = 0
+    overall_prompt_token_count = 0
+
+    chat_history = []
+    init_prompt = """
+    Now please give me instructions to solve over overall task step by step. If the task requires some specific knowledge, please instruct me to use tools to complete the task.
+        """
+    input_msg = society.init_chat(init_prompt)
+    for _round in range(round_limit):
+        assistant_response, user_response = society.step(input_msg)
+        # Check if usage info is available before accessing it
+        if assistant_response.info.get("usage") and user_response.info.get("usage"):
+            overall_completion_token_count += assistant_response.info["usage"].get(
+                "completion_tokens", 0
+            ) + user_response.info["usage"].get("completion_tokens", 0)
+            overall_prompt_token_count += assistant_response.info["usage"].get(
+                "prompt_tokens", 0
+            ) + user_response.info["usage"].get("prompt_tokens", 0)
+
+        # convert tool call to dict
+        tool_call_records: List[dict] = []
+        if assistant_response.info.get("tool_calls"):
+            for tool_call in assistant_response.info["tool_calls"]:
+                tool_call_records.append(tool_call.as_dict())
+
+        _data = {
+            "user": user_response.msg.content
+            if hasattr(user_response, "msg") and user_response.msg
+            else "",
+            "assistant": assistant_response.msg.content
+            if hasattr(assistant_response, "msg") and assistant_response.msg
+            else "",
+            "tool_calls": tool_call_records,
+        }
+
+        chat_history.append(_data)
+        logger.info(
+            f"Round #{_round} user_response:\n {user_response.msgs[0].content if user_response.msgs and len(user_response.msgs) > 0 else ''}"
+        )
+        logger.info(
+            f"Round #{_round} assistant_response:\n {assistant_response.msgs[0].content if assistant_response.msgs and len(assistant_response.msgs) > 0 else ''}"
+        )
+
+        if (
+            assistant_response.terminated
+            or user_response.terminated
+            or "TASK_DONE" in user_response.msg.content
+        ):
+            break
+
+        input_msg = assistant_response.msg
+
+    answer = chat_history[-1]["assistant"]
+    token_info = {
+        "completion_token_count": overall_completion_token_count,
+        "prompt_token_count": overall_prompt_token_count,
+    }
+
+    return answer, chat_history, token_info
+
+def construct_society(question: str) -> ExcelRolePalying:
+    r"""Construct a society of agents based on the given question.
+
+    Args:
+        question (str): The task or question to be addressed by the society.
+
+    Returns:
+        OwlRolePlaying: A configured society of agents ready to address the question.
+    """
+
+    # base_model_config = {
+    #     "model_platform": ModelPlatformType.DEEPSEEK,
+    #     "model_type": 'deepseek-chat',
+    #     "model_config_dict": ChatGPTConfig(temperature=0.1, max_tokens=8192).as_dict(),
+    # }
+
+    # Create models for different components using Azure OpenAI
+    base_model_config = {
+        "model_platform": ModelPlatformType.AZURE,
+        "model_type": os.getenv("AZURE_OPENAI_MODEL_TYPE"),
+        "model_config_dict": ChatGPTConfig(temperature=0.01, max_tokens=4096).as_dict(),
+    }
+
+
+    models = {
+        "user": ModelFactory.create(**base_model_config),
+        "assistant": ModelFactory.create(**base_model_config),
+    }
+
+    # Configure toolkits
+    tools = [
+        *CodeExecutionToolkit(sandbox="subprocess", verbose=True).get_tools(),
+        *ExcelToolkit().get_tools(),
+        *FileWriteToolkit(output_dir="./").get_tools(),
+    ]
+
+    # Configure agent roles and parameters
+    user_agent_kwargs = {"model": models["user"]}
+    assistant_agent_kwargs = {"model": models["assistant"], "tools": tools}
+
+    # Configure task parameters
+    task_kwargs = {
+        "task_prompt": question,
+        "with_task_specify": False,
+    }
+
+    # Create and return the society
+    society = ExcelRolePalying(
+        **task_kwargs,
+        user_role_name="user",
+        user_agent_kwargs=user_agent_kwargs,
+        assistant_role_name="assistant",
+        assistant_agent_kwargs=assistant_agent_kwargs,
+        output_language="English"
+    )
+
+    return society
+
+
+def main():
+    r"""Main function to run the OWL system with Azure OpenAI."""
+    # Example question
+
+
+    default_task = "Please help analyze the number of admitted students, as well as the highest and lowest scores for each college in this file. Visualize this information in a single chart and save it in the current directory. The file path is `./data/admission_en.xlsx.`"
+
+
+    set_log_file('log.txt')
+
+    # Override default task if command line argument is provided
+    task = sys.argv[1] if len(sys.argv) > 1 else default_task
+
+    # Construct and run the society
+    society = construct_society(task)
+
+    answer, chat_history, token_count = run_society(society)
+
+    # Output the result
+    print(f"\033[94mAnswer: {answer}\033[0m")
+
+
+if __name__ == "__main__":
+    main()
--- a/community_usecase/excel_analyzer/data_analyzer_zh.py
+++ b/community_usecase/excel_analyzer/data_analyzer_zh.py
@ -0,0 +1,304 @@
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+import os
+import sys
+
+
+from dotenv import load_dotenv
+from camel.configs import ChatGPTConfig
+from camel.models import ModelFactory
+from camel.messages.base import BaseMessage
+
+from camel.toolkits import (
+    CodeExecutionToolkit,
+    ExcelToolkit,
+    FileWriteToolkit,
+)
+from camel.types import ModelPlatformType
+
+from owl.utils import OwlRolePlaying
+from typing import Dict, List, Optional, Tuple
+from camel.logger import set_log_level, set_log_file, get_logger
+
+import pathlib
+
+logger = get_logger(__name__)
+
+base_dir = pathlib.Path(__file__).parent.parent.parent
+env_path = base_dir / "owl" / ".env"
+load_dotenv(dotenv_path=str(env_path))
+
+set_log_level(level="DEBUG")
+
+class ExcelRolePalying(OwlRolePlaying):
+    def _construct_gaia_sys_msgs(self):
+        user_system_prompt = f"""
+### ===== 用户规则 =====  
+永远不要忘记，你是用户，而我是助手。绝对不能互换角色！ 你必须始终指导我，我们的共同目标是合作完成任务。  
+我的职责是帮助你完成一个复杂的任务。  
+
+你必须根据我的专业能力和你的需求逐步指导我解决任务。  
+你的指令格式必须为：  
+`Instruction: [你的指令]`  
+其中，"Instruction" 代表一个子任务或问题。  
+
+- 你每次只能给出一个指令。  
+- 我必须依据你的指令提供适当的解决方案。  
+- 你只能指导我，而不能向我提问。  
+
+---
+
+### 请注意  
+任务可能会非常复杂，不要试图一次性解决整个任务！  
+你必须让我一步一步地寻找答案。  
+
+以下是一些能帮助你给出更有价值指令的提示：  
+
+#### <tips>
+- 我可以使用各种工具，比如：excel Toolkit 和 code Execution Toolkit 等。
+
+- 尽管任务复杂，但答案是存在的。  
+  如果你发现当前方案无法找到答案，请重新规划任务，使用其他方法或工具来达到相同的目标。  
+
+- 务必提醒我验证最终答案是否正确！  
+  这可以通过多种方式完成，例如截图、网页分析等。  
+
+- 如果我编写了代码，请提醒我运行代码并获取结果。  
+
+- 请灵活使用代码解决问题，尤其是涉及 Excel 相关任务时。  
+
+</tips>
+
+---
+
+### 任务描述  
+当前任务如下：  
+<task>{self.task_prompt}</task>  
+永远不要忘记这个任务！  
+
+### 任务执行规则  
+你现在必须开始 逐步指导我完成任务。  
+- 不要添加任何额外的内容！  
+- 继续给出指令，直到你认为任务完成。  
+
+### 任务完成规则  
+当任务完成时，你只能回复一个单词：  
+`<TASK_DONE>`  
+
+在我的回答完全解决你的任务之前，绝对不要说 `<TASK_DONE>`！
+        """
+
+        assistant_system_prompt = f"""
+===== 助手规则 =====  
+永远不要忘记，你是助手，而我是用户。绝对不能互换角色！ 绝对不能指挥我！ 你必须利用你的工具来解决我分配的任务。  
+我们的共同目标是合作完成一个复杂的任务。  
+你的职责是帮助我完成任务。  
+
+当前任务如下：  
+{self.task_prompt}  
+永远不要忘记这个任务！  
+
+我会根据你的专业能力和我的需求指导你完成任务。  
+每条指令通常是一个子任务或问题。  
+
+你必须充分利用你的工具，尽力解决问题，并详细解释你的解决方案。  
+除非我宣布任务完成，你的回答必须以以下格式开始：  
+
+Solution: [你的解决方案]  
+
+[你的解决方案] 必须具体，包含详细的解释，并提供可行的实现方案、示例或清单来解决任务。  
+
+---
+
+### 请注意：整体任务可能会非常复杂！  
+以下是一些可能帮助你解决任务的重要提示：  
+
+#### <tips>  
+- 如果一种方法失败了，尝试其他方法。答案是存在的！  
+- 当涉及到查看某个excel信息的时候，你可以总是以编写python代码读入excel文件查看sheet名，列名之类的信息开始。
+- 当你尝试给出python代码的时候，始终记得在最开头import相关的库，比如下面这些excel分析常见的库
+```
+import pandas as pd
+```
+- 始终验证你的最终答案是否正确！  
+- 请每次都从头开始编写完整代码，编写代码后，务必运行代码并获取结果！  
+  如果遇到错误，尝试调试代码。  
+  请注意，代码执行环境不支持交互式输入。  
+- 如果工具运行失败，或者代码无法正确运行，  
+  绝对不要假设其返回了正确结果，并在此基础上继续推理！  
+  正确的做法是分析错误原因，并尝试修正！  
+- 如果你写的代码涉及到用matplotlib画图，请始终在代码开头下面这段代码：
+```
+import matplotlib
+matplotlib.rcParams['font.sans-serif'] = ['SimHei']  # 支持中文
+matplotlib.rcParams['axes.unicode_minus'] = False  # 解决负号显示问题
+```
+- 请始终使用英文来画图，比如title, xlabel, ylabel以及其他均使用英文。
+</tips>
+        """
+
+        user_sys_msg = BaseMessage.make_user_message(
+            role_name=self.user_role_name, content=user_system_prompt
+        )
+
+        assistant_sys_msg = BaseMessage.make_assistant_message(
+            role_name=self.assistant_role_name, content=assistant_system_prompt
+        )
+
+        return user_sys_msg, assistant_sys_msg
+
+def run_society(
+    society: OwlRolePlaying,
+    round_limit: int = 15,
+) -> Tuple[str, List[dict], dict]:
+    overall_completion_token_count = 0
+    overall_prompt_token_count = 0
+
+    chat_history = []
+    init_prompt = """
+现在请给我逐步解决整个任务的指令。如果任务需要一些特定的知识，请指示我使用工具来完成任务。
+        """
+    input_msg = society.init_chat(init_prompt)
+    for _round in range(round_limit):
+        assistant_response, user_response = society.step(input_msg)
+        # Check if usage info is available before accessing it
+        if assistant_response.info.get("usage") and user_response.info.get("usage"):
+            overall_completion_token_count += assistant_response.info["usage"].get(
+                "completion_tokens", 0
+            ) + user_response.info["usage"].get("completion_tokens", 0)
+            overall_prompt_token_count += assistant_response.info["usage"].get(
+                "prompt_tokens", 0
+            ) + user_response.info["usage"].get("prompt_tokens", 0)
+
+        # convert tool call to dict
+        tool_call_records: List[dict] = []
+        if assistant_response.info.get("tool_calls"):
+            for tool_call in assistant_response.info["tool_calls"]:
+                tool_call_records.append(tool_call.as_dict())
+
+        _data = {
+            "user": user_response.msg.content
+            if hasattr(user_response, "msg") and user_response.msg
+            else "",
+            "assistant": assistant_response.msg.content
+            if hasattr(assistant_response, "msg") and assistant_response.msg
+            else "",
+            "tool_calls": tool_call_records,
+        }
+
+        chat_history.append(_data)
+        logger.info(
+            f"Round #{_round} user_response:\n {user_response.msgs[0].content if user_response.msgs and len(user_response.msgs) > 0 else ''}"
+        )
+        logger.info(
+            f"Round #{_round} assistant_response:\n {assistant_response.msgs[0].content if assistant_response.msgs and len(assistant_response.msgs) > 0 else ''}"
+        )
+
+        if (
+            assistant_response.terminated
+            or user_response.terminated
+            or "TASK_DONE" in user_response.msg.content
+        ):
+            break
+
+        input_msg = assistant_response.msg
+
+    answer = chat_history[-1]["assistant"]
+    token_info = {
+        "completion_token_count": overall_completion_token_count,
+        "prompt_token_count": overall_prompt_token_count,
+    }
+
+    return answer, chat_history, token_info
+
+def construct_society(question: str) -> ExcelRolePalying:
+    r"""Construct a society of agents based on the given question.
+
+    Args:
+        question (str): The task or question to be addressed by the society.
+
+    Returns:
+        OwlRolePlaying: A configured society of agents ready to address the question.
+    """
+
+    # base_model_config = {
+    #     "model_platform": ModelPlatformType.DEEPSEEK,
+    #     "model_type": 'deepseek-chat',
+    #     "model_config_dict": ChatGPTConfig(temperature=0.1, max_tokens=8192).as_dict(),
+    # }
+
+    # Create models for different components using Azure OpenAI
+    base_model_config = {
+        "model_platform": ModelPlatformType.AZURE,
+        "model_type": os.getenv("AZURE_OPENAI_MODEL_TYPE"),
+        "model_config_dict": ChatGPTConfig(temperature=0.4, max_tokens=4096).as_dict(),
+    }
+
+
+    models = {
+        "user": ModelFactory.create(**base_model_config),
+        "assistant": ModelFactory.create(**base_model_config),
+    }
+
+    # Configure toolkits
+    tools = [
+        *CodeExecutionToolkit(sandbox="subprocess", verbose=True).get_tools(),
+        *ExcelToolkit().get_tools(),
+        *FileWriteToolkit(output_dir="./").get_tools(),
+    ]
+
+    # Configure agent roles and parameters
+    user_agent_kwargs = {"model": models["user"]}
+    assistant_agent_kwargs = {"model": models["assistant"], "tools": tools}
+
+    # Configure task parameters
+    task_kwargs = {
+        "task_prompt": question,
+        "with_task_specify": False,
+    }
+
+    # Create and return the society
+    society = ExcelRolePalying(
+        **task_kwargs,
+        user_role_name="user",
+        user_agent_kwargs=user_agent_kwargs,
+        assistant_role_name="assistant",
+        assistant_agent_kwargs=assistant_agent_kwargs,
+        output_language="中文"
+    )
+
+    return society
+
+
+def main():
+    r"""Main function to run the OWL system with Azure OpenAI."""
+    # Example question
+    default_task = "帮忙分析一下这个文件中各个学院的录取人数以及最高分最低分，把这些信息画到一张图上，并存到当前目录下。文件路径是`./data/admission_zh.xlsx`"
+
+    set_log_file('log.txt')
+
+    # Override default task if command line argument is provided
+    task = sys.argv[1] if len(sys.argv) > 1 else default_task
+
+    # Construct and run the society
+    society = construct_society(task)
+
+    answer, chat_history, token_count = run_society(society)
+
+    # Output the result
+    print(f"\033[94mAnswer: {answer}\033[0m")
+
+
+if __name__ == "__main__":
+    main()