diff --git a/community_usecase/excel_analyzer/README.md b/community_usecase/excel_analyzer/README.md index 3c14607..fd7e87e 100644 --- a/community_usecase/excel_analyzer/README.md +++ b/community_usecase/excel_analyzer/README.md @@ -6,6 +6,12 @@ This project uses **Owl** for data analysis and visualization. - Provides both English and Chinese versions of the raw data and prompts - Utilizes **CodeExecutionToolkit**, **ExcelToolkit**, and **FileWriteToolkit** to complete related tasks - Implements **ExcelRolePlaying** based on **OwlRolePlaying**, which overrides the `system_prompt` with a cleaner, more focused version tailored for data analysis scenarios +- +- The analysis and visualization of this Excel file involve: + - Complex headers (merged rows) + - Nan value handling + - Complex group calculations + - Visualization ## How to Use 1. Set up the environment according to Owl's official instructions @@ -13,13 +19,12 @@ This project uses **Owl** for data analysis and visualization. ```bash cd community_usecase/excel_analyzer - # Chinese version - python data_insights_deepseek_zh.py + # Chinese version, using deepseek-v3 + python excel_analyzer_zh.py - # English version - python data_insights_gpt4o_zh.py + # English version, using gpt-4o + python excel_analyzer_zh.py ``` 3. The analysis results will be saved in the current directory -## Demo Video diff --git a/community_usecase/excel_analyzer/README_zh.md b/community_usecase/excel_analyzer/README_zh.md index 9fb1361..eeb85a9 100644 --- a/community_usecase/excel_analyzer/README_zh.md +++ b/community_usecase/excel_analyzer/README_zh.md @@ -3,11 +3,15 @@ ## Features - - 提供了英文,中文两个版本的原始数据和prompt,方便理解 - 使用**CodeExecutionToolkit**,**ExcelToolkit**,**FileWriteToolkit**来完成相关工作 - 在**OwlRolePlaying**基础之上实现了**ExcelRolePalying**,它重写了system_prompt,更简洁,聚焦在数据分析场景 - +- 经过测试,在`gpt-4o`和`deepseek-v3`下均可以达到预期效果 +- 对该excel进行分析和可视化时涉及到的内容有: + - 复杂表头(合并行) + - 缺失值处理 + - 复杂的分组计算 + - 可视化 ## How to use 1. 按照owl的官方流程搭建好环境 @@ -15,16 +19,12 @@ ``` cd community_usecase/excel_analyzer - # Chinese version + # Chinese version, using deepseek-v3 python excel_analyzer_zh.py - # English version + # English version, using gpt-4o python excel_analyzer_zh.py ``` 3. 数据集分析的结果将会在出存在当前目录下 -## Demo -视频结果:[link] - - diff --git a/community_usecase/excel_analyzer/data_analyzer_en.py b/community_usecase/excel_analyzer/data_analyzer_en.py index 57d15e6..d3458e3 100644 --- a/community_usecase/excel_analyzer/data_analyzer_en.py +++ b/community_usecase/excel_analyzer/data_analyzer_en.py @@ -195,12 +195,6 @@ def construct_society(question: str) -> ExcelRolePalying: OwlRolePlaying: A configured society of agents ready to address the question. """ - # base_model_config = { - # "model_platform": ModelPlatformType.DEEPSEEK, - # "model_type": 'deepseek-chat', - # "model_config_dict": ChatGPTConfig(temperature=0.1, max_tokens=8192).as_dict(), - # } - # Create models for different components using Azure OpenAI base_model_config = { "model_platform": ModelPlatformType.AZURE, @@ -245,12 +239,12 @@ def construct_society(question: str) -> ExcelRolePalying: def main(): - r"""Main function to run the OWL system with Azure OpenAI.""" # Example question - - default_task = "Please help analyze the number of admitted students, as well as the highest and lowest scores for each college in this file. Visualize this information in a single chart and save it in the current directory. The file path is `./data/admission_en.xlsx.`" - + default_task = """Please help analyze the file `./data/admission_en.xlsx` by: + - Calculating the number of admitted students, as well as the highest and lowest scores for each college + - Plotting this information in a single chart: use a bar chart for the number of admitted students, and line charts for the highest and lowest scores + - Saving the generated chart as `vis_en.png` in the current directory""" set_log_file('log.txt') diff --git a/community_usecase/excel_analyzer/data_analyzer_zh.py b/community_usecase/excel_analyzer/data_analyzer_zh.py index cdb342f..96f1e92 100644 --- a/community_usecase/excel_analyzer/data_analyzer_zh.py +++ b/community_usecase/excel_analyzer/data_analyzer_zh.py @@ -134,11 +134,10 @@ import pandas as pd - 始终验证你的最终答案是否正确! - 请每次都从头开始编写完整代码,编写代码后,务必运行代码并获取结果! 如果遇到错误,尝试调试代码。 - 请注意,代码执行环境不支持交互式输入。 - 如果工具运行失败,或者代码无法正确运行, 绝对不要假设其返回了正确结果,并在此基础上继续推理! 正确的做法是分析错误原因,并尝试修正! -- 如果你写的代码涉及到用matplotlib画图,请始终在代码开头下面这段代码: +- [重要!!!]如果你写的代码涉及到用matplotlib画图,请始终在代码开头下面这段代码: ``` import matplotlib matplotlib.rcParams['font.sans-serif'] = ['SimHei'] # 支持中文 @@ -232,18 +231,18 @@ def construct_society(question: str) -> ExcelRolePalying: OwlRolePlaying: A configured society of agents ready to address the question. """ - # base_model_config = { - # "model_platform": ModelPlatformType.DEEPSEEK, - # "model_type": 'deepseek-chat', - # "model_config_dict": ChatGPTConfig(temperature=0.1, max_tokens=8192).as_dict(), - # } + base_model_config = { + "model_platform": ModelPlatformType.DEEPSEEK, + "model_type": 'deepseek-chat', + "model_config_dict": ChatGPTConfig(temperature=0.1, max_tokens=8192).as_dict(), + } # Create models for different components using Azure OpenAI - base_model_config = { - "model_platform": ModelPlatformType.AZURE, - "model_type": os.getenv("AZURE_OPENAI_MODEL_TYPE"), - "model_config_dict": ChatGPTConfig(temperature=0.4, max_tokens=4096).as_dict(), - } + # base_model_config = { + # "model_platform": ModelPlatformType.AZURE, + # "model_type": os.getenv("AZURE_OPENAI_MODEL_TYPE"), + # "model_config_dict": ChatGPTConfig(temperature=0.4, max_tokens=4096).as_dict(), + # } models = { @@ -284,7 +283,13 @@ def construct_society(question: str) -> ExcelRolePalying: def main(): r"""Main function to run the OWL system with Azure OpenAI.""" # Example question - default_task = "帮忙分析一下这个文件中各个学院的录取人数以及最高分最低分,把这些信息画到一张图上,并存到当前目录下。文件路径是`./data/admission_zh.xlsx`" + # default_task = """帮忙分析一下这个文件中各个学院的录取人数以及最高分最低分,把这些信息画到一张图上,并存到当前路径下。文件路径是`./data/admission_zh.xlsx`""" + + default_task = """帮忙分析一下`./data/admission_zh.xlsx`这个文件,请你: + - 统计各个学院的录取人数以及最高分最低分 + - 把这些信息画到一张图上,录取人数使用柱状图,最高分最低分使用折线图 + - 把画完的图`vis_zh.png`存到当前目录下""" + set_log_file('log.txt')