update wendong

2026-03-22 05:57:17 +08:00 · 2025-03-15 12:24:31 +08:00
parent c7c94a233a
commit 05aebff028
18 changed files with 1398 additions and 115 deletions
--- a/examples/run_gaia_roleplaying.py
+++ b/examples/run_gaia_roleplaying.py
@@ -0,0 +1,139 @@
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+
+from dotenv import load_dotenv
+
+
+import os
+
+from camel.models import ModelFactory
+from camel.logger import get_logger
+from camel.toolkits import (
+    AudioAnalysisToolkit,
+    CodeExecutionToolkit,
+    ExcelToolkit,
+    ImageAnalysisToolkit,
+    SearchToolkit,
+    VideoAnalysisToolkit,
+    BrowserToolkit,
+    FileWriteToolkit,
+)
+from camel.types import ModelPlatformType, ModelType
+from camel.configs import ChatGPTConfig
+
+from owl.utils import GAIABenchmark
+from camel.logger import set_log_level
+
+set_log_level(level="DEBUG")
+
+load_dotenv()
+
+logger = get_logger(__name__)
+
+# Configuration
+LEVEL = 1
+SAVE_RESULT = True
+test_idx = [0]
+
+
+def main():
+    """Main function to run the GAIA benchmark."""
+    # Create cache directory
+    cache_dir = "tmp/"
+    os.makedirs(cache_dir, exist_ok=True)
+    result_dir = "results/"
+    os.makedirs(result_dir, exist_ok=True)
+
+    # Create models for different components
+    models = {
+        "user": ModelFactory.create(
+            model_platform=ModelPlatformType.OPENAI,
+            model_type=ModelType.GPT_4O,
+            model_config_dict=ChatGPTConfig(temperature=0, top_p=1).as_dict(),
+        ),
+        "assistant": ModelFactory.create(
+            model_platform=ModelPlatformType.OPENAI,
+            model_type=ModelType.GPT_4O,
+            model_config_dict=ChatGPTConfig(temperature=0, top_p=1).as_dict(),
+        ),
+        "web": ModelFactory.create(
+            model_platform=ModelPlatformType.OPENAI,
+            model_type=ModelType.GPT_4O,
+            model_config_dict=ChatGPTConfig(temperature=0, top_p=1).as_dict(),
+        ),
+        "planning": ModelFactory.create(
+            model_platform=ModelPlatformType.OPENAI,
+            model_type=ModelType.GPT_4O,
+            model_config_dict=ChatGPTConfig(temperature=0, top_p=1).as_dict(),
+        ),
+        "video": ModelFactory.create(
+            model_platform=ModelPlatformType.OPENAI,
+            model_type=ModelType.GPT_4O,
+            model_config_dict=ChatGPTConfig(temperature=0, top_p=1).as_dict(),
+        ),
+        "image": ModelFactory.create(
+            model_platform=ModelPlatformType.OPENAI,
+            model_type=ModelType.GPT_4O,
+            model_config_dict=ChatGPTConfig(temperature=0, top_p=1).as_dict(),
+        ),
+    }
+
+    # Configure toolkits
+    tools = [
+        *BrowserToolkit(
+            headless=False,  # Set to True for headless mode (e.g., on remote servers)
+            web_agent_model=models["web"],
+            planning_agent_model=models["planning"],
+        ).get_tools(),
+        *VideoAnalysisToolkit(
+            model=models["video"]
+        ).get_tools(),  # This requires OpenAI Key
+        *AudioAnalysisToolkit().get_tools(),  # This requires OpenAI Key
+        *CodeExecutionToolkit(sandbox="subprocess", verbose=True).get_tools(),
+        *ImageAnalysisToolkit(model=models["image"]).get_tools(),
+        *SearchToolkit().get_tools(),
+        *ExcelToolkit().get_tools(),
+        *FileWriteToolkit(output_dir="./").get_tools(),
+    ]
+
+    # Configure agent roles and parameters
+    user_agent_kwargs = {"model": models["user"]}
+    assistant_agent_kwargs = {"model": models["assistant"], "tools": tools}
+
+    # Initialize benchmark
+    benchmark = GAIABenchmark(data_dir="data/gaia", save_to="results/result.json")
+
+    # Print benchmark information
+    print(f"Number of validation examples: {len(benchmark.valid)}")
+    print(f"Number of test examples: {len(benchmark.test)}")
+
+    # Run benchmark
+    result = benchmark.run(
+        on="valid",
+        level=LEVEL,
+        idx=test_idx,
+        save_result=SAVE_RESULT,
+        user_role_name="user",
+        user_agent_kwargs=user_agent_kwargs,
+        assistant_role_name="assistant",
+        assistant_agent_kwargs=assistant_agent_kwargs,
+    )
+
+    # Output results
+    logger.info(f"Correct: {result['correct']}, Total: {result['total']}")
+    logger.info(f"Accuracy: {result['accuracy']}")
+
+
+if __name__ == "__main__":
+    main()