mirror of
https://github.com/camel-ai/owl.git
synced 2026-03-22 05:57:17 +08:00
update wendong
This commit is contained in:
139
examples/run_gaia_roleplaying.py
Normal file
139
examples/run_gaia_roleplaying.py
Normal file
@@ -0,0 +1,139 @@
|
||||
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
|
||||
import os
|
||||
|
||||
from camel.models import ModelFactory
|
||||
from camel.logger import get_logger
|
||||
from camel.toolkits import (
|
||||
AudioAnalysisToolkit,
|
||||
CodeExecutionToolkit,
|
||||
ExcelToolkit,
|
||||
ImageAnalysisToolkit,
|
||||
SearchToolkit,
|
||||
VideoAnalysisToolkit,
|
||||
BrowserToolkit,
|
||||
FileWriteToolkit,
|
||||
)
|
||||
from camel.types import ModelPlatformType, ModelType
|
||||
from camel.configs import ChatGPTConfig
|
||||
|
||||
from owl.utils import GAIABenchmark
|
||||
from camel.logger import set_log_level
|
||||
|
||||
set_log_level(level="DEBUG")
|
||||
|
||||
load_dotenv()
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
# Configuration
|
||||
LEVEL = 1
|
||||
SAVE_RESULT = True
|
||||
test_idx = [0]
|
||||
|
||||
|
||||
def main():
|
||||
"""Main function to run the GAIA benchmark."""
|
||||
# Create cache directory
|
||||
cache_dir = "tmp/"
|
||||
os.makedirs(cache_dir, exist_ok=True)
|
||||
result_dir = "results/"
|
||||
os.makedirs(result_dir, exist_ok=True)
|
||||
|
||||
# Create models for different components
|
||||
models = {
|
||||
"user": ModelFactory.create(
|
||||
model_platform=ModelPlatformType.OPENAI,
|
||||
model_type=ModelType.GPT_4O,
|
||||
model_config_dict=ChatGPTConfig(temperature=0, top_p=1).as_dict(),
|
||||
),
|
||||
"assistant": ModelFactory.create(
|
||||
model_platform=ModelPlatformType.OPENAI,
|
||||
model_type=ModelType.GPT_4O,
|
||||
model_config_dict=ChatGPTConfig(temperature=0, top_p=1).as_dict(),
|
||||
),
|
||||
"web": ModelFactory.create(
|
||||
model_platform=ModelPlatformType.OPENAI,
|
||||
model_type=ModelType.GPT_4O,
|
||||
model_config_dict=ChatGPTConfig(temperature=0, top_p=1).as_dict(),
|
||||
),
|
||||
"planning": ModelFactory.create(
|
||||
model_platform=ModelPlatformType.OPENAI,
|
||||
model_type=ModelType.GPT_4O,
|
||||
model_config_dict=ChatGPTConfig(temperature=0, top_p=1).as_dict(),
|
||||
),
|
||||
"video": ModelFactory.create(
|
||||
model_platform=ModelPlatformType.OPENAI,
|
||||
model_type=ModelType.GPT_4O,
|
||||
model_config_dict=ChatGPTConfig(temperature=0, top_p=1).as_dict(),
|
||||
),
|
||||
"image": ModelFactory.create(
|
||||
model_platform=ModelPlatformType.OPENAI,
|
||||
model_type=ModelType.GPT_4O,
|
||||
model_config_dict=ChatGPTConfig(temperature=0, top_p=1).as_dict(),
|
||||
),
|
||||
}
|
||||
|
||||
# Configure toolkits
|
||||
tools = [
|
||||
*BrowserToolkit(
|
||||
headless=False, # Set to True for headless mode (e.g., on remote servers)
|
||||
web_agent_model=models["web"],
|
||||
planning_agent_model=models["planning"],
|
||||
).get_tools(),
|
||||
*VideoAnalysisToolkit(
|
||||
model=models["video"]
|
||||
).get_tools(), # This requires OpenAI Key
|
||||
*AudioAnalysisToolkit().get_tools(), # This requires OpenAI Key
|
||||
*CodeExecutionToolkit(sandbox="subprocess", verbose=True).get_tools(),
|
||||
*ImageAnalysisToolkit(model=models["image"]).get_tools(),
|
||||
*SearchToolkit().get_tools(),
|
||||
*ExcelToolkit().get_tools(),
|
||||
*FileWriteToolkit(output_dir="./").get_tools(),
|
||||
]
|
||||
|
||||
# Configure agent roles and parameters
|
||||
user_agent_kwargs = {"model": models["user"]}
|
||||
assistant_agent_kwargs = {"model": models["assistant"], "tools": tools}
|
||||
|
||||
# Initialize benchmark
|
||||
benchmark = GAIABenchmark(data_dir="data/gaia", save_to="results/result.json")
|
||||
|
||||
# Print benchmark information
|
||||
print(f"Number of validation examples: {len(benchmark.valid)}")
|
||||
print(f"Number of test examples: {len(benchmark.test)}")
|
||||
|
||||
# Run benchmark
|
||||
result = benchmark.run(
|
||||
on="valid",
|
||||
level=LEVEL,
|
||||
idx=test_idx,
|
||||
save_result=SAVE_RESULT,
|
||||
user_role_name="user",
|
||||
user_agent_kwargs=user_agent_kwargs,
|
||||
assistant_role_name="assistant",
|
||||
assistant_agent_kwargs=assistant_agent_kwargs,
|
||||
)
|
||||
|
||||
# Output results
|
||||
logger.info(f"Correct: {result['correct']}, Total: {result['total']}")
|
||||
logger.info(f"Accuracy: {result['accuracy']}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user