mirror of
https://github.com/camel-ai/owl.git
synced 2026-03-22 14:07:17 +08:00
feat: add vllm for local models
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -40,6 +40,7 @@ owl/data
|
||||
owl/tmp
|
||||
owl/.env
|
||||
owl/utils/__pycache__/
|
||||
owl/results
|
||||
|
||||
# Logs
|
||||
*.log
|
||||
|
||||
@@ -193,6 +193,10 @@ python owl/run_deepseek.py
|
||||
|
||||
# Run with other OpenAI-compatible models
|
||||
python owl/run_openai_compatiable_model.py
|
||||
|
||||
# Run with VLLM backends (using Qwen2.5-VL-7B-Instruct w/ 4 GPUs as an example)
|
||||
bash owl/scripts/serve.sh # run this under another terminal or screen
|
||||
python owl/run_vllm/py
|
||||
```
|
||||
|
||||
For a simpler version that only requires an LLM API key, you can try our minimal example:
|
||||
@@ -239,7 +243,7 @@ We provided a script to reproduce the results on GAIA.
|
||||
You can check the `run_gaia_roleplaying.py` file and run the following command:
|
||||
|
||||
```bash
|
||||
python run_gaia_roleplaying.py
|
||||
python owl/run_gaia_roleplaying.py
|
||||
```
|
||||
|
||||
# ⏱️ Future Plans
|
||||
|
||||
134
owl/run_gaia_roleplaying_vllm.py
Normal file
134
owl/run_gaia_roleplaying_vllm.py
Normal file
@@ -0,0 +1,134 @@
|
||||
"""
|
||||
This script demonstrates how to run the OWL system with an open-source model
|
||||
on VLLM as the user agent.
|
||||
|
||||
Pre-requisites: bash scripts/serve.sh (4 GPUs for qwen2.5-vl-7b-instruct).
|
||||
"""
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv()
|
||||
|
||||
import os
|
||||
from loguru import logger
|
||||
|
||||
from camel.models import ModelFactory
|
||||
from camel.toolkits import (
|
||||
AudioAnalysisToolkit,
|
||||
CodeExecutionToolkit,
|
||||
DocumentProcessingToolkit,
|
||||
ExcelToolkit,
|
||||
ImageAnalysisToolkit,
|
||||
SearchToolkit,
|
||||
VideoAnalysisToolkit,
|
||||
WebToolkit,
|
||||
)
|
||||
from camel.types import ModelPlatformType, ModelType
|
||||
from camel.configs import ChatGPTConfig
|
||||
|
||||
from utils import GAIABenchmark
|
||||
|
||||
|
||||
# Configuration
|
||||
LEVEL = 1
|
||||
SAVE_RESULT = True
|
||||
test_idx = [0]
|
||||
VLLM_MODEL_TYPE = "Qwen/Qwen2.5-VL-7B-Instruct" # set the VLLM model type
|
||||
PORT = 8964 # set the port for the VLLM model
|
||||
SAVE_TO = "results/result_vllm.json"
|
||||
|
||||
|
||||
def main():
|
||||
"""Main function to run the GAIA benchmark."""
|
||||
# Create cache directory
|
||||
cache_dir = "tmp/"
|
||||
os.makedirs(cache_dir, exist_ok=True)
|
||||
|
||||
# Create models for different components
|
||||
models = {
|
||||
"user": ModelFactory.create(
|
||||
model_platform=ModelPlatformType.VLLM,
|
||||
model_type=VLLM_MODEL_TYPE,
|
||||
model_config_dict={"temperature": 0., "top_p": 1.},
|
||||
url=f"http://localhost:{PORT}/v1",
|
||||
),
|
||||
"assistant": ModelFactory.create(
|
||||
model_platform=ModelPlatformType.OPENAI,
|
||||
model_type=ModelType.GPT_4O,
|
||||
model_config_dict=ChatGPTConfig(temperature=0, top_p=1).as_dict(),
|
||||
),
|
||||
"web": ModelFactory.create(
|
||||
model_platform=ModelPlatformType.OPENAI,
|
||||
model_type=ModelType.GPT_4O,
|
||||
model_config_dict=ChatGPTConfig(temperature=0, top_p=1).as_dict(),
|
||||
),
|
||||
"planning": ModelFactory.create(
|
||||
model_platform=ModelPlatformType.OPENAI,
|
||||
model_type=ModelType.GPT_4O,
|
||||
model_config_dict=ChatGPTConfig(temperature=0, top_p=1).as_dict(),
|
||||
),
|
||||
"video": ModelFactory.create(
|
||||
model_platform=ModelPlatformType.OPENAI,
|
||||
model_type=ModelType.GPT_4O,
|
||||
model_config_dict=ChatGPTConfig(temperature=0, top_p=1).as_dict(),
|
||||
),
|
||||
"image": ModelFactory.create(
|
||||
model_platform=ModelPlatformType.OPENAI,
|
||||
model_type=ModelType.GPT_4O,
|
||||
model_config_dict=ChatGPTConfig(temperature=0, top_p=1).as_dict(),
|
||||
),
|
||||
"search": ModelFactory.create(
|
||||
model_platform=ModelPlatformType.OPENAI,
|
||||
model_type=ModelType.GPT_4O,
|
||||
model_config_dict=ChatGPTConfig(temperature=0, top_p=1).as_dict(),
|
||||
),
|
||||
}
|
||||
|
||||
# Configure toolkits
|
||||
tools = [
|
||||
*WebToolkit(
|
||||
headless=True, # Set to True for headless mode (e.g., on remote servers)
|
||||
web_agent_model=models["web"],
|
||||
planning_agent_model=models["planning"],
|
||||
).get_tools(),
|
||||
*DocumentProcessingToolkit().get_tools(),
|
||||
*VideoAnalysisToolkit(model=models["video"]).get_tools(), # This requires OpenAI Key
|
||||
*AudioAnalysisToolkit().get_tools(), # This requires OpenAI Key
|
||||
*CodeExecutionToolkit(sandbox="subprocess", verbose=True).get_tools(),
|
||||
*ImageAnalysisToolkit(model=models["image"]).get_tools(),
|
||||
*SearchToolkit(model=models["search"]).get_tools(),
|
||||
*ExcelToolkit().get_tools(),
|
||||
]
|
||||
|
||||
# Configure agent roles and parameters
|
||||
user_agent_kwargs = {"model": models["user"]}
|
||||
assistant_agent_kwargs = {"model": models["assistant"], "tools": tools}
|
||||
|
||||
# Initialize benchmark
|
||||
benchmark = GAIABenchmark(
|
||||
data_dir="data/gaia",
|
||||
save_to=SAVE_TO,
|
||||
)
|
||||
|
||||
# Print benchmark information
|
||||
print(f"Number of validation examples: {len(benchmark.valid)}")
|
||||
print(f"Number of test examples: {len(benchmark.test)}")
|
||||
|
||||
# Run benchmark
|
||||
result = benchmark.run(
|
||||
on="valid",
|
||||
level=LEVEL,
|
||||
idx=test_idx,
|
||||
save_result=SAVE_RESULT,
|
||||
user_role_name="user",
|
||||
user_agent_kwargs=user_agent_kwargs,
|
||||
assistant_role_name="assistant",
|
||||
assistant_agent_kwargs=assistant_agent_kwargs,
|
||||
)
|
||||
|
||||
# Output results
|
||||
logger.success(f"Correct: {result['correct']}, Total: {result['total']}")
|
||||
logger.success(f"Accuracy: {result['accuracy']}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
136
owl/run_vllm.py
Normal file
136
owl/run_vllm.py
Normal file
@@ -0,0 +1,136 @@
|
||||
"""
|
||||
This script demonstrates how to run the OWL system with an open-source model
|
||||
on VLLM as the user agent.
|
||||
|
||||
Pre-requisites: bash scripts/serve.sh (4 GPUs for qwen2.5-vl-7b-instruct).
|
||||
"""
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv()
|
||||
|
||||
from camel.models import ModelFactory
|
||||
from camel.toolkits import (
|
||||
AudioAnalysisToolkit,
|
||||
CodeExecutionToolkit,
|
||||
DocumentProcessingToolkit,
|
||||
ExcelToolkit,
|
||||
ImageAnalysisToolkit,
|
||||
SearchToolkit,
|
||||
VideoAnalysisToolkit,
|
||||
WebToolkit,
|
||||
)
|
||||
from camel.types import ModelPlatformType, ModelType
|
||||
|
||||
from utils import OwlRolePlaying, run_society
|
||||
|
||||
VLLM_MODEL_TYPE = "Qwen/Qwen2.5-VL-7B-Instruct" # set the VLLM model type
|
||||
PORT = 8964 # set the port for the VLLM model
|
||||
|
||||
|
||||
def construct_society(question: str) -> OwlRolePlaying:
|
||||
r"""Construct a society of agents based on the given question.
|
||||
|
||||
Args:
|
||||
question (str): The task or question to be addressed by the society.
|
||||
|
||||
Returns:
|
||||
OwlRolePlaying: A configured society of agents ready to address the question.
|
||||
"""
|
||||
|
||||
# Create models for different components
|
||||
models = {
|
||||
"user": ModelFactory.create(
|
||||
model_platform=ModelPlatformType.VLLM,
|
||||
model_type=VLLM_MODEL_TYPE,
|
||||
model_config_dict={"temperature": 0.},
|
||||
url=f"http://localhost:{PORT}/v1",
|
||||
),
|
||||
"assistant": ModelFactory.create(
|
||||
model_platform=ModelPlatformType.OPENAI,
|
||||
model_type=ModelType.GPT_4O,
|
||||
model_config_dict={"temperature": 0},
|
||||
),
|
||||
"web": ModelFactory.create(
|
||||
model_platform=ModelPlatformType.OPENAI,
|
||||
model_type=ModelType.GPT_4O,
|
||||
model_config_dict={"temperature": 0},
|
||||
),
|
||||
"planning": ModelFactory.create(
|
||||
model_platform=ModelPlatformType.OPENAI,
|
||||
model_type=ModelType.GPT_4O,
|
||||
model_config_dict={"temperature": 0},
|
||||
),
|
||||
"video": ModelFactory.create(
|
||||
model_platform=ModelPlatformType.OPENAI,
|
||||
model_type=ModelType.GPT_4O,
|
||||
model_config_dict={"temperature": 0},
|
||||
),
|
||||
"image": ModelFactory.create(
|
||||
model_platform=ModelPlatformType.OPENAI,
|
||||
model_type=ModelType.GPT_4O,
|
||||
model_config_dict={"temperature": 0},
|
||||
),
|
||||
"search": ModelFactory.create(
|
||||
model_platform=ModelPlatformType.OPENAI,
|
||||
model_type=ModelType.GPT_4O,
|
||||
model_config_dict={"temperature": 0},
|
||||
),
|
||||
}
|
||||
|
||||
# Configure toolkits
|
||||
tools = [
|
||||
*WebToolkit(
|
||||
headless=True, # Set to True for headless mode (e.g., on remote servers)
|
||||
web_agent_model=models["web"],
|
||||
planning_agent_model=models["planning"],
|
||||
).get_tools(),
|
||||
*DocumentProcessingToolkit().get_tools(),
|
||||
*VideoAnalysisToolkit(model=models["video"]).get_tools(), # This requires OpenAI Key
|
||||
*AudioAnalysisToolkit().get_tools(), # This requires OpenAI Key
|
||||
*CodeExecutionToolkit(sandbox="subprocess", verbose=True).get_tools(),
|
||||
*ImageAnalysisToolkit(model=models["image"]).get_tools(),
|
||||
*SearchToolkit(model=models["search"]).get_tools(),
|
||||
*ExcelToolkit().get_tools(),
|
||||
]
|
||||
|
||||
# Configure agent roles and parameters
|
||||
user_agent_kwargs = {"model": models["user"]}
|
||||
assistant_agent_kwargs = {"model": models["assistant"], "tools": tools}
|
||||
|
||||
# Configure task parameters
|
||||
task_kwargs = {
|
||||
"task_prompt": question,
|
||||
"with_task_specify": False,
|
||||
}
|
||||
|
||||
# Create and return the society
|
||||
society = OwlRolePlaying(
|
||||
**task_kwargs,
|
||||
user_role_name="user",
|
||||
user_agent_kwargs=user_agent_kwargs,
|
||||
assistant_role_name="assistant",
|
||||
assistant_agent_kwargs=assistant_agent_kwargs,
|
||||
)
|
||||
|
||||
return society
|
||||
|
||||
|
||||
def main():
|
||||
r"""Main function to run the OWL system with an example question."""
|
||||
# Example research question
|
||||
question = (
|
||||
"What was the volume in m^3 of the fish bag that was calculated in "
|
||||
"the University of Leicester paper `Can Hiccup Supply Enough Fish "
|
||||
"to Maintain a Dragon's Diet?`"
|
||||
)
|
||||
|
||||
# Construct and run the society
|
||||
society = construct_society(question)
|
||||
answer, chat_history, token_count = run_society(society)
|
||||
|
||||
# Output the result
|
||||
print(f"Answer: {answer}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
19
owl/scripts/serve.sh
Normal file
19
owl/scripts/serve.sh
Normal file
@@ -0,0 +1,19 @@
|
||||
#!/bin/bash
|
||||
|
||||
# tested vllm version: 0.7.3
|
||||
|
||||
export VLLM_ALLOW_LONG_MAX_MODEL_LEN=1
|
||||
|
||||
model=${1:-Qwen/Qwen2.5-VL-7B-Instruct}
|
||||
max_model_len=${2:-32768}
|
||||
gpu_memory_utilization=${3:-0.9}
|
||||
devices=${4:-"0,1,2,3"}
|
||||
tp_size=$(awk -F',' '{print NF}' <<< "$devices")
|
||||
port=${5:-8964}
|
||||
|
||||
CUDA_VISIBLE_DEVICES=${devices} python -m vllm.entrypoints.openai.api_server \
|
||||
--model ${model} \
|
||||
--tensor_parallel_size ${tp_size} \
|
||||
--gpu_memory_utilization ${gpu_memory_utilization} \
|
||||
--port ${port} \
|
||||
--max_model_len ${max_model_len} \
|
||||
@@ -252,6 +252,7 @@ class GAIABenchmark(BaseBenchmark):
|
||||
|
||||
|
||||
if save_result:
|
||||
os.makedirs(os.path.dirname(self.save_to), exist_ok=True)
|
||||
with open(self.save_to, 'w') as f:
|
||||
json.dump(self._results, f, indent=4, ensure_ascii=False)
|
||||
f.close()
|
||||
@@ -370,6 +371,10 @@ class GAIABenchmark(BaseBenchmark):
|
||||
|
||||
|
||||
def normalize_number_str(self, number_str: str) -> float:
|
||||
if number_str is None:
|
||||
logger.error("Received None as number string, returning infinity.")
|
||||
return float("inf")
|
||||
|
||||
for char in ["$", "%", ","]:
|
||||
number_str = number_str.replace(char, "")
|
||||
try:
|
||||
|
||||
Reference in New Issue
Block a user