feat: add vllm for local models

This commit is contained in:
Ziyu
2025-03-24 16:56:15 -05:00
parent 7887585e8e
commit 883fd6f774
6 changed files with 300 additions and 1 deletions

1
.gitignore vendored
View File

@@ -40,6 +40,7 @@ owl/data
owl/tmp
owl/.env
owl/utils/__pycache__/
owl/results
# Logs
*.log

View File

@@ -193,6 +193,10 @@ python owl/run_deepseek.py
# Run with other OpenAI-compatible models
python owl/run_openai_compatiable_model.py
# Run with VLLM backends (using Qwen2.5-VL-7B-Instruct w/ 4 GPUs as an example)
bash owl/scripts/serve.sh # run this under another terminal or screen
python owl/run_vllm/py
```
For a simpler version that only requires an LLM API key, you can try our minimal example:
@@ -239,7 +243,7 @@ We provided a script to reproduce the results on GAIA.
You can check the `run_gaia_roleplaying.py` file and run the following command:
```bash
python run_gaia_roleplaying.py
python owl/run_gaia_roleplaying.py
```
# ⏱️ Future Plans

View File

@@ -0,0 +1,134 @@
"""
This script demonstrates how to run the OWL system with an open-source model
on VLLM as the user agent.
Pre-requisites: bash scripts/serve.sh (4 GPUs for qwen2.5-vl-7b-instruct).
"""
from dotenv import load_dotenv
load_dotenv()
import os
from loguru import logger
from camel.models import ModelFactory
from camel.toolkits import (
AudioAnalysisToolkit,
CodeExecutionToolkit,
DocumentProcessingToolkit,
ExcelToolkit,
ImageAnalysisToolkit,
SearchToolkit,
VideoAnalysisToolkit,
WebToolkit,
)
from camel.types import ModelPlatformType, ModelType
from camel.configs import ChatGPTConfig
from utils import GAIABenchmark
# Configuration
LEVEL = 1
SAVE_RESULT = True
test_idx = [0]
VLLM_MODEL_TYPE = "Qwen/Qwen2.5-VL-7B-Instruct" # set the VLLM model type
PORT = 8964 # set the port for the VLLM model
SAVE_TO = "results/result_vllm.json"
def main():
"""Main function to run the GAIA benchmark."""
# Create cache directory
cache_dir = "tmp/"
os.makedirs(cache_dir, exist_ok=True)
# Create models for different components
models = {
"user": ModelFactory.create(
model_platform=ModelPlatformType.VLLM,
model_type=VLLM_MODEL_TYPE,
model_config_dict={"temperature": 0., "top_p": 1.},
url=f"http://localhost:{PORT}/v1",
),
"assistant": ModelFactory.create(
model_platform=ModelPlatformType.OPENAI,
model_type=ModelType.GPT_4O,
model_config_dict=ChatGPTConfig(temperature=0, top_p=1).as_dict(),
),
"web": ModelFactory.create(
model_platform=ModelPlatformType.OPENAI,
model_type=ModelType.GPT_4O,
model_config_dict=ChatGPTConfig(temperature=0, top_p=1).as_dict(),
),
"planning": ModelFactory.create(
model_platform=ModelPlatformType.OPENAI,
model_type=ModelType.GPT_4O,
model_config_dict=ChatGPTConfig(temperature=0, top_p=1).as_dict(),
),
"video": ModelFactory.create(
model_platform=ModelPlatformType.OPENAI,
model_type=ModelType.GPT_4O,
model_config_dict=ChatGPTConfig(temperature=0, top_p=1).as_dict(),
),
"image": ModelFactory.create(
model_platform=ModelPlatformType.OPENAI,
model_type=ModelType.GPT_4O,
model_config_dict=ChatGPTConfig(temperature=0, top_p=1).as_dict(),
),
"search": ModelFactory.create(
model_platform=ModelPlatformType.OPENAI,
model_type=ModelType.GPT_4O,
model_config_dict=ChatGPTConfig(temperature=0, top_p=1).as_dict(),
),
}
# Configure toolkits
tools = [
*WebToolkit(
headless=True, # Set to True for headless mode (e.g., on remote servers)
web_agent_model=models["web"],
planning_agent_model=models["planning"],
).get_tools(),
*DocumentProcessingToolkit().get_tools(),
*VideoAnalysisToolkit(model=models["video"]).get_tools(), # This requires OpenAI Key
*AudioAnalysisToolkit().get_tools(), # This requires OpenAI Key
*CodeExecutionToolkit(sandbox="subprocess", verbose=True).get_tools(),
*ImageAnalysisToolkit(model=models["image"]).get_tools(),
*SearchToolkit(model=models["search"]).get_tools(),
*ExcelToolkit().get_tools(),
]
# Configure agent roles and parameters
user_agent_kwargs = {"model": models["user"]}
assistant_agent_kwargs = {"model": models["assistant"], "tools": tools}
# Initialize benchmark
benchmark = GAIABenchmark(
data_dir="data/gaia",
save_to=SAVE_TO,
)
# Print benchmark information
print(f"Number of validation examples: {len(benchmark.valid)}")
print(f"Number of test examples: {len(benchmark.test)}")
# Run benchmark
result = benchmark.run(
on="valid",
level=LEVEL,
idx=test_idx,
save_result=SAVE_RESULT,
user_role_name="user",
user_agent_kwargs=user_agent_kwargs,
assistant_role_name="assistant",
assistant_agent_kwargs=assistant_agent_kwargs,
)
# Output results
logger.success(f"Correct: {result['correct']}, Total: {result['total']}")
logger.success(f"Accuracy: {result['accuracy']}")
if __name__ == "__main__":
main()

136
owl/run_vllm.py Normal file
View File

@@ -0,0 +1,136 @@
"""
This script demonstrates how to run the OWL system with an open-source model
on VLLM as the user agent.
Pre-requisites: bash scripts/serve.sh (4 GPUs for qwen2.5-vl-7b-instruct).
"""
from dotenv import load_dotenv
load_dotenv()
from camel.models import ModelFactory
from camel.toolkits import (
AudioAnalysisToolkit,
CodeExecutionToolkit,
DocumentProcessingToolkit,
ExcelToolkit,
ImageAnalysisToolkit,
SearchToolkit,
VideoAnalysisToolkit,
WebToolkit,
)
from camel.types import ModelPlatformType, ModelType
from utils import OwlRolePlaying, run_society
VLLM_MODEL_TYPE = "Qwen/Qwen2.5-VL-7B-Instruct" # set the VLLM model type
PORT = 8964 # set the port for the VLLM model
def construct_society(question: str) -> OwlRolePlaying:
r"""Construct a society of agents based on the given question.
Args:
question (str): The task or question to be addressed by the society.
Returns:
OwlRolePlaying: A configured society of agents ready to address the question.
"""
# Create models for different components
models = {
"user": ModelFactory.create(
model_platform=ModelPlatformType.VLLM,
model_type=VLLM_MODEL_TYPE,
model_config_dict={"temperature": 0.},
url=f"http://localhost:{PORT}/v1",
),
"assistant": ModelFactory.create(
model_platform=ModelPlatformType.OPENAI,
model_type=ModelType.GPT_4O,
model_config_dict={"temperature": 0},
),
"web": ModelFactory.create(
model_platform=ModelPlatformType.OPENAI,
model_type=ModelType.GPT_4O,
model_config_dict={"temperature": 0},
),
"planning": ModelFactory.create(
model_platform=ModelPlatformType.OPENAI,
model_type=ModelType.GPT_4O,
model_config_dict={"temperature": 0},
),
"video": ModelFactory.create(
model_platform=ModelPlatformType.OPENAI,
model_type=ModelType.GPT_4O,
model_config_dict={"temperature": 0},
),
"image": ModelFactory.create(
model_platform=ModelPlatformType.OPENAI,
model_type=ModelType.GPT_4O,
model_config_dict={"temperature": 0},
),
"search": ModelFactory.create(
model_platform=ModelPlatformType.OPENAI,
model_type=ModelType.GPT_4O,
model_config_dict={"temperature": 0},
),
}
# Configure toolkits
tools = [
*WebToolkit(
headless=True, # Set to True for headless mode (e.g., on remote servers)
web_agent_model=models["web"],
planning_agent_model=models["planning"],
).get_tools(),
*DocumentProcessingToolkit().get_tools(),
*VideoAnalysisToolkit(model=models["video"]).get_tools(), # This requires OpenAI Key
*AudioAnalysisToolkit().get_tools(), # This requires OpenAI Key
*CodeExecutionToolkit(sandbox="subprocess", verbose=True).get_tools(),
*ImageAnalysisToolkit(model=models["image"]).get_tools(),
*SearchToolkit(model=models["search"]).get_tools(),
*ExcelToolkit().get_tools(),
]
# Configure agent roles and parameters
user_agent_kwargs = {"model": models["user"]}
assistant_agent_kwargs = {"model": models["assistant"], "tools": tools}
# Configure task parameters
task_kwargs = {
"task_prompt": question,
"with_task_specify": False,
}
# Create and return the society
society = OwlRolePlaying(
**task_kwargs,
user_role_name="user",
user_agent_kwargs=user_agent_kwargs,
assistant_role_name="assistant",
assistant_agent_kwargs=assistant_agent_kwargs,
)
return society
def main():
r"""Main function to run the OWL system with an example question."""
# Example research question
question = (
"What was the volume in m^3 of the fish bag that was calculated in "
"the University of Leicester paper `Can Hiccup Supply Enough Fish "
"to Maintain a Dragon's Diet?`"
)
# Construct and run the society
society = construct_society(question)
answer, chat_history, token_count = run_society(society)
# Output the result
print(f"Answer: {answer}")
if __name__ == "__main__":
main()

19
owl/scripts/serve.sh Normal file
View File

@@ -0,0 +1,19 @@
#!/bin/bash
# tested vllm version: 0.7.3
export VLLM_ALLOW_LONG_MAX_MODEL_LEN=1
model=${1:-Qwen/Qwen2.5-VL-7B-Instruct}
max_model_len=${2:-32768}
gpu_memory_utilization=${3:-0.9}
devices=${4:-"0,1,2,3"}
tp_size=$(awk -F',' '{print NF}' <<< "$devices")
port=${5:-8964}
CUDA_VISIBLE_DEVICES=${devices} python -m vllm.entrypoints.openai.api_server \
--model ${model} \
--tensor_parallel_size ${tp_size} \
--gpu_memory_utilization ${gpu_memory_utilization} \
--port ${port} \
--max_model_len ${max_model_len} \

View File

@@ -252,6 +252,7 @@ class GAIABenchmark(BaseBenchmark):
if save_result:
os.makedirs(os.path.dirname(self.save_to), exist_ok=True)
with open(self.save_to, 'w') as f:
json.dump(self._results, f, indent=4, ensure_ascii=False)
f.close()
@@ -370,6 +371,10 @@ class GAIABenchmark(BaseBenchmark):
def normalize_number_str(self, number_str: str) -> float:
if number_str is None:
logger.error("Received None as number string, returning infinity.")
return float("inf")
for char in ["$", "%", ","]:
number_str = number_str.replace(char, "")
try: