mirror of
https://github.com/camel-ai/owl.git
synced 2026-03-22 05:57:17 +08:00
fix bugs and add requirements
This commit is contained in:
@@ -9,8 +9,7 @@ This repository contains inference part code for the OWL framework (Workforce).
|
||||
|
||||
## Inference
|
||||
|
||||
The camel version we use is `0.2.46`. To reproduce Workforce inference performance (69.70% - Claude-3.7 accuracy on GAIA benchmark and 60.61% - GPT-4o
|
||||
accuracy on GAIA benchmark), follow the steps below:
|
||||
The camel version we use is `0.2.46`. To reproduce Workforce inference performance on GAIA benchmark (69.70% - Claude-3.7 accuracy on GAIA benchmark, pass@1, and 60.61% - GPT-4o accuracy on GAIA benchmark, pass@3), follow the steps below:
|
||||
|
||||
### Installation and Setup
|
||||
|
||||
|
||||
142
requirements.txt
Normal file
142
requirements.txt
Normal file
@@ -0,0 +1,142 @@
|
||||
# Core dependencies
|
||||
numpy>=1.26.0
|
||||
openai>=1.59.7
|
||||
tiktoken>=0.7.0
|
||||
colorama>=0.4.6
|
||||
jsonschema>=4.0.0
|
||||
protobuf>=5.0.0
|
||||
docstring-parser>=0.15.0
|
||||
pydantic>=1.9.0,<2.10.0
|
||||
eval-type-backport==0.2.0
|
||||
curl_cffi==0.6.2
|
||||
httpx>=0.28.0,<1.0.0
|
||||
psutil>=5.9.8
|
||||
pillow>=10.1.0,<11.0.0
|
||||
retry>=0.9.2
|
||||
loguru>=0.7.3
|
||||
scenedetect>=0.6.5.2
|
||||
openpyxl>=3.1.5
|
||||
tabulate>=0.9.0
|
||||
xls2xlsx>=0.2.0
|
||||
docx2markdown>=0.1.1
|
||||
chunkr_ai>=0.0.41
|
||||
playwright>=1.50.0
|
||||
html2text>=2024.2.26
|
||||
|
||||
# Optional dependencies - Model platforms
|
||||
litellm>=1.38.1
|
||||
mistralai>=1.1.0
|
||||
reka-api>=3.0.8
|
||||
anthropic>=0.42.0
|
||||
cohere>=5.11.0
|
||||
fish-audio-sdk>=2024.12.5
|
||||
|
||||
# Optional dependencies - Huggingface ecosystem
|
||||
transformers>=4.0.0
|
||||
diffusers>=0.25.0
|
||||
accelerate>=0.26.0
|
||||
datasets>=3.0.0
|
||||
soundfile>=0.13.0
|
||||
sentencepiece>=0.2.0
|
||||
opencv-python>=4.0.0
|
||||
|
||||
# Optional dependencies - Core RAG components
|
||||
sentence-transformers>=3.0.1
|
||||
qdrant-client>=1.9.0
|
||||
pymilvus>=2.4.0
|
||||
rank-bm25>=0.2.2
|
||||
|
||||
# Optional dependencies - Storage solutions
|
||||
neo4j>=5.18.0
|
||||
nebula3-python==3.8.2
|
||||
redis>=5.0.6
|
||||
azure-storage-blob>=12.21.0
|
||||
google-cloud-storage>=2.18.0
|
||||
botocore>=1.35.3
|
||||
|
||||
# Optional dependencies - Document processing tools
|
||||
beautifulsoup4>=4.0.0
|
||||
docx2txt>=0.8.0
|
||||
PyMuPDF>=1.22.5
|
||||
unstructured==0.16.20
|
||||
prance>=23.6.21.0
|
||||
openapi-spec-validator>=0.7.1
|
||||
pandasai>=2.3.0
|
||||
|
||||
# Optional dependencies - Media processing tools
|
||||
imageio[pyav]>=2.34.2
|
||||
pydub>=0.25.1
|
||||
yt-dlp>=2024.11.4
|
||||
ffmpeg-python>=0.2.0
|
||||
|
||||
# Optional dependencies - Web and API tools
|
||||
wikipedia>=1.0.0
|
||||
linkup-sdk>=0.2.1
|
||||
duckduckgo-search>=6.3.5
|
||||
newspaper3k>=0.2.8
|
||||
wolframalpha>=5.0.0
|
||||
pyowm>=3.3.0
|
||||
googlemaps>=4.10.0
|
||||
requests_oauthlib>=1.3.1
|
||||
firecrawl-py>=1.0.0
|
||||
apify_client>=1.8.1
|
||||
tavily-python>=0.5.0
|
||||
dappier>=0.3.3
|
||||
sympy>=1.13.3
|
||||
|
||||
# Optional dependencies - Communication platform tools
|
||||
slack-sdk>=3.27.2
|
||||
slack-bolt>=1.20.1
|
||||
pygithub>=2.3.0
|
||||
pyTelegramBotAPI>=4.18.0
|
||||
discord.py>=2.3.2
|
||||
notion-client>=2.2.1
|
||||
praw>=7.7.1
|
||||
|
||||
# Optional dependencies - Data science and analytics tools
|
||||
rouge>=1.0.1
|
||||
aiosqlite>=0.20.0
|
||||
textblob>=0.17.1
|
||||
datacommons>=1.4.3
|
||||
datacommons_pandas>=0.0.3
|
||||
pandas>=1.5.3
|
||||
stripe>=11.3.0
|
||||
networkx>=3.4.2
|
||||
|
||||
# Optional dependencies - Research tools
|
||||
scholarly[tor]==1.7.11
|
||||
arxiv>=2.1.3
|
||||
arxiv2text>=0.1.14
|
||||
|
||||
# Optional dependencies - Development tools
|
||||
outlines>=0.1.7
|
||||
docker>=7.1.0
|
||||
jupyter_client>=8.6.2
|
||||
ipykernel>=6.0.0
|
||||
agentops>=0.3.21
|
||||
e2b-code-interpreter>=1.0.3
|
||||
tree-sitter-python>=0.23.6
|
||||
tree-sitter>=0.23.2
|
||||
pyyaml>=6.0.2
|
||||
|
||||
# Development and testing tools
|
||||
pytest>=7.0.0
|
||||
pytest-asyncio>=0.23.0
|
||||
mock>=5.0.0
|
||||
pytest-cov>=4.0.0
|
||||
ruff>=0.7.0
|
||||
mypy>=1.5.1
|
||||
toml>=0.10.2
|
||||
pre-commit>=3.0.0
|
||||
gradio>=3.0.0
|
||||
|
||||
# Type stubs
|
||||
types-Pillow
|
||||
types-Pygments
|
||||
types-mock
|
||||
types-regex
|
||||
types-setuptools
|
||||
types-tqdm
|
||||
types-colorama>=0.0.0
|
||||
types-requests>=2.0.0
|
||||
types-PyYAML>=6.0.0
|
||||
@@ -215,7 +215,7 @@ def evaluate_on_gaia():
|
||||
MAX_TRIES = 1
|
||||
|
||||
SAVE_RESULT_PATH = f"results/workforce/workforce_{LEVEL}_pass{MAX_TRIES}_gpt4o.json"
|
||||
test_idx = [0, 1, 2]
|
||||
test_idx = [1]
|
||||
|
||||
if os.path.exists(f"tmp/"):
|
||||
shutil.rmtree(f"tmp/")
|
||||
|
||||
@@ -137,6 +137,10 @@ class GAIABenchmark(BaseBenchmark):
|
||||
|
||||
|
||||
def _save_results_to_file(self, results: List[Dict[str, Any]], file_path: str):
|
||||
# get base dir of file_path
|
||||
base_dir = os.path.dirname(file_path)
|
||||
os.makedirs(base_dir, exist_ok=True)
|
||||
|
||||
with open(file_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(results, f, indent=4, ensure_ascii=False)
|
||||
f.close()
|
||||
@@ -315,7 +319,7 @@ Please output with the final answer according to the requirements without any ot
|
||||
return self._generate_summary()
|
||||
|
||||
|
||||
def run_single_agent_with_retry(
|
||||
def run(
|
||||
self,
|
||||
agent: ChatAgent,
|
||||
on: Literal["valid", "test"],
|
||||
@@ -327,6 +331,7 @@ Please output with the final answer according to the requirements without any ot
|
||||
save_result: bool = False,
|
||||
|
||||
) -> Dict[str, Any]:
|
||||
r"""Run the benchmark with a single agent."""
|
||||
|
||||
datas = self._load_tasks(on, level, randomize, subset, idx)
|
||||
|
||||
@@ -424,7 +429,6 @@ Please output with the final answer according to the requirements without any ot
|
||||
subset: Optional[int] = None,
|
||||
idx: Optional[List[int]] = None,
|
||||
save_result: bool = False,
|
||||
filtered_tasks_file_path: Optional[str] = None,
|
||||
) -> Dict[str, Any]:
|
||||
r"""Run the benchmark with retry mechanism.
|
||||
|
||||
@@ -439,11 +443,10 @@ Please output with the final answer according to the requirements without any ot
|
||||
subset (Optional[int]): Number of tasks to run. Defaults to None (all tasks).
|
||||
idx (Optional[List[int]]): Specific task indices to run. Defaults to None.
|
||||
save_result (bool): Whether to save results to file. Defaults to False.
|
||||
filtered_tasks_file_path (Optional[str]): Path to the file containing filtered tasks. Defaults to None.
|
||||
Returns:
|
||||
Dict[str, Any]: Summary of benchmark results.
|
||||
"""
|
||||
tasks = self._load_tasks(on, level, randomize, subset, idx, filtered_tasks_file_path)
|
||||
tasks = self._load_tasks(on, level, randomize, subset, idx)
|
||||
|
||||
self._results = []
|
||||
|
||||
|
||||
Reference in New Issue
Block a user