feat: Support Tau-Bench and BFCL evaluation benchmarks (#11953)

Co-authored-by: openhands <openhands@all-hands.dev>
This commit is contained in:
Aaron Sequeira
2025-12-31 06:12:50 +03:00
committed by GitHub
parent 82e0aa7924
commit 4c0f0a1e9b
6 changed files with 469 additions and 2 deletions

View File

@@ -192,6 +192,9 @@ datasets = "*"
joblib = "*"
swebench = { git = "https://github.com/ryanhoangt/SWE-bench.git", rev = "fix-modal-patch-eval" }
multi-swe-bench = "0.1.2"
pandas = "*"
# tau-bench = { git = "https://github.com/sierra-research/tau-bench.git" }
# bfcl-eval = "*" # TODO: Verify exact package name/source
[tool.poetry.group.testgeneval.dependencies]
fuzzywuzzy = "^0.18.0"