Minor SWE-Bench inference config tweak (#2381)

* save infer logs to infer_logs * set max budget for swebench eval
2025-12-26 05:48:36 +08:00 · 2024-06-11 04:14:22 +08:00 · 2024-06-11 04:14:22 +08:00 · 11a2d1682d
commit 11a2d1682d
parent e4145aef66
2 changed files with 2 additions and 1 deletions
--- a/evaluation/swe_bench/README.md
+++ b/evaluation/swe_bench/README.md
@ -51,6 +51,7 @@ sandbox_timeout = 120
 use_host_network = false
 run_as_devin = false
 enable_auto_lint = true
+max_budget_per_task = 4 # 4 USD

 # TODO: Change these to the model you want to evaluate
 [eval_gpt4_1106_preview]
--- a/evaluation/swe_bench/run_infer.py
+++ b/evaluation/swe_bench/run_infer.py
@ -209,7 +209,7 @@ def process_instance(
    if reset_logger:
        # Set up logger
        log_file = os.path.join(
-            eval_output_dir, 'logs', f'instance_{instance.instance_id}.log'
+            eval_output_dir, 'infer_logs', f'instance_{instance.instance_id}.log'
        )
        # Remove all existing handlers from logger
        for handler in logger.handlers[:]: