Update run_infer.py to incorporate selection of task based on repo (#8509)

This commit is contained in:
Yueqi Song 2025-05-15 00:27:28 -04:00 committed by GitHub
parent 7e88d4195f
commit 3ca585b79f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -714,6 +714,19 @@ def filter_dataset(dataset: pd.DataFrame, filter_column: str) -> pd.DataFrame:
subset = dataset[dataset[filter_column].isin(selected_ids)]
logger.info(f'Retained {subset.shape[0]} tasks after filtering')
return subset
if 'selected_repos' in data:
# repos for the swe-bench instances:
# ['astropy/astropy', 'django/django', 'matplotlib/matplotlib', 'mwaskom/seaborn', 'pallets/flask', 'psf/requests', 'pydata/xarray', 'pylint-dev/pylint', 'pytest-dev/pytest', 'scikit-learn/scikit-learn', 'sphinx-doc/sphinx', 'sympy/sympy']
selected_repos = data['selected_repos']
if isinstance(selected_repos, str): selected_repos = [selected_repos]
assert isinstance(selected_repos, list)
logger.info(
f'Filtering {selected_repos} tasks from "selected_repos"...'
)
subset = dataset[dataset["repo"].isin(selected_repos)]
logger.info(f'Retained {subset.shape[0]} tasks after filtering')
return subset
skip_ids = os.environ.get('SKIP_IDS', '').split(',')
if len(skip_ids) > 0:
logger.info(f'Filtering {len(skip_ids)} tasks from "SKIP_IDS"...')