diff --git a/evaluation/benchmarks/mint/tasks/reasoning.py b/evaluation/benchmarks/mint/tasks/reasoning.py index 08cf320c35..64fb18a6b0 100644 --- a/evaluation/benchmarks/mint/tasks/reasoning.py +++ b/evaluation/benchmarks/mint/tasks/reasoning.py @@ -307,7 +307,7 @@ class TheoremqaTask(Task): # Converting the string answer to a number/list/bool/option try: - prediction = eval(prediction) + prediction = ast.literal_eval(prediction) except Exception: LOGGER.warning( f'[TASK] Failed to convert the answer: {prediction}\n{traceback.format_exc()}'