diff --git a/owl/utils/document_toolkit.py b/owl/utils/document_toolkit.py index 192d1f1..5d81ce4 100644 --- a/owl/utils/document_toolkit.py +++ b/owl/utils/document_toolkit.py @@ -144,12 +144,11 @@ class DocumentProcessingToolkit(BaseToolkit): return True, extracted_text try: result = asyncio.run(self._extract_content_with_chunkr(document_path)) - raise ValueError("Chunkr is not available.") return True, result except Exception as e: logger.warning( - f"Error occurred while using chunkr to process document: {e}" + f"Error occurred while using Chunkr to process document: {e}" ) if document_path.endswith(".pdf"): # try using pypdf to extract text from pdf @@ -226,7 +225,7 @@ class DocumentProcessingToolkit(BaseToolkit): if result.status == "Failed": logger.error( - f"Error while processing document {document_path}: {result.message}" + f"Error while processing document {document_path}: {result.message} using Chunkr." ) return f"Error while processing document: {result.message}" diff --git a/owl/utils/gaia.py b/owl/utils/gaia.py index 07f1827..a133a26 100644 --- a/owl/utils/gaia.py +++ b/owl/utils/gaia.py @@ -191,15 +191,10 @@ class GAIABenchmark(BaseBenchmark): except Exception as e: logger.warning(e) # raise FileNotFoundError(f"{self.save_to} does not exist.") - + datas = [data for data in datas if not self._check_task_completed(data["task_id"])] + logger.info(f"Number of tasks to be processed: {len(datas)}") # Process tasks for task in tqdm(datas, desc="Running"): - if self._check_task_completed(task["task_id"]): - logger.info( - f"The following task is already completed:\n task id: {task['task_id']}, question: {task['Question']}" - ) - continue - if_prepared_task, info = self._prepare_task(task) if not if_prepared_task: _result_info = {