From 65636fbcdffbe70e06b1f55ed675413cc657dc49 Mon Sep 17 00:00:00 2001 From: Sha Zhou Date: Fri, 22 Aug 2025 15:37:52 +0800 Subject: [PATCH] fix encoding error for embeddings --- src/tools/embeddings.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tools/embeddings.ts b/src/tools/embeddings.ts index b9dbae5..5be37cc 100644 --- a/src/tools/embeddings.ts +++ b/src/tools/embeddings.ts @@ -89,7 +89,7 @@ async function getBatchEmbeddingsWithRetry( const key = Object.keys(item)[0]; return key === 'text' ? { text: trimSymbols(item[key]) } : item; } - }); // Copy the original texts + }).filter(item => typeof item !== 'string' || item.trim()); // Copy the original texts let indexMap = new Map(); // Map to keep track of original indices // Initialize indexMap with original indices @@ -251,7 +251,7 @@ function truncateInputString(input: string | Record): string { } } -function trimSymbols(str: string): string { +export function trimSymbols(str: string): string { const regex = /[\p{S}\p{P}\p{Z}\p{C}\p{Emoji}]+/gu; return str.replace(regex, ' '); } \ No newline at end of file