mirror of
https://github.com/jina-ai/node-DeepResearch.git
synced 2025-12-26 06:28:56 +08:00
fix encoding error for embeddings
This commit is contained in:
parent
a4cb9139ed
commit
65636fbcdf
@ -89,7 +89,7 @@ async function getBatchEmbeddingsWithRetry(
|
|||||||
const key = Object.keys(item)[0];
|
const key = Object.keys(item)[0];
|
||||||
return key === 'text' ? { text: trimSymbols(item[key]) } : item;
|
return key === 'text' ? { text: trimSymbols(item[key]) } : item;
|
||||||
}
|
}
|
||||||
}); // Copy the original texts
|
}).filter(item => typeof item !== 'string' || item.trim()); // Copy the original texts
|
||||||
let indexMap = new Map<number, number>(); // Map to keep track of original indices
|
let indexMap = new Map<number, number>(); // Map to keep track of original indices
|
||||||
|
|
||||||
// Initialize indexMap with original indices
|
// Initialize indexMap with original indices
|
||||||
@ -251,7 +251,7 @@ function truncateInputString(input: string | Record<string, string>): string {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function trimSymbols(str: string): string {
|
export function trimSymbols(str: string): string {
|
||||||
const regex = /[\p{S}\p{P}\p{Z}\p{C}\p{Emoji}]+/gu;
|
const regex = /[\p{S}\p{P}\p{Z}\p{C}\p{Emoji}]+/gu;
|
||||||
return str.replace(regex, ' ');
|
return str.replace(regex, ' ');
|
||||||
}
|
}
|
||||||
Loading…
x
Reference in New Issue
Block a user