mirror of
https://github.com/jina-ai/node-DeepResearch.git
synced 2025-12-25 22:16:49 +08:00
fix late chunk error
This commit is contained in:
parent
65636fbcdf
commit
c2dbfc65a3
@ -89,7 +89,7 @@ async function getBatchEmbeddingsWithRetry(
|
||||
const key = Object.keys(item)[0];
|
||||
return key === 'text' ? { text: trimSymbols(item[key]) } : item;
|
||||
}
|
||||
}).filter(item => typeof item !== 'string' || item.trim()); // Copy the original texts
|
||||
}); // Copy the original texts
|
||||
let indexMap = new Map<number, number>(); // Map to keep track of original indices
|
||||
|
||||
// Initialize indexMap with original indices
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
import { TrackerContext } from "../types";
|
||||
import { Schemas } from "../utils/schemas";
|
||||
import { cosineSimilarity } from "./cosine";
|
||||
import { getEmbeddings } from "./embeddings";
|
||||
import { getEmbeddings, trimSymbols } from "./embeddings";
|
||||
import { logError, logDebug } from '../logging';
|
||||
|
||||
// Refactored cherryPick function
|
||||
@ -21,7 +21,12 @@ export async function cherryPick(question: string, longContext: string, options:
|
||||
// Split the longContext into chunks of chunkSize
|
||||
const chunks: string[] = [];
|
||||
for (let i = 0; i < longContext.length; i += chunkSize) {
|
||||
chunks.push(longContext.substring(i, Math.min(i + chunkSize, longContext.length)));
|
||||
const str = longContext.substring(i, Math.min(i + chunkSize, longContext.length));
|
||||
const trimmedStr = trimSymbols(str);
|
||||
if (trimmedStr.trim().length === 0) {
|
||||
continue; // Skip empty chunks
|
||||
}
|
||||
chunks.push(str);
|
||||
}
|
||||
|
||||
logDebug(`late chunking enabled! num chunks: ${chunks.length}`);
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user