mirror of
https://github.com/jina-ai/node-DeepResearch.git
synced 2025-12-26 06:28:56 +08:00
fix late chunk error
This commit is contained in:
parent
65636fbcdf
commit
c2dbfc65a3
@ -89,7 +89,7 @@ async function getBatchEmbeddingsWithRetry(
|
|||||||
const key = Object.keys(item)[0];
|
const key = Object.keys(item)[0];
|
||||||
return key === 'text' ? { text: trimSymbols(item[key]) } : item;
|
return key === 'text' ? { text: trimSymbols(item[key]) } : item;
|
||||||
}
|
}
|
||||||
}).filter(item => typeof item !== 'string' || item.trim()); // Copy the original texts
|
}); // Copy the original texts
|
||||||
let indexMap = new Map<number, number>(); // Map to keep track of original indices
|
let indexMap = new Map<number, number>(); // Map to keep track of original indices
|
||||||
|
|
||||||
// Initialize indexMap with original indices
|
// Initialize indexMap with original indices
|
||||||
|
|||||||
@ -1,7 +1,7 @@
|
|||||||
import { TrackerContext } from "../types";
|
import { TrackerContext } from "../types";
|
||||||
import { Schemas } from "../utils/schemas";
|
import { Schemas } from "../utils/schemas";
|
||||||
import { cosineSimilarity } from "./cosine";
|
import { cosineSimilarity } from "./cosine";
|
||||||
import { getEmbeddings } from "./embeddings";
|
import { getEmbeddings, trimSymbols } from "./embeddings";
|
||||||
import { logError, logDebug } from '../logging';
|
import { logError, logDebug } from '../logging';
|
||||||
|
|
||||||
// Refactored cherryPick function
|
// Refactored cherryPick function
|
||||||
@ -21,7 +21,12 @@ export async function cherryPick(question: string, longContext: string, options:
|
|||||||
// Split the longContext into chunks of chunkSize
|
// Split the longContext into chunks of chunkSize
|
||||||
const chunks: string[] = [];
|
const chunks: string[] = [];
|
||||||
for (let i = 0; i < longContext.length; i += chunkSize) {
|
for (let i = 0; i < longContext.length; i += chunkSize) {
|
||||||
chunks.push(longContext.substring(i, Math.min(i + chunkSize, longContext.length)));
|
const str = longContext.substring(i, Math.min(i + chunkSize, longContext.length));
|
||||||
|
const trimmedStr = trimSymbols(str);
|
||||||
|
if (trimmedStr.trim().length === 0) {
|
||||||
|
continue; // Skip empty chunks
|
||||||
|
}
|
||||||
|
chunks.push(str);
|
||||||
}
|
}
|
||||||
|
|
||||||
logDebug(`late chunking enabled! num chunks: ${chunks.length}`);
|
logDebug(`late chunking enabled! num chunks: ${chunks.length}`);
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user