fix late chunk error

This commit is contained in:
Sha Zhou 2025-08-22 16:04:54 +08:00
parent 65636fbcdf
commit c2dbfc65a3
2 changed files with 8 additions and 3 deletions

View File

@ -89,7 +89,7 @@ async function getBatchEmbeddingsWithRetry(
const key = Object.keys(item)[0];
return key === 'text' ? { text: trimSymbols(item[key]) } : item;
}
}).filter(item => typeof item !== 'string' || item.trim()); // Copy the original texts
}); // Copy the original texts
let indexMap = new Map<number, number>(); // Map to keep track of original indices
// Initialize indexMap with original indices

View File

@ -1,7 +1,7 @@
import { TrackerContext } from "../types";
import { Schemas } from "../utils/schemas";
import { cosineSimilarity } from "./cosine";
import { getEmbeddings } from "./embeddings";
import { getEmbeddings, trimSymbols } from "./embeddings";
import { logError, logDebug } from '../logging';
// Refactored cherryPick function
@ -21,7 +21,12 @@ export async function cherryPick(question: string, longContext: string, options:
// Split the longContext into chunks of chunkSize
const chunks: string[] = [];
for (let i = 0; i < longContext.length; i += chunkSize) {
chunks.push(longContext.substring(i, Math.min(i + chunkSize, longContext.length)));
const str = longContext.substring(i, Math.min(i + chunkSize, longContext.length));
const trimmedStr = trimSymbols(str);
if (trimmedStr.trim().length === 0) {
continue; // Skip empty chunks
}
chunks.push(str);
}
logDebug(`late chunking enabled! num chunks: ${chunks.length}`);