refactor: build ref with embeddings

This commit is contained in:
Han Xiao 2025-04-14 22:27:51 +08:00
parent de35fe37cb
commit 858289a25d

View File

@ -7,9 +7,9 @@ import {cosineSimilarity} from "./cosine";
export async function cherryPick(question: string, longContext: string, options: any = {}, trackers: TrackerContext, schemaGen: Schemas, url: string) {
const {
snippetLength = 5000, // char length of each snippet
snippetLength = 6000, // char length of each snippet
numSnippets = Math.max(2, Math.min(5, Math.floor(longContext.length / snippetLength))),
chunkSize = 500, // char length of each chunk
chunkSize = 300, // char length of each chunk
} = options;
const maxTokensPerRequest = 8192 // Maximum tokens per embedding request
@ -52,8 +52,12 @@ export async function cherryPick(question: string, longContext: string, options:
console.log(`Total length ${longContext.length} split ${chunks.length} chunks into ${chunkBatches.length} batches of ~${chunksPerBatch} chunks each`);
// Process all batches in parallel
const batchPromises = chunkBatches.map(async (batch, batchIndex) => {
// Process each batch and collect the embeddings
const allChunkEmbeddings: number[][] = [];
let totalTokensUsed = 0;
for (let batchIndex = 0; batchIndex < chunkBatches.length; batchIndex++) {
const batch = chunkBatches[batchIndex];
console.log(`Processing batch ${batchIndex + 1}/${chunkBatches.length} with ${batch.length} chunks`);
// Get embeddings for the current batch
@ -87,25 +91,12 @@ export async function cherryPick(question: string, longContext: string, options:
// Extract embeddings from this batch
const batchEmbeddings = batchEmbeddingResponse.data.data.map((item: any) => item.embedding);
allChunkEmbeddings.push(...batchEmbeddings);
// Return both embeddings and token usage
return {
embeddings: batchEmbeddings,
tokens: batchEmbeddingResponse.data.usage?.total_tokens || 0
};
});
// Wait for all batch processing to complete
const batchResults = await Promise.all(batchPromises);
// Collect all embeddings and total token usage
const allChunkEmbeddings: number[][] = [];
let totalTokensUsed = 0;
batchResults.forEach(result => {
allChunkEmbeddings.push(...result.embeddings);
totalTokensUsed += result.tokens;
});
// Track token usage
const batchTokens = batchEmbeddingResponse.data.usage?.total_tokens || 0;
totalTokensUsed += batchTokens;
}
// Get embedding for the question
const questionEmbeddingResponse = await axios.post(
@ -208,4 +199,4 @@ ${snippet}
// Fallback: just return the beginning of the context up to the desired length
return longContext.substring(0, snippetLength * numSnippets);
}
}
}