refactor: build ref with embeddings

2026-03-22 07:29:35 +08:00 · 2025-04-14 22:27:51 +08:00
parent de35fe37cb
commit 858289a25d
1 changed files with 14 additions and 23 deletions
--- a/src/tools/jina-latechunk.ts
+++ b/src/tools/jina-latechunk.ts
@@ -7,9 +7,9 @@ import {cosineSimilarity} from "./cosine";
 export async function cherryPick(question: string, longContext: string, options: any = {}, trackers: TrackerContext, schemaGen: Schemas, url: string) {
  const {
-    snippetLength = 5000,  // char length of each snippet
+    snippetLength = 6000,  // char length of each snippet
    numSnippets = Math.max(2, Math.min(5, Math.floor(longContext.length / snippetLength))),
-    chunkSize = 500,  // char length of each chunk
+    chunkSize = 300,  // char length of each chunk
  } = options;
  const maxTokensPerRequest = 8192 // Maximum tokens per embedding request
@@ -52,8 +52,12 @@ export async function cherryPick(question: string, longContext: string, options:
    console.log(`Total length ${longContext.length} split ${chunks.length} chunks into ${chunkBatches.length} batches of ~${chunksPerBatch} chunks each`);
-    // Process all batches in parallel
+    // Process each batch and collect the embeddings
-    const batchPromises = chunkBatches.map(async (batch, batchIndex) => {
+    const allChunkEmbeddings: number[][] = [];
    let totalTokensUsed = 0;
    for (let batchIndex = 0; batchIndex < chunkBatches.length; batchIndex++) {
      const batch = chunkBatches[batchIndex];
      console.log(`Processing batch ${batchIndex + 1}/${chunkBatches.length} with ${batch.length} chunks`);
      // Get embeddings for the current batch
@@ -87,25 +91,12 @@ export async function cherryPick(question: string, longContext: string, options:
      // Extract embeddings from this batch
      const batchEmbeddings = batchEmbeddingResponse.data.data.map((item: any) => item.embedding);
      allChunkEmbeddings.push(...batchEmbeddings);
-      // Return both embeddings and token usage
+      // Track token usage
-      return {
+      const batchTokens = batchEmbeddingResponse.data.usage?.total_tokens || 0;
-        embeddings: batchEmbeddings,
+      totalTokensUsed += batchTokens;
-        tokens: batchEmbeddingResponse.data.usage?.total_tokens || 0
+    }
      };
    });
    // Wait for all batch processing to complete
    const batchResults = await Promise.all(batchPromises);
    // Collect all embeddings and total token usage
    const allChunkEmbeddings: number[][] = [];
    let totalTokensUsed = 0;
    batchResults.forEach(result => {
      allChunkEmbeddings.push(...result.embeddings);
      totalTokensUsed += result.tokens;
    });
    // Get embedding for the question
    const questionEmbeddingResponse = await axios.post(
@@ -208,4 +199,4 @@ ${snippet}
    // Fallback: just return the beginning of the context up to the desired length
    return longContext.substring(0, snippetLength * numSnippets);
  }
-}
+}