From c5900ac767678161eea27ccc3347777cec4d13dd Mon Sep 17 00:00:00 2001
From: Han Xiao <han.xiao@jina.ai>
Date: Wed, 19 Mar 2025 10:07:45 +0800
Subject: [PATCH] fix: unnecessary eval

---
 src/tools/evaluator.ts      | 18 ------------------
 src/tools/jina-latechunk.ts | 21 ++++++++++++++-------
 2 files changed, 14 insertions(+), 25 deletions(-)
diff --git a/src/tools/evaluator.ts b/src/tools/evaluator.ts
index 630df60..3b0298f 100644
--- a/src/tools/evaluator.ts
+++ b/src/tools/evaluator.ts
@@ -38,24 +38,6 @@ Could you please evaluate my answer based on your knowledge and strict standards
   }
 }
 
-function getAttributionPrompt(question: string, answer: string, allKnowledge: KnowledgeItem[]): PromptPair {
-  return {
-    system: `You are an evaluator that verifies if answer content is properly attributed to and supported by the provided context.`,
-    user: `
-<context>
-${JSON.stringify(allKnowledge)}
-</context>    
-<question>
-${question}
-</question>
-<answer>
-${answer}
-</answer>
-
-Please read and think.
-`
-  }
-}
 
 function getDefinitivePrompt(question: string, answer: string): PromptPair {
   return {
diff --git a/src/tools/jina-latechunk.ts b/src/tools/jina-latechunk.ts
index dfcd16f..cfdceb5 100644
--- a/src/tools/jina-latechunk.ts
+++ b/src/tools/jina-latechunk.ts
@@ -6,16 +6,19 @@ import {Schemas} from "../utils/schemas";
 export async function cherryPick(question: string, longContext: string, options: any = {}, trackers: TrackerContext, schemaGen: Schemas, url: string) {
 
   const {
-    snippetLength = 3000,
+    snippetLength = 3000,  // char length of each snippet
     numSnippets = Math.max(2, Math.min(5, Math.floor(longContext.length / snippetLength))),
-    chunkSize = 300,
-    maxTokensPerRequest = 8192, // Maximum tokens per embedding request
-    // Rough estimate of tokens per character (can be adjusted based on your text)
-    tokensPerCharacter = 0.4
+    chunkSize = 300,  // char length of each chunk
   } = options;
 
+  const maxTokensPerRequest = 8192 // Maximum tokens per embedding request
+
+  // Rough estimate of tokens per character (can be adjusted based on your text)
+  const tokensPerCharacter = 0.4
+
   if (longContext.length < snippetLength * 2) {
     // If the context is shorter than the snippet length, return the whole context
+    console.log('content is too short, dont bother')
     return longContext;
   }
 
@@ -30,6 +33,10 @@ export async function cherryPick(question: string, longContext: string, options:
   trackers.actionTracker.trackThink('late_chunk', schemaGen.languageCode, {url});
 
   try {
+    if (question.trim().length === 0) {
+      throw new Error('Empty question, returning full context');
+    }
+
     // Estimate the number of tokens per chunk
     const estimatedTokensPerChunk = Math.ceil(chunkSize * tokensPerCharacter);
 
@@ -180,11 +187,11 @@ export async function cherryPick(question: string, longContext: string, options:
 
     // wrap with <snippet-index> tag
     return snippets.map((snippet, index) => `
-<snippet-${index+1}>
+<snippet-${index + 1}>
 
 ${snippet}
 
-</snippet-${index+1}>`.trim()).join("\n\n");
+</snippet-${index + 1}>`.trim()).join("\n\n");
 
   } catch (error) {
     console.error('Error in late chunking:', error);