From c5900ac767678161eea27ccc3347777cec4d13dd Mon Sep 17 00:00:00 2001 From: Han Xiao Date: Wed, 19 Mar 2025 10:07:45 +0800 Subject: [PATCH] fix: unnecessary eval --- src/tools/evaluator.ts | 18 ------------------ src/tools/jina-latechunk.ts | 21 ++++++++++++++------- 2 files changed, 14 insertions(+), 25 deletions(-) diff --git a/src/tools/evaluator.ts b/src/tools/evaluator.ts index 630df60..3b0298f 100644 --- a/src/tools/evaluator.ts +++ b/src/tools/evaluator.ts @@ -38,24 +38,6 @@ Could you please evaluate my answer based on your knowledge and strict standards } } -function getAttributionPrompt(question: string, answer: string, allKnowledge: KnowledgeItem[]): PromptPair { - return { - system: `You are an evaluator that verifies if answer content is properly attributed to and supported by the provided context.`, - user: ` - -${JSON.stringify(allKnowledge)} - - -${question} - - -${answer} - - -Please read and think. -` - } -} function getDefinitivePrompt(question: string, answer: string): PromptPair { return { diff --git a/src/tools/jina-latechunk.ts b/src/tools/jina-latechunk.ts index dfcd16f..cfdceb5 100644 --- a/src/tools/jina-latechunk.ts +++ b/src/tools/jina-latechunk.ts @@ -6,16 +6,19 @@ import {Schemas} from "../utils/schemas"; export async function cherryPick(question: string, longContext: string, options: any = {}, trackers: TrackerContext, schemaGen: Schemas, url: string) { const { - snippetLength = 3000, + snippetLength = 3000, // char length of each snippet numSnippets = Math.max(2, Math.min(5, Math.floor(longContext.length / snippetLength))), - chunkSize = 300, - maxTokensPerRequest = 8192, // Maximum tokens per embedding request - // Rough estimate of tokens per character (can be adjusted based on your text) - tokensPerCharacter = 0.4 + chunkSize = 300, // char length of each chunk } = options; + const maxTokensPerRequest = 8192 // Maximum tokens per embedding request + + // Rough estimate of tokens per character (can be adjusted based on your text) + const tokensPerCharacter = 0.4 + if (longContext.length < snippetLength * 2) { // If the context is shorter than the snippet length, return the whole context + console.log('content is too short, dont bother') return longContext; } @@ -30,6 +33,10 @@ export async function cherryPick(question: string, longContext: string, options: trackers.actionTracker.trackThink('late_chunk', schemaGen.languageCode, {url}); try { + if (question.trim().length === 0) { + throw new Error('Empty question, returning full context'); + } + // Estimate the number of tokens per chunk const estimatedTokensPerChunk = Math.ceil(chunkSize * tokensPerCharacter); @@ -180,11 +187,11 @@ export async function cherryPick(question: string, longContext: string, options: // wrap with tag return snippets.map((snippet, index) => ` - + ${snippet} -`.trim()).join("\n\n"); +`.trim()).join("\n\n"); } catch (error) { console.error('Error in late chunking:', error);