From 71157a74682dcd2cd43d68813023d9eea4b5071c Mon Sep 17 00:00:00 2001 From: Han Xiao Date: Wed, 19 Mar 2025 08:06:29 +0800 Subject: [PATCH] fix: unnecessary eval --- src/agent.ts | 10 +++++----- src/tools/error-analyzer.ts | 2 -- src/tools/evaluator.ts | 22 +++++++++++----------- 3 files changed, 16 insertions(+), 18 deletions(-) diff --git a/src/agent.ts b/src/agent.ts index 1050b2c..1b1174e 100644 --- a/src/agent.ts +++ b/src/agent.ts @@ -226,7 +226,7 @@ ${actionSections.join('\n\n')} `); // Add footer - sections.push(`Think step by step, choose the action, and respond in valid JSON format matching exact JSON schema of that action.`); + sections.push(`Think step by step, choose the action, then respond by matching the schema of that action.`); return removeExtraLineBreaks(sections.join('\n\n')); } @@ -408,7 +408,7 @@ export async function getResponse(question?: string, let allowReflect = true; let allowCoding = true; let system = ''; - let maxStrictEvals = 2; + let maxStrictEvals = Math.max(1, Math.min(3, maxBadAttempts - 1)); let msgWithKnowledge: CoreMessage[] = []; let thisStep: StepAction = {action: 'answer', answer: '', references: [], think: '', isFinal: false}; @@ -417,7 +417,7 @@ export async function getResponse(question?: string, const badURLs: string[] = []; const evaluationMetrics: Record = {}; // reserve the 10% final budget for the beast mode - const regularBudget = tokenBudget * 0.9; + const regularBudget = tokenBudget * 0.85; const finalAnswerPIP: string[] = []; let trivialQuestion = false; while (context.tokenTracker.getTotalUsage().totalTokens < regularBudget && badAttempts <= maxBadAttempts) { @@ -920,8 +920,8 @@ But unfortunately, you failed to solve the issue. You need to think out of the b ); } else { (thisStep as AnswerAction).mdAnswer = fixCodeBlockIndentation( - buildMdFromAnswer((thisStep as AnswerAction)) - ); + buildMdFromAnswer((thisStep as AnswerAction)) + ); } console.log(thisStep) diff --git a/src/tools/error-analyzer.ts b/src/tools/error-analyzer.ts index c416d24..e29ebd6 100644 --- a/src/tools/error-analyzer.ts +++ b/src/tools/error-analyzer.ts @@ -19,8 +19,6 @@ Analyze the steps and provide detailed feedback following these guidelines: - In the recap: Summarize key actions chronologically, highlight patterns, and identify where the process started to go wrong - In the blame: Point to specific steps or patterns that led to the inadequate answer - In the improvement: Provide actionable suggestions that could have led to a better outcome - -Generate a JSON response following JSON schema. diff --git a/src/tools/evaluator.ts b/src/tools/evaluator.ts index bb3decc..630df60 100644 --- a/src/tools/evaluator.ts +++ b/src/tools/evaluator.ts @@ -646,17 +646,17 @@ export async function evaluateAnswer( for (const evaluationType of evaluationTypes) { let prompt: { system: string; user: string } | undefined switch (evaluationType) { - case 'attribution': { - if (allKnowledge.length === 0) { - return { - pass: false, - think: `The knowledge is completely empty and the answer can not be derived from it. Need to found some other references and URLs`, - type: 'attribution', - }; - } - prompt = getAttributionPrompt(question, action.answer, allKnowledge); - break; - } + // case 'attribution': { + // if (allKnowledge.length === 0) { + // return { + // pass: false, + // think: `The knowledge is completely empty and the answer can not be derived from it. Need to found some other references and URLs`, + // type: 'attribution', + // }; + // } + // prompt = getAttributionPrompt(question, action.answer, allKnowledge); + // break; + // } case 'definitive': prompt = getDefinitivePrompt(question, action.answer);