fix: unnecessary eval

2025-12-26 06:28:56 +08:00 · 2025-03-18 21:00:45 +08:00 · 2025-03-18 21:00:45 +08:00 · 1f248de100
commit 1f248de100
parent 51974bf196
2 changed files with 20 additions and 9 deletions
--- a/src/agent.ts
+++ b/src/agent.ts
@ -419,6 +419,7 @@ export async function getResponse(question?: string,
  // reserve the 10% final budget for the beast mode
  const regularBudget = tokenBudget * 0.9;
  const finalAnswerPIP: string[] = [];
+  let trivialQuestion = false;
  while (context.tokenTracker.getTotalUsage().totalTokens < regularBudget && badAttempts <= maxBadAttempts) {
    // add 1s delay to avoid rate limiting
    step++;
@ -516,6 +517,7 @@ export async function getResponse(question?: string,
        // LLM is so confident and answer immediately, skip all evaluations
        // however, if it does give any reference, it must be evaluated, case study: "How to configure a timeout when loading a huggingface dataset with python?"
        thisStep.isFinal = true;
+        trivialQuestion = true;
        break
      }

@ -870,6 +872,7 @@ But unfortunately, you failed to solve the issue. You need to think out of the b
    weightedURLs,
    msgWithKnowledge
  }, totalStep);
+
  if (!(thisStep as AnswerAction).isFinal) {
    console.log('Enter Beast mode!!!')
    // any answer is better than no answer, humanity last resort
@ -907,13 +910,20 @@ But unfortunately, you failed to solve the issue. You need to think out of the b
    context.actionTracker.trackAction({totalStep, thisStep, gaps, badAttempts});
  }

-  (thisStep as AnswerAction).mdAnswer = fixCodeBlockIndentation(await fixMarkdown(
-      buildMdFromAnswer((thisStep as AnswerAction)),
-      allKnowledge,
-      context,
-      SchemaGen
-    )
-  );
+  if (!trivialQuestion) {
+    (thisStep as AnswerAction).mdAnswer = fixCodeBlockIndentation(await fixMarkdown(
+        buildMdFromAnswer((thisStep as AnswerAction)),
+        allKnowledge,
+        context,
+        SchemaGen
+      )
+    );
+  } else {
+    (thisStep as AnswerAction).mdAnswer = fixCodeBlockIndentation(
+        buildMdFromAnswer((thisStep as AnswerAction))
+      );
+  }
+
  console.log(thisStep)

  await storeContext(system, schema, {
--- a/src/tools/evaluator.ts
+++ b/src/tools/evaluator.ts
@ -12,13 +12,14 @@ function getRejectAllAnswersPrompt(question: string, answer: AnswerAction, allKn

  return {
    system: `
-You are a ruthless answer evaluator trained to REJECT answers. 
+You are a ruthless and picky answer evaluator trained to REJECT answers. 
+EVERYTHING is imperfect to you. You can't stand any dubious or lazy answers. 
 Given a question-answer pair, your job is to find ANY weakness in the presented answer. 
 Extremely strict standards of evidence apply. 
 Identity EVERY missing detail. 
 First, argue AGAINST the answer with the strongest possible case. 
 Then, argue FOR the answer. 
-Only after considering both perspectives, synthesize a final improvement plan starts with "For the best answer, you must...".
+Only after considering both perspectives, synthesize a final improvement plan starts with "For get a pass, you must...".

 The following knowledge items are provided for your reference. Note that some of them may not be directly related to the question/answer user provided, but may give some subtle hints and insights:
 ${KnowledgeStr.join('\n\n')}