fix: overlength gen

2026-03-22 07:29:35 +08:00 · 2025-02-22 11:28:25 +08:00
parent 0ed7321f77
commit c8cd9bc09e
5 changed files with 164 additions and 62 deletions
--- a/config.json
+++ b/config.json
@@ -30,7 +30,7 @@
      "default": {
        "model": "gemini-2.0-flash",
        "temperature": 0,
-        "maxTokens": 8000
+        "maxTokens": 1000
      },
      "tools": {
        "coder": { "temperature": 0.7 },
--- a/src/agent.ts
+++ b/src/agent.ts
@@ -342,7 +342,7 @@ export async function getResponse(question?: string,
    allowReflect = allowReflect && (gaps.length <= 1);
    const currentQuestion: string = gaps.length > 0 ? gaps.shift()! : question
    if (!evaluationMetrics[currentQuestion]) {
-      evaluationMetrics[currentQuestion] = await evaluateQuestion(currentQuestion, context.tokenTracker)
+      evaluationMetrics[currentQuestion] = await evaluateQuestion(currentQuestion, context)
    }

    // update all urls with buildURLMap
@@ -406,7 +406,7 @@ export async function getResponse(question?: string,

      const {response: evaluation} = await evaluateAnswer(currentQuestion, thisStep,
        evaluationMetrics[currentQuestion],
-        [context.tokenTracker, context.actionTracker],
+        context,
        visitedURLs
      );

@@ -446,7 +446,7 @@ The evaluator thinks your answer is bad because:
 ${evaluation.think}
 `);
            // store the bad context and reset the diary context
-            const {response: errorAnalysis} = await analyzeSteps(diaryContext, context.tokenTracker);
+            const {response: errorAnalysis} = await analyzeSteps(diaryContext, context);

            allKnowledge.push({
              question: currentQuestion,
@@ -535,7 +535,7 @@ But then you realized you have asked them before. You decided to to think out of
      }
    } else if (thisStep.action === 'search' && thisStep.searchQuery) {
      // rewrite queries
-      let {queries: keywordsQueries} = await rewriteQuery(thisStep, context.tokenTracker);
+      let {queries: keywordsQueries} = await rewriteQuery(thisStep, context);

      // add the original query before rewrite to the keywordsQueries
      keywordsQueries.push(thisStep.searchQuery)
--- a/src/tools/error-analyzer.ts
+++ b/src/tools/error-analyzer.ts
@@ -1,13 +1,12 @@
 import {z} from 'zod';
-import {TokenTracker} from "../utils/token-tracker";
-import {ErrorAnalysisResponse} from '../types';
+import {ErrorAnalysisResponse, TrackerContext} from '../types';
 import {ObjectGeneratorSafe} from "../utils/safe-generator";


 const responseSchema = z.object({
-  recap: z.string().describe('Recap of the actions taken and the steps conducted'),
-  blame: z.string().describe('Which action or the step was the root cause of the answer rejection'),
-  improvement: z.string().describe('Suggested key improvement for the next iteration, do not use bullet points, be concise and hot-take vibe.'),
+  recap: z.string().describe('Recap of the actions taken and the steps conducted in first person narrative.').max(500),
+  blame: z.string().describe('Which action or the step was the root cause of the answer rejection').max(500),
+  improvement: z.string().describe('Suggested key improvement for the next iteration, do not use bullet points, be concise and hot-take vibe.').max(500),
  questionsToAnswer: z.array(
    z.string().describe("each question must be a single line, concise and clear. not composite or compound, less than 20 words.")
  ).max(2)
@@ -111,10 +110,10 @@ ${diaryContext.join('\n')}
 const TOOL_NAME = 'errorAnalyzer';
 export async function analyzeSteps(
  diaryContext: string[],
-  tracker?: TokenTracker
+  trackers?: TrackerContext
 ): Promise<{ response: ErrorAnalysisResponse }> {
  try {
-    const generator = new ObjectGeneratorSafe(tracker);
+    const generator = new ObjectGeneratorSafe(trackers?.tokenTracker);
    const prompt = getPrompt(diaryContext);

    const result = await generator.generateObject({
@@ -124,6 +123,8 @@ export async function analyzeSteps(
    });

    console.log(TOOL_NAME, result.object);
+    trackers?.actionTracker.trackThink(result.object.blame);
+    trackers?.actionTracker.trackThink(result.object.improvement);

    return { response: result.object };

--- a/src/tools/evaluator.ts
+++ b/src/tools/evaluator.ts
@@ -1,10 +1,8 @@
 import {z} from 'zod';
 import {GenerateObjectResult} from 'ai';
-import {TokenTracker} from "../utils/token-tracker";
-import {AnswerAction, EvaluationCriteria, EvaluationResponse, EvaluationType} from '../types';
+import {AnswerAction, EvaluationCriteria, EvaluationResponse, EvaluationType, TrackerContext} from '../types';
 import {readUrl, removeAllLineBreaks} from "./read";
 import {ObjectGeneratorSafe} from "../utils/safe-generator";
-import {ActionTracker} from "../utils/action-tracker";


 const baseSchema = {
@@ -263,7 +261,7 @@ Answer: ${JSON.stringify(answer)}`;
 const questionEvaluationSchema = z.object({
  needsFreshness: z.boolean().describe('Whether the question requires freshness check'),
  needsPlurality: z.boolean().describe('Whether the question requires plurality check'),
-  think: z.string().describe('Explanation of why these checks are needed').max(500),
+  think: z.string().describe('A very concise explain of why you choose those checks are needed in first person, extremely short.').max(500),
  languageStyle: z.string().describe('The language being used and the overall vibe/mood of the question').max(50),
 });

@@ -349,10 +347,10 @@ const TOOL_NAME = 'evaluator';

 export async function evaluateQuestion(
  question: string,
-  tracker?: TokenTracker
+  trackers?: TrackerContext
 ): Promise<EvaluationCriteria> {
  try {
-    const generator = new ObjectGeneratorSafe(tracker);
+    const generator = new ObjectGeneratorSafe(trackers?.tokenTracker);

    const result = await generator.generateObject({
      model: TOOL_NAME,
@@ -368,6 +366,7 @@ export async function evaluateQuestion(
    if (result.object.needsPlurality) types.push('plurality');

    console.log('Question Metrics:', types);
+    trackers?.actionTracker.trackThink(result.object.think);

    // Always evaluate definitive first, then freshness (if needed), then plurality (if needed)
    return {types, languageStyle: result.object.languageStyle};
@@ -386,9 +385,9 @@ async function performEvaluation<T>(
    schema: z.ZodType<T>;
    prompt: string;
  },
-  trackers: [TokenTracker, ActionTracker],
+  trackers: TrackerContext,
 ): Promise<GenerateObjectResult<T>> {
-  const generator = new ObjectGeneratorSafe(trackers[0]);
+  const generator = new ObjectGeneratorSafe(trackers.tokenTracker);

  const result = await generator.generateObject({
    model: TOOL_NAME,
@@ -396,7 +395,7 @@ async function performEvaluation<T>(
    prompt: params.prompt,
  }) as GenerateObjectResult<any>;

-  trackers[1].trackThink(result.object.think)
+  trackers.actionTracker.trackThink(result.object.think)

  console.log(`${evaluationType} ${TOOL_NAME}`, result.object);

@@ -409,7 +408,7 @@ export async function evaluateAnswer(
  question: string,
  action: AnswerAction,
  evaluationCri: EvaluationCriteria,
-  trackers: [TokenTracker, ActionTracker],
+  trackers: TrackerContext,
  visitedURLs: string[] = []
 ): Promise<{ response: EvaluationResponse }> {
  let result;
@@ -504,14 +503,14 @@ export async function evaluateAnswer(
 }

 // Helper function to fetch and combine source content
-async function fetchSourceContent(urls: string[], trackers: [TokenTracker, ActionTracker]): Promise<string> {
+async function fetchSourceContent(urls: string[], trackers: TrackerContext): Promise<string> {
  if (!urls.length) return '';
-  trackers[1].trackThink('Let me fetch the source content to verify the answer.');
+  trackers.actionTracker.trackThink('Let me fetch the source content to verify the answer.');
  try {
    const results = await Promise.all(
      urls.map(async (url) => {
        try {
-          const {response} = await readUrl(url, trackers[0]);
+          const {response} = await readUrl(url, trackers.tokenTracker);
          const content = response?.data?.content || '';
          return removeAllLineBreaks(content);
        } catch (error) {
--- a/src/tools/query-rewriter.ts
+++ b/src/tools/query-rewriter.ts
@@ -1,6 +1,5 @@
 import { z } from 'zod';
-import { TokenTracker } from "../utils/token-tracker";
-import { SearchAction } from '../types';
+import {SearchAction, TrackerContext} from '../types';
 import {ObjectGeneratorSafe} from "../utils/safe-generator";


@@ -15,15 +14,31 @@ const responseSchema = z.object({


 function getPrompt(action: SearchAction): string {
-  return `You are an expert search query generator. You optimize user queries into precise keyword combinations with strategic reasoning and appropriate search operators.
+  return `You are an expert search query generator with deep psychological understanding. You optimize user queries by extensively analyzing potential user intents and generating comprehensive search variations.

 <rules>
-1. Start with simple keyword extraction, preserve crucial qualifiers while removing fluff words
-2. Use exact match quotes for specific phrases that must stay together
-3. Split queries only when necessary for distinctly different aspects
-4. Make the query resistant to SEO manipulation
-5. When necessary, append <query-operators> at the end only when must needed
+1. Start with deep intent analysis:
+   - Direct intent (what they explicitly ask)
+   - Implicit intent (what they might actually want)
+   - Related intents (what they might need next)
+   - Prerequisite knowledge (what they need to know first)
+   - Common pitfalls (what they should avoid)
+   - Expert perspectives (what professionals would search for)
+   - Beginner needs (what newcomers might miss)
+   - Alternative approaches (different ways to solve the problem)

+2. For each identified intent:
+   - Generate queries in original language
+   - Generate queries in English (if not original)
+   - Generate queries in most authoritative language
+   - Use appropriate operators and filters
+
+3. Query structure rules:
+   - Use exact match quotes for specific phrases
+   - Split queries for distinct aspects
+   - Add operators only when necessary
+   - Ensure each query targets a specific intent
+   - Remove fluff words but preserve crucial qualifiers

 <query-operators>
 A query can't only have operators; and operators can't be at the start a query;
@@ -42,45 +57,131 @@ A query can't only have operators; and operators can't be at the start a query;
 </rules>

 <examples>
-Input Query: What's the difference between ReactJS and Vue.js for building web applications?
+Input Query: 宝马二手车价格
 <think>
-This is a comparison query. User is likely looking for technical evaluation and objective feature comparisons, possibly for framework selection decisions. We'll split this into separate queries to capture both high-level differences and specific technical aspects.
+Let me think as the user...
+
+I'm looking up BMW used car prices, but what's really on my mind?
+
+Primary concerns:
+- I want a BMW because it's a status symbol, but I'm worried about affordability
+- I don't want to look foolish buying an old luxury car I can't maintain
+- I need to know if I'm getting a good deal or being scammed
+- I'm anxious about expensive surprises after purchase
+
+Deeper anxieties:
+- Can I actually afford the maintenance?
+- Will people judge me for buying an old BMW instead of a new regular car?
+- What if I'm getting in over my head?
+- Am I mechanically savvy enough for this?
+
+Expert-level considerations:
+- Which models have notorious issues?
+- What are the real ownership costs beyond the purchase price?
+- Where are the negotiation leverage points?
+- What do mechanics look for in these specific models?
 </think>
-Queries: [
-  "react performance",
-  "vue performance",
-  "react vue comparison",
+queries: [
+  "宝马 二手车 价格区间 评估 lang:zh",
+  "宝马 各系列 保值率 对比",
+  "二手宝马 维修成本 真实体验",
+  "买二手宝马 后悔 经历",
+  "二手宝马 月收入 工资要求",
+  "修宝马 坑 避免",
+  "BMW used car price guide comparison",
+  "BMW maintenance costs by model year",
+  "living with used BMW reality",
+  "BMW ownership regret stories",
+  "expensive BMW repair nightmares avoid",
+  "BMW versus new Toyota financial comparison",
+  "BMW Gebrauchtwagen Preisanalyse lang:de",
+  "BMW Langzeitqualität Erfahrung",
+  "BMW Werkstatt Horror Geschichten",
+  "BMW Gebrauchtwagen versteckte Kosten"
 ]

-Input Query: How to fix a leaking kitchen faucet?
+Input Query: Python Django authentication best practices
 <think>
-This is a how-to query seeking practical solutions. User likely wants step-by-step guidance and visual demonstrations for DIY repair. We'll target both video tutorials and written guides.
+Let me get inside this developer's head...
+
+On the surface, I'm asking about Django authentication best practices. But here's what's really going through my mind:
+
+Primary concerns:
+- I don't want to mess up security and get blamed for a breach
+- I'm worried my implementation isn't "professional enough"
+- Need to look competent in code reviews
+- Don't want to rebuild this later when we scale
+
+Hidden anxieties:
+- Am I out of my depth with security?
+- What if I miss something critical?
+- How do real companies actually do this?
+- Will this code embarrass me later?
+
+Professional worries:
+- Need to anticipate future architecture questions
+- Want to avoid rookie mistakes
+- Need to handle edge cases I haven't thought of
+- How do I explain these decisions to senior devs?
 </think>
-Output Queries: [
-  "kitchen faucet leak repair",
-  "faucet drip fix site:youtube.com",
-  "how to repair faucet "
+queries: [
+  "Django authentication security best practices site:docs.djangoproject.com",
+  "Django auth implementation patterns security",
+  "authentication security breach postmortem",
+  "how to explain authentication architecture interview",
+  "authentication code review feedback examples",
+  "startup authentication technical debt lessons",
+  "Django auth security testing methodology",
+  "Django authentication scalability issues",
+  "authentication mistakes junior developers",
+  "when to use third party auth instead of building",
+  "signs your authentication implementation is amateur",
+  "authentication decisions you'll regret",
+  "authentication system design interview questions",
+  "authentication technical debt warnings",
+  "how to document authentication decisions",
+  "defending authentication design choices"
 ]

-Input Query: What are healthy breakfast options for type 2 diabetes?
+Input Query: paella recipe authentic
 <think>
-This is a health-specific informational query. User needs authoritative medical advice combined with practical meal suggestions. Splitting into medical guidelines and recipes will provide comprehensive coverage.
-</think>
-Output Queries: [
-  "what to eat for type 2 diabetes",
-  "type 2 diabetes breakfast guidelines",
-  "diabetic breakfast recipes"
-]
+I'm asking about authentic paella recipes, but let me be honest with myself...

-Input Query: Latest AWS Lambda features for serverless applications
-<think>
-This is a product research query focused on recent updates. User wants current information about specific technology features, likely for implementation purposes. We'll target official docs and community insights.
+What I'm really thinking:
+- I want to impress someone with "real" Spanish cooking
+- I'm worried about embarrassing myself with an inauthentic version
+- I don't want to look like a tourist/ignorant foreigner
+- Need to sound knowledgeable about Spanish cuisine
+
+My deeper anxieties:
+- What if a Spanish person tries my paella?
+- How do I know if my rice is actually cooked properly?
+- What are the absolute rookie mistakes to avoid?
+- What secrets do Spanish grandmothers know that aren't in recipes?
+
+Cultural insecurities:
+- Will using the wrong pan ruin everything?
+- What ingredients should I never admit to using?
+- How do I handle authenticity purists?
+- What do Spanish people laugh about in foreign paellas?
 </think>
-Output Queries: [
-  "aws lambda features site:aws.amazon.com intitle:2025",
-  "new features lambda serverless"
+queries: [
+  "authentic valencian paella recipe",
+  "traditional paella techniques",
+  "worst paella mistakes foreigners make",
+  "how to tell if paella is actually good",
+  "what spanish mothers teach about paella",
+  "paella authenticity arguments",
+  "paella valenciana auténtica receta lang:es",
+  "paella tradicional técnica preparación",
+  "errores imperdonables paella valenciana",
+  "secretos paella abuela valenciana",
+  "críticas paella extranjeros errores",
+  "paella polémica ingredientes prohibidos",
+  "how to serve paella to spanish guests",
+  "paella etiquette mistakes avoid",
+  "what spaniards hate about foreign paella"
 ]
-</examples>

 Now, process this query:
 Input Query: ${action.searchQuery}
@@ -90,9 +191,9 @@ Intention: ${action.think}

 const TOOL_NAME = 'queryRewriter';

-export async function rewriteQuery(action: SearchAction, tracker?: TokenTracker): Promise<{ queries: string[] }> {
+export async function rewriteQuery(action: SearchAction, trackers?: TrackerContext): Promise<{ queries: string[] }> {
  try {
-    const generator = new ObjectGeneratorSafe(tracker);
+    const generator = new ObjectGeneratorSafe(trackers?.tokenTracker);
    const prompt = getPrompt(action);

    const result = await generator.generateObject({
@@ -102,6 +203,7 @@ export async function rewriteQuery(action: SearchAction, tracker?: TokenTracker)
    });

    console.log(TOOL_NAME, result.object.queries);
+    trackers?.actionTracker.trackThink(result.object.think);
    return { queries: result.object.queries };
  } catch (error) {
    console.error(`Error in ${TOOL_NAME}`, error);