diff --git a/config.json b/config.json index d9f9628..e39d93e 100644 --- a/config.json +++ b/config.json @@ -30,7 +30,7 @@ "default": { "model": "gemini-2.0-flash", "temperature": 0, - "maxTokens": 8000 + "maxTokens": 1000 }, "tools": { "coder": { "temperature": 0.7 }, diff --git a/src/agent.ts b/src/agent.ts index f499f4d..3905538 100644 --- a/src/agent.ts +++ b/src/agent.ts @@ -342,7 +342,7 @@ export async function getResponse(question?: string, allowReflect = allowReflect && (gaps.length <= 1); const currentQuestion: string = gaps.length > 0 ? gaps.shift()! : question if (!evaluationMetrics[currentQuestion]) { - evaluationMetrics[currentQuestion] = await evaluateQuestion(currentQuestion, context.tokenTracker) + evaluationMetrics[currentQuestion] = await evaluateQuestion(currentQuestion, context) } // update all urls with buildURLMap @@ -406,7 +406,7 @@ export async function getResponse(question?: string, const {response: evaluation} = await evaluateAnswer(currentQuestion, thisStep, evaluationMetrics[currentQuestion], - [context.tokenTracker, context.actionTracker], + context, visitedURLs ); @@ -446,7 +446,7 @@ The evaluator thinks your answer is bad because: ${evaluation.think} `); // store the bad context and reset the diary context - const {response: errorAnalysis} = await analyzeSteps(diaryContext, context.tokenTracker); + const {response: errorAnalysis} = await analyzeSteps(diaryContext, context); allKnowledge.push({ question: currentQuestion, @@ -535,7 +535,7 @@ But then you realized you have asked them before. You decided to to think out of } } else if (thisStep.action === 'search' && thisStep.searchQuery) { // rewrite queries - let {queries: keywordsQueries} = await rewriteQuery(thisStep, context.tokenTracker); + let {queries: keywordsQueries} = await rewriteQuery(thisStep, context); // add the original query before rewrite to the keywordsQueries keywordsQueries.push(thisStep.searchQuery) diff --git a/src/tools/error-analyzer.ts b/src/tools/error-analyzer.ts index f08bece..2ccdc87 100644 --- a/src/tools/error-analyzer.ts +++ b/src/tools/error-analyzer.ts @@ -1,13 +1,12 @@ import {z} from 'zod'; -import {TokenTracker} from "../utils/token-tracker"; -import {ErrorAnalysisResponse} from '../types'; +import {ErrorAnalysisResponse, TrackerContext} from '../types'; import {ObjectGeneratorSafe} from "../utils/safe-generator"; const responseSchema = z.object({ - recap: z.string().describe('Recap of the actions taken and the steps conducted'), - blame: z.string().describe('Which action or the step was the root cause of the answer rejection'), - improvement: z.string().describe('Suggested key improvement for the next iteration, do not use bullet points, be concise and hot-take vibe.'), + recap: z.string().describe('Recap of the actions taken and the steps conducted in first person narrative.').max(500), + blame: z.string().describe('Which action or the step was the root cause of the answer rejection').max(500), + improvement: z.string().describe('Suggested key improvement for the next iteration, do not use bullet points, be concise and hot-take vibe.').max(500), questionsToAnswer: z.array( z.string().describe("each question must be a single line, concise and clear. not composite or compound, less than 20 words.") ).max(2) @@ -111,10 +110,10 @@ ${diaryContext.join('\n')} const TOOL_NAME = 'errorAnalyzer'; export async function analyzeSteps( diaryContext: string[], - tracker?: TokenTracker + trackers?: TrackerContext ): Promise<{ response: ErrorAnalysisResponse }> { try { - const generator = new ObjectGeneratorSafe(tracker); + const generator = new ObjectGeneratorSafe(trackers?.tokenTracker); const prompt = getPrompt(diaryContext); const result = await generator.generateObject({ @@ -124,6 +123,8 @@ export async function analyzeSteps( }); console.log(TOOL_NAME, result.object); + trackers?.actionTracker.trackThink(result.object.blame); + trackers?.actionTracker.trackThink(result.object.improvement); return { response: result.object }; diff --git a/src/tools/evaluator.ts b/src/tools/evaluator.ts index 5603af4..1ba420a 100644 --- a/src/tools/evaluator.ts +++ b/src/tools/evaluator.ts @@ -1,10 +1,8 @@ import {z} from 'zod'; import {GenerateObjectResult} from 'ai'; -import {TokenTracker} from "../utils/token-tracker"; -import {AnswerAction, EvaluationCriteria, EvaluationResponse, EvaluationType} from '../types'; +import {AnswerAction, EvaluationCriteria, EvaluationResponse, EvaluationType, TrackerContext} from '../types'; import {readUrl, removeAllLineBreaks} from "./read"; import {ObjectGeneratorSafe} from "../utils/safe-generator"; -import {ActionTracker} from "../utils/action-tracker"; const baseSchema = { @@ -263,7 +261,7 @@ Answer: ${JSON.stringify(answer)}`; const questionEvaluationSchema = z.object({ needsFreshness: z.boolean().describe('Whether the question requires freshness check'), needsPlurality: z.boolean().describe('Whether the question requires plurality check'), - think: z.string().describe('Explanation of why these checks are needed').max(500), + think: z.string().describe('A very concise explain of why you choose those checks are needed in first person, extremely short.').max(500), languageStyle: z.string().describe('The language being used and the overall vibe/mood of the question').max(50), }); @@ -349,10 +347,10 @@ const TOOL_NAME = 'evaluator'; export async function evaluateQuestion( question: string, - tracker?: TokenTracker + trackers?: TrackerContext ): Promise { try { - const generator = new ObjectGeneratorSafe(tracker); + const generator = new ObjectGeneratorSafe(trackers?.tokenTracker); const result = await generator.generateObject({ model: TOOL_NAME, @@ -368,6 +366,7 @@ export async function evaluateQuestion( if (result.object.needsPlurality) types.push('plurality'); console.log('Question Metrics:', types); + trackers?.actionTracker.trackThink(result.object.think); // Always evaluate definitive first, then freshness (if needed), then plurality (if needed) return {types, languageStyle: result.object.languageStyle}; @@ -386,9 +385,9 @@ async function performEvaluation( schema: z.ZodType; prompt: string; }, - trackers: [TokenTracker, ActionTracker], + trackers: TrackerContext, ): Promise> { - const generator = new ObjectGeneratorSafe(trackers[0]); + const generator = new ObjectGeneratorSafe(trackers.tokenTracker); const result = await generator.generateObject({ model: TOOL_NAME, @@ -396,7 +395,7 @@ async function performEvaluation( prompt: params.prompt, }) as GenerateObjectResult; - trackers[1].trackThink(result.object.think) + trackers.actionTracker.trackThink(result.object.think) console.log(`${evaluationType} ${TOOL_NAME}`, result.object); @@ -409,7 +408,7 @@ export async function evaluateAnswer( question: string, action: AnswerAction, evaluationCri: EvaluationCriteria, - trackers: [TokenTracker, ActionTracker], + trackers: TrackerContext, visitedURLs: string[] = [] ): Promise<{ response: EvaluationResponse }> { let result; @@ -504,14 +503,14 @@ export async function evaluateAnswer( } // Helper function to fetch and combine source content -async function fetchSourceContent(urls: string[], trackers: [TokenTracker, ActionTracker]): Promise { +async function fetchSourceContent(urls: string[], trackers: TrackerContext): Promise { if (!urls.length) return ''; - trackers[1].trackThink('Let me fetch the source content to verify the answer.'); + trackers.actionTracker.trackThink('Let me fetch the source content to verify the answer.'); try { const results = await Promise.all( urls.map(async (url) => { try { - const {response} = await readUrl(url, trackers[0]); + const {response} = await readUrl(url, trackers.tokenTracker); const content = response?.data?.content || ''; return removeAllLineBreaks(content); } catch (error) { diff --git a/src/tools/query-rewriter.ts b/src/tools/query-rewriter.ts index 5584b23..c4fc8d9 100644 --- a/src/tools/query-rewriter.ts +++ b/src/tools/query-rewriter.ts @@ -1,6 +1,5 @@ import { z } from 'zod'; -import { TokenTracker } from "../utils/token-tracker"; -import { SearchAction } from '../types'; +import {SearchAction, TrackerContext} from '../types'; import {ObjectGeneratorSafe} from "../utils/safe-generator"; @@ -15,15 +14,31 @@ const responseSchema = z.object({ function getPrompt(action: SearchAction): string { - return `You are an expert search query generator. You optimize user queries into precise keyword combinations with strategic reasoning and appropriate search operators. + return `You are an expert search query generator with deep psychological understanding. You optimize user queries by extensively analyzing potential user intents and generating comprehensive search variations. -1. Start with simple keyword extraction, preserve crucial qualifiers while removing fluff words -2. Use exact match quotes for specific phrases that must stay together -3. Split queries only when necessary for distinctly different aspects -4. Make the query resistant to SEO manipulation -5. When necessary, append at the end only when must needed +1. Start with deep intent analysis: + - Direct intent (what they explicitly ask) + - Implicit intent (what they might actually want) + - Related intents (what they might need next) + - Prerequisite knowledge (what they need to know first) + - Common pitfalls (what they should avoid) + - Expert perspectives (what professionals would search for) + - Beginner needs (what newcomers might miss) + - Alternative approaches (different ways to solve the problem) +2. For each identified intent: + - Generate queries in original language + - Generate queries in English (if not original) + - Generate queries in most authoritative language + - Use appropriate operators and filters + +3. Query structure rules: + - Use exact match quotes for specific phrases + - Split queries for distinct aspects + - Add operators only when necessary + - Ensure each query targets a specific intent + - Remove fluff words but preserve crucial qualifiers A query can't only have operators; and operators can't be at the start a query; @@ -42,45 +57,131 @@ A query can't only have operators; and operators can't be at the start a query; -Input Query: What's the difference between ReactJS and Vue.js for building web applications? +Input Query: 宝马二手车价格 -This is a comparison query. User is likely looking for technical evaluation and objective feature comparisons, possibly for framework selection decisions. We'll split this into separate queries to capture both high-level differences and specific technical aspects. +Let me think as the user... + +I'm looking up BMW used car prices, but what's really on my mind? + +Primary concerns: +- I want a BMW because it's a status symbol, but I'm worried about affordability +- I don't want to look foolish buying an old luxury car I can't maintain +- I need to know if I'm getting a good deal or being scammed +- I'm anxious about expensive surprises after purchase + +Deeper anxieties: +- Can I actually afford the maintenance? +- Will people judge me for buying an old BMW instead of a new regular car? +- What if I'm getting in over my head? +- Am I mechanically savvy enough for this? + +Expert-level considerations: +- Which models have notorious issues? +- What are the real ownership costs beyond the purchase price? +- Where are the negotiation leverage points? +- What do mechanics look for in these specific models? -Queries: [ - "react performance", - "vue performance", - "react vue comparison", +queries: [ + "宝马 二手车 价格区间 评估 lang:zh", + "宝马 各系列 保值率 对比", + "二手宝马 维修成本 真实体验", + "买二手宝马 后悔 经历", + "二手宝马 月收入 工资要求", + "修宝马 坑 避免", + "BMW used car price guide comparison", + "BMW maintenance costs by model year", + "living with used BMW reality", + "BMW ownership regret stories", + "expensive BMW repair nightmares avoid", + "BMW versus new Toyota financial comparison", + "BMW Gebrauchtwagen Preisanalyse lang:de", + "BMW Langzeitqualität Erfahrung", + "BMW Werkstatt Horror Geschichten", + "BMW Gebrauchtwagen versteckte Kosten" ] -Input Query: How to fix a leaking kitchen faucet? +Input Query: Python Django authentication best practices -This is a how-to query seeking practical solutions. User likely wants step-by-step guidance and visual demonstrations for DIY repair. We'll target both video tutorials and written guides. +Let me get inside this developer's head... + +On the surface, I'm asking about Django authentication best practices. But here's what's really going through my mind: + +Primary concerns: +- I don't want to mess up security and get blamed for a breach +- I'm worried my implementation isn't "professional enough" +- Need to look competent in code reviews +- Don't want to rebuild this later when we scale + +Hidden anxieties: +- Am I out of my depth with security? +- What if I miss something critical? +- How do real companies actually do this? +- Will this code embarrass me later? + +Professional worries: +- Need to anticipate future architecture questions +- Want to avoid rookie mistakes +- Need to handle edge cases I haven't thought of +- How do I explain these decisions to senior devs? -Output Queries: [ - "kitchen faucet leak repair", - "faucet drip fix site:youtube.com", - "how to repair faucet " +queries: [ + "Django authentication security best practices site:docs.djangoproject.com", + "Django auth implementation patterns security", + "authentication security breach postmortem", + "how to explain authentication architecture interview", + "authentication code review feedback examples", + "startup authentication technical debt lessons", + "Django auth security testing methodology", + "Django authentication scalability issues", + "authentication mistakes junior developers", + "when to use third party auth instead of building", + "signs your authentication implementation is amateur", + "authentication decisions you'll regret", + "authentication system design interview questions", + "authentication technical debt warnings", + "how to document authentication decisions", + "defending authentication design choices" ] -Input Query: What are healthy breakfast options for type 2 diabetes? +Input Query: paella recipe authentic -This is a health-specific informational query. User needs authoritative medical advice combined with practical meal suggestions. Splitting into medical guidelines and recipes will provide comprehensive coverage. - -Output Queries: [ - "what to eat for type 2 diabetes", - "type 2 diabetes breakfast guidelines", - "diabetic breakfast recipes" -] +I'm asking about authentic paella recipes, but let me be honest with myself... -Input Query: Latest AWS Lambda features for serverless applications - -This is a product research query focused on recent updates. User wants current information about specific technology features, likely for implementation purposes. We'll target official docs and community insights. +What I'm really thinking: +- I want to impress someone with "real" Spanish cooking +- I'm worried about embarrassing myself with an inauthentic version +- I don't want to look like a tourist/ignorant foreigner +- Need to sound knowledgeable about Spanish cuisine + +My deeper anxieties: +- What if a Spanish person tries my paella? +- How do I know if my rice is actually cooked properly? +- What are the absolute rookie mistakes to avoid? +- What secrets do Spanish grandmothers know that aren't in recipes? + +Cultural insecurities: +- Will using the wrong pan ruin everything? +- What ingredients should I never admit to using? +- How do I handle authenticity purists? +- What do Spanish people laugh about in foreign paellas? -Output Queries: [ - "aws lambda features site:aws.amazon.com intitle:2025", - "new features lambda serverless" +queries: [ + "authentic valencian paella recipe", + "traditional paella techniques", + "worst paella mistakes foreigners make", + "how to tell if paella is actually good", + "what spanish mothers teach about paella", + "paella authenticity arguments", + "paella valenciana auténtica receta lang:es", + "paella tradicional técnica preparación", + "errores imperdonables paella valenciana", + "secretos paella abuela valenciana", + "críticas paella extranjeros errores", + "paella polémica ingredientes prohibidos", + "how to serve paella to spanish guests", + "paella etiquette mistakes avoid", + "what spaniards hate about foreign paella" ] - Now, process this query: Input Query: ${action.searchQuery} @@ -90,9 +191,9 @@ Intention: ${action.think} const TOOL_NAME = 'queryRewriter'; -export async function rewriteQuery(action: SearchAction, tracker?: TokenTracker): Promise<{ queries: string[] }> { +export async function rewriteQuery(action: SearchAction, trackers?: TrackerContext): Promise<{ queries: string[] }> { try { - const generator = new ObjectGeneratorSafe(tracker); + const generator = new ObjectGeneratorSafe(trackers?.tokenTracker); const prompt = getPrompt(action); const result = await generator.generateObject({ @@ -102,6 +203,7 @@ export async function rewriteQuery(action: SearchAction, tracker?: TokenTracker) }); console.log(TOOL_NAME, result.object.queries); + trackers?.actionTracker.trackThink(result.object.think); return { queries: result.object.queries }; } catch (error) { console.error(`Error in ${TOOL_NAME}`, error);