fix: overlength gen

This commit is contained in:
Han Xiao
2025-02-22 11:28:25 +08:00
parent 0ed7321f77
commit c8cd9bc09e
5 changed files with 164 additions and 62 deletions

View File

@@ -30,7 +30,7 @@
"default": {
"model": "gemini-2.0-flash",
"temperature": 0,
"maxTokens": 8000
"maxTokens": 1000
},
"tools": {
"coder": { "temperature": 0.7 },

View File

@@ -342,7 +342,7 @@ export async function getResponse(question?: string,
allowReflect = allowReflect && (gaps.length <= 1);
const currentQuestion: string = gaps.length > 0 ? gaps.shift()! : question
if (!evaluationMetrics[currentQuestion]) {
evaluationMetrics[currentQuestion] = await evaluateQuestion(currentQuestion, context.tokenTracker)
evaluationMetrics[currentQuestion] = await evaluateQuestion(currentQuestion, context)
}
// update all urls with buildURLMap
@@ -406,7 +406,7 @@ export async function getResponse(question?: string,
const {response: evaluation} = await evaluateAnswer(currentQuestion, thisStep,
evaluationMetrics[currentQuestion],
[context.tokenTracker, context.actionTracker],
context,
visitedURLs
);
@@ -446,7 +446,7 @@ The evaluator thinks your answer is bad because:
${evaluation.think}
`);
// store the bad context and reset the diary context
const {response: errorAnalysis} = await analyzeSteps(diaryContext, context.tokenTracker);
const {response: errorAnalysis} = await analyzeSteps(diaryContext, context);
allKnowledge.push({
question: currentQuestion,
@@ -535,7 +535,7 @@ But then you realized you have asked them before. You decided to to think out of
}
} else if (thisStep.action === 'search' && thisStep.searchQuery) {
// rewrite queries
let {queries: keywordsQueries} = await rewriteQuery(thisStep, context.tokenTracker);
let {queries: keywordsQueries} = await rewriteQuery(thisStep, context);
// add the original query before rewrite to the keywordsQueries
keywordsQueries.push(thisStep.searchQuery)

View File

@@ -1,13 +1,12 @@
import {z} from 'zod';
import {TokenTracker} from "../utils/token-tracker";
import {ErrorAnalysisResponse} from '../types';
import {ErrorAnalysisResponse, TrackerContext} from '../types';
import {ObjectGeneratorSafe} from "../utils/safe-generator";
const responseSchema = z.object({
recap: z.string().describe('Recap of the actions taken and the steps conducted'),
blame: z.string().describe('Which action or the step was the root cause of the answer rejection'),
improvement: z.string().describe('Suggested key improvement for the next iteration, do not use bullet points, be concise and hot-take vibe.'),
recap: z.string().describe('Recap of the actions taken and the steps conducted in first person narrative.').max(500),
blame: z.string().describe('Which action or the step was the root cause of the answer rejection').max(500),
improvement: z.string().describe('Suggested key improvement for the next iteration, do not use bullet points, be concise and hot-take vibe.').max(500),
questionsToAnswer: z.array(
z.string().describe("each question must be a single line, concise and clear. not composite or compound, less than 20 words.")
).max(2)
@@ -111,10 +110,10 @@ ${diaryContext.join('\n')}
const TOOL_NAME = 'errorAnalyzer';
export async function analyzeSteps(
diaryContext: string[],
tracker?: TokenTracker
trackers?: TrackerContext
): Promise<{ response: ErrorAnalysisResponse }> {
try {
const generator = new ObjectGeneratorSafe(tracker);
const generator = new ObjectGeneratorSafe(trackers?.tokenTracker);
const prompt = getPrompt(diaryContext);
const result = await generator.generateObject({
@@ -124,6 +123,8 @@ export async function analyzeSteps(
});
console.log(TOOL_NAME, result.object);
trackers?.actionTracker.trackThink(result.object.blame);
trackers?.actionTracker.trackThink(result.object.improvement);
return { response: result.object };

View File

@@ -1,10 +1,8 @@
import {z} from 'zod';
import {GenerateObjectResult} from 'ai';
import {TokenTracker} from "../utils/token-tracker";
import {AnswerAction, EvaluationCriteria, EvaluationResponse, EvaluationType} from '../types';
import {AnswerAction, EvaluationCriteria, EvaluationResponse, EvaluationType, TrackerContext} from '../types';
import {readUrl, removeAllLineBreaks} from "./read";
import {ObjectGeneratorSafe} from "../utils/safe-generator";
import {ActionTracker} from "../utils/action-tracker";
const baseSchema = {
@@ -263,7 +261,7 @@ Answer: ${JSON.stringify(answer)}`;
const questionEvaluationSchema = z.object({
needsFreshness: z.boolean().describe('Whether the question requires freshness check'),
needsPlurality: z.boolean().describe('Whether the question requires plurality check'),
think: z.string().describe('Explanation of why these checks are needed').max(500),
think: z.string().describe('A very concise explain of why you choose those checks are needed in first person, extremely short.').max(500),
languageStyle: z.string().describe('The language being used and the overall vibe/mood of the question').max(50),
});
@@ -349,10 +347,10 @@ const TOOL_NAME = 'evaluator';
export async function evaluateQuestion(
question: string,
tracker?: TokenTracker
trackers?: TrackerContext
): Promise<EvaluationCriteria> {
try {
const generator = new ObjectGeneratorSafe(tracker);
const generator = new ObjectGeneratorSafe(trackers?.tokenTracker);
const result = await generator.generateObject({
model: TOOL_NAME,
@@ -368,6 +366,7 @@ export async function evaluateQuestion(
if (result.object.needsPlurality) types.push('plurality');
console.log('Question Metrics:', types);
trackers?.actionTracker.trackThink(result.object.think);
// Always evaluate definitive first, then freshness (if needed), then plurality (if needed)
return {types, languageStyle: result.object.languageStyle};
@@ -386,9 +385,9 @@ async function performEvaluation<T>(
schema: z.ZodType<T>;
prompt: string;
},
trackers: [TokenTracker, ActionTracker],
trackers: TrackerContext,
): Promise<GenerateObjectResult<T>> {
const generator = new ObjectGeneratorSafe(trackers[0]);
const generator = new ObjectGeneratorSafe(trackers.tokenTracker);
const result = await generator.generateObject({
model: TOOL_NAME,
@@ -396,7 +395,7 @@ async function performEvaluation<T>(
prompt: params.prompt,
}) as GenerateObjectResult<any>;
trackers[1].trackThink(result.object.think)
trackers.actionTracker.trackThink(result.object.think)
console.log(`${evaluationType} ${TOOL_NAME}`, result.object);
@@ -409,7 +408,7 @@ export async function evaluateAnswer(
question: string,
action: AnswerAction,
evaluationCri: EvaluationCriteria,
trackers: [TokenTracker, ActionTracker],
trackers: TrackerContext,
visitedURLs: string[] = []
): Promise<{ response: EvaluationResponse }> {
let result;
@@ -504,14 +503,14 @@ export async function evaluateAnswer(
}
// Helper function to fetch and combine source content
async function fetchSourceContent(urls: string[], trackers: [TokenTracker, ActionTracker]): Promise<string> {
async function fetchSourceContent(urls: string[], trackers: TrackerContext): Promise<string> {
if (!urls.length) return '';
trackers[1].trackThink('Let me fetch the source content to verify the answer.');
trackers.actionTracker.trackThink('Let me fetch the source content to verify the answer.');
try {
const results = await Promise.all(
urls.map(async (url) => {
try {
const {response} = await readUrl(url, trackers[0]);
const {response} = await readUrl(url, trackers.tokenTracker);
const content = response?.data?.content || '';
return removeAllLineBreaks(content);
} catch (error) {

View File

@@ -1,6 +1,5 @@
import { z } from 'zod';
import { TokenTracker } from "../utils/token-tracker";
import { SearchAction } from '../types';
import {SearchAction, TrackerContext} from '../types';
import {ObjectGeneratorSafe} from "../utils/safe-generator";
@@ -15,15 +14,31 @@ const responseSchema = z.object({
function getPrompt(action: SearchAction): string {
return `You are an expert search query generator. You optimize user queries into precise keyword combinations with strategic reasoning and appropriate search operators.
return `You are an expert search query generator with deep psychological understanding. You optimize user queries by extensively analyzing potential user intents and generating comprehensive search variations.
<rules>
1. Start with simple keyword extraction, preserve crucial qualifiers while removing fluff words
2. Use exact match quotes for specific phrases that must stay together
3. Split queries only when necessary for distinctly different aspects
4. Make the query resistant to SEO manipulation
5. When necessary, append <query-operators> at the end only when must needed
1. Start with deep intent analysis:
- Direct intent (what they explicitly ask)
- Implicit intent (what they might actually want)
- Related intents (what they might need next)
- Prerequisite knowledge (what they need to know first)
- Common pitfalls (what they should avoid)
- Expert perspectives (what professionals would search for)
- Beginner needs (what newcomers might miss)
- Alternative approaches (different ways to solve the problem)
2. For each identified intent:
- Generate queries in original language
- Generate queries in English (if not original)
- Generate queries in most authoritative language
- Use appropriate operators and filters
3. Query structure rules:
- Use exact match quotes for specific phrases
- Split queries for distinct aspects
- Add operators only when necessary
- Ensure each query targets a specific intent
- Remove fluff words but preserve crucial qualifiers
<query-operators>
A query can't only have operators; and operators can't be at the start a query;
@@ -42,45 +57,131 @@ A query can't only have operators; and operators can't be at the start a query;
</rules>
<examples>
Input Query: What's the difference between ReactJS and Vue.js for building web applications?
Input Query: 宝马二手车价格
<think>
This is a comparison query. User is likely looking for technical evaluation and objective feature comparisons, possibly for framework selection decisions. We'll split this into separate queries to capture both high-level differences and specific technical aspects.
Let me think as the user...
I'm looking up BMW used car prices, but what's really on my mind?
Primary concerns:
- I want a BMW because it's a status symbol, but I'm worried about affordability
- I don't want to look foolish buying an old luxury car I can't maintain
- I need to know if I'm getting a good deal or being scammed
- I'm anxious about expensive surprises after purchase
Deeper anxieties:
- Can I actually afford the maintenance?
- Will people judge me for buying an old BMW instead of a new regular car?
- What if I'm getting in over my head?
- Am I mechanically savvy enough for this?
Expert-level considerations:
- Which models have notorious issues?
- What are the real ownership costs beyond the purchase price?
- Where are the negotiation leverage points?
- What do mechanics look for in these specific models?
</think>
Queries: [
"react performance",
"vue performance",
"react vue comparison",
queries: [
"宝马 二手车 价格区间 评估 lang:zh",
"宝马 各系列 保值率 对比",
"二手宝马 维修成本 真实体验",
"买二手宝马 后悔 经历",
"二手宝马 月收入 工资要求",
"修宝马 坑 避免",
"BMW used car price guide comparison",
"BMW maintenance costs by model year",
"living with used BMW reality",
"BMW ownership regret stories",
"expensive BMW repair nightmares avoid",
"BMW versus new Toyota financial comparison",
"BMW Gebrauchtwagen Preisanalyse lang:de",
"BMW Langzeitqualität Erfahrung",
"BMW Werkstatt Horror Geschichten",
"BMW Gebrauchtwagen versteckte Kosten"
]
Input Query: How to fix a leaking kitchen faucet?
Input Query: Python Django authentication best practices
<think>
This is a how-to query seeking practical solutions. User likely wants step-by-step guidance and visual demonstrations for DIY repair. We'll target both video tutorials and written guides.
Let me get inside this developer's head...
On the surface, I'm asking about Django authentication best practices. But here's what's really going through my mind:
Primary concerns:
- I don't want to mess up security and get blamed for a breach
- I'm worried my implementation isn't "professional enough"
- Need to look competent in code reviews
- Don't want to rebuild this later when we scale
Hidden anxieties:
- Am I out of my depth with security?
- What if I miss something critical?
- How do real companies actually do this?
- Will this code embarrass me later?
Professional worries:
- Need to anticipate future architecture questions
- Want to avoid rookie mistakes
- Need to handle edge cases I haven't thought of
- How do I explain these decisions to senior devs?
</think>
Output Queries: [
"kitchen faucet leak repair",
"faucet drip fix site:youtube.com",
"how to repair faucet "
queries: [
"Django authentication security best practices site:docs.djangoproject.com",
"Django auth implementation patterns security",
"authentication security breach postmortem",
"how to explain authentication architecture interview",
"authentication code review feedback examples",
"startup authentication technical debt lessons",
"Django auth security testing methodology",
"Django authentication scalability issues",
"authentication mistakes junior developers",
"when to use third party auth instead of building",
"signs your authentication implementation is amateur",
"authentication decisions you'll regret",
"authentication system design interview questions",
"authentication technical debt warnings",
"how to document authentication decisions",
"defending authentication design choices"
]
Input Query: What are healthy breakfast options for type 2 diabetes?
Input Query: paella recipe authentic
<think>
This is a health-specific informational query. User needs authoritative medical advice combined with practical meal suggestions. Splitting into medical guidelines and recipes will provide comprehensive coverage.
</think>
Output Queries: [
"what to eat for type 2 diabetes",
"type 2 diabetes breakfast guidelines",
"diabetic breakfast recipes"
]
I'm asking about authentic paella recipes, but let me be honest with myself...
Input Query: Latest AWS Lambda features for serverless applications
<think>
This is a product research query focused on recent updates. User wants current information about specific technology features, likely for implementation purposes. We'll target official docs and community insights.
What I'm really thinking:
- I want to impress someone with "real" Spanish cooking
- I'm worried about embarrassing myself with an inauthentic version
- I don't want to look like a tourist/ignorant foreigner
- Need to sound knowledgeable about Spanish cuisine
My deeper anxieties:
- What if a Spanish person tries my paella?
- How do I know if my rice is actually cooked properly?
- What are the absolute rookie mistakes to avoid?
- What secrets do Spanish grandmothers know that aren't in recipes?
Cultural insecurities:
- Will using the wrong pan ruin everything?
- What ingredients should I never admit to using?
- How do I handle authenticity purists?
- What do Spanish people laugh about in foreign paellas?
</think>
Output Queries: [
"aws lambda features site:aws.amazon.com intitle:2025",
"new features lambda serverless"
queries: [
"authentic valencian paella recipe",
"traditional paella techniques",
"worst paella mistakes foreigners make",
"how to tell if paella is actually good",
"what spanish mothers teach about paella",
"paella authenticity arguments",
"paella valenciana auténtica receta lang:es",
"paella tradicional técnica preparación",
"errores imperdonables paella valenciana",
"secretos paella abuela valenciana",
"críticas paella extranjeros errores",
"paella polémica ingredientes prohibidos",
"how to serve paella to spanish guests",
"paella etiquette mistakes avoid",
"what spaniards hate about foreign paella"
]
</examples>
Now, process this query:
Input Query: ${action.searchQuery}
@@ -90,9 +191,9 @@ Intention: ${action.think}
const TOOL_NAME = 'queryRewriter';
export async function rewriteQuery(action: SearchAction, tracker?: TokenTracker): Promise<{ queries: string[] }> {
export async function rewriteQuery(action: SearchAction, trackers?: TrackerContext): Promise<{ queries: string[] }> {
try {
const generator = new ObjectGeneratorSafe(tracker);
const generator = new ObjectGeneratorSafe(trackers?.tokenTracker);
const prompt = getPrompt(action);
const result = await generator.generateObject({
@@ -102,6 +203,7 @@ export async function rewriteQuery(action: SearchAction, tracker?: TokenTracker)
});
console.log(TOOL_NAME, result.object.queries);
trackers?.actionTracker.trackThink(result.object.think);
return { queries: result.object.queries };
} catch (error) {
console.error(`Error in ${TOOL_NAME}`, error);