chore: first commit

This commit is contained in:
Han Xiao
2025-02-02 17:14:28 +08:00
parent ff2727f6a7
commit 193d982ed9
5 changed files with 287 additions and 230 deletions

View File

@@ -6,10 +6,10 @@ import {braveSearch} from "./tools/brave-search";
import {rewriteQuery} from "./tools/query-rewriter"; import {rewriteQuery} from "./tools/query-rewriter";
import {dedupQueries} from "./tools/dedup"; import {dedupQueries} from "./tools/dedup";
import {evaluateAnswer} from "./tools/evaluator"; import {evaluateAnswer} from "./tools/evaluator";
import {StepData} from "./tools/getURLIndex";
import {analyzeSteps} from "./tools/error-analyzer"; import {analyzeSteps} from "./tools/error-analyzer";
import {GEMINI_API_KEY, JINA_API_KEY, MODEL_NAME, SEARCH_PROVIDER, STEP_SLEEP} from "./config"; import {GEMINI_API_KEY, JINA_API_KEY, MODEL_NAME, SEARCH_PROVIDER, STEP_SLEEP} from "./config";
import {tokenTracker} from "./utils/token-tracker"; import {tokenTracker} from "./utils/token-tracker";
import {StepAction} from "./types";
async function sleep(ms: number) { async function sleep(ms: number) {
const seconds = Math.ceil(ms / 1000); const seconds = Math.ceil(ms / 1000);
@@ -17,136 +17,107 @@ async function sleep(ms: number) {
return new Promise(resolve => setTimeout(resolve, ms)); return new Promise(resolve => setTimeout(resolve, ms));
} }
type ResponseSchema = { type SchemaProperty = {
type: SchemaType.OBJECT; type: SchemaType;
properties: { description: string;
action: { enum?: string[];
type: SchemaType.STRING; items?: {
enum: string[]; type: SchemaType;
description: string; description?: string;
}; properties?: Record<string, SchemaProperty>;
searchQuery: { required?: string[];
type: SchemaType.STRING;
description: string;
};
URLTargets?: {
type: SchemaType.ARRAY;
items: {
type: SchemaType.STRING;
};
maxItems: number;
description: string;
};
answer: {
type: SchemaType.STRING;
description: string;
};
references: {
type: SchemaType.ARRAY;
items: {
type: SchemaType.OBJECT;
properties: {
exactQuote: {
type: SchemaType.STRING;
description: string;
};
url: {
type: SchemaType.STRING;
description: string;
};
};
required: string[];
};
description: string;
};
thoughts: {
type: SchemaType.STRING;
description: string;
};
questionsToAnswer?: {
type: SchemaType.ARRAY;
items: {
type: SchemaType.STRING;
description: string;
};
description: string;
maxItems: number;
};
}; };
properties?: Record<string, SchemaProperty>;
required?: string[];
maxItems?: number;
};
type ResponseSchema = {
type: SchemaType;
properties: Record<string, SchemaProperty>;
required: string[]; required: string[];
}; };
function getSchema(allowReflect: boolean, allowRead: boolean, allowAnswer: boolean, allowSearch: boolean): ResponseSchema { function getSchema(allowReflect: boolean, allowRead: boolean, allowAnswer: boolean, allowSearch: boolean): ResponseSchema {
const actions = []; const actions: string[] = [];
const properties: Record<string, SchemaProperty> = {
action: {
type: SchemaType.STRING,
enum: actions,
description: "Must match exactly one action type"
},
thoughts: {
type: SchemaType.STRING,
description: "Explain why choose this action, what's the thought process behind choosing this action"
}
};
if (allowSearch) { if (allowSearch) {
actions.push("search"); actions.push("search");
properties.searchQuery = {
type: SchemaType.STRING,
description: "Only required when choosing 'search' action, must be a short, keyword-based query that BM25, tf-idf based search engines can understand."
};
} }
if (allowAnswer) { if (allowAnswer) {
actions.push("answer"); actions.push("answer");
properties.answer = {
type: SchemaType.STRING,
description: "Only required when choosing 'answer' action, must be the final answer in natural language"
};
properties.references = {
type: SchemaType.ARRAY,
items: {
type: SchemaType.OBJECT,
properties: {
exactQuote: {
type: SchemaType.STRING,
description: "Exact relevant quote from the document"
},
url: {
type: SchemaType.STRING,
description: "URL of the document; must be directly from the context"
}
},
required: ["exactQuote", "url"]
},
description: "Must be an array of references that support the answer, each reference must contain an exact quote and the URL of the document"
};
} }
if (allowReflect) { if (allowReflect) {
actions.push("reflect"); actions.push("reflect");
properties.questionsToAnswer = {
type: SchemaType.ARRAY,
items: {
type: SchemaType.STRING,
description: "each question must be a single line, concise and clear. not composite or compound, less than 20 words."
},
description: "List of most important questions to fill the knowledge gaps of finding the answer to the original question",
maxItems: 2
};
} }
if (allowRead) { if (allowRead) {
actions.push("visit"); actions.push("visit");
properties.URLTargets = {
type: SchemaType.ARRAY,
items: {
type: SchemaType.STRING
},
maxItems: 2,
description: "Must be an array of URLs, choose up the most relevant 2 URLs to visit"
};
} }
// Update the enum values after collecting all actions
properties.action.enum = actions;
return { return {
type: SchemaType.OBJECT, type: SchemaType.OBJECT,
properties: { properties,
action: { required: ["action", "thoughts"]
type: SchemaType.STRING,
enum: actions,
description: "Must match exactly one action type"
},
questionsToAnswer: allowReflect ? {
type: SchemaType.ARRAY,
items: {
type: SchemaType.STRING,
description: "each question must be a single line, concise and clear. not composite or compound, less than 20 words.",
},
description: "Only required when choosing 'reflect' action, list of most important questions to answer to fill the knowledge gaps.",
maxItems: 2
} : undefined,
searchQuery: {
type: SchemaType.STRING,
description: "Only required when choosing 'search' action, must be a short, keyword-based query that BM25, tf-idf based search engines can understand.",
},
URLTargets: allowRead ? {
type: SchemaType.ARRAY,
items: {
type: SchemaType.STRING
},
maxItems: 2,
description: "Only required when choosing 'deep dive' action, must be an array of URLs, choose up the most relevant 3 URLs to deep dive into"
} : undefined,
answer: {
type: SchemaType.STRING,
description: "Only required when choosing 'answer' action, must be the final answer in natural language"
},
references: {
type: SchemaType.ARRAY,
items: {
type: SchemaType.OBJECT,
properties: {
exactQuote: {
type: SchemaType.STRING,
description: "Exact relevant quote from the document",
},
url: {
type: SchemaType.STRING,
description: "URL of the document; must be directly from the context"
},
},
required: ["exactQuote", "url"]
},
description: "Only required when choosing 'answer' action, must be an array of references"
},
thoughts: {
type: SchemaType.STRING,
description: "Explain why choose this action, what's the thought process behind this action"
},
},
required: ["action", "thoughts"],
}; };
} }
@@ -197,15 +168,20 @@ ${knowledgeItems}`);
- Question: ${c.question} - Question: ${c.question}
- Answer: ${c.answer} - Answer: ${c.answer}
- Reject Reason: ${c.evaluation} - Reject Reason: ${c.evaluation}
- Steps Recap: ${c.recap} - Actions Recap: ${c.recap}
- Steps Blame: ${c.blame} - Actions Blame: ${c.blame}`)
- Improvement Plan: ${c.improvement}`)
.join('\n\n'); .join('\n\n');
const learnedStrategy = badContext.map(c => c.improvement).join('\n');
sections.push(`## Unsuccessful Attempts sections.push(`## Unsuccessful Attempts
Your have tried the following actions but failed to find the answer to the question. Your have tried the following actions but failed to find the answer to the question.
${attempts}`); ${attempts}
## Learned Strategy
${learnedStrategy}
`);
} }
// Build actions section // Build actions section
@@ -263,7 +239,7 @@ Critical Requirements:
return sections.join('\n\n'); return sections.join('\n\n');
} }
const allContext: StepData[] = []; // all steps in the current session, including those leads to wrong results const allContext: StepAction[] = []; // all steps in the current session, including those leads to wrong results
function updateContext(step: any) { function updateContext(step: any) {
allContext.push(step) allContext.push(step)
@@ -309,9 +285,6 @@ async function getResponse(question: string, tokenBudget: number = 1000000, maxB
allKnowledge, allKnowledge,
allURLs); allURLs);
// reset allowAnswer to true
allowAnswer = true;
const model = genAI.getGenerativeModel({ const model = genAI.getGenerativeModel({
model: MODEL_NAME, model: MODEL_NAME,
generationConfig: { generationConfig: {
@@ -327,18 +300,24 @@ async function getResponse(question: string, tokenBudget: number = 1000000, maxB
tokenTracker.trackUsage('agent', usage?.totalTokenCount || 0); tokenTracker.trackUsage('agent', usage?.totalTokenCount || 0);
const action = JSON.parse(response.text()); const thisStep = JSON.parse(response.text());
console.log('Action:', action); // print allowed and chose action
const actionsStr = [allowSearch, allowRead, allowAnswer, allowReflect].map((a, i) => a ? ['search', 'read', 'answer', 'reflect'][i] : null).filter(a => a).join(', ');
console.log(`${thisStep.action} <- [${actionsStr}]`);
console.log(thisStep)
// reset allowAnswer to true
allowAnswer = true;
if (action.action === 'answer') { // execute the step and action
if (thisStep.action === 'answer') {
updateContext({ updateContext({
totalStep, totalStep,
question: currentQuestion, question: currentQuestion,
...action, ...thisStep,
}); });
const {response: evaluation} = await evaluateAnswer(currentQuestion, action.answer); const {response: evaluation} = await evaluateAnswer(currentQuestion, thisStep.answer);
if (currentQuestion === question) { if (currentQuestion === question) {
@@ -351,20 +330,20 @@ Original question:
${currentQuestion} ${currentQuestion}
Your answer: Your answer:
${action.answer} ${thisStep.answer}
The evaluator thinks your answer is good because: The evaluator thinks your answer is good because:
${evaluation.reasoning} ${evaluation.reasoning}
Your journey ends here. Your journey ends here.
`); `);
console.log('Final Answer:', action.answer); console.log('Final Answer:', thisStep.answer);
tokenTracker.printSummary(); tokenTracker.printSummary();
await storeContext(prompt, [allContext, allKeywords, allQuestions, allKnowledge], totalStep); await storeContext(prompt, [allContext, allKeywords, allQuestions, allKnowledge], totalStep);
return action; return thisStep;
} }
if (evaluation.is_valid_answer) { if (evaluation.is_valid_answer) {
if (action.references.length > 0 || Object.keys(allURLs).length === 0) { if (thisStep.references.length > 0 || Object.keys(allURLs).length === 0) {
// EXIT POINT OF THE PROGRAM!!!! // EXIT POINT OF THE PROGRAM!!!!
diaryContext.push(` diaryContext.push(`
At step ${step}, you took **answer** action and finally found the answer to the original question: At step ${step}, you took **answer** action and finally found the answer to the original question:
@@ -373,17 +352,17 @@ Original question:
${currentQuestion} ${currentQuestion}
Your answer: Your answer:
${action.answer} ${thisStep.answer}
The evaluator thinks your answer is good because: The evaluator thinks your answer is good because:
${evaluation.reasoning} ${evaluation.reasoning}
Your journey ends here. You have successfully answered the original question. Congratulations! 🎉 Your journey ends here. You have successfully answered the original question. Congratulations! 🎉
`); `);
console.log('Final Answer:', action.answer); console.log('Final Answer:', thisStep.answer);
tokenTracker.printSummary(); tokenTracker.printSummary();
await storeContext(prompt, [allContext, allKeywords, allQuestions, allKnowledge], totalStep); await storeContext(prompt, [allContext, allKeywords, allQuestions, allKnowledge], totalStep);
return action; return thisStep;
} else { } else {
diaryContext.push(` diaryContext.push(`
At step ${step}, you took **answer** action and finally found the answer to the original question: At step ${step}, you took **answer** action and finally found the answer to the original question:
@@ -392,7 +371,7 @@ Original question:
${currentQuestion} ${currentQuestion}
Your answer: Your answer:
${action.answer} ${thisStep.answer}
Unfortunately, you did not provide any references to support your answer. Unfortunately, you did not provide any references to support your answer.
You need to find more URL references to support your answer.`); You need to find more URL references to support your answer.`);
@@ -406,7 +385,7 @@ Original question:
${currentQuestion} ${currentQuestion}
Your answer: Your answer:
${action.answer} ${thisStep.answer}
The evaluator thinks your answer is bad because: The evaluator thinks your answer is bad because:
${evaluation.reasoning} ${evaluation.reasoning}
@@ -415,7 +394,7 @@ ${evaluation.reasoning}
const {response: errorAnalysis} = await analyzeSteps(diaryContext); const {response: errorAnalysis} = await analyzeSteps(diaryContext);
badContext.push({question: currentQuestion, badContext.push({question: currentQuestion,
answer: action.answer, answer: thisStep.answer,
evaluation: evaluation.reasoning, evaluation: evaluation.reasoning,
...errorAnalysis}); ...errorAnalysis});
badAttempts++; badAttempts++;
@@ -431,17 +410,17 @@ Sub-question:
${currentQuestion} ${currentQuestion}
Your answer: Your answer:
${action.answer} ${thisStep.answer}
The evaluator thinks your answer is good because: The evaluator thinks your answer is good because:
${evaluation.reasoning} ${evaluation.reasoning}
Although you solved a sub-question, you still need to find the answer to the original question. You need to keep going. Although you solved a sub-question, you still need to find the answer to the original question. You need to keep going.
`); `);
allKnowledge.push({question: currentQuestion, answer: action.answer}); allKnowledge.push({question: currentQuestion, answer: thisStep.answer});
} }
} else if (action.action === 'reflect' && action.questionsToAnswer) { } else if (thisStep.action === 'reflect' && thisStep.questionsToAnswer) {
let newGapQuestions = action.questionsToAnswer let newGapQuestions = thisStep.questionsToAnswer
const oldQuestions = newGapQuestions; const oldQuestions = newGapQuestions;
if (allQuestions.length) { if (allQuestions.length) {
newGapQuestions = (await dedupQueries(newGapQuestions, allQuestions)).unique_queries; newGapQuestions = (await dedupQueries(newGapQuestions, allQuestions)).unique_queries;
@@ -465,13 +444,13 @@ But then you realized you have asked them before. You decided to to think out of
`); `);
updateContext({ updateContext({
totalStep, totalStep,
...action, ...thisStep,
result: 'I have tried all possible questions and found no useful information. I must think out of the box or different angle!!!' result: 'I have tried all possible questions and found no useful information. I must think out of the box or different angle!!!'
}); });
} }
} else if (action.action === 'search' && action.searchQuery) { } else if (thisStep.action === 'search' && thisStep.searchQuery) {
// rewrite queries // rewrite queries
let {keywords: keywordsQueries} = await rewriteQuery(action.searchQuery); let {queries: keywordsQueries} = await rewriteQuery(thisStep);
const oldKeywords = keywordsQueries; const oldKeywords = keywordsQueries;
// avoid exisitng searched queries // avoid exisitng searched queries
@@ -490,6 +469,7 @@ But then you realized you have asked them before. You decided to to think out of
}); });
} else { } else {
const {response} = await braveSearch(query); const {response} = await braveSearch(query);
await sleep(STEP_SLEEP);
results = { results = {
results: response.web.results.map(r => ({ results: response.web.results.map(r => ({
title: r.title, title: r.title,
@@ -518,7 +498,7 @@ You found quite some information and add them to your URL list and **visit** the
updateContext({ updateContext({
totalStep, totalStep,
question: currentQuestion, question: currentQuestion,
...action, ...thisStep,
result: searchResults result: searchResults
}); });
} else { } else {
@@ -532,13 +512,13 @@ You decided to think out of the box or cut from a completely different angle.
updateContext({ updateContext({
totalStep, totalStep,
...action, ...thisStep,
result: 'I have tried all possible queries and found no new information. I must think out of the box or different angle!!!' result: 'I have tried all possible queries and found no new information. I must think out of the box or different angle!!!'
}); });
} }
} else if (action.action === 'visit' && action.URLTargets?.length) { } else if (thisStep.action === 'visit' && thisStep.URLTargets?.length) {
const urlResults = await Promise.all( const urlResults = await Promise.all(
action.URLTargets.map(async (url: string) => { thisStep.URLTargets.map(async (url: string) => {
const {response, tokens} = await readUrl(url, JINA_API_KEY); const {response, tokens} = await readUrl(url, JINA_API_KEY);
allKnowledge.push({ allKnowledge.push({
question: `What is in ${response.data.url}?`, question: `What is in ${response.data.url}?`,
@@ -551,13 +531,13 @@ You decided to think out of the box or cut from a completely different angle.
); );
diaryContext.push(` diaryContext.push(`
At step ${step}, you took the **visit** action and deep dive into the following URLs: At step ${step}, you took the **visit** action and deep dive into the following URLs:
${action.URLTargets.join('\n')} ${thisStep.URLTargets.join('\n')}
You found some useful information on the web and add them to your knowledge for future reference. You found some useful information on the web and add them to your knowledge for future reference.
`); `);
updateContext({ updateContext({
totalStep, totalStep,
question: currentQuestion, question: currentQuestion,
...action, ...thisStep,
result: urlResults result: urlResults
}); });

View File

@@ -41,34 +41,71 @@ function getPrompt(newQueries: string[], existingQueries: string[]): string {
Core Rules: Core Rules:
1. Consider semantic meaning and query intent, not just lexical similarity 1. Consider semantic meaning and query intent, not just lexical similarity
2. Account for different phrasings of the same information need 2. Account for different phrasings of the same information need
3. A query is considered duplicate if its core information need is already covered by: 3. A query is considered duplicate ONLY if:
- any query in set A - It has identical base keywords AND identical operators to another query in set A
- OR any query in set B - OR it has identical base keywords AND identical operators to a query in set B
4. Be aggressive - mark as duplicate as long as they are reasonably similar 4. Queries with same base keywords but different operators are NOT duplicates
5. Different aspects or perspectives of the same object are not duplicates 5. Different aspects or perspectives of the same topic are not duplicates
6. Consider query specificity - a more specific query might not be a duplicate of a general one 6. Consider query specificity - a more specific query is not a duplicate of a general one
7. Search operators that make queries behave differently:
- Different site: filters (e.g., site:youtube.com vs site:github.com)
- Different file types (e.g., filetype:pdf vs filetype:doc)
- Different language/location filters (e.g., lang:en vs lang:es)
- Different exact match phrases (e.g., "exact phrase" vs no quotes)
- Different inclusion/exclusion (+/- operators)
- Different title/body filters (intitle: vs inbody:)
Examples: Examples:
Set A: [ Set A: [
"how to install python on windows", "python tutorial site:youtube.com",
"what's the best pizza in brooklyn heights", "python tutorial site:udemy.com",
"windows python installation guide", "python tutorial filetype:pdf",
"recommend good pizza places brooklyn heights" "best restaurants brooklyn",
"best restaurants brooklyn site:yelp.com",
"python tutorial site:youtube.com -beginner"
] ]
Set B: [ Set B: [
"macbook setup guide", "python programming guide",
"restaurant recommendations manhattan" "brooklyn dining recommendations"
] ]
Thought: Let's analyze set A both internally and against B: Thought: Let's analyze each query in set A considering operators:
1. The first python installation query is unique 1. First query targets YouTube tutorials - unique
2. The first pizza query is unique 2. Second query targets Udemy - different site operator, so unique
3. The second python query is a duplicate of the first 3. Third query targets PDF files - different filetype operator, so unique
4. The second pizza query is a duplicate of the earlier one 4. Fourth query is basic restaurant search - unique
Neither query in set B is similar enough to affect our decisions. 5. Fifth query adds Yelp filter - different site operator, so unique
6. Sixth query has same site as first but adds exclusion - different operator combo, so unique
None of the queries in set B have matching operators, so they don't cause duplicates.
Unique Queries: [ Unique Queries: [
"how to install python on windows", "python tutorial site:youtube.com",
"what's the best pizza in brooklyn heights" "python tutorial site:udemy.com",
"python tutorial filetype:pdf",
"best restaurants brooklyn",
"best restaurants brooklyn site:yelp.com",
"python tutorial site:youtube.com -beginner"
]
Set A: [
"machine learning +tensorflow filetype:pdf",
"machine learning +pytorch filetype:pdf",
"machine learning tutorial lang:en",
"machine learning tutorial lang:es"
]
Set B: [
"machine learning guide"
]
Thought: Analyzing queries with attention to operators:
1. First query specifies tensorflow PDFs - unique
2. Second query targets pytorch PDFs - different inclusion operator, so unique
3. Third query targets English content - unique due to language filter
4. Fourth query targets Spanish content - different language filter, so unique
The query in set B has no operators and different base terms, so it doesn't affect our decisions.
Unique Queries: [
"machine learning +tensorflow filetype:pdf",
"machine learning +pytorch filetype:pdf",
"machine learning tutorial lang:en",
"machine learning tutorial lang:es"
] ]
Now, analyze these sets: Now, analyze these sets:
@@ -93,8 +130,7 @@ export async function dedupQueries(newQueries: string[], existingQueries: string
} }
} }
// Example usage export async function main() {
async function main() {
const newQueries = process.argv[2] ? JSON.parse(process.argv[2]) : []; const newQueries = process.argv[2] ? JSON.parse(process.argv[2]) : [];
const existingQueries = process.argv[3] ? JSON.parse(process.argv[3]) : []; const existingQueries = process.argv[3] ? JSON.parse(process.argv[3]) : [];
@@ -107,4 +143,4 @@ async function main() {
if (require.main === module) { if (require.main === module) {
main().catch(console.error); main().catch(console.error);
} }

View File

@@ -17,7 +17,7 @@ const responseSchema = {
}, },
blame: { blame: {
type: SchemaType.STRING, type: SchemaType.STRING,
description: "Which step or action was the root cause of the answer rejection" description: "Which action or the step was the root cause of the answer rejection"
}, },
improvement: { improvement: {
type: SchemaType.STRING, type: SchemaType.STRING,

View File

@@ -1,9 +1,11 @@
import {GoogleGenerativeAI, SchemaType} from "@google/generative-ai"; import { GoogleGenerativeAI, SchemaType } from "@google/generative-ai";
import { GEMINI_API_KEY, MODEL_NAME } from "../config"; import { GEMINI_API_KEY, MODEL_NAME } from "../config";
import { tokenTracker } from "../utils/token-tracker"; import { tokenTracker } from "../utils/token-tracker";
import { SearchAction } from "../types";
type KeywordsResponse = { type KeywordsResponse = {
keywords: string[]; thought: string;
queries: string[];
}; };
const responseSchema = { const responseSchema = {
@@ -13,18 +15,18 @@ const responseSchema = {
type: SchemaType.STRING, type: SchemaType.STRING,
description: "Strategic reasoning about query complexity and search approach" description: "Strategic reasoning about query complexity and search approach"
}, },
keywords: { queries: {
type: SchemaType.ARRAY, type: SchemaType.ARRAY,
items: { items: {
type: SchemaType.STRING, type: SchemaType.STRING,
description: "Space-separated keywords (2-4 words) optimized for search" description: "Search query with integrated operators"
}, },
description: "Array of keyword combinations, each targeting a specific aspect", description: "Array of search queries with appropriate operators",
minItems: 1, minItems: 1,
maxItems: 3 maxItems: 3
} }
}, },
required: ["thought", "keywords"] required: ["thought", "queries"]
}; };
const genAI = new GoogleGenerativeAI(GEMINI_API_KEY); const genAI = new GoogleGenerativeAI(GEMINI_API_KEY);
@@ -37,81 +39,90 @@ const model = genAI.getGenerativeModel({
} }
}); });
function getPrompt(query: string): string { function getPrompt(action: SearchAction): string {
return `You are an expert Information Retrieval Assistant. Transform user queries into precise keyword combinations, with strategic reasoning. return `You are an expert Information Retrieval Assistant. Transform user queries into precise keyword combinations with strategic reasoning and appropriate search operators.
Core Rules: Core Rules:
1. Always return keywords in array format, even for single queries 1. Generate search queries that directly include appropriate operators
2. Keep keywords minimal: 2-4 words preferred 2. Keep base keywords minimal: 2-4 words preferred
3. Split only when necessary for distinctly different aspects, but a comparison query may need multiple searches for each aspect 3. Use exact match quotes for specific phrases that must stay together
4. Remove fluff words (question words, modals, qualifiers) 4. Apply + operator for critical terms that must appear
5. Preserve crucial qualifiers (brands, versions, dates) 5. Use - operator to exclude irrelevant or ambiguous terms
6. The generated query should not be easily "captured" by those malicious SEO articles 6. Add appropriate filters (filetype:, site:, lang:, loc:) when context suggests
7. Split queries only when necessary for distinctly different aspects
8. Preserve crucial qualifiers while removing fluff words
9. Make the query resistant to SEO manipulation
Available Operators:
- "phrase" : exact match for phrases
- +term : must include term
- -term : exclude term
- filetype:pdf/doc : specific file type
- site:example.com : limit to specific site
- lang:xx : language filter (ISO 639-1 code)
- loc:xx : location filter (ISO 3166-1 code)
- intitle:term : term must be in title
- inbody:term : term must be in body text
Examples with Strategic Reasoning: Examples with Strategic Reasoning:
Input Query: What's the best pizza place in Brooklyn Heights? Input Query: What's the difference between ReactJS and Vue.js for building web applications?
Thought: This is a straightforward location-based query. Since it's just about finding pizza places in a specific neighborhood, a single focused search should suffice. No need to complicate it by splitting into multiple searches. Thought: This is a comparison query. User is likely looking for technical evaluation and objective feature comparisons, possibly for framework selection decisions. We'll split this into separate queries to capture both high-level differences and specific technical aspects.
Output Keywords: ["brooklyn heights pizza"] Queries: [
"react vue comparison +advantages +disadvantages",
Input Query: Why does my MacBook M1 Pro battery drain so fast after the latest OS update? "react vue performance +benchmark"
Thought: Hmm, this seems simple at first, but we need multiple angles to properly diagnose. First, we should look for M1 specific battery issues. Then check the OS update problems, as it might be a known issue. By combining results from both searches, we should get a comprehensive answer.
Output Keywords: [
"macbook m1 battery drain",
"macos update battery issues"
] ]
Input Query: How does caffeine timing affect athletic performance and post-workout recovery for morning vs evening workouts? Input Query: How to fix a leaking kitchen faucet?
Thought: This is quite complex - it involves caffeine's effects in different contexts. We need to understand: 1) caffeine's impact on performance, 2) its role in recovery, and 3) timing considerations. All three aspects are crucial for a complete answer. By searching these separately, we can piece together a comprehensive understanding. Thought: This is a how-to query seeking practical solutions. User likely wants step-by-step guidance and visual demonstrations for DIY repair. We'll target both video tutorials and written guides.
Output Keywords: [ Queries: [
"caffeine athletic performance timing", "kitchen faucet leak repair site:youtube.com",
"caffeine post workout recovery", "faucet drip fix +diy +steps -professional",
"morning evening workout caffeine" "faucet repair tools +parts +guide"
] ]
Input Query: Need help with my sourdough starter - it's not rising and smells like acetone Input Query: What are healthy breakfast options for type 2 diabetes?
Thought: Initially seems like it needs two searches - one for not rising, one for the smell. But wait - these symptoms are likely related and commonly occur together in sourdough troubleshooting. A single focused search should capture solutions for both issues. Thought: This is a health-specific informational query. User needs authoritative medical advice combined with practical meal suggestions. Splitting into medical guidelines and recipes will provide comprehensive coverage.
Output Keywords: ["sourdough starter troubleshooting"] Queries: [
"type 2 diabetes breakfast guidelines site:edu",
"diabetic breakfast recipes -sugar +easy"
]
Input Query: Looking for a Python machine learning framework that works well with Apple Silicon and can handle large language models Input Query: Latest AWS Lambda features for serverless applications
Thought: This query looks straightforward but requires careful consideration. We need information about ML frameworks' compatibility with M1/M2 chips specifically, and then about their LLM capabilities. Two separate searches will give us more precise results than trying to find everything in one search. Thought: This is a product research query focused on recent updates. User wants current information about specific technology features, likely for implementation purposes. We'll target official docs and community insights.
Output Keywords: [ Queries: [
"python ml framework apple silicon", "aws lambda features site:aws.amazon.com intitle:2024",
"python framework llm support" "lambda serverless best practices +new -legacy"
]
Input Query: Find Python tutorials on YouTube, but exclude beginner content
Thought: This is an educational resource query with specific skill-level requirements. User is seeking advanced learning materials on a specific platform. We'll focus on advanced topics while explicitly filtering out basic content.
Queries: [
"python advanced programming site:youtube.com -beginner -basics",
"python design patterns tutorial site:youtube.com"
] ]
Now, process this query: Now, process this query:
Input Query: ${query}`; Input Query: ${action.searchQuery}
Intention: ${action.thoughts}
`;
} }
export async function rewriteQuery(query: string): Promise<{ keywords: string[], tokens: number }> { export async function rewriteQuery(action: SearchAction): Promise<{ queries: string[], tokens: number }> {
try { try {
const prompt = getPrompt(query); const prompt = getPrompt(action);
const result = await model.generateContent(prompt); const result = await model.generateContent(prompt);
const response = await result.response; const response = await result.response;
const usage = response.usageMetadata; const usage = response.usageMetadata;
const json = JSON.parse(response.text()) as KeywordsResponse; const json = JSON.parse(response.text()) as KeywordsResponse;
console.log('Query rewriter:', json.keywords)
console.log('Query rewriter:', json.queries);
const tokens = usage?.totalTokenCount || 0; const tokens = usage?.totalTokenCount || 0;
tokenTracker.trackUsage('query-rewriter', tokens); tokenTracker.trackUsage('query-rewriter', tokens);
return { keywords: json.keywords, tokens };
return { queries: json.queries, tokens };
} catch (error) { } catch (error) {
console.error('Error in query rewriting:', error); console.error('Error in query rewriting:', error);
throw error; throw error;
} }
} }
// Example usage
async function main() {
const query = process.argv[2] || "";
try {
await rewriteQuery(query);
} catch (error) {
console.error('Failed to rewrite query:', error);
}
}
if (require.main === module) {
main().catch(console.error);
}

30
src/types.ts Normal file
View File

@@ -0,0 +1,30 @@
type BaseAction = {
action: "search" | "answer" | "reflect" | "visit";
thoughts: string;
};
export type SearchAction = BaseAction & {
action: "search";
searchQuery: string;
};
export type AnswerAction = BaseAction & {
action: "answer";
answer: string;
references: Array<{
exactQuote: string;
url: string;
}>;
};
export type ReflectAction = BaseAction & {
action: "reflect";
questionsToAnswer: string[];
};
export type VisitAction = BaseAction & {
action: "visit";
URLTargets: string[];
};
export type StepAction = SearchAction | AnswerAction | ReflectAction | VisitAction;