From f48c84d207e5e62d2eb5d5e13b20f5c9a6d6e1ac Mon Sep 17 00:00:00 2001 From: Han Xiao Date: Wed, 5 Feb 2025 11:41:29 +0800 Subject: [PATCH] fix: use xml prompt --- src/agent.ts | 148 ++++++++++++++++++++++++++++++++------------- src/tools/dedup.ts | 104 ++++++++++++------------------- 2 files changed, 144 insertions(+), 108 deletions(-) diff --git a/src/agent.ts b/src/agent.ts index 3abcced..549d0fa 100644 --- a/src/agent.ts +++ b/src/agent.ts @@ -111,7 +111,7 @@ function getPrompt( allowRead: boolean = true, allowSearch: boolean = true, badContext?: { question: string, answer: string, evaluation: string, recap: string; blame: string; improvement: string; }[], - knowledge?: { question: string; answer: string; }[], + knowledge?: { question: string; answer: string; references: any[]}[], allURLs?: Record, beastMode?: boolean ): string { @@ -122,49 +122,78 @@ function getPrompt( You are an advanced AI research analyst specializing in multi-step reasoning. Using your training data and prior lessons learned, answer the following question with absolute certainty: -## Question -${question}`); + +${question} + +`); // Add context section if exists if (context?.length) { - sections.push(`## Context + sections.push(` + You have conducted the following actions: -${context.join('\n')}`); +${context.join('\n')} + + +`); } // Add knowledge section if exists if (knowledge?.length) { const knowledgeItems = knowledge - .map((k, i) => `### Knowledge ${i + 1}: ${k.question}\n${k.answer}`) + .map((k, i) => ` + + +${k.question} + + +${k.answer} + + +${JSON.stringify(k.references)} + + +`) .join('\n\n'); - sections.push(`## Knowledge + sections.push(` + You have successfully gathered some knowledge which might be useful for answering the original question. Here is the knowledge you have gathered so far -${knowledgeItems}`); +${knowledgeItems} + + +`); } // Add bad context section if exists if (badContext?.length) { const attempts = badContext - .map((c, i) => `### Attempt ${i + 1} + .map((c, i) => ` + - Question: ${c.question} - Answer: ${c.answer} - Reject Reason: ${c.evaluation} - Actions Recap: ${c.recap} -- Actions Blame: ${c.blame}`) +- Actions Blame: ${c.blame} + +`) .join('\n\n'); const learnedStrategy = badContext.map(c => c.improvement).join('\n'); - sections.push(`## Unsuccessful Attempts + sections.push(` + Your have tried the following actions but failed to find the answer to the question. ${attempts} -## Learned Strategy + + + ${learnedStrategy} + `); } @@ -176,50 +205,69 @@ ${learnedStrategy} .map(([url, desc]) => ` + "${url}": "${desc}"`) .join('\n'); - actions.push(`**visit**: + actions.push(` + - Visit any URLs from below to gather external knowledge, choose the most relevant URLs that might contain the answer + ${urlList} + - When you have enough search result in the context and want to deep dive into specific URLs -- It allows you to access the full content behind any URLs`); +- It allows you to access the full content behind any URLs + + +`); } if (allowSearch) { - actions.push(`**search**: + actions.push(` + - Query external sources using a public search engine - Focus on solving one specific aspect of the question -- Only give keywords search query, not full sentences`); +- Only give keywords search query, not full sentences + +`); } if (allowAnswer) { - actions.push(`**answer**: + actions.push(` + - Provide final response only when 100% certain -- Responses must be definitive (no ambiguity, uncertainty, or disclaimers)${allowReflect ? '\n- If doubts remain, use "reflect" instead' : ''}`); +- Responses must be definitive (no ambiguity, uncertainty, or disclaimers)${allowReflect ? '\n- If doubts remain, use instead' : ''} + +`); } if (beastMode) { - actions.push(`**answer**: -- You have gathered enough information to answer the question; they may not be perfect, but this is your very last chance to answer the question. -- Try the best of the best reasoning ability, investigate every details in the context and provide the best answer you can think of. -- When uncertain, educated guess is allowed and encouraged, but make sure it is based on the context and knowledge you have gathered. -- Responses must be definitive (no ambiguity, uncertainty, or disclaimers`); + actions.push(` + +- Any answer is better than no answer +- Partial answers are allowed, but make sure they are based on the context and knowledge you have gathered +- When uncertain, educated guess based on the context and knowledge is allowed and encouraged. +- Responses must be definitive (no ambiguity, uncertainty, or disclaimers) + +`); } if (allowReflect) { - actions.push(`**reflect**: + actions.push(` + - Perform critical analysis through hypothetical scenarios or systematic breakdowns - Identify knowledge gaps and formulate essential clarifying questions - Questions must be: - Original (not variations of existing questions) - Focused on single concepts - Under 20 words - - Non-compound/non-complex`); + - Non-compound/non-complex + +`); } - sections.push(`## Actions - + sections.push(` + Based on the current context, you must choose one of the following actions: - -${actions.join('\n\n')}`); +${actions.join('\n\n')} + +`); // Add footer sections.push(`Respond exclusively in valid JSON format matching exact JSON schema. @@ -243,6 +291,10 @@ function removeAllLineBreaks(text: string) { return text.replace(/(\r\n|\n|\r)/gm, " "); } +function removeHTMLtags(text: string) { + return text.replace(/<[^>]*>?/gm, ''); +} + export async function getResponse(question: string, tokenBudget: number = 1_000_000, maxBadAttempts: number = 3, existingContext?: Partial): Promise<{ result: StepAction; context: TrackerContext }> { @@ -250,7 +302,7 @@ export async function getResponse(question: string, tokenBudget: number = 1_000_ tokenTracker: existingContext?.tokenTracker || new TokenTracker(tokenBudget), actionTracker: existingContext?.actionTracker || new ActionTracker() }; - context.actionTracker.trackAction({ gaps: [question], totalStep: 0, badAttempts: 0 }); + context.actionTracker.trackAction({gaps: [question], totalStep: 0, badAttempts: 0}); let step = 0; let totalStep = 0; let badAttempts = 0; @@ -275,7 +327,7 @@ export async function getResponse(question: string, tokenBudget: number = 1_000_ await sleep(STEP_SLEEP); step++; totalStep++; - context.actionTracker.trackAction({ totalStep, thisStep, gaps, badAttempts }); + context.actionTracker.trackAction({totalStep, thisStep, gaps, badAttempts}); const budgetPercentage = (context.tokenTracker.getTotalUsage() / tokenBudget * 100).toFixed(2); console.log(`Step ${totalStep} / Budget used ${budgetPercentage}%`); console.log('Gaps:', gaps); @@ -298,7 +350,7 @@ export async function getResponse(question: string, tokenBudget: number = 1_000_ allKnowledge, allURLs, false - ); + ); const model = genAI.getGenerativeModel({ model: modelConfigs.agent.model, @@ -439,15 +491,14 @@ Although you solved a sub-question, you still need to find the answer to the ori allKnowledge.push({ question: currentQuestion, answer: thisStep.answer, + references: thisStep.references, type: 'qa' }); } } else if (thisStep.action === 'reflect' && thisStep.questionsToAnswer) { let newGapQuestions = thisStep.questionsToAnswer const oldQuestions = newGapQuestions; - if (allQuestions.length) { - newGapQuestions = (await dedupQueries(newGapQuestions, allQuestions)).unique_queries; - } + newGapQuestions = (await dedupQueries(newGapQuestions, allQuestions)).unique_queries; if (newGapQuestions.length > 0) { // found new gap questions diaryContext.push(` @@ -479,10 +530,9 @@ But then you realized you have asked them before. You decided to to think out of const oldKeywords = keywordsQueries; // avoid exisitng searched queries - if (allKeywords.length) { - const {unique_queries: dedupedQueries} = await dedupQueries(keywordsQueries, allKeywords); - keywordsQueries = dedupedQueries; - } + const {unique_queries: dedupedQueries} = await dedupQueries(keywordsQueries, allKeywords); + keywordsQueries = dedupedQueries; + if (keywordsQueries.length > 0) { const searchResults = []; for (const query of keywordsQueries) { @@ -508,6 +558,14 @@ But then you realized you have asked them before. You decided to to think out of url: r.url, description: r.description, })); + + allKnowledge.push({ + question: `What do Internet say about ${query}?`, + answer: removeHTMLtags(minResults.map(r => `${r.description}`).join('; ')), + references: minResults.map(r => r.url), + type: 'side-info' + }); + for (const r of minResults) { allURLs[r.url] = r.title; } @@ -559,6 +617,7 @@ You decided to think out of the box or cut from a completely different angle. allKnowledge.push({ question: `What is in ${response.data?.url || 'the URL'}?`, answer: removeAllLineBreaks(response.data?.content || 'No content available'), + references: [response.data?.url], type: 'url' }); visitedURLs.push(url); @@ -602,7 +661,7 @@ You decided to think out of the box or cut from a completely different angle.`); totalStep++; await storeContext(prompt, [allContext, allKeywords, allQuestions, allKnowledge], totalStep); if (isAnswered) { - return { result: thisStep, context }; + return {result: thisStep, context}; } else { console.log('Enter Beast mode!!!') const prompt = getPrompt( @@ -617,7 +676,7 @@ You decided to think out of the box or cut from a completely different angle.`); allKnowledge, allURLs, true - ); + ); const model = genAI.getGenerativeModel({ model: modelConfigs.agentBeastMode.model, @@ -636,7 +695,7 @@ You decided to think out of the box or cut from a completely different angle.`); await storeContext(prompt, [allContext, allKeywords, allQuestions, allKnowledge], totalStep); thisStep = JSON.parse(response.text()); console.log(thisStep) - return { result: thisStep, context }; + return {result: thisStep, context}; } } @@ -658,7 +717,10 @@ const genAI = new GoogleGenerativeAI(GEMINI_API_KEY); export async function main() { const question = process.argv[2] || ""; - const { result: finalStep, context: tracker } = await getResponse(question) as { result: AnswerAction; context: TrackerContext }; + const { + result: finalStep, + context: tracker + } = await getResponse(question) as { result: AnswerAction; context: TrackerContext }; console.log('Final Answer:', finalStep.answer); tracker.tokenTracker.printSummary(); diff --git a/src/tools/dedup.ts b/src/tools/dedup.ts index 3d6d644..8fa6cf7 100644 --- a/src/tools/dedup.ts +++ b/src/tools/dedup.ts @@ -33,81 +33,55 @@ const model = genAI.getGenerativeModel({ }); function getPrompt(newQueries: string[], existingQueries: string[]): string { - return `You are an expert in semantic similarity analysis. Given a set of new queries (A) and existing queries (B), identify which queries from set A are semantically unique when compared BOTH to other queries within A AND to queries in set B. + return `You are an expert in semantic similarity analysis. Given a set of queries (setA) and a set of queries (setB) -Core Rules: + +Function FilterSetA(setA, setB, threshold): + filteredA = empty set + + for each candidateQuery in setA: + isValid = true + + // Check similarity with already accepted queries in filteredA + for each acceptedQuery in filteredA: + similarity = calculateSimilarity(candidateQuery, acceptedQuery) + if similarity >= threshold: + isValid = false + break + + // If passed first check, compare with set B + if isValid: + for each queryB in setB: + similarity = calculateSimilarity(candidateQuery, queryB) + if similarity >= threshold: + isValid = false + break + + // If passed all checks, add to filtered set + if isValid: + add candidateQuery to filteredA + + return filteredA + + + 1. Consider semantic meaning and query intent, not just lexical similarity 2. Account for different phrasings of the same information need -3. A query is considered duplicate ONLY if: - - It has identical base keywords AND identical operators to another query in set A - - OR it has identical base keywords AND identical operators to a query in set B -4. Queries with same base keywords but different operators are NOT duplicates -5. Different aspects or perspectives of the same topic are not duplicates -6. Consider query specificity - a more specific query is not a duplicate of a general one -7. Search operators that make queries behave differently: +3. Queries with same base keywords but different operators are NOT duplicates +4. Different aspects or perspectives of the same topic are not duplicates +5. Consider query specificity - a more specific query is not a duplicate of a general one +6. Search operators that make queries behave differently: - Different site: filters (e.g., site:youtube.com vs site:github.com) - Different file types (e.g., filetype:pdf vs filetype:doc) - Different language/location filters (e.g., lang:en vs lang:es) - Different exact match phrases (e.g., "exact phrase" vs no quotes) - Different inclusion/exclusion (+/- operators) - Different title/body filters (intitle: vs inbody:) + -Examples: - -Set A: [ - "python tutorial site:youtube.com", - "python tutorial site:udemy.com", - "python tutorial filetype:pdf", - "best restaurants brooklyn", - "best restaurants brooklyn site:yelp.com", - "python tutorial site:youtube.com -beginner" -] -Set B: [ - "python programming guide", - "brooklyn dining recommendations" -] -Thought: Let's analyze each query in set A considering operators: -1. First query targets YouTube tutorials - unique -2. Second query targets Udemy - different site operator, so unique -3. Third query targets PDF files - different filetype operator, so unique -4. Fourth query is basic restaurant search - unique -5. Fifth query adds Yelp filter - different site operator, so unique -6. Sixth query has same site as first but adds exclusion - different operator combo, so unique -None of the queries in set B have matching operators, so they don't cause duplicates. -Unique Queries: [ - "python tutorial site:youtube.com", - "python tutorial site:udemy.com", - "python tutorial filetype:pdf", - "best restaurants brooklyn", - "best restaurants brooklyn site:yelp.com", - "python tutorial site:youtube.com -beginner" -] - -Set A: [ - "machine learning +tensorflow filetype:pdf", - "machine learning +pytorch filetype:pdf", - "machine learning tutorial lang:en", - "machine learning tutorial lang:es" -] -Set B: [ - "machine learning guide" -] -Thought: Analyzing queries with attention to operators: -1. First query specifies tensorflow PDFs - unique -2. Second query targets pytorch PDFs - different inclusion operator, so unique -3. Third query targets English content - unique due to language filter -4. Fourth query targets Spanish content - different language filter, so unique -The query in set B has no operators and different base terms, so it doesn't affect our decisions. -Unique Queries: [ - "machine learning +tensorflow filetype:pdf", - "machine learning +pytorch filetype:pdf", - "machine learning tutorial lang:en", - "machine learning tutorial lang:es" -] - -Now, analyze these sets: -Set A: ${JSON.stringify(newQueries)} -Set B: ${JSON.stringify(existingQueries)}`; +Now, run FilterSetA on the following: +SetA: ${JSON.stringify(newQueries)} +SetB: ${JSON.stringify(existingQueries)}`; } export async function dedupQueries(newQueries: string[], existingQueries: string[], tracker?: TokenTracker): Promise<{ unique_queries: string[], tokens: number }> {