mirror of
https://github.com/jina-ai/node-DeepResearch.git
synced 2025-12-26 06:28:56 +08:00
fix: use xml prompt
This commit is contained in:
parent
77e1656744
commit
f48c84d207
148
src/agent.ts
148
src/agent.ts
@ -111,7 +111,7 @@ function getPrompt(
|
|||||||
allowRead: boolean = true,
|
allowRead: boolean = true,
|
||||||
allowSearch: boolean = true,
|
allowSearch: boolean = true,
|
||||||
badContext?: { question: string, answer: string, evaluation: string, recap: string; blame: string; improvement: string; }[],
|
badContext?: { question: string, answer: string, evaluation: string, recap: string; blame: string; improvement: string; }[],
|
||||||
knowledge?: { question: string; answer: string; }[],
|
knowledge?: { question: string; answer: string; references: any[]}[],
|
||||||
allURLs?: Record<string, string>,
|
allURLs?: Record<string, string>,
|
||||||
beastMode?: boolean
|
beastMode?: boolean
|
||||||
): string {
|
): string {
|
||||||
@ -122,49 +122,78 @@ function getPrompt(
|
|||||||
|
|
||||||
You are an advanced AI research analyst specializing in multi-step reasoning. Using your training data and prior lessons learned, answer the following question with absolute certainty:
|
You are an advanced AI research analyst specializing in multi-step reasoning. Using your training data and prior lessons learned, answer the following question with absolute certainty:
|
||||||
|
|
||||||
## Question
|
<question>
|
||||||
${question}`);
|
${question}
|
||||||
|
</question>
|
||||||
|
`);
|
||||||
|
|
||||||
// Add context section if exists
|
// Add context section if exists
|
||||||
if (context?.length) {
|
if (context?.length) {
|
||||||
sections.push(`## Context
|
sections.push(`
|
||||||
|
<context>
|
||||||
You have conducted the following actions:
|
You have conducted the following actions:
|
||||||
|
|
||||||
${context.join('\n')}`);
|
${context.join('\n')}
|
||||||
|
|
||||||
|
</context>
|
||||||
|
`);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add knowledge section if exists
|
// Add knowledge section if exists
|
||||||
if (knowledge?.length) {
|
if (knowledge?.length) {
|
||||||
const knowledgeItems = knowledge
|
const knowledgeItems = knowledge
|
||||||
.map((k, i) => `### Knowledge ${i + 1}: ${k.question}\n${k.answer}`)
|
.map((k, i) => `
|
||||||
|
<knowledge-${i + 1}>
|
||||||
|
<question>
|
||||||
|
${k.question}
|
||||||
|
</question>
|
||||||
|
<answer>
|
||||||
|
${k.answer}
|
||||||
|
</answer>
|
||||||
|
<references>
|
||||||
|
${JSON.stringify(k.references)}
|
||||||
|
</references>
|
||||||
|
</knowledge-${i + 1}>
|
||||||
|
`)
|
||||||
.join('\n\n');
|
.join('\n\n');
|
||||||
|
|
||||||
sections.push(`## Knowledge
|
sections.push(`
|
||||||
|
<knowledge>
|
||||||
You have successfully gathered some knowledge which might be useful for answering the original question. Here is the knowledge you have gathered so far
|
You have successfully gathered some knowledge which might be useful for answering the original question. Here is the knowledge you have gathered so far
|
||||||
|
|
||||||
${knowledgeItems}`);
|
${knowledgeItems}
|
||||||
|
|
||||||
|
</knowledge>
|
||||||
|
`);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add bad context section if exists
|
// Add bad context section if exists
|
||||||
if (badContext?.length) {
|
if (badContext?.length) {
|
||||||
const attempts = badContext
|
const attempts = badContext
|
||||||
.map((c, i) => `### Attempt ${i + 1}
|
.map((c, i) => `
|
||||||
|
<attempt-${i + 1}>
|
||||||
- Question: ${c.question}
|
- Question: ${c.question}
|
||||||
- Answer: ${c.answer}
|
- Answer: ${c.answer}
|
||||||
- Reject Reason: ${c.evaluation}
|
- Reject Reason: ${c.evaluation}
|
||||||
- Actions Recap: ${c.recap}
|
- Actions Recap: ${c.recap}
|
||||||
- Actions Blame: ${c.blame}`)
|
- Actions Blame: ${c.blame}
|
||||||
|
</attempt-${i + 1}>
|
||||||
|
`)
|
||||||
.join('\n\n');
|
.join('\n\n');
|
||||||
|
|
||||||
const learnedStrategy = badContext.map(c => c.improvement).join('\n');
|
const learnedStrategy = badContext.map(c => c.improvement).join('\n');
|
||||||
|
|
||||||
sections.push(`## Unsuccessful Attempts
|
sections.push(`
|
||||||
|
<bad-attempts>
|
||||||
Your have tried the following actions but failed to find the answer to the question.
|
Your have tried the following actions but failed to find the answer to the question.
|
||||||
|
|
||||||
${attempts}
|
${attempts}
|
||||||
|
|
||||||
## Learned Strategy
|
</bad-attempts>
|
||||||
|
|
||||||
|
<learned-strategy>
|
||||||
${learnedStrategy}
|
${learnedStrategy}
|
||||||
|
</learned-strategy>
|
||||||
`);
|
`);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -176,50 +205,69 @@ ${learnedStrategy}
|
|||||||
.map(([url, desc]) => ` + "${url}": "${desc}"`)
|
.map(([url, desc]) => ` + "${url}": "${desc}"`)
|
||||||
.join('\n');
|
.join('\n');
|
||||||
|
|
||||||
actions.push(`**visit**:
|
actions.push(`
|
||||||
|
<action-visit>
|
||||||
- Visit any URLs from below to gather external knowledge, choose the most relevant URLs that might contain the answer
|
- Visit any URLs from below to gather external knowledge, choose the most relevant URLs that might contain the answer
|
||||||
|
<url-list>
|
||||||
${urlList}
|
${urlList}
|
||||||
|
</url-list>
|
||||||
- When you have enough search result in the context and want to deep dive into specific URLs
|
- When you have enough search result in the context and want to deep dive into specific URLs
|
||||||
- It allows you to access the full content behind any URLs`);
|
- It allows you to access the full content behind any URLs
|
||||||
|
|
||||||
|
</action-visit>
|
||||||
|
`);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (allowSearch) {
|
if (allowSearch) {
|
||||||
actions.push(`**search**:
|
actions.push(`
|
||||||
|
<action-search>
|
||||||
- Query external sources using a public search engine
|
- Query external sources using a public search engine
|
||||||
- Focus on solving one specific aspect of the question
|
- Focus on solving one specific aspect of the question
|
||||||
- Only give keywords search query, not full sentences`);
|
- Only give keywords search query, not full sentences
|
||||||
|
</action-search>
|
||||||
|
`);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (allowAnswer) {
|
if (allowAnswer) {
|
||||||
actions.push(`**answer**:
|
actions.push(`
|
||||||
|
<action-answer>
|
||||||
- Provide final response only when 100% certain
|
- Provide final response only when 100% certain
|
||||||
- Responses must be definitive (no ambiguity, uncertainty, or disclaimers)${allowReflect ? '\n- If doubts remain, use "reflect" instead' : ''}`);
|
- Responses must be definitive (no ambiguity, uncertainty, or disclaimers)${allowReflect ? '\n- If doubts remain, use <action-reflect> instead' : ''}
|
||||||
|
</action-answer>
|
||||||
|
`);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (beastMode) {
|
if (beastMode) {
|
||||||
actions.push(`**answer**:
|
actions.push(`
|
||||||
- You have gathered enough information to answer the question; they may not be perfect, but this is your very last chance to answer the question.
|
<action-answer>
|
||||||
- Try the best of the best reasoning ability, investigate every details in the context and provide the best answer you can think of.
|
- Any answer is better than no answer
|
||||||
- When uncertain, educated guess is allowed and encouraged, but make sure it is based on the context and knowledge you have gathered.
|
- Partial answers are allowed, but make sure they are based on the context and knowledge you have gathered
|
||||||
- Responses must be definitive (no ambiguity, uncertainty, or disclaimers`);
|
- When uncertain, educated guess based on the context and knowledge is allowed and encouraged.
|
||||||
|
- Responses must be definitive (no ambiguity, uncertainty, or disclaimers)
|
||||||
|
</action-answer>
|
||||||
|
`);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (allowReflect) {
|
if (allowReflect) {
|
||||||
actions.push(`**reflect**:
|
actions.push(`
|
||||||
|
<action-reflect>
|
||||||
- Perform critical analysis through hypothetical scenarios or systematic breakdowns
|
- Perform critical analysis through hypothetical scenarios or systematic breakdowns
|
||||||
- Identify knowledge gaps and formulate essential clarifying questions
|
- Identify knowledge gaps and formulate essential clarifying questions
|
||||||
- Questions must be:
|
- Questions must be:
|
||||||
- Original (not variations of existing questions)
|
- Original (not variations of existing questions)
|
||||||
- Focused on single concepts
|
- Focused on single concepts
|
||||||
- Under 20 words
|
- Under 20 words
|
||||||
- Non-compound/non-complex`);
|
- Non-compound/non-complex
|
||||||
|
</action-reflect>
|
||||||
|
`);
|
||||||
}
|
}
|
||||||
|
|
||||||
sections.push(`## Actions
|
sections.push(`
|
||||||
|
<actions>
|
||||||
Based on the current context, you must choose one of the following actions:
|
Based on the current context, you must choose one of the following actions:
|
||||||
|
${actions.join('\n\n')}
|
||||||
${actions.join('\n\n')}`);
|
</actions>
|
||||||
|
`);
|
||||||
|
|
||||||
// Add footer
|
// Add footer
|
||||||
sections.push(`Respond exclusively in valid JSON format matching exact JSON schema.
|
sections.push(`Respond exclusively in valid JSON format matching exact JSON schema.
|
||||||
@ -243,6 +291,10 @@ function removeAllLineBreaks(text: string) {
|
|||||||
return text.replace(/(\r\n|\n|\r)/gm, " ");
|
return text.replace(/(\r\n|\n|\r)/gm, " ");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function removeHTMLtags(text: string) {
|
||||||
|
return text.replace(/<[^>]*>?/gm, '');
|
||||||
|
}
|
||||||
|
|
||||||
export async function getResponse(question: string, tokenBudget: number = 1_000_000,
|
export async function getResponse(question: string, tokenBudget: number = 1_000_000,
|
||||||
maxBadAttempts: number = 3,
|
maxBadAttempts: number = 3,
|
||||||
existingContext?: Partial<TrackerContext>): Promise<{ result: StepAction; context: TrackerContext }> {
|
existingContext?: Partial<TrackerContext>): Promise<{ result: StepAction; context: TrackerContext }> {
|
||||||
@ -250,7 +302,7 @@ export async function getResponse(question: string, tokenBudget: number = 1_000_
|
|||||||
tokenTracker: existingContext?.tokenTracker || new TokenTracker(tokenBudget),
|
tokenTracker: existingContext?.tokenTracker || new TokenTracker(tokenBudget),
|
||||||
actionTracker: existingContext?.actionTracker || new ActionTracker()
|
actionTracker: existingContext?.actionTracker || new ActionTracker()
|
||||||
};
|
};
|
||||||
context.actionTracker.trackAction({ gaps: [question], totalStep: 0, badAttempts: 0 });
|
context.actionTracker.trackAction({gaps: [question], totalStep: 0, badAttempts: 0});
|
||||||
let step = 0;
|
let step = 0;
|
||||||
let totalStep = 0;
|
let totalStep = 0;
|
||||||
let badAttempts = 0;
|
let badAttempts = 0;
|
||||||
@ -275,7 +327,7 @@ export async function getResponse(question: string, tokenBudget: number = 1_000_
|
|||||||
await sleep(STEP_SLEEP);
|
await sleep(STEP_SLEEP);
|
||||||
step++;
|
step++;
|
||||||
totalStep++;
|
totalStep++;
|
||||||
context.actionTracker.trackAction({ totalStep, thisStep, gaps, badAttempts });
|
context.actionTracker.trackAction({totalStep, thisStep, gaps, badAttempts});
|
||||||
const budgetPercentage = (context.tokenTracker.getTotalUsage() / tokenBudget * 100).toFixed(2);
|
const budgetPercentage = (context.tokenTracker.getTotalUsage() / tokenBudget * 100).toFixed(2);
|
||||||
console.log(`Step ${totalStep} / Budget used ${budgetPercentage}%`);
|
console.log(`Step ${totalStep} / Budget used ${budgetPercentage}%`);
|
||||||
console.log('Gaps:', gaps);
|
console.log('Gaps:', gaps);
|
||||||
@ -298,7 +350,7 @@ export async function getResponse(question: string, tokenBudget: number = 1_000_
|
|||||||
allKnowledge,
|
allKnowledge,
|
||||||
allURLs,
|
allURLs,
|
||||||
false
|
false
|
||||||
);
|
);
|
||||||
|
|
||||||
const model = genAI.getGenerativeModel({
|
const model = genAI.getGenerativeModel({
|
||||||
model: modelConfigs.agent.model,
|
model: modelConfigs.agent.model,
|
||||||
@ -439,15 +491,14 @@ Although you solved a sub-question, you still need to find the answer to the ori
|
|||||||
allKnowledge.push({
|
allKnowledge.push({
|
||||||
question: currentQuestion,
|
question: currentQuestion,
|
||||||
answer: thisStep.answer,
|
answer: thisStep.answer,
|
||||||
|
references: thisStep.references,
|
||||||
type: 'qa'
|
type: 'qa'
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
} else if (thisStep.action === 'reflect' && thisStep.questionsToAnswer) {
|
} else if (thisStep.action === 'reflect' && thisStep.questionsToAnswer) {
|
||||||
let newGapQuestions = thisStep.questionsToAnswer
|
let newGapQuestions = thisStep.questionsToAnswer
|
||||||
const oldQuestions = newGapQuestions;
|
const oldQuestions = newGapQuestions;
|
||||||
if (allQuestions.length) {
|
newGapQuestions = (await dedupQueries(newGapQuestions, allQuestions)).unique_queries;
|
||||||
newGapQuestions = (await dedupQueries(newGapQuestions, allQuestions)).unique_queries;
|
|
||||||
}
|
|
||||||
if (newGapQuestions.length > 0) {
|
if (newGapQuestions.length > 0) {
|
||||||
// found new gap questions
|
// found new gap questions
|
||||||
diaryContext.push(`
|
diaryContext.push(`
|
||||||
@ -479,10 +530,9 @@ But then you realized you have asked them before. You decided to to think out of
|
|||||||
|
|
||||||
const oldKeywords = keywordsQueries;
|
const oldKeywords = keywordsQueries;
|
||||||
// avoid exisitng searched queries
|
// avoid exisitng searched queries
|
||||||
if (allKeywords.length) {
|
const {unique_queries: dedupedQueries} = await dedupQueries(keywordsQueries, allKeywords);
|
||||||
const {unique_queries: dedupedQueries} = await dedupQueries(keywordsQueries, allKeywords);
|
keywordsQueries = dedupedQueries;
|
||||||
keywordsQueries = dedupedQueries;
|
|
||||||
}
|
|
||||||
if (keywordsQueries.length > 0) {
|
if (keywordsQueries.length > 0) {
|
||||||
const searchResults = [];
|
const searchResults = [];
|
||||||
for (const query of keywordsQueries) {
|
for (const query of keywordsQueries) {
|
||||||
@ -508,6 +558,14 @@ But then you realized you have asked them before. You decided to to think out of
|
|||||||
url: r.url,
|
url: r.url,
|
||||||
description: r.description,
|
description: r.description,
|
||||||
}));
|
}));
|
||||||
|
|
||||||
|
allKnowledge.push({
|
||||||
|
question: `What do Internet say about ${query}?`,
|
||||||
|
answer: removeHTMLtags(minResults.map(r => `${r.description}`).join('; ')),
|
||||||
|
references: minResults.map(r => r.url),
|
||||||
|
type: 'side-info'
|
||||||
|
});
|
||||||
|
|
||||||
for (const r of minResults) {
|
for (const r of minResults) {
|
||||||
allURLs[r.url] = r.title;
|
allURLs[r.url] = r.title;
|
||||||
}
|
}
|
||||||
@ -559,6 +617,7 @@ You decided to think out of the box or cut from a completely different angle.
|
|||||||
allKnowledge.push({
|
allKnowledge.push({
|
||||||
question: `What is in ${response.data?.url || 'the URL'}?`,
|
question: `What is in ${response.data?.url || 'the URL'}?`,
|
||||||
answer: removeAllLineBreaks(response.data?.content || 'No content available'),
|
answer: removeAllLineBreaks(response.data?.content || 'No content available'),
|
||||||
|
references: [response.data?.url],
|
||||||
type: 'url'
|
type: 'url'
|
||||||
});
|
});
|
||||||
visitedURLs.push(url);
|
visitedURLs.push(url);
|
||||||
@ -602,7 +661,7 @@ You decided to think out of the box or cut from a completely different angle.`);
|
|||||||
totalStep++;
|
totalStep++;
|
||||||
await storeContext(prompt, [allContext, allKeywords, allQuestions, allKnowledge], totalStep);
|
await storeContext(prompt, [allContext, allKeywords, allQuestions, allKnowledge], totalStep);
|
||||||
if (isAnswered) {
|
if (isAnswered) {
|
||||||
return { result: thisStep, context };
|
return {result: thisStep, context};
|
||||||
} else {
|
} else {
|
||||||
console.log('Enter Beast mode!!!')
|
console.log('Enter Beast mode!!!')
|
||||||
const prompt = getPrompt(
|
const prompt = getPrompt(
|
||||||
@ -617,7 +676,7 @@ You decided to think out of the box or cut from a completely different angle.`);
|
|||||||
allKnowledge,
|
allKnowledge,
|
||||||
allURLs,
|
allURLs,
|
||||||
true
|
true
|
||||||
);
|
);
|
||||||
|
|
||||||
const model = genAI.getGenerativeModel({
|
const model = genAI.getGenerativeModel({
|
||||||
model: modelConfigs.agentBeastMode.model,
|
model: modelConfigs.agentBeastMode.model,
|
||||||
@ -636,7 +695,7 @@ You decided to think out of the box or cut from a completely different angle.`);
|
|||||||
await storeContext(prompt, [allContext, allKeywords, allQuestions, allKnowledge], totalStep);
|
await storeContext(prompt, [allContext, allKeywords, allQuestions, allKnowledge], totalStep);
|
||||||
thisStep = JSON.parse(response.text());
|
thisStep = JSON.parse(response.text());
|
||||||
console.log(thisStep)
|
console.log(thisStep)
|
||||||
return { result: thisStep, context };
|
return {result: thisStep, context};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -658,7 +717,10 @@ const genAI = new GoogleGenerativeAI(GEMINI_API_KEY);
|
|||||||
|
|
||||||
export async function main() {
|
export async function main() {
|
||||||
const question = process.argv[2] || "";
|
const question = process.argv[2] || "";
|
||||||
const { result: finalStep, context: tracker } = await getResponse(question) as { result: AnswerAction; context: TrackerContext };
|
const {
|
||||||
|
result: finalStep,
|
||||||
|
context: tracker
|
||||||
|
} = await getResponse(question) as { result: AnswerAction; context: TrackerContext };
|
||||||
console.log('Final Answer:', finalStep.answer);
|
console.log('Final Answer:', finalStep.answer);
|
||||||
|
|
||||||
tracker.tokenTracker.printSummary();
|
tracker.tokenTracker.printSummary();
|
||||||
|
|||||||
@ -33,81 +33,55 @@ const model = genAI.getGenerativeModel({
|
|||||||
});
|
});
|
||||||
|
|
||||||
function getPrompt(newQueries: string[], existingQueries: string[]): string {
|
function getPrompt(newQueries: string[], existingQueries: string[]): string {
|
||||||
return `You are an expert in semantic similarity analysis. Given a set of new queries (A) and existing queries (B), identify which queries from set A are semantically unique when compared BOTH to other queries within A AND to queries in set B.
|
return `You are an expert in semantic similarity analysis. Given a set of queries (setA) and a set of queries (setB)
|
||||||
|
|
||||||
Core Rules:
|
<rules>
|
||||||
|
Function FilterSetA(setA, setB, threshold):
|
||||||
|
filteredA = empty set
|
||||||
|
|
||||||
|
for each candidateQuery in setA:
|
||||||
|
isValid = true
|
||||||
|
|
||||||
|
// Check similarity with already accepted queries in filteredA
|
||||||
|
for each acceptedQuery in filteredA:
|
||||||
|
similarity = calculateSimilarity(candidateQuery, acceptedQuery)
|
||||||
|
if similarity >= threshold:
|
||||||
|
isValid = false
|
||||||
|
break
|
||||||
|
|
||||||
|
// If passed first check, compare with set B
|
||||||
|
if isValid:
|
||||||
|
for each queryB in setB:
|
||||||
|
similarity = calculateSimilarity(candidateQuery, queryB)
|
||||||
|
if similarity >= threshold:
|
||||||
|
isValid = false
|
||||||
|
break
|
||||||
|
|
||||||
|
// If passed all checks, add to filtered set
|
||||||
|
if isValid:
|
||||||
|
add candidateQuery to filteredA
|
||||||
|
|
||||||
|
return filteredA
|
||||||
|
</rules>
|
||||||
|
|
||||||
|
<similarity-definition>
|
||||||
1. Consider semantic meaning and query intent, not just lexical similarity
|
1. Consider semantic meaning and query intent, not just lexical similarity
|
||||||
2. Account for different phrasings of the same information need
|
2. Account for different phrasings of the same information need
|
||||||
3. A query is considered duplicate ONLY if:
|
3. Queries with same base keywords but different operators are NOT duplicates
|
||||||
- It has identical base keywords AND identical operators to another query in set A
|
4. Different aspects or perspectives of the same topic are not duplicates
|
||||||
- OR it has identical base keywords AND identical operators to a query in set B
|
5. Consider query specificity - a more specific query is not a duplicate of a general one
|
||||||
4. Queries with same base keywords but different operators are NOT duplicates
|
6. Search operators that make queries behave differently:
|
||||||
5. Different aspects or perspectives of the same topic are not duplicates
|
|
||||||
6. Consider query specificity - a more specific query is not a duplicate of a general one
|
|
||||||
7. Search operators that make queries behave differently:
|
|
||||||
- Different site: filters (e.g., site:youtube.com vs site:github.com)
|
- Different site: filters (e.g., site:youtube.com vs site:github.com)
|
||||||
- Different file types (e.g., filetype:pdf vs filetype:doc)
|
- Different file types (e.g., filetype:pdf vs filetype:doc)
|
||||||
- Different language/location filters (e.g., lang:en vs lang:es)
|
- Different language/location filters (e.g., lang:en vs lang:es)
|
||||||
- Different exact match phrases (e.g., "exact phrase" vs no quotes)
|
- Different exact match phrases (e.g., "exact phrase" vs no quotes)
|
||||||
- Different inclusion/exclusion (+/- operators)
|
- Different inclusion/exclusion (+/- operators)
|
||||||
- Different title/body filters (intitle: vs inbody:)
|
- Different title/body filters (intitle: vs inbody:)
|
||||||
|
</similarity-definition>
|
||||||
|
|
||||||
Examples:
|
Now, run FilterSetA on the following:
|
||||||
|
SetA: ${JSON.stringify(newQueries)}
|
||||||
Set A: [
|
SetB: ${JSON.stringify(existingQueries)}`;
|
||||||
"python tutorial site:youtube.com",
|
|
||||||
"python tutorial site:udemy.com",
|
|
||||||
"python tutorial filetype:pdf",
|
|
||||||
"best restaurants brooklyn",
|
|
||||||
"best restaurants brooklyn site:yelp.com",
|
|
||||||
"python tutorial site:youtube.com -beginner"
|
|
||||||
]
|
|
||||||
Set B: [
|
|
||||||
"python programming guide",
|
|
||||||
"brooklyn dining recommendations"
|
|
||||||
]
|
|
||||||
Thought: Let's analyze each query in set A considering operators:
|
|
||||||
1. First query targets YouTube tutorials - unique
|
|
||||||
2. Second query targets Udemy - different site operator, so unique
|
|
||||||
3. Third query targets PDF files - different filetype operator, so unique
|
|
||||||
4. Fourth query is basic restaurant search - unique
|
|
||||||
5. Fifth query adds Yelp filter - different site operator, so unique
|
|
||||||
6. Sixth query has same site as first but adds exclusion - different operator combo, so unique
|
|
||||||
None of the queries in set B have matching operators, so they don't cause duplicates.
|
|
||||||
Unique Queries: [
|
|
||||||
"python tutorial site:youtube.com",
|
|
||||||
"python tutorial site:udemy.com",
|
|
||||||
"python tutorial filetype:pdf",
|
|
||||||
"best restaurants brooklyn",
|
|
||||||
"best restaurants brooklyn site:yelp.com",
|
|
||||||
"python tutorial site:youtube.com -beginner"
|
|
||||||
]
|
|
||||||
|
|
||||||
Set A: [
|
|
||||||
"machine learning +tensorflow filetype:pdf",
|
|
||||||
"machine learning +pytorch filetype:pdf",
|
|
||||||
"machine learning tutorial lang:en",
|
|
||||||
"machine learning tutorial lang:es"
|
|
||||||
]
|
|
||||||
Set B: [
|
|
||||||
"machine learning guide"
|
|
||||||
]
|
|
||||||
Thought: Analyzing queries with attention to operators:
|
|
||||||
1. First query specifies tensorflow PDFs - unique
|
|
||||||
2. Second query targets pytorch PDFs - different inclusion operator, so unique
|
|
||||||
3. Third query targets English content - unique due to language filter
|
|
||||||
4. Fourth query targets Spanish content - different language filter, so unique
|
|
||||||
The query in set B has no operators and different base terms, so it doesn't affect our decisions.
|
|
||||||
Unique Queries: [
|
|
||||||
"machine learning +tensorflow filetype:pdf",
|
|
||||||
"machine learning +pytorch filetype:pdf",
|
|
||||||
"machine learning tutorial lang:en",
|
|
||||||
"machine learning tutorial lang:es"
|
|
||||||
]
|
|
||||||
|
|
||||||
Now, analyze these sets:
|
|
||||||
Set A: ${JSON.stringify(newQueries)}
|
|
||||||
Set B: ${JSON.stringify(existingQueries)}`;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function dedupQueries(newQueries: string[], existingQueries: string[], tracker?: TokenTracker): Promise<{ unique_queries: string[], tokens: number }> {
|
export async function dedupQueries(newQueries: string[], existingQueries: string[], tracker?: TokenTracker): Promise<{ unique_queries: string[], tokens: number }> {
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user