Files
node-DeepResearch/src/agent.ts
2025-01-30 09:56:38 +08:00

533 lines
17 KiB
TypeScript

import {GoogleGenerativeAI, SchemaType} from "@google/generative-ai";
import dotenv from 'dotenv';
import {ProxyAgent, setGlobalDispatcher} from "undici";
import {readUrl} from "./tools/read";
import fs from 'fs/promises';
import {SafeSearchType, search} from "duck-duck-scrape";
import {rewriteQuery} from "./tools/query-rewriter";
import {dedupQueries} from "./tools/dedup";
import {evaluateAnswer} from "./tools/evaluator";
import {buildURLMap, StepData} from "./tools/getURLIndex";
// Proxy setup remains the same
if (process.env.https_proxy) {
try {
const proxyUrl = new URL(process.env.https_proxy).toString();
const dispatcher = new ProxyAgent({uri: proxyUrl});
setGlobalDispatcher(dispatcher);
} catch (error) {
console.error('Failed to set proxy:', error);
}
}
dotenv.config();
async function sleep(ms: number) {
const frames = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'];
const startTime = Date.now();
const endTime = startTime + ms;
// Clear current line and hide cursor
process.stdout.write('\x1B[?25l');
while (Date.now() < endTime) {
const remaining = Math.ceil((endTime - Date.now()) / 1000);
const frameIndex = Math.floor(Date.now() / 100) % frames.length;
// Clear line and write new frame
process.stdout.write(`\r${frames[frameIndex]} Cool down... ${remaining}s remaining`);
// Small delay for animation
await new Promise(resolve => setTimeout(resolve, 50));
}
// Clear line, show cursor and move to next line
process.stdout.write('\r\x1B[K\x1B[?25h\n');
// Original sleep
await new Promise(resolve => setTimeout(resolve, 0));
}
type ResponseSchema = {
type: SchemaType.OBJECT;
properties: {
action: {
type: SchemaType.STRING;
enum: string[];
description: string;
};
searchQuery: {
type: SchemaType.STRING;
description: string;
};
URLTargets?: {
type: SchemaType.ARRAY;
items: {
type: SchemaType.STRING;
};
description: string;
};
answer: {
type: SchemaType.STRING;
description: string;
};
references: {
type: SchemaType.ARRAY;
items: {
type: SchemaType.OBJECT;
properties: {
title: {
type: SchemaType.STRING;
description: string;
};
url: {
type: SchemaType.STRING;
description: string;
};
};
required: string[];
};
description: string;
};
reasoning: {
type: SchemaType.STRING;
description: string;
};
questionsToAnswer?: {
type: SchemaType.ARRAY;
items: {
type: SchemaType.STRING;
description: string;
};
description: string;
maxItems: number;
};
};
required: string[];
};
function getSchema(allowReflect: boolean, allowRead: boolean): ResponseSchema {
let actions = ["search", "answer"];
if (allowReflect) {
actions.push("reflect");
}
if (allowRead) {
actions.push("visit");
}
return {
type: SchemaType.OBJECT,
properties: {
action: {
type: SchemaType.STRING,
enum: actions,
description: "Must match exactly one action type"
},
questionsToAnswer: allowReflect ? {
type: SchemaType.ARRAY,
items: {
type: SchemaType.STRING,
description: "each question must be a single line, concise and clear. not composite or compound, less than 20 words.",
},
description: "Only required when choosing 'reflect' action, list of most important questions to answer to fill the knowledge gaps.",
maxItems: 2
} : undefined,
searchQuery: {
type: SchemaType.STRING,
description: "Only required when choosing 'search' action, must be a short, keyword-based query that BM25, tf-idf based search engines can understand.",
},
URLTargets: allowRead ? {
type: SchemaType.ARRAY,
items: {
type: SchemaType.STRING
},
description: "Only required when choosing 'deep dive' action, must be an array of URLs"
} : undefined,
answer: {
type: SchemaType.STRING,
description: "Only required when choosing 'answer' action, must be the final answer in natural language"
},
references: {
type: SchemaType.ARRAY,
items: {
type: SchemaType.OBJECT,
properties: {
title: {
type: SchemaType.STRING,
description: "Title of the document; must be directly from the context",
},
url: {
type: SchemaType.STRING,
description: "URL of the document; must be directly from the context"
}
},
required: ["title", "url"]
},
description: "Only required when choosing 'answer' action, must be an array of references"
},
reasoning: {
type: SchemaType.STRING,
description: "Explain why choose this action?"
},
},
required: ["action", "reasoning"],
};
}
function getPrompt(question: string, context?: any[], allQuestions?: string[], allowReflect: boolean = false, badContext?: any[], knowledge?: any[], allURLs?: Record<string, string>) {
// console.log('Context:', context);
// console.log('All URLs:', JSON.stringify(allURLs, null, 2));
const knowledgeIntro = knowledge?.length ?
`
## Knowledge
You have successfully gathered some knowledge from the following questions:
${JSON.stringify(knowledge, null, 2)}
`
: '';
const badContextIntro = badContext?.length ?
`
## Unsuccessful Attempts
Your have tried the following actions but failed to find the answer to the question.
${badContext.map((c, i) => `
### Attempt ${i + 1}
${c.join('\n')}
`).join('\n')}
Learn to avoid these mistakes and think of a new approach, from a different angle, e.g. search for different keywords, read different URLs, or ask different questions.
`
: '';
const contextIntro = context?.length ?
`
## Context
You have conducted the following actions:
${context.join('\n')}
`
: '';
let actionsDescription = `
## Actions
When you are uncertain about the answer and you need knowledge, choose one of these actions to proceed:
${allURLs ? `
**visit**:
- Visit any URLs from below to gather external knowledge
${JSON.stringify(allURLs, null, 2)}
- When you have enough search result in the context and want to deep dive into specific URLs
- It allows you to access the full content behind any URLs
` : ''}
**search**:
- Query external sources using a public search engine
- Focus on solving one specific aspect of the question
- Only give keywords search query, not full sentences
**answer**:
- Provide final response only when 100% certain
- Responses must be definitive (no ambiguity, uncertainty, or disclaimers)
${allowReflect ? `- If doubts remain, use "reflect" instead` : ''}`;
if (allowReflect) {
actionsDescription += `
**reflect**:
- Perform critical analysis through hypothetical scenarios or systematic breakdowns
- Identify knowledge gaps and formulate essential clarifying questions
- Questions must be:
- Original (not variations of existing questions)
- Focused on single concepts
- Under 20 words
- Non-compound/non-complex
`;
}
return `
Current date: ${new Date().toUTCString()}
You are an advanced AI research analyst specializing in multi-step reasoning. Using your training data and prior lessons learned, answer the following question with absolute certainty:
## Question
${question}
${contextIntro.trim()}
${knowledgeIntro.trim()}
${badContextIntro.trim()}
${actionsDescription.trim()}
Respond exclusively in valid JSON format matching exact JSON schema.
Critical Requirements:
- Include ONLY ONE action type
- Never add unsupported keys
- Exclude all non-JSON text, markdown, or explanations
- Maintain strict JSON syntax`.trim();
}
let context: StepData[] = []; // successful steps in the current session
let allContext: StepData[] = []; // all steps in the current session, including those leads to wrong results
function updateContext(step: any) {
context.push(step);
allContext.push(step)
}
async function getResponse(question: string, tokenBudget: number = 1000000) {
let totalTokens = 0;
let step = 0;
let totalStep = 0;
let gaps: string[] = [question]; // All questions to be answered including the orginal question
let allQuestions = [question];
let allKeywords = [];
let allKnowledge = []; // knowledge are intermedidate questions that are answered
let badContext = [];
let diaryContext = [];
let allURLs: Record<string, string> = {};
while (totalTokens < tokenBudget) {
// add 1s delay to avoid rate limiting
await sleep(1000);
step++;
totalStep++;
console.log('===STEPS===', totalStep)
console.log('Gaps:', gaps)
const allowReflect = gaps.length <= 1;
const currentQuestion = gaps.length > 0 ? gaps.shift()! : question;
// update all urls with buildURLMap
allURLs = buildURLMap(allContext);
const allowRead = Object.keys(allURLs).length > 0;
const prompt = getPrompt(
currentQuestion,
diaryContext,
allQuestions,
allowReflect,
badContext,
allKnowledge,
allURLs);
console.log('Prompt len:', prompt.length)
const model = genAI.getGenerativeModel({
model: modelName,
generationConfig: {
temperature: 0.7,
responseMimeType: "application/json",
responseSchema: getSchema(allowReflect, allowRead)
}
});
const result = await model.generateContent(prompt);
const response = await result.response;
const usage = response.usageMetadata;
totalTokens += usage?.totalTokenCount || 0;
console.log(`Tokens: ${totalTokens}/${tokenBudget}`);
const action = JSON.parse(response.text());
console.log('Question-Action:', currentQuestion, action);
if (action.action === 'answer') {
updateContext({
step,
question: currentQuestion,
...action,
});
const evaluation = await evaluateAnswer(currentQuestion, action.answer);
if (currentQuestion === question) {
if (evaluation.is_valid_answer) {
// EXIT POINT OF THE PROGRAM!!!!
diaryContext.push(`
At step ${step}, you took **answer** action and finally found the answer to the original question:
Original question:
${currentQuestion}
Your answer:
${action.answer}
The evaluator thinks your answer is good because:
${evaluation.reasoning}
Your journey ends here. You have successfully answered the original question. Congratulations! 🎉
`);
await storeContext(prompt, [allContext, allKeywords, allQuestions, allKnowledge], totalStep);
return action;
} else {
diaryContext.push(`
At step ${step}, you took **answer** action but evaluator thinks it is not a good answer:
Original question:
${currentQuestion}
Your answer:
${action.answer}
The evaluator thinks your answer is bad because:
${evaluation.reasoning}
`);
// store the bad context and reset the diary context
badContext.push(diaryContext);
diaryContext = [];
step = 0;
}
} else if (evaluation.is_valid_answer) {
diaryContext.push(`
At step ${step}, you took **answer** action. You found a good answer to the sub-question:
Sub-question:
${currentQuestion}
Your answer:
${action.answer}
The evaluator thinks your answer is good because:
${evaluation.reasoning}
Although you solved a sub-question, you still need to find the answer to the original question. You need to keep going.
`);
allKnowledge.push({question: currentQuestion, answer: action.answer});
}
}
else if (action.action === 'reflect' && action.questionsToAnswer) {
let newGapQuestions = action.questionsToAnswer
const oldQuestions = newGapQuestions;
if (allQuestions.length) {
newGapQuestions = await dedupQueries(newGapQuestions, allQuestions)
}
if (newGapQuestions.length > 0) {
// found new gap questions
diaryContext.push(`
At step ${step}, you took **reflect** and think about the knowledge gaps. You found some sub-questions are important to the question: "${currentQuestion}"
You realize you need to know the answers to the following sub-questions:
${newGapQuestions.map((q: string) => `- ${q}`).join('\n')}
You will now figure out the answers to these sub-questions and see if they can help me find the answer to the original question.
`);
gaps.push(...newGapQuestions);
allQuestions.push(...newGapQuestions);
gaps.push(question); // always keep the original question in the gaps
} else {
console.log('No new questions to ask');
diaryContext.push(`
At step ${step}, you took **reflect** and think about the knowledge gaps. You tried to break down the question "${currentQuestion}" into gap-questions like this: ${oldQuestions.join(', ')}
But then you realized you have asked them before. You decided to to think out of the box or cut from a completely different angle.
`);
updateContext({
step,
...action,
result: 'I have tried all possible questions and found no useful information. I must think out of the box or different angle!!!'
});
}
}
else if (action.action === 'search' && action.searchQuery) {
// rewrite queries
let keywordsQueries = await rewriteQuery(action.searchQuery);
const oldKeywords = keywordsQueries;
// avoid exisitng searched queries
if (allKeywords.length) {
keywordsQueries = await dedupQueries(keywordsQueries, allKeywords)
}
if (keywordsQueries.length > 0) {
const searchResults = [];
for (const query of keywordsQueries) {
console.log('Searching:', query);
const results = await search(query, {
safeSearch: SafeSearchType.STRICT
});
const minResults = results.results.map(r => ({
title: r.title,
url: r.url,
description: r.description,
}));
searchResults.push({query, results: minResults});
allKeywords.push(query);
await sleep(5000);
}
diaryContext.push(`
At step ${step}, you took the **search** action and look for external information for the question: "${currentQuestion}".
In particular, you tried to search for the following keywords: "${keywordsQueries.join(', ')}".
You found quite some information and add them to your URL list and **visit** them later when needed.
`);
updateContext({
step,
question: currentQuestion,
...action,
result: searchResults
});
} else {
diaryContext.push(`
At step ${step}, you took the **search** action and look for external information for the question: "${currentQuestion}".
In particular, you tried to search for the following keywords: ${oldKeywords.join(', ')}.
But then you realized you have already searched for these keywords before.
You decided to think out of the box or cut from a completely different angle.
`);
console.log('No new queries to search');
updateContext({
step,
...action,
result: 'I have tried all possible queries and found no new information. I must think out of the box or different angle!!!'
});
}
}
else if (action.action === 'visit' && action.URLTargets?.length) {
const urlResults = await Promise.all(
action.URLTargets.map(async (url: string) => {
const response = await readUrl(url, jinaToken);
allKnowledge.push({question: `What is in ${response.data.url}?`, answer: response.data.content});
return {url, result: response};
})
);
diaryContext.push(`
At step ${step}, you took the **visit** action and deep dive into the following URLs:
${action.URLTargets.join('\n')}
You found some useful information on the web and add them to your knowledge for future reference.
`);
updateContext({
step,
question: currentQuestion,
...action,
result: urlResults
});
totalTokens += urlResults.reduce((sum, r) => sum + r.result.data.usage.tokens, 0);
}
await storeContext(prompt, [allContext, allKeywords, allQuestions, allKnowledge], totalStep);
}
}
async function storeContext(prompt: string, memory: any[][], step: number) {
try {
await fs.writeFile(`prompt-${step}.txt`, prompt);
const [context, keywords, questions, knowledge] = memory;
await fs.writeFile('context.json', JSON.stringify(context, null, 2));
await fs.writeFile('keywords.json', JSON.stringify(keywords, null, 2));
await fs.writeFile('questions.json', JSON.stringify(questions, null, 2));
await fs.writeFile('knowledge.json', JSON.stringify(knowledge, null, 2));
} catch (error) {
console.error('Failed to store context:', error);
}
}
const apiKey = process.env.GEMINI_API_KEY as string;
const jinaToken = process.env.JINA_API_KEY as string;
if (!apiKey) throw new Error("GEMINI_API_KEY not found");
if (!jinaToken) throw new Error("JINA_API_KEY not found");
const modelName = 'gemini-1.5-flash';
const genAI = new GoogleGenerativeAI(apiKey);
const question = process.argv[2] || "";
getResponse(question);