mirror of
https://github.com/jina-ai/node-DeepResearch.git
synced 2026-03-22 07:29:35 +08:00
chore: first commit
This commit is contained in:
142
src/agent.ts
142
src/agent.ts
@@ -7,6 +7,7 @@ import {SafeSearchType, search} from "duck-duck-scrape";
|
|||||||
import {rewriteQuery} from "./tools/query-rewriter";
|
import {rewriteQuery} from "./tools/query-rewriter";
|
||||||
import {dedupQueries} from "./tools/dedup";
|
import {dedupQueries} from "./tools/dedup";
|
||||||
import {evaluateAnswer} from "./tools/evaluator";
|
import {evaluateAnswer} from "./tools/evaluator";
|
||||||
|
import {buildURLMap} from "./tools/getURLIndex";
|
||||||
|
|
||||||
// Proxy setup remains the same
|
// Proxy setup remains the same
|
||||||
if (process.env.https_proxy) {
|
if (process.env.https_proxy) {
|
||||||
@@ -164,32 +165,55 @@ function getSchema(allowReflect: boolean): ResponseSchema {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
function getPrompt(question: string, context?: any[], allQuestions?: string[], allowReflect: boolean = false, badContext?: any[]) {
|
function getPrompt(question: string, context?: any[], allQuestions?: string[], allowReflect: boolean = false, badContext?: any[], knowledge?: any[], allURLs?: Record<string, string>) {
|
||||||
|
// console.log('Context:', context);
|
||||||
|
// console.log('All URLs:', JSON.stringify(allURLs, null, 2));
|
||||||
|
|
||||||
|
const knowledgeIntro = knowledge?.length ?
|
||||||
|
`
|
||||||
|
## Knowledge
|
||||||
|
You have successfully gathered some knowledge from the following questions:
|
||||||
|
|
||||||
|
${JSON.stringify(knowledge, null, 2)}
|
||||||
|
|
||||||
|
`
|
||||||
|
: '';
|
||||||
|
|
||||||
const badContextIntro = badContext?.length ?
|
const badContextIntro = badContext?.length ?
|
||||||
`Your last unsuccessful answer contains these previous actions and knowledge:
|
`
|
||||||
${JSON.stringify(badContext, null, 2)}
|
## Unsuccessful Answer Analysis
|
||||||
|
Your last unsuccessful answer are:
|
||||||
|
|
||||||
|
${JSON.stringify(badContext, null, 2)}
|
||||||
|
|
||||||
Learn to avoid these mistakes and think of a new approach, from a different angle, e.g. search for different keywords, read different URLs, or ask different questions.
|
Learn to avoid these mistakes and think of a new approach, from a different angle, e.g. search for different keywords, read different URLs, or ask different questions.
|
||||||
`
|
`
|
||||||
: '';
|
: '';
|
||||||
|
|
||||||
const contextIntro = context?.length ?
|
const contextIntro = context?.length ?
|
||||||
`Your current context contains these previous actions and knowledge:
|
|
||||||
${JSON.stringify(context, null, 2)}
|
|
||||||
`
|
`
|
||||||
|
## Context
|
||||||
|
You have conducted the following actions:
|
||||||
|
|
||||||
|
${JSON.stringify(context, null, 2)}
|
||||||
|
|
||||||
|
`
|
||||||
: '';
|
: '';
|
||||||
|
|
||||||
let actionsDescription = `
|
let actionsDescription = `
|
||||||
Using your training data and prior lessons learned, answer the following question with absolute certainty:
|
## Actions
|
||||||
|
|
||||||
${question}
|
|
||||||
|
|
||||||
When uncertain or needing additional information, select one of these actions:
|
When uncertain or needing additional information, select one of these actions:
|
||||||
|
|
||||||
|
${allURLs ? `
|
||||||
**read**:
|
**read**:
|
||||||
- Access external URLs to gather more information
|
- Access any URLs from below to gather external knowledge
|
||||||
|
|
||||||
|
${JSON.stringify(allURLs, null, 2)}
|
||||||
|
|
||||||
- When you have enough search result in the context and want to deep dive into specific URLs
|
- When you have enough search result in the context and want to deep dive into specific URLs
|
||||||
- It allows you access the full content behind specific URLs
|
- It allows you access the full content behind any URLs
|
||||||
|
` : ''}
|
||||||
|
|
||||||
**search**:
|
**search**:
|
||||||
- Query external sources using a public search engine
|
- Query external sources using a public search engine
|
||||||
@@ -202,7 +226,8 @@ When uncertain or needing additional information, select one of these actions:
|
|||||||
${allowReflect ? `- If doubts remain, use "reflect" instead` : ''}`;
|
${allowReflect ? `- If doubts remain, use "reflect" instead` : ''}`;
|
||||||
|
|
||||||
if (allowReflect) {
|
if (allowReflect) {
|
||||||
actionsDescription += `\n\n**reflect**:
|
actionsDescription += `
|
||||||
|
**reflect**:
|
||||||
- Perform critical analysis through hypothetical scenarios or systematic breakdowns
|
- Perform critical analysis through hypothetical scenarios or systematic breakdowns
|
||||||
- Identify knowledge gaps and formulate essential clarifying questions
|
- Identify knowledge gaps and formulate essential clarifying questions
|
||||||
- Questions must be:
|
- Questions must be:
|
||||||
@@ -210,14 +235,19 @@ ${allowReflect ? `- If doubts remain, use "reflect" instead` : ''}`;
|
|||||||
- Focused on single concepts
|
- Focused on single concepts
|
||||||
- Under 20 words
|
- Under 20 words
|
||||||
- Non-compound/non-complex
|
- Non-compound/non-complex
|
||||||
`;
|
`.trim();
|
||||||
}
|
}
|
||||||
|
|
||||||
return `
|
return `
|
||||||
You are an advanced AI research analyst specializing in multi-step reasoning.
|
You are an advanced AI research analyst specializing in multi-step reasoning. Using your training data and prior lessons learned, answer the following question with absolute certainty:
|
||||||
|
|
||||||
|
## Question
|
||||||
|
${question}
|
||||||
|
|
||||||
${contextIntro.trim()}
|
${contextIntro.trim()}
|
||||||
|
|
||||||
|
${knowledgeIntro.trim()}
|
||||||
|
|
||||||
${badContextIntro.trim()}
|
${badContextIntro.trim()}
|
||||||
|
|
||||||
${actionsDescription.trim()}
|
${actionsDescription.trim()}
|
||||||
@@ -240,7 +270,7 @@ async function getResponse(question: string, tokenBudget: number = 30000000) {
|
|||||||
let allKeywords = [];
|
let allKeywords = [];
|
||||||
let allKnowledge = []; // knowledge are intermedidate questions that are answered
|
let allKnowledge = []; // knowledge are intermedidate questions that are answered
|
||||||
let badContext = [];
|
let badContext = [];
|
||||||
|
let allURLs: Record<string, string> = {};
|
||||||
while (totalTokens < tokenBudget) {
|
while (totalTokens < tokenBudget) {
|
||||||
// add 1s delay to avoid rate limiting
|
// add 1s delay to avoid rate limiting
|
||||||
await sleep(1000);
|
await sleep(1000);
|
||||||
@@ -249,7 +279,16 @@ async function getResponse(question: string, tokenBudget: number = 30000000) {
|
|||||||
console.log('Gaps:', gaps)
|
console.log('Gaps:', gaps)
|
||||||
const allowReflect = gaps.length <= 1;
|
const allowReflect = gaps.length <= 1;
|
||||||
const currentQuestion = gaps.length > 0 ? gaps.shift()! : question;
|
const currentQuestion = gaps.length > 0 ? gaps.shift()! : question;
|
||||||
const prompt = getPrompt(currentQuestion, context, allQuestions, allowReflect, badContext);
|
// update all urls with buildURLMap
|
||||||
|
allURLs = {...allURLs, ...buildURLMap(context)};
|
||||||
|
const prompt = getPrompt(
|
||||||
|
currentQuestion,
|
||||||
|
context,
|
||||||
|
allQuestions,
|
||||||
|
allowReflect,
|
||||||
|
badContext,
|
||||||
|
allKnowledge,
|
||||||
|
allURLs);
|
||||||
console.log('Prompt len:', prompt.length)
|
console.log('Prompt len:', prompt.length)
|
||||||
|
|
||||||
const model = genAI.getGenerativeModel({
|
const model = genAI.getGenerativeModel({
|
||||||
@@ -283,7 +322,11 @@ async function getResponse(question: string, tokenBudget: number = 30000000) {
|
|||||||
if (evaluation.is_valid_answer) {
|
if (evaluation.is_valid_answer) {
|
||||||
return action;
|
return action;
|
||||||
} else {
|
} else {
|
||||||
badContext.push({...context, "Why this is a bad answer?": evaluation.reasoning});
|
badContext.push({
|
||||||
|
question: currentQuestion,
|
||||||
|
answer: action.answer,
|
||||||
|
"Why this is a bad answer?": evaluation.reasoning
|
||||||
|
});
|
||||||
context = [];
|
context = [];
|
||||||
}
|
}
|
||||||
} else if (evaluation.is_valid_answer) {
|
} else if (evaluation.is_valid_answer) {
|
||||||
@@ -296,9 +339,18 @@ async function getResponse(question: string, tokenBudget: number = 30000000) {
|
|||||||
if (allQuestions.length) {
|
if (allQuestions.length) {
|
||||||
newGapQuestions = await dedupQueries(newGapQuestions, allQuestions)
|
newGapQuestions = await dedupQueries(newGapQuestions, allQuestions)
|
||||||
}
|
}
|
||||||
gaps.push(...newGapQuestions);
|
if (newGapQuestions.length > 0) {
|
||||||
allQuestions.push(...newGapQuestions);
|
gaps.push(...newGapQuestions);
|
||||||
gaps.push(question); // always keep the original question in the gaps
|
allQuestions.push(...newGapQuestions);
|
||||||
|
gaps.push(question); // always keep the original question in the gaps
|
||||||
|
} else {
|
||||||
|
console.log('No new questions to ask');
|
||||||
|
context.push({
|
||||||
|
step,
|
||||||
|
...action,
|
||||||
|
result: 'I have tried all possible questions and found no useful information. I must think out of the box or different angle!!!'
|
||||||
|
});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Rest of the action handling remains the same
|
// Rest of the action handling remains the same
|
||||||
@@ -310,27 +362,37 @@ async function getResponse(question: string, tokenBudget: number = 30000000) {
|
|||||||
if (allKeywords.length) {
|
if (allKeywords.length) {
|
||||||
keywordsQueries = await dedupQueries(keywordsQueries, allKeywords)
|
keywordsQueries = await dedupQueries(keywordsQueries, allKeywords)
|
||||||
}
|
}
|
||||||
const searchResults = [];
|
if (keywordsQueries.length > 0) {
|
||||||
for (const query of keywordsQueries) {
|
const searchResults = [];
|
||||||
const results = await search(query, {
|
for (const query of keywordsQueries) {
|
||||||
safeSearch: SafeSearchType.STRICT
|
console.log('Searching:', query);
|
||||||
});
|
const results = await search(query, {
|
||||||
const minResults = results.results.map(r => ({
|
safeSearch: SafeSearchType.STRICT
|
||||||
title: r.title,
|
});
|
||||||
url: r.url,
|
const minResults = results.results.map(r => ({
|
||||||
description: r.description,
|
title: r.title,
|
||||||
}));
|
url: r.url,
|
||||||
searchResults.push({query, results: minResults});
|
description: r.description,
|
||||||
allKeywords.push(query);
|
}));
|
||||||
await sleep(5000);
|
searchResults.push({query, results: minResults});
|
||||||
}
|
allKeywords.push(query);
|
||||||
|
await sleep(5000);
|
||||||
|
}
|
||||||
|
|
||||||
context.push({
|
context.push({
|
||||||
step,
|
step,
|
||||||
question: currentQuestion,
|
question: currentQuestion,
|
||||||
...action,
|
...action,
|
||||||
result: searchResults
|
result: searchResults
|
||||||
});
|
});
|
||||||
|
} else {
|
||||||
|
console.log('No new queries to search');
|
||||||
|
context.push({
|
||||||
|
step,
|
||||||
|
...action,
|
||||||
|
result: 'I have tried all possible queries and found no new information. I must think out of the box or different angle!!!'
|
||||||
|
});
|
||||||
|
}
|
||||||
} else if (action.action === 'read' && action.URLTargets?.length) {
|
} else if (action.action === 'read' && action.URLTargets?.length) {
|
||||||
const urlResults = await Promise.all(
|
const urlResults = await Promise.all(
|
||||||
action.URLTargets.map(async (url: string) => {
|
action.URLTargets.map(async (url: string) => {
|
||||||
|
|||||||
@@ -101,7 +101,7 @@ export async function dedupQueries(newQueries: string[], existingQueries: string
|
|||||||
const result = await model.generateContent(prompt);
|
const result = await model.generateContent(prompt);
|
||||||
const response = await result.response;
|
const response = await result.response;
|
||||||
const json = JSON.parse(response.text()) as DedupResponse;
|
const json = JSON.parse(response.text()) as DedupResponse;
|
||||||
console.log('Analysis:', json);
|
console.log('Dedup:', json);
|
||||||
return json.unique_queries;
|
return json.unique_queries;
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('Error in deduplication analysis:', error);
|
console.error('Error in deduplication analysis:', error);
|
||||||
|
|||||||
@@ -45,7 +45,7 @@ const genAI = new GoogleGenerativeAI(apiKey);
|
|||||||
const model = genAI.getGenerativeModel({
|
const model = genAI.getGenerativeModel({
|
||||||
model: modelName,
|
model: modelName,
|
||||||
generationConfig: {
|
generationConfig: {
|
||||||
temperature: 0.1,
|
temperature: 0,
|
||||||
responseMimeType: "application/json",
|
responseMimeType: "application/json",
|
||||||
responseSchema: responseSchema
|
responseSchema: responseSchema
|
||||||
}
|
}
|
||||||
@@ -59,7 +59,7 @@ Core Evaluation Criteria:
|
|||||||
2. Clarity: Answer should be clear and unambiguous
|
2. Clarity: Answer should be clear and unambiguous
|
||||||
3. Informativeness: Answer must provide substantial, useful information
|
3. Informativeness: Answer must provide substantial, useful information
|
||||||
4. Specificity: Generic or vague responses are not acceptable
|
4. Specificity: Generic or vague responses are not acceptable
|
||||||
5. Definitiveness: "I don't know" or highly uncertain responses are not valid
|
5. Definitiveness: "I don't know", "lack of information" or highly uncertain responses are not valid
|
||||||
6. Relevance: Answer must be directly related to the question topic
|
6. Relevance: Answer must be directly related to the question topic
|
||||||
7. Accuracy: Information provided should be factually sound (if verifiable)
|
7. Accuracy: Information provided should be factually sound (if verifiable)
|
||||||
|
|
||||||
@@ -79,6 +79,13 @@ Evaluation: {
|
|||||||
"reasoning": "The answer is comprehensive, specific, and covers all key system requirements across different operating systems. It provides concrete numbers and necessary additional components."
|
"reasoning": "The answer is comprehensive, specific, and covers all key system requirements across different operating systems. It provides concrete numbers and necessary additional components."
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Question: "what is the twitter account of jina ai's founder?"
|
||||||
|
Answer: "The provided text does not contain the Twitter account of Jina AI's founder."
|
||||||
|
Evaluation: {
|
||||||
|
"is_valid_answer": false,
|
||||||
|
"reasoning": "The answer is not definitive and fails to provide the requested information. Don't know, can't derive, lack of information is unacceptable,"
|
||||||
|
}
|
||||||
|
|
||||||
Now evaluate this pair:
|
Now evaluate this pair:
|
||||||
Question: ${JSON.stringify(question)}
|
Question: ${JSON.stringify(question)}
|
||||||
Answer: ${JSON.stringify(answer)}`;
|
Answer: ${JSON.stringify(answer)}`;
|
||||||
|
|||||||
39
src/tools/getURLIndex.ts
Normal file
39
src/tools/getURLIndex.ts
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
interface SearchResult {
|
||||||
|
title: string;
|
||||||
|
url: string;
|
||||||
|
description: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface QueryResult {
|
||||||
|
query: string;
|
||||||
|
results: SearchResult[];
|
||||||
|
}
|
||||||
|
|
||||||
|
interface StepData {
|
||||||
|
step: number;
|
||||||
|
question: string;
|
||||||
|
action: string;
|
||||||
|
reasoning: string;
|
||||||
|
searchQuery?: string;
|
||||||
|
result?: QueryResult[];
|
||||||
|
}
|
||||||
|
|
||||||
|
export function buildURLMap(data: StepData[]): Record<string, string> {
|
||||||
|
const urlMap: Record<string, string> = {};
|
||||||
|
|
||||||
|
data.forEach(step => {
|
||||||
|
if (step.result && Array.isArray(step.result)) {
|
||||||
|
step.result.forEach(queryResult => {
|
||||||
|
if (queryResult.results && Array.isArray(queryResult.results)) {
|
||||||
|
queryResult.results.forEach(result => {
|
||||||
|
if (!urlMap[result.url]) {
|
||||||
|
urlMap[result.url] = `${result.title}`;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
return urlMap;
|
||||||
|
}
|
||||||
@@ -111,6 +111,7 @@ export async function rewriteQuery(query: string): Promise<string[]> {
|
|||||||
const result = await model.generateContent(prompt);
|
const result = await model.generateContent(prompt);
|
||||||
const response = await result.response;
|
const response = await result.response;
|
||||||
const json = JSON.parse(response.text()) as KeywordsResponse;
|
const json = JSON.parse(response.text()) as KeywordsResponse;
|
||||||
|
console.log('Rewriter:', json)
|
||||||
return json.keywords;
|
return json.keywords;
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('Error in query rewriting:', error);
|
console.error('Error in query rewriting:', error);
|
||||||
|
|||||||
Reference in New Issue
Block a user