mirror of
https://github.com/jina-ai/node-DeepResearch.git
synced 2025-12-26 06:28:56 +08:00
chore: first commit
This commit is contained in:
parent
91f1cbcb97
commit
5eb75fcb61
1
.gitignore
vendored
1
.gitignore
vendored
@ -1,5 +1,6 @@
|
|||||||
# Logs
|
# Logs
|
||||||
logs
|
logs
|
||||||
|
.idea
|
||||||
*.log
|
*.log
|
||||||
npm-debug.log*
|
npm-debug.log*
|
||||||
yarn-debug.log*
|
yarn-debug.log*
|
||||||
|
|||||||
48
package-lock.json
generated
Normal file
48
package-lock.json
generated
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
{
|
||||||
|
"name": "agentic-search",
|
||||||
|
"version": "1.0.0",
|
||||||
|
"lockfileVersion": 3,
|
||||||
|
"requires": true,
|
||||||
|
"packages": {
|
||||||
|
"": {
|
||||||
|
"name": "agentic-search",
|
||||||
|
"version": "1.0.0",
|
||||||
|
"license": "ISC",
|
||||||
|
"dependencies": {
|
||||||
|
"@google/generative-ai": "^0.21.0",
|
||||||
|
"dotenv": "^16.4.7",
|
||||||
|
"undici": "^7.3.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@google/generative-ai": {
|
||||||
|
"version": "0.21.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/@google/generative-ai/-/generative-ai-0.21.0.tgz",
|
||||||
|
"integrity": "sha512-7XhUbtnlkSEZK15kN3t+tzIMxsbKm/dSkKBFalj+20NvPKe1kBY7mR2P7vuijEn+f06z5+A8bVGKO0v39cr6Wg==",
|
||||||
|
"license": "Apache-2.0",
|
||||||
|
"engines": {
|
||||||
|
"node": ">=18.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/dotenv": {
|
||||||
|
"version": "16.4.7",
|
||||||
|
"resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.4.7.tgz",
|
||||||
|
"integrity": "sha512-47qPchRCykZC03FhkYAhrvwU4xDBFIj1QPqaarj6mdM/hgUzfPHcpkHJOn3mJAufFeeAxAzeGsr5X0M4k6fLZQ==",
|
||||||
|
"license": "BSD-2-Clause",
|
||||||
|
"engines": {
|
||||||
|
"node": ">=12"
|
||||||
|
},
|
||||||
|
"funding": {
|
||||||
|
"url": "https://dotenvx.com"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/undici": {
|
||||||
|
"version": "7.3.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/undici/-/undici-7.3.0.tgz",
|
||||||
|
"integrity": "sha512-Qy96NND4Dou5jKoSJ2gm8ax8AJM/Ey9o9mz7KN1bb9GP+G0l20Zw8afxTnY2f4b7hmhn/z8aC2kfArVQlAhFBw==",
|
||||||
|
"license": "MIT",
|
||||||
|
"engines": {
|
||||||
|
"node": ">=20.18.1"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
18
package.json
Normal file
18
package.json
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
{
|
||||||
|
"name": "agentic-search",
|
||||||
|
"version": "1.0.0",
|
||||||
|
"main": "index.js",
|
||||||
|
"scripts": {
|
||||||
|
"build": "tsc",
|
||||||
|
"as": "npx ts-node src/agent.ts"
|
||||||
|
},
|
||||||
|
"keywords": [],
|
||||||
|
"author": "",
|
||||||
|
"license": "ISC",
|
||||||
|
"description": "",
|
||||||
|
"dependencies": {
|
||||||
|
"@google/generative-ai": "^0.21.0",
|
||||||
|
"dotenv": "^16.4.7",
|
||||||
|
"undici": "^7.3.0"
|
||||||
|
}
|
||||||
|
}
|
||||||
248
src/agent.ts
Normal file
248
src/agent.ts
Normal file
@ -0,0 +1,248 @@
|
|||||||
|
import {GoogleGenerativeAI, SchemaType} from "@google/generative-ai";
|
||||||
|
import dotenv from 'dotenv';
|
||||||
|
import {ProxyAgent, setGlobalDispatcher} from "undici";
|
||||||
|
import {readUrl} from "./tools/read";
|
||||||
|
import {search} from "./tools/search";
|
||||||
|
// 获取代理URL并设置代理
|
||||||
|
if (process.env.https_proxy) {
|
||||||
|
try {
|
||||||
|
const proxyUrl = new URL(process.env.https_proxy).toString();
|
||||||
|
const dispatcher = new ProxyAgent({uri: proxyUrl});
|
||||||
|
setGlobalDispatcher(dispatcher);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Failed to set proxy:', error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
dotenv.config();
|
||||||
|
|
||||||
|
const schema = {
|
||||||
|
type: SchemaType.OBJECT,
|
||||||
|
properties: {
|
||||||
|
action: {
|
||||||
|
type: SchemaType.STRING,
|
||||||
|
enum: ["search", "readURL", "rewrite", "answer", "reflect"],
|
||||||
|
description: "Must match exactly one action type"
|
||||||
|
},
|
||||||
|
remainedGaps: {
|
||||||
|
type: SchemaType.ARRAY,
|
||||||
|
items: {
|
||||||
|
type: SchemaType.STRING
|
||||||
|
},
|
||||||
|
description: "Only required when choosing 'reflect' action, must be an array of gaps in the knowledge",
|
||||||
|
},
|
||||||
|
searchKeywords: {
|
||||||
|
type: SchemaType.ARRAY,
|
||||||
|
items: {
|
||||||
|
type: SchemaType.STRING
|
||||||
|
},
|
||||||
|
description: "Only required when choosing 'search' action, must be an array of keywords"
|
||||||
|
},
|
||||||
|
URLTargets: {
|
||||||
|
type: SchemaType.ARRAY,
|
||||||
|
items: {
|
||||||
|
type: SchemaType.STRING
|
||||||
|
},
|
||||||
|
description: "Only required when choosing 'readURL' action, must be an array of URLs"
|
||||||
|
},
|
||||||
|
rewriteQuery: {
|
||||||
|
type: SchemaType.STRING,
|
||||||
|
description: "Only required when choosing 'rewrite' action, must be a new query that might lead to better or more relevant information",
|
||||||
|
},
|
||||||
|
answer: {
|
||||||
|
type: SchemaType.STRING,
|
||||||
|
description: "Only required when choosing 'answer' action, must be the final answer in natural language"
|
||||||
|
},
|
||||||
|
references: {
|
||||||
|
type: SchemaType.ARRAY,
|
||||||
|
items: {
|
||||||
|
type: SchemaType.OBJECT,
|
||||||
|
properties: {
|
||||||
|
title: {
|
||||||
|
type: SchemaType.STRING,
|
||||||
|
description: "Title of the document; must be directly from the context"
|
||||||
|
},
|
||||||
|
url: {
|
||||||
|
type: SchemaType.STRING,
|
||||||
|
description: "URL of the document; must be directly from the context"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
required: ["title", "url"]
|
||||||
|
},
|
||||||
|
description: "Only required when choosing 'answer' action, must be an array of references"
|
||||||
|
},
|
||||||
|
reasoning: {
|
||||||
|
type: SchemaType.STRING,
|
||||||
|
description: "Explain why choose this action?"
|
||||||
|
},
|
||||||
|
confidence: {
|
||||||
|
type: SchemaType.NUMBER,
|
||||||
|
minimum: 0.0,
|
||||||
|
maximum: 1.0,
|
||||||
|
description: "Represents the confidence level of in answering the question BEFORE taking the action. Must be a float between 0.0 and 1.0",
|
||||||
|
}
|
||||||
|
},
|
||||||
|
required: ["action", "reasoning", "confidence"],
|
||||||
|
};
|
||||||
|
|
||||||
|
const apiKey = process.env.GEMINI_API_KEY as string;
|
||||||
|
const jinaToken = process.env.JINA_API_KEY as string;
|
||||||
|
if (!apiKey) {
|
||||||
|
throw new Error("GEMINI_API_KEY not found");
|
||||||
|
}
|
||||||
|
if (!jinaToken) {
|
||||||
|
throw new Error("JINA_API_KEY not found");
|
||||||
|
}
|
||||||
|
|
||||||
|
const modelName = 'gemini-1.5-flash';
|
||||||
|
const genAI = new GoogleGenerativeAI(apiKey);
|
||||||
|
const model = genAI.getGenerativeModel({
|
||||||
|
model: modelName,
|
||||||
|
generationConfig: {
|
||||||
|
temperature: 0.7,
|
||||||
|
responseMimeType: "application/json",
|
||||||
|
responseSchema: schema
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
function getPrompt(question: string, context?: string) {
|
||||||
|
let contextIntro = ``;
|
||||||
|
if (!!context) {
|
||||||
|
contextIntro = `You have the following context:
|
||||||
|
${context}
|
||||||
|
`;
|
||||||
|
}
|
||||||
|
|
||||||
|
return `You are an AI research analyst capable of multi-step reasoning.
|
||||||
|
|
||||||
|
${contextIntro}
|
||||||
|
|
||||||
|
Based on the context and the knowledge in your training data, you must answer the following question with 100% confidence:
|
||||||
|
|
||||||
|
${question}
|
||||||
|
|
||||||
|
If you are not 100% confident in your answer, you should first take a reflection to identify the gaps in your knowledge:
|
||||||
|
|
||||||
|
**reflect**:
|
||||||
|
- Challenge existing knowledge with what-if thinking.
|
||||||
|
- Reflect on the gaps in your knowledge and ask for more questions to fill those gaps.
|
||||||
|
- You use this action when you feel like you need to first answer those questions before proceeding with the current one.
|
||||||
|
- This action has higher priority than all other actions.
|
||||||
|
|
||||||
|
If you are still not confident after reflecting, you can take one of the following actions:
|
||||||
|
|
||||||
|
**search**:
|
||||||
|
- Search external real-world information via a public search engine.
|
||||||
|
- The search engine works best with short, keyword-based queries.
|
||||||
|
- You use this action when you need more world knowledge or up to date information that is not covered in your training data or cut-off knowledge base.
|
||||||
|
|
||||||
|
**readURL**:
|
||||||
|
- Provide a specific URL to fetch and read its content in detail.
|
||||||
|
- Any URL must come from the current context.
|
||||||
|
- You use this action when you feel like that particular URL might have the information you need to answer the question.
|
||||||
|
|
||||||
|
**rewrite**:
|
||||||
|
- Propose a new or modified query (in a different phrasing, more details, or from another angle) that might lead to better or more relevant information.
|
||||||
|
- This rewritten query can help the search engine find more accurate results, thereby improving your confidence in answering the original question.
|
||||||
|
- You use this action when you think the current query is too vague, broad, or ambiguous; or the search engine results are not satisfactory.
|
||||||
|
|
||||||
|
**answer**:
|
||||||
|
- Provide your answer to the user, **only** if you are completely sure.
|
||||||
|
|
||||||
|
When you decide on your action, respond **only** in valid JSON format according to the schema below.
|
||||||
|
|
||||||
|
**Important**:
|
||||||
|
- Do not include any extra keys.
|
||||||
|
- Do not include explanatory text, markdown formatting, or reasoning in the final output.
|
||||||
|
- Output exactly one JSON object in your response.
|
||||||
|
`;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
async function getResponse(question: string) {
|
||||||
|
let tokenBudget = 300000;
|
||||||
|
let totalTokens = 0;
|
||||||
|
let context = '';
|
||||||
|
let step = 0;
|
||||||
|
let gaps: string[] = [];
|
||||||
|
let hasAnswer = false;
|
||||||
|
|
||||||
|
while (totalTokens < tokenBudget && !hasAnswer) {
|
||||||
|
const currentQuestion = gaps.length > 0 ? gaps.shift()! : question;
|
||||||
|
const prompt = getPrompt(currentQuestion, context);
|
||||||
|
console.log('Prompt length:', prompt.length);
|
||||||
|
const result = await model.generateContent(prompt);
|
||||||
|
const response = await result.response;
|
||||||
|
const usage = response.usageMetadata;
|
||||||
|
step++;
|
||||||
|
|
||||||
|
totalTokens += usage?.totalTokenCount || 0;
|
||||||
|
console.log(`Tokens: ${totalTokens}/${tokenBudget}`);
|
||||||
|
|
||||||
|
const action = JSON.parse(response.text());
|
||||||
|
console.log('Action:', action);
|
||||||
|
|
||||||
|
if (action.action === 'answer') {
|
||||||
|
hasAnswer = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (action.action === 'reflect' && action.remainedGaps) {
|
||||||
|
gaps.push(...action.remainedGaps);
|
||||||
|
const contextRecord = JSON.stringify({
|
||||||
|
step,
|
||||||
|
...action,
|
||||||
|
question: currentQuestion
|
||||||
|
});
|
||||||
|
context = `${context}\n${contextRecord}`;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
if (action.action === 'search' && action.searchKeywords) {
|
||||||
|
const results = await search(action.searchKeywords.join(' '), jinaToken);
|
||||||
|
const contextRecord = JSON.stringify({
|
||||||
|
step,
|
||||||
|
...action,
|
||||||
|
question: currentQuestion,
|
||||||
|
result: results.data
|
||||||
|
});
|
||||||
|
context = `${context}\n${contextRecord}`;
|
||||||
|
totalTokens += results.data.reduce((sum, r) => sum + r.usage.tokens, 0);
|
||||||
|
} else if (action.action === 'readURL' && action.URLTargets?.length) {
|
||||||
|
const urlResults = await Promise.all(
|
||||||
|
action.URLTargets.map(async (url: string) => {
|
||||||
|
const response = await readUrl(url, jinaToken);
|
||||||
|
return {url, result: response};
|
||||||
|
})
|
||||||
|
);
|
||||||
|
|
||||||
|
const contextRecord = JSON.stringify({
|
||||||
|
step,
|
||||||
|
...action,
|
||||||
|
question: currentQuestion,
|
||||||
|
result: urlResults
|
||||||
|
});
|
||||||
|
context = `${context}\n${contextRecord}`;
|
||||||
|
totalTokens += urlResults.reduce((sum, r) => sum + r.result.data.usage.tokens, 0);
|
||||||
|
} else if (action.action === 'rewrite' && action.rewriteQuery) {
|
||||||
|
// Immediately search with the new rewriteQuery
|
||||||
|
const results = await search(action.rewriteQuery, jinaToken);
|
||||||
|
const contextRecord = JSON.stringify({
|
||||||
|
step,
|
||||||
|
...action,
|
||||||
|
question: currentQuestion,
|
||||||
|
result: results.data
|
||||||
|
});
|
||||||
|
context = `${context}\n${contextRecord}`;
|
||||||
|
totalTokens += results.data.reduce((sum, r) => sum + r.usage.tokens, 0);
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error fetching data:', error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
const question = process.argv[2] || "";
|
||||||
|
getResponse(question);
|
||||||
43
src/tools/read.ts
Normal file
43
src/tools/read.ts
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
import https from 'https';
|
||||||
|
|
||||||
|
interface ReadResponse {
|
||||||
|
code: number;
|
||||||
|
status: number;
|
||||||
|
data: {
|
||||||
|
title: string;
|
||||||
|
description: string;
|
||||||
|
url: string;
|
||||||
|
content: string;
|
||||||
|
usage: { tokens: number; };
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
export function readUrl(url: string, token: string): Promise<ReadResponse> {
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
const data = JSON.stringify({url});
|
||||||
|
|
||||||
|
const options = {
|
||||||
|
hostname: 'r.jina.ai',
|
||||||
|
port: 443,
|
||||||
|
path: '/',
|
||||||
|
method: 'POST',
|
||||||
|
headers: {
|
||||||
|
'Accept': 'application/json',
|
||||||
|
'Authorization': `Bearer ${token}`,
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'Content-Length': data.length,
|
||||||
|
'X-Retain-Images': 'none'
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const req = https.request(options, (res) => {
|
||||||
|
let responseData = '';
|
||||||
|
res.on('data', (chunk) => responseData += chunk);
|
||||||
|
res.on('end', () => resolve(JSON.parse(responseData)));
|
||||||
|
});
|
||||||
|
|
||||||
|
req.on('error', reject);
|
||||||
|
req.write(data);
|
||||||
|
req.end();
|
||||||
|
});
|
||||||
|
}
|
||||||
38
src/tools/search.ts
Normal file
38
src/tools/search.ts
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
import https from 'https';
|
||||||
|
|
||||||
|
interface SearchResponse {
|
||||||
|
code: number;
|
||||||
|
status: number;
|
||||||
|
data: Array<{
|
||||||
|
title: string;
|
||||||
|
description: string;
|
||||||
|
url: string;
|
||||||
|
content: string;
|
||||||
|
usage: { tokens: number; };
|
||||||
|
}>;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function search(query: string, token: string): Promise<SearchResponse> {
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
const options = {
|
||||||
|
hostname: 's.jina.ai',
|
||||||
|
port: 443,
|
||||||
|
path: `/${encodeURIComponent(query)}`,
|
||||||
|
method: 'GET',
|
||||||
|
headers: {
|
||||||
|
'Accept': 'application/json',
|
||||||
|
'Authorization': `Bearer ${token}`,
|
||||||
|
'X-Retain-Images': 'none'
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const req = https.request(options, (res) => {
|
||||||
|
let responseData = '';
|
||||||
|
res.on('data', (chunk) => responseData += chunk);
|
||||||
|
res.on('end', () => resolve(JSON.parse(responseData)));
|
||||||
|
});
|
||||||
|
|
||||||
|
req.on('error', reject);
|
||||||
|
req.end();
|
||||||
|
});
|
||||||
|
}
|
||||||
Loading…
x
Reference in New Issue
Block a user