mirror of
https://github.com/jina-ai/node-DeepResearch.git
synced 2025-12-26 06:27:28 +08:00
chore: first commit
This commit is contained in:
parent
91f1cbcb97
commit
5eb75fcb61
1
.gitignore
vendored
1
.gitignore
vendored
@ -1,5 +1,6 @@
|
||||
# Logs
|
||||
logs
|
||||
.idea
|
||||
*.log
|
||||
npm-debug.log*
|
||||
yarn-debug.log*
|
||||
|
||||
48
package-lock.json
generated
Normal file
48
package-lock.json
generated
Normal file
@ -0,0 +1,48 @@
|
||||
{
|
||||
"name": "agentic-search",
|
||||
"version": "1.0.0",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "agentic-search",
|
||||
"version": "1.0.0",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"@google/generative-ai": "^0.21.0",
|
||||
"dotenv": "^16.4.7",
|
||||
"undici": "^7.3.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@google/generative-ai": {
|
||||
"version": "0.21.0",
|
||||
"resolved": "https://registry.npmjs.org/@google/generative-ai/-/generative-ai-0.21.0.tgz",
|
||||
"integrity": "sha512-7XhUbtnlkSEZK15kN3t+tzIMxsbKm/dSkKBFalj+20NvPKe1kBY7mR2P7vuijEn+f06z5+A8bVGKO0v39cr6Wg==",
|
||||
"license": "Apache-2.0",
|
||||
"engines": {
|
||||
"node": ">=18.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/dotenv": {
|
||||
"version": "16.4.7",
|
||||
"resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.4.7.tgz",
|
||||
"integrity": "sha512-47qPchRCykZC03FhkYAhrvwU4xDBFIj1QPqaarj6mdM/hgUzfPHcpkHJOn3mJAufFeeAxAzeGsr5X0M4k6fLZQ==",
|
||||
"license": "BSD-2-Clause",
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://dotenvx.com"
|
||||
}
|
||||
},
|
||||
"node_modules/undici": {
|
||||
"version": "7.3.0",
|
||||
"resolved": "https://registry.npmjs.org/undici/-/undici-7.3.0.tgz",
|
||||
"integrity": "sha512-Qy96NND4Dou5jKoSJ2gm8ax8AJM/Ey9o9mz7KN1bb9GP+G0l20Zw8afxTnY2f4b7hmhn/z8aC2kfArVQlAhFBw==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=20.18.1"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
18
package.json
Normal file
18
package.json
Normal file
@ -0,0 +1,18 @@
|
||||
{
|
||||
"name": "agentic-search",
|
||||
"version": "1.0.0",
|
||||
"main": "index.js",
|
||||
"scripts": {
|
||||
"build": "tsc",
|
||||
"as": "npx ts-node src/agent.ts"
|
||||
},
|
||||
"keywords": [],
|
||||
"author": "",
|
||||
"license": "ISC",
|
||||
"description": "",
|
||||
"dependencies": {
|
||||
"@google/generative-ai": "^0.21.0",
|
||||
"dotenv": "^16.4.7",
|
||||
"undici": "^7.3.0"
|
||||
}
|
||||
}
|
||||
248
src/agent.ts
Normal file
248
src/agent.ts
Normal file
@ -0,0 +1,248 @@
|
||||
import {GoogleGenerativeAI, SchemaType} from "@google/generative-ai";
|
||||
import dotenv from 'dotenv';
|
||||
import {ProxyAgent, setGlobalDispatcher} from "undici";
|
||||
import {readUrl} from "./tools/read";
|
||||
import {search} from "./tools/search";
|
||||
// 获取代理URL并设置代理
|
||||
if (process.env.https_proxy) {
|
||||
try {
|
||||
const proxyUrl = new URL(process.env.https_proxy).toString();
|
||||
const dispatcher = new ProxyAgent({uri: proxyUrl});
|
||||
setGlobalDispatcher(dispatcher);
|
||||
} catch (error) {
|
||||
console.error('Failed to set proxy:', error);
|
||||
}
|
||||
}
|
||||
dotenv.config();
|
||||
|
||||
const schema = {
|
||||
type: SchemaType.OBJECT,
|
||||
properties: {
|
||||
action: {
|
||||
type: SchemaType.STRING,
|
||||
enum: ["search", "readURL", "rewrite", "answer", "reflect"],
|
||||
description: "Must match exactly one action type"
|
||||
},
|
||||
remainedGaps: {
|
||||
type: SchemaType.ARRAY,
|
||||
items: {
|
||||
type: SchemaType.STRING
|
||||
},
|
||||
description: "Only required when choosing 'reflect' action, must be an array of gaps in the knowledge",
|
||||
},
|
||||
searchKeywords: {
|
||||
type: SchemaType.ARRAY,
|
||||
items: {
|
||||
type: SchemaType.STRING
|
||||
},
|
||||
description: "Only required when choosing 'search' action, must be an array of keywords"
|
||||
},
|
||||
URLTargets: {
|
||||
type: SchemaType.ARRAY,
|
||||
items: {
|
||||
type: SchemaType.STRING
|
||||
},
|
||||
description: "Only required when choosing 'readURL' action, must be an array of URLs"
|
||||
},
|
||||
rewriteQuery: {
|
||||
type: SchemaType.STRING,
|
||||
description: "Only required when choosing 'rewrite' action, must be a new query that might lead to better or more relevant information",
|
||||
},
|
||||
answer: {
|
||||
type: SchemaType.STRING,
|
||||
description: "Only required when choosing 'answer' action, must be the final answer in natural language"
|
||||
},
|
||||
references: {
|
||||
type: SchemaType.ARRAY,
|
||||
items: {
|
||||
type: SchemaType.OBJECT,
|
||||
properties: {
|
||||
title: {
|
||||
type: SchemaType.STRING,
|
||||
description: "Title of the document; must be directly from the context"
|
||||
},
|
||||
url: {
|
||||
type: SchemaType.STRING,
|
||||
description: "URL of the document; must be directly from the context"
|
||||
}
|
||||
},
|
||||
required: ["title", "url"]
|
||||
},
|
||||
description: "Only required when choosing 'answer' action, must be an array of references"
|
||||
},
|
||||
reasoning: {
|
||||
type: SchemaType.STRING,
|
||||
description: "Explain why choose this action?"
|
||||
},
|
||||
confidence: {
|
||||
type: SchemaType.NUMBER,
|
||||
minimum: 0.0,
|
||||
maximum: 1.0,
|
||||
description: "Represents the confidence level of in answering the question BEFORE taking the action. Must be a float between 0.0 and 1.0",
|
||||
}
|
||||
},
|
||||
required: ["action", "reasoning", "confidence"],
|
||||
};
|
||||
|
||||
const apiKey = process.env.GEMINI_API_KEY as string;
|
||||
const jinaToken = process.env.JINA_API_KEY as string;
|
||||
if (!apiKey) {
|
||||
throw new Error("GEMINI_API_KEY not found");
|
||||
}
|
||||
if (!jinaToken) {
|
||||
throw new Error("JINA_API_KEY not found");
|
||||
}
|
||||
|
||||
const modelName = 'gemini-1.5-flash';
|
||||
const genAI = new GoogleGenerativeAI(apiKey);
|
||||
const model = genAI.getGenerativeModel({
|
||||
model: modelName,
|
||||
generationConfig: {
|
||||
temperature: 0.7,
|
||||
responseMimeType: "application/json",
|
||||
responseSchema: schema
|
||||
}
|
||||
});
|
||||
|
||||
function getPrompt(question: string, context?: string) {
|
||||
let contextIntro = ``;
|
||||
if (!!context) {
|
||||
contextIntro = `You have the following context:
|
||||
${context}
|
||||
`;
|
||||
}
|
||||
|
||||
return `You are an AI research analyst capable of multi-step reasoning.
|
||||
|
||||
${contextIntro}
|
||||
|
||||
Based on the context and the knowledge in your training data, you must answer the following question with 100% confidence:
|
||||
|
||||
${question}
|
||||
|
||||
If you are not 100% confident in your answer, you should first take a reflection to identify the gaps in your knowledge:
|
||||
|
||||
**reflect**:
|
||||
- Challenge existing knowledge with what-if thinking.
|
||||
- Reflect on the gaps in your knowledge and ask for more questions to fill those gaps.
|
||||
- You use this action when you feel like you need to first answer those questions before proceeding with the current one.
|
||||
- This action has higher priority than all other actions.
|
||||
|
||||
If you are still not confident after reflecting, you can take one of the following actions:
|
||||
|
||||
**search**:
|
||||
- Search external real-world information via a public search engine.
|
||||
- The search engine works best with short, keyword-based queries.
|
||||
- You use this action when you need more world knowledge or up to date information that is not covered in your training data or cut-off knowledge base.
|
||||
|
||||
**readURL**:
|
||||
- Provide a specific URL to fetch and read its content in detail.
|
||||
- Any URL must come from the current context.
|
||||
- You use this action when you feel like that particular URL might have the information you need to answer the question.
|
||||
|
||||
**rewrite**:
|
||||
- Propose a new or modified query (in a different phrasing, more details, or from another angle) that might lead to better or more relevant information.
|
||||
- This rewritten query can help the search engine find more accurate results, thereby improving your confidence in answering the original question.
|
||||
- You use this action when you think the current query is too vague, broad, or ambiguous; or the search engine results are not satisfactory.
|
||||
|
||||
**answer**:
|
||||
- Provide your answer to the user, **only** if you are completely sure.
|
||||
|
||||
When you decide on your action, respond **only** in valid JSON format according to the schema below.
|
||||
|
||||
**Important**:
|
||||
- Do not include any extra keys.
|
||||
- Do not include explanatory text, markdown formatting, or reasoning in the final output.
|
||||
- Output exactly one JSON object in your response.
|
||||
`;
|
||||
|
||||
}
|
||||
|
||||
|
||||
async function getResponse(question: string) {
|
||||
let tokenBudget = 300000;
|
||||
let totalTokens = 0;
|
||||
let context = '';
|
||||
let step = 0;
|
||||
let gaps: string[] = [];
|
||||
let hasAnswer = false;
|
||||
|
||||
while (totalTokens < tokenBudget && !hasAnswer) {
|
||||
const currentQuestion = gaps.length > 0 ? gaps.shift()! : question;
|
||||
const prompt = getPrompt(currentQuestion, context);
|
||||
console.log('Prompt length:', prompt.length);
|
||||
const result = await model.generateContent(prompt);
|
||||
const response = await result.response;
|
||||
const usage = response.usageMetadata;
|
||||
step++;
|
||||
|
||||
totalTokens += usage?.totalTokenCount || 0;
|
||||
console.log(`Tokens: ${totalTokens}/${tokenBudget}`);
|
||||
|
||||
const action = JSON.parse(response.text());
|
||||
console.log('Action:', action);
|
||||
|
||||
if (action.action === 'answer') {
|
||||
hasAnswer = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (action.action === 'reflect' && action.remainedGaps) {
|
||||
gaps.push(...action.remainedGaps);
|
||||
const contextRecord = JSON.stringify({
|
||||
step,
|
||||
...action,
|
||||
question: currentQuestion
|
||||
});
|
||||
context = `${context}\n${contextRecord}`;
|
||||
continue;
|
||||
}
|
||||
|
||||
try {
|
||||
if (action.action === 'search' && action.searchKeywords) {
|
||||
const results = await search(action.searchKeywords.join(' '), jinaToken);
|
||||
const contextRecord = JSON.stringify({
|
||||
step,
|
||||
...action,
|
||||
question: currentQuestion,
|
||||
result: results.data
|
||||
});
|
||||
context = `${context}\n${contextRecord}`;
|
||||
totalTokens += results.data.reduce((sum, r) => sum + r.usage.tokens, 0);
|
||||
} else if (action.action === 'readURL' && action.URLTargets?.length) {
|
||||
const urlResults = await Promise.all(
|
||||
action.URLTargets.map(async (url: string) => {
|
||||
const response = await readUrl(url, jinaToken);
|
||||
return {url, result: response};
|
||||
})
|
||||
);
|
||||
|
||||
const contextRecord = JSON.stringify({
|
||||
step,
|
||||
...action,
|
||||
question: currentQuestion,
|
||||
result: urlResults
|
||||
});
|
||||
context = `${context}\n${contextRecord}`;
|
||||
totalTokens += urlResults.reduce((sum, r) => sum + r.result.data.usage.tokens, 0);
|
||||
} else if (action.action === 'rewrite' && action.rewriteQuery) {
|
||||
// Immediately search with the new rewriteQuery
|
||||
const results = await search(action.rewriteQuery, jinaToken);
|
||||
const contextRecord = JSON.stringify({
|
||||
step,
|
||||
...action,
|
||||
question: currentQuestion,
|
||||
result: results.data
|
||||
});
|
||||
context = `${context}\n${contextRecord}`;
|
||||
totalTokens += results.data.reduce((sum, r) => sum + r.usage.tokens, 0);
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Error fetching data:', error);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
const question = process.argv[2] || "";
|
||||
getResponse(question);
|
||||
43
src/tools/read.ts
Normal file
43
src/tools/read.ts
Normal file
@ -0,0 +1,43 @@
|
||||
import https from 'https';
|
||||
|
||||
interface ReadResponse {
|
||||
code: number;
|
||||
status: number;
|
||||
data: {
|
||||
title: string;
|
||||
description: string;
|
||||
url: string;
|
||||
content: string;
|
||||
usage: { tokens: number; };
|
||||
};
|
||||
}
|
||||
|
||||
export function readUrl(url: string, token: string): Promise<ReadResponse> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const data = JSON.stringify({url});
|
||||
|
||||
const options = {
|
||||
hostname: 'r.jina.ai',
|
||||
port: 443,
|
||||
path: '/',
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Accept': 'application/json',
|
||||
'Authorization': `Bearer ${token}`,
|
||||
'Content-Type': 'application/json',
|
||||
'Content-Length': data.length,
|
||||
'X-Retain-Images': 'none'
|
||||
}
|
||||
};
|
||||
|
||||
const req = https.request(options, (res) => {
|
||||
let responseData = '';
|
||||
res.on('data', (chunk) => responseData += chunk);
|
||||
res.on('end', () => resolve(JSON.parse(responseData)));
|
||||
});
|
||||
|
||||
req.on('error', reject);
|
||||
req.write(data);
|
||||
req.end();
|
||||
});
|
||||
}
|
||||
38
src/tools/search.ts
Normal file
38
src/tools/search.ts
Normal file
@ -0,0 +1,38 @@
|
||||
import https from 'https';
|
||||
|
||||
interface SearchResponse {
|
||||
code: number;
|
||||
status: number;
|
||||
data: Array<{
|
||||
title: string;
|
||||
description: string;
|
||||
url: string;
|
||||
content: string;
|
||||
usage: { tokens: number; };
|
||||
}>;
|
||||
}
|
||||
|
||||
export function search(query: string, token: string): Promise<SearchResponse> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const options = {
|
||||
hostname: 's.jina.ai',
|
||||
port: 443,
|
||||
path: `/${encodeURIComponent(query)}`,
|
||||
method: 'GET',
|
||||
headers: {
|
||||
'Accept': 'application/json',
|
||||
'Authorization': `Bearer ${token}`,
|
||||
'X-Retain-Images': 'none'
|
||||
}
|
||||
};
|
||||
|
||||
const req = https.request(options, (res) => {
|
||||
let responseData = '';
|
||||
res.on('data', (chunk) => responseData += chunk);
|
||||
res.on('end', () => resolve(JSON.parse(responseData)));
|
||||
});
|
||||
|
||||
req.on('error', reject);
|
||||
req.end();
|
||||
});
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user