chore: first commit

This commit is contained in:
Han Xiao 2025-01-26 14:53:55 +08:00
parent 91f1cbcb97
commit 5eb75fcb61
6 changed files with 396 additions and 0 deletions

1
.gitignore vendored
View File

@ -1,5 +1,6 @@
# Logs
logs
.idea
*.log
npm-debug.log*
yarn-debug.log*

48
package-lock.json generated Normal file
View File

@ -0,0 +1,48 @@
{
"name": "agentic-search",
"version": "1.0.0",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "agentic-search",
"version": "1.0.0",
"license": "ISC",
"dependencies": {
"@google/generative-ai": "^0.21.0",
"dotenv": "^16.4.7",
"undici": "^7.3.0"
}
},
"node_modules/@google/generative-ai": {
"version": "0.21.0",
"resolved": "https://registry.npmjs.org/@google/generative-ai/-/generative-ai-0.21.0.tgz",
"integrity": "sha512-7XhUbtnlkSEZK15kN3t+tzIMxsbKm/dSkKBFalj+20NvPKe1kBY7mR2P7vuijEn+f06z5+A8bVGKO0v39cr6Wg==",
"license": "Apache-2.0",
"engines": {
"node": ">=18.0.0"
}
},
"node_modules/dotenv": {
"version": "16.4.7",
"resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.4.7.tgz",
"integrity": "sha512-47qPchRCykZC03FhkYAhrvwU4xDBFIj1QPqaarj6mdM/hgUzfPHcpkHJOn3mJAufFeeAxAzeGsr5X0M4k6fLZQ==",
"license": "BSD-2-Clause",
"engines": {
"node": ">=12"
},
"funding": {
"url": "https://dotenvx.com"
}
},
"node_modules/undici": {
"version": "7.3.0",
"resolved": "https://registry.npmjs.org/undici/-/undici-7.3.0.tgz",
"integrity": "sha512-Qy96NND4Dou5jKoSJ2gm8ax8AJM/Ey9o9mz7KN1bb9GP+G0l20Zw8afxTnY2f4b7hmhn/z8aC2kfArVQlAhFBw==",
"license": "MIT",
"engines": {
"node": ">=20.18.1"
}
}
}
}

18
package.json Normal file
View File

@ -0,0 +1,18 @@
{
"name": "agentic-search",
"version": "1.0.0",
"main": "index.js",
"scripts": {
"build": "tsc",
"as": "npx ts-node src/agent.ts"
},
"keywords": [],
"author": "",
"license": "ISC",
"description": "",
"dependencies": {
"@google/generative-ai": "^0.21.0",
"dotenv": "^16.4.7",
"undici": "^7.3.0"
}
}

248
src/agent.ts Normal file
View File

@ -0,0 +1,248 @@
import {GoogleGenerativeAI, SchemaType} from "@google/generative-ai";
import dotenv from 'dotenv';
import {ProxyAgent, setGlobalDispatcher} from "undici";
import {readUrl} from "./tools/read";
import {search} from "./tools/search";
// 获取代理URL并设置代理
if (process.env.https_proxy) {
try {
const proxyUrl = new URL(process.env.https_proxy).toString();
const dispatcher = new ProxyAgent({uri: proxyUrl});
setGlobalDispatcher(dispatcher);
} catch (error) {
console.error('Failed to set proxy:', error);
}
}
dotenv.config();
const schema = {
type: SchemaType.OBJECT,
properties: {
action: {
type: SchemaType.STRING,
enum: ["search", "readURL", "rewrite", "answer", "reflect"],
description: "Must match exactly one action type"
},
remainedGaps: {
type: SchemaType.ARRAY,
items: {
type: SchemaType.STRING
},
description: "Only required when choosing 'reflect' action, must be an array of gaps in the knowledge",
},
searchKeywords: {
type: SchemaType.ARRAY,
items: {
type: SchemaType.STRING
},
description: "Only required when choosing 'search' action, must be an array of keywords"
},
URLTargets: {
type: SchemaType.ARRAY,
items: {
type: SchemaType.STRING
},
description: "Only required when choosing 'readURL' action, must be an array of URLs"
},
rewriteQuery: {
type: SchemaType.STRING,
description: "Only required when choosing 'rewrite' action, must be a new query that might lead to better or more relevant information",
},
answer: {
type: SchemaType.STRING,
description: "Only required when choosing 'answer' action, must be the final answer in natural language"
},
references: {
type: SchemaType.ARRAY,
items: {
type: SchemaType.OBJECT,
properties: {
title: {
type: SchemaType.STRING,
description: "Title of the document; must be directly from the context"
},
url: {
type: SchemaType.STRING,
description: "URL of the document; must be directly from the context"
}
},
required: ["title", "url"]
},
description: "Only required when choosing 'answer' action, must be an array of references"
},
reasoning: {
type: SchemaType.STRING,
description: "Explain why choose this action?"
},
confidence: {
type: SchemaType.NUMBER,
minimum: 0.0,
maximum: 1.0,
description: "Represents the confidence level of in answering the question BEFORE taking the action. Must be a float between 0.0 and 1.0",
}
},
required: ["action", "reasoning", "confidence"],
};
const apiKey = process.env.GEMINI_API_KEY as string;
const jinaToken = process.env.JINA_API_KEY as string;
if (!apiKey) {
throw new Error("GEMINI_API_KEY not found");
}
if (!jinaToken) {
throw new Error("JINA_API_KEY not found");
}
const modelName = 'gemini-1.5-flash';
const genAI = new GoogleGenerativeAI(apiKey);
const model = genAI.getGenerativeModel({
model: modelName,
generationConfig: {
temperature: 0.7,
responseMimeType: "application/json",
responseSchema: schema
}
});
function getPrompt(question: string, context?: string) {
let contextIntro = ``;
if (!!context) {
contextIntro = `You have the following context:
${context}
`;
}
return `You are an AI research analyst capable of multi-step reasoning.
${contextIntro}
Based on the context and the knowledge in your training data, you must answer the following question with 100% confidence:
${question}
If you are not 100% confident in your answer, you should first take a reflection to identify the gaps in your knowledge:
**reflect**:
- Challenge existing knowledge with what-if thinking.
- Reflect on the gaps in your knowledge and ask for more questions to fill those gaps.
- You use this action when you feel like you need to first answer those questions before proceeding with the current one.
- This action has higher priority than all other actions.
If you are still not confident after reflecting, you can take one of the following actions:
**search**:
- Search external real-world information via a public search engine.
- The search engine works best with short, keyword-based queries.
- You use this action when you need more world knowledge or up to date information that is not covered in your training data or cut-off knowledge base.
**readURL**:
- Provide a specific URL to fetch and read its content in detail.
- Any URL must come from the current context.
- You use this action when you feel like that particular URL might have the information you need to answer the question.
**rewrite**:
- Propose a new or modified query (in a different phrasing, more details, or from another angle) that might lead to better or more relevant information.
- This rewritten query can help the search engine find more accurate results, thereby improving your confidence in answering the original question.
- You use this action when you think the current query is too vague, broad, or ambiguous; or the search engine results are not satisfactory.
**answer**:
- Provide your answer to the user, **only** if you are completely sure.
When you decide on your action, respond **only** in valid JSON format according to the schema below.
**Important**:
- Do not include any extra keys.
- Do not include explanatory text, markdown formatting, or reasoning in the final output.
- Output exactly one JSON object in your response.
`;
}
async function getResponse(question: string) {
let tokenBudget = 300000;
let totalTokens = 0;
let context = '';
let step = 0;
let gaps: string[] = [];
let hasAnswer = false;
while (totalTokens < tokenBudget && !hasAnswer) {
const currentQuestion = gaps.length > 0 ? gaps.shift()! : question;
const prompt = getPrompt(currentQuestion, context);
console.log('Prompt length:', prompt.length);
const result = await model.generateContent(prompt);
const response = await result.response;
const usage = response.usageMetadata;
step++;
totalTokens += usage?.totalTokenCount || 0;
console.log(`Tokens: ${totalTokens}/${tokenBudget}`);
const action = JSON.parse(response.text());
console.log('Action:', action);
if (action.action === 'answer') {
hasAnswer = true;
continue;
}
if (action.action === 'reflect' && action.remainedGaps) {
gaps.push(...action.remainedGaps);
const contextRecord = JSON.stringify({
step,
...action,
question: currentQuestion
});
context = `${context}\n${contextRecord}`;
continue;
}
try {
if (action.action === 'search' && action.searchKeywords) {
const results = await search(action.searchKeywords.join(' '), jinaToken);
const contextRecord = JSON.stringify({
step,
...action,
question: currentQuestion,
result: results.data
});
context = `${context}\n${contextRecord}`;
totalTokens += results.data.reduce((sum, r) => sum + r.usage.tokens, 0);
} else if (action.action === 'readURL' && action.URLTargets?.length) {
const urlResults = await Promise.all(
action.URLTargets.map(async (url: string) => {
const response = await readUrl(url, jinaToken);
return {url, result: response};
})
);
const contextRecord = JSON.stringify({
step,
...action,
question: currentQuestion,
result: urlResults
});
context = `${context}\n${contextRecord}`;
totalTokens += urlResults.reduce((sum, r) => sum + r.result.data.usage.tokens, 0);
} else if (action.action === 'rewrite' && action.rewriteQuery) {
// Immediately search with the new rewriteQuery
const results = await search(action.rewriteQuery, jinaToken);
const contextRecord = JSON.stringify({
step,
...action,
question: currentQuestion,
result: results.data
});
context = `${context}\n${contextRecord}`;
totalTokens += results.data.reduce((sum, r) => sum + r.usage.tokens, 0);
}
} catch (error) {
console.error('Error fetching data:', error);
}
}
}
const question = process.argv[2] || "";
getResponse(question);

43
src/tools/read.ts Normal file
View File

@ -0,0 +1,43 @@
import https from 'https';
interface ReadResponse {
code: number;
status: number;
data: {
title: string;
description: string;
url: string;
content: string;
usage: { tokens: number; };
};
}
export function readUrl(url: string, token: string): Promise<ReadResponse> {
return new Promise((resolve, reject) => {
const data = JSON.stringify({url});
const options = {
hostname: 'r.jina.ai',
port: 443,
path: '/',
method: 'POST',
headers: {
'Accept': 'application/json',
'Authorization': `Bearer ${token}`,
'Content-Type': 'application/json',
'Content-Length': data.length,
'X-Retain-Images': 'none'
}
};
const req = https.request(options, (res) => {
let responseData = '';
res.on('data', (chunk) => responseData += chunk);
res.on('end', () => resolve(JSON.parse(responseData)));
});
req.on('error', reject);
req.write(data);
req.end();
});
}

38
src/tools/search.ts Normal file
View File

@ -0,0 +1,38 @@
import https from 'https';
interface SearchResponse {
code: number;
status: number;
data: Array<{
title: string;
description: string;
url: string;
content: string;
usage: { tokens: number; };
}>;
}
export function search(query: string, token: string): Promise<SearchResponse> {
return new Promise((resolve, reject) => {
const options = {
hostname: 's.jina.ai',
port: 443,
path: `/${encodeURIComponent(query)}`,
method: 'GET',
headers: {
'Accept': 'application/json',
'Authorization': `Bearer ${token}`,
'X-Retain-Images': 'none'
}
};
const req = https.request(options, (res) => {
let responseData = '';
res.on('data', (chunk) => responseData += chunk);
res.on('end', () => resolve(JSON.parse(responseData)));
});
req.on('error', reject);
req.end();
});
}