From 5eb75fcb61bfc86622c3a9dd9c50121c2f5b08fd Mon Sep 17 00:00:00 2001 From: Han Xiao Date: Sun, 26 Jan 2025 14:53:55 +0800 Subject: [PATCH] chore: first commit --- .gitignore | 1 + package-lock.json | 48 +++++++++ package.json | 18 ++++ src/agent.ts | 248 ++++++++++++++++++++++++++++++++++++++++++++ src/tools/read.ts | 43 ++++++++ src/tools/search.ts | 38 +++++++ 6 files changed, 396 insertions(+) create mode 100644 package-lock.json create mode 100644 package.json create mode 100644 src/agent.ts create mode 100644 src/tools/read.ts create mode 100644 src/tools/search.ts diff --git a/.gitignore b/.gitignore index c6bba59..cc67a33 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ # Logs logs +.idea *.log npm-debug.log* yarn-debug.log* diff --git a/package-lock.json b/package-lock.json new file mode 100644 index 0000000..4a2dbb6 --- /dev/null +++ b/package-lock.json @@ -0,0 +1,48 @@ +{ + "name": "agentic-search", + "version": "1.0.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "agentic-search", + "version": "1.0.0", + "license": "ISC", + "dependencies": { + "@google/generative-ai": "^0.21.0", + "dotenv": "^16.4.7", + "undici": "^7.3.0" + } + }, + "node_modules/@google/generative-ai": { + "version": "0.21.0", + "resolved": "https://registry.npmjs.org/@google/generative-ai/-/generative-ai-0.21.0.tgz", + "integrity": "sha512-7XhUbtnlkSEZK15kN3t+tzIMxsbKm/dSkKBFalj+20NvPKe1kBY7mR2P7vuijEn+f06z5+A8bVGKO0v39cr6Wg==", + "license": "Apache-2.0", + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/dotenv": { + "version": "16.4.7", + "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.4.7.tgz", + "integrity": "sha512-47qPchRCykZC03FhkYAhrvwU4xDBFIj1QPqaarj6mdM/hgUzfPHcpkHJOn3mJAufFeeAxAzeGsr5X0M4k6fLZQ==", + "license": "BSD-2-Clause", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://dotenvx.com" + } + }, + "node_modules/undici": { + "version": "7.3.0", + "resolved": "https://registry.npmjs.org/undici/-/undici-7.3.0.tgz", + "integrity": "sha512-Qy96NND4Dou5jKoSJ2gm8ax8AJM/Ey9o9mz7KN1bb9GP+G0l20Zw8afxTnY2f4b7hmhn/z8aC2kfArVQlAhFBw==", + "license": "MIT", + "engines": { + "node": ">=20.18.1" + } + } + } +} diff --git a/package.json b/package.json new file mode 100644 index 0000000..8668c41 --- /dev/null +++ b/package.json @@ -0,0 +1,18 @@ +{ + "name": "agentic-search", + "version": "1.0.0", + "main": "index.js", + "scripts": { + "build": "tsc", + "as": "npx ts-node src/agent.ts" + }, + "keywords": [], + "author": "", + "license": "ISC", + "description": "", + "dependencies": { + "@google/generative-ai": "^0.21.0", + "dotenv": "^16.4.7", + "undici": "^7.3.0" + } +} diff --git a/src/agent.ts b/src/agent.ts new file mode 100644 index 0000000..69a6d04 --- /dev/null +++ b/src/agent.ts @@ -0,0 +1,248 @@ +import {GoogleGenerativeAI, SchemaType} from "@google/generative-ai"; +import dotenv from 'dotenv'; +import {ProxyAgent, setGlobalDispatcher} from "undici"; +import {readUrl} from "./tools/read"; +import {search} from "./tools/search"; +// 获取代理URL并设置代理 +if (process.env.https_proxy) { + try { + const proxyUrl = new URL(process.env.https_proxy).toString(); + const dispatcher = new ProxyAgent({uri: proxyUrl}); + setGlobalDispatcher(dispatcher); + } catch (error) { + console.error('Failed to set proxy:', error); + } +} +dotenv.config(); + +const schema = { + type: SchemaType.OBJECT, + properties: { + action: { + type: SchemaType.STRING, + enum: ["search", "readURL", "rewrite", "answer", "reflect"], + description: "Must match exactly one action type" + }, + remainedGaps: { + type: SchemaType.ARRAY, + items: { + type: SchemaType.STRING + }, + description: "Only required when choosing 'reflect' action, must be an array of gaps in the knowledge", + }, + searchKeywords: { + type: SchemaType.ARRAY, + items: { + type: SchemaType.STRING + }, + description: "Only required when choosing 'search' action, must be an array of keywords" + }, + URLTargets: { + type: SchemaType.ARRAY, + items: { + type: SchemaType.STRING + }, + description: "Only required when choosing 'readURL' action, must be an array of URLs" + }, + rewriteQuery: { + type: SchemaType.STRING, + description: "Only required when choosing 'rewrite' action, must be a new query that might lead to better or more relevant information", + }, + answer: { + type: SchemaType.STRING, + description: "Only required when choosing 'answer' action, must be the final answer in natural language" + }, + references: { + type: SchemaType.ARRAY, + items: { + type: SchemaType.OBJECT, + properties: { + title: { + type: SchemaType.STRING, + description: "Title of the document; must be directly from the context" + }, + url: { + type: SchemaType.STRING, + description: "URL of the document; must be directly from the context" + } + }, + required: ["title", "url"] + }, + description: "Only required when choosing 'answer' action, must be an array of references" + }, + reasoning: { + type: SchemaType.STRING, + description: "Explain why choose this action?" + }, + confidence: { + type: SchemaType.NUMBER, + minimum: 0.0, + maximum: 1.0, + description: "Represents the confidence level of in answering the question BEFORE taking the action. Must be a float between 0.0 and 1.0", + } + }, + required: ["action", "reasoning", "confidence"], +}; + +const apiKey = process.env.GEMINI_API_KEY as string; +const jinaToken = process.env.JINA_API_KEY as string; +if (!apiKey) { + throw new Error("GEMINI_API_KEY not found"); +} +if (!jinaToken) { + throw new Error("JINA_API_KEY not found"); +} + +const modelName = 'gemini-1.5-flash'; +const genAI = new GoogleGenerativeAI(apiKey); +const model = genAI.getGenerativeModel({ + model: modelName, + generationConfig: { + temperature: 0.7, + responseMimeType: "application/json", + responseSchema: schema + } +}); + +function getPrompt(question: string, context?: string) { + let contextIntro = ``; + if (!!context) { + contextIntro = `You have the following context: + ${context} + `; + } + + return `You are an AI research analyst capable of multi-step reasoning. + +${contextIntro} + +Based on the context and the knowledge in your training data, you must answer the following question with 100% confidence: + +${question} + +If you are not 100% confident in your answer, you should first take a reflection to identify the gaps in your knowledge: + +**reflect**: +- Challenge existing knowledge with what-if thinking. +- Reflect on the gaps in your knowledge and ask for more questions to fill those gaps. +- You use this action when you feel like you need to first answer those questions before proceeding with the current one. +- This action has higher priority than all other actions. + +If you are still not confident after reflecting, you can take one of the following actions: + +**search**: +- Search external real-world information via a public search engine. +- The search engine works best with short, keyword-based queries. +- You use this action when you need more world knowledge or up to date information that is not covered in your training data or cut-off knowledge base. + +**readURL**: +- Provide a specific URL to fetch and read its content in detail. +- Any URL must come from the current context. +- You use this action when you feel like that particular URL might have the information you need to answer the question. + +**rewrite**: +- Propose a new or modified query (in a different phrasing, more details, or from another angle) that might lead to better or more relevant information. +- This rewritten query can help the search engine find more accurate results, thereby improving your confidence in answering the original question. +- You use this action when you think the current query is too vague, broad, or ambiguous; or the search engine results are not satisfactory. + +**answer**: +- Provide your answer to the user, **only** if you are completely sure. + +When you decide on your action, respond **only** in valid JSON format according to the schema below. + +**Important**: +- Do not include any extra keys. +- Do not include explanatory text, markdown formatting, or reasoning in the final output. +- Output exactly one JSON object in your response. + `; + +} + + +async function getResponse(question: string) { + let tokenBudget = 300000; + let totalTokens = 0; + let context = ''; + let step = 0; + let gaps: string[] = []; + let hasAnswer = false; + + while (totalTokens < tokenBudget && !hasAnswer) { + const currentQuestion = gaps.length > 0 ? gaps.shift()! : question; + const prompt = getPrompt(currentQuestion, context); + console.log('Prompt length:', prompt.length); + const result = await model.generateContent(prompt); + const response = await result.response; + const usage = response.usageMetadata; + step++; + + totalTokens += usage?.totalTokenCount || 0; + console.log(`Tokens: ${totalTokens}/${tokenBudget}`); + + const action = JSON.parse(response.text()); + console.log('Action:', action); + + if (action.action === 'answer') { + hasAnswer = true; + continue; + } + + if (action.action === 'reflect' && action.remainedGaps) { + gaps.push(...action.remainedGaps); + const contextRecord = JSON.stringify({ + step, + ...action, + question: currentQuestion + }); + context = `${context}\n${contextRecord}`; + continue; + } + + try { + if (action.action === 'search' && action.searchKeywords) { + const results = await search(action.searchKeywords.join(' '), jinaToken); + const contextRecord = JSON.stringify({ + step, + ...action, + question: currentQuestion, + result: results.data + }); + context = `${context}\n${contextRecord}`; + totalTokens += results.data.reduce((sum, r) => sum + r.usage.tokens, 0); + } else if (action.action === 'readURL' && action.URLTargets?.length) { + const urlResults = await Promise.all( + action.URLTargets.map(async (url: string) => { + const response = await readUrl(url, jinaToken); + return {url, result: response}; + }) + ); + + const contextRecord = JSON.stringify({ + step, + ...action, + question: currentQuestion, + result: urlResults + }); + context = `${context}\n${contextRecord}`; + totalTokens += urlResults.reduce((sum, r) => sum + r.result.data.usage.tokens, 0); + } else if (action.action === 'rewrite' && action.rewriteQuery) { + // Immediately search with the new rewriteQuery + const results = await search(action.rewriteQuery, jinaToken); + const contextRecord = JSON.stringify({ + step, + ...action, + question: currentQuestion, + result: results.data + }); + context = `${context}\n${contextRecord}`; + totalTokens += results.data.reduce((sum, r) => sum + r.usage.tokens, 0); + } + } catch (error) { + console.error('Error fetching data:', error); + } + } +} + + +const question = process.argv[2] || ""; +getResponse(question); diff --git a/src/tools/read.ts b/src/tools/read.ts new file mode 100644 index 0000000..8968d86 --- /dev/null +++ b/src/tools/read.ts @@ -0,0 +1,43 @@ +import https from 'https'; + +interface ReadResponse { + code: number; + status: number; + data: { + title: string; + description: string; + url: string; + content: string; + usage: { tokens: number; }; + }; +} + +export function readUrl(url: string, token: string): Promise { + return new Promise((resolve, reject) => { + const data = JSON.stringify({url}); + + const options = { + hostname: 'r.jina.ai', + port: 443, + path: '/', + method: 'POST', + headers: { + 'Accept': 'application/json', + 'Authorization': `Bearer ${token}`, + 'Content-Type': 'application/json', + 'Content-Length': data.length, + 'X-Retain-Images': 'none' + } + }; + + const req = https.request(options, (res) => { + let responseData = ''; + res.on('data', (chunk) => responseData += chunk); + res.on('end', () => resolve(JSON.parse(responseData))); + }); + + req.on('error', reject); + req.write(data); + req.end(); + }); +} diff --git a/src/tools/search.ts b/src/tools/search.ts new file mode 100644 index 0000000..32a76cf --- /dev/null +++ b/src/tools/search.ts @@ -0,0 +1,38 @@ +import https from 'https'; + +interface SearchResponse { + code: number; + status: number; + data: Array<{ + title: string; + description: string; + url: string; + content: string; + usage: { tokens: number; }; + }>; +} + +export function search(query: string, token: string): Promise { + return new Promise((resolve, reject) => { + const options = { + hostname: 's.jina.ai', + port: 443, + path: `/${encodeURIComponent(query)}`, + method: 'GET', + headers: { + 'Accept': 'application/json', + 'Authorization': `Bearer ${token}`, + 'X-Retain-Images': 'none' + } + }; + + const req = https.request(options, (res) => { + let responseData = ''; + res.on('data', (chunk) => responseData += chunk); + res.on('end', () => resolve(JSON.parse(responseData))); + }); + + req.on('error', reject); + req.end(); + }); +}