From 5eb75fcb61bfc86622c3a9dd9c50121c2f5b08fd Mon Sep 17 00:00:00 2001
From: Han Xiao <han.xiao@jina.ai>
Date: Sun, 26 Jan 2025 14:53:55 +0800
Subject: [PATCH] chore: first commit

---
 .gitignore          |   1 +
 package-lock.json   |  48 +++++++++
 package.json        |  18 ++++
 src/agent.ts        | 248 ++++++++++++++++++++++++++++++++++++++++++++
 src/tools/read.ts   |  43 ++++++++
 src/tools/search.ts |  38 +++++++
 6 files changed, 396 insertions(+)
 create mode 100644 package-lock.json
 create mode 100644 package.json
 create mode 100644 src/agent.ts
 create mode 100644 src/tools/read.ts
 create mode 100644 src/tools/search.ts

diff --git a/.gitignore b/.gitignore
index c6bba59..cc67a33 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,6 @@
 # Logs
 logs
+.idea
 *.log
 npm-debug.log*
 yarn-debug.log*
diff --git a/package-lock.json b/package-lock.json
new file mode 100644
index 0000000..4a2dbb6
--- /dev/null
+++ b/package-lock.json
@@ -0,0 +1,48 @@
+{
+  "name": "agentic-search",
+  "version": "1.0.0",
+  "lockfileVersion": 3,
+  "requires": true,
+  "packages": {
+    "": {
+      "name": "agentic-search",
+      "version": "1.0.0",
+      "license": "ISC",
+      "dependencies": {
+        "@google/generative-ai": "^0.21.0",
+        "dotenv": "^16.4.7",
+        "undici": "^7.3.0"
+      }
+    },
+    "node_modules/@google/generative-ai": {
+      "version": "0.21.0",
+      "resolved": "https://registry.npmjs.org/@google/generative-ai/-/generative-ai-0.21.0.tgz",
+      "integrity": "sha512-7XhUbtnlkSEZK15kN3t+tzIMxsbKm/dSkKBFalj+20NvPKe1kBY7mR2P7vuijEn+f06z5+A8bVGKO0v39cr6Wg==",
+      "license": "Apache-2.0",
+      "engines": {
+        "node": ">=18.0.0"
+      }
+    },
+    "node_modules/dotenv": {
+      "version": "16.4.7",
+      "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.4.7.tgz",
+      "integrity": "sha512-47qPchRCykZC03FhkYAhrvwU4xDBFIj1QPqaarj6mdM/hgUzfPHcpkHJOn3mJAufFeeAxAzeGsr5X0M4k6fLZQ==",
+      "license": "BSD-2-Clause",
+      "engines": {
+        "node": ">=12"
+      },
+      "funding": {
+        "url": "https://dotenvx.com"
+      }
+    },
+    "node_modules/undici": {
+      "version": "7.3.0",
+      "resolved": "https://registry.npmjs.org/undici/-/undici-7.3.0.tgz",
+      "integrity": "sha512-Qy96NND4Dou5jKoSJ2gm8ax8AJM/Ey9o9mz7KN1bb9GP+G0l20Zw8afxTnY2f4b7hmhn/z8aC2kfArVQlAhFBw==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=20.18.1"
+      }
+    }
+  }
+}
diff --git a/package.json b/package.json
new file mode 100644
index 0000000..8668c41
--- /dev/null
+++ b/package.json
@@ -0,0 +1,18 @@
+{
+  "name": "agentic-search",
+  "version": "1.0.0",
+  "main": "index.js",
+  "scripts": {
+    "build": "tsc",
+    "as": "npx ts-node src/agent.ts"
+  },
+  "keywords": [],
+  "author": "",
+  "license": "ISC",
+  "description": "",
+  "dependencies": {
+    "@google/generative-ai": "^0.21.0",
+    "dotenv": "^16.4.7",
+    "undici": "^7.3.0"
+  }
+}
diff --git a/src/agent.ts b/src/agent.ts
new file mode 100644
index 0000000..69a6d04
--- /dev/null
+++ b/src/agent.ts
@@ -0,0 +1,248 @@
+import {GoogleGenerativeAI, SchemaType} from "@google/generative-ai";
+import dotenv from 'dotenv';
+import {ProxyAgent, setGlobalDispatcher} from "undici";
+import {readUrl} from "./tools/read";
+import {search} from "./tools/search";
+// 获取代理URL并设置代理
+if (process.env.https_proxy) {
+  try {
+    const proxyUrl = new URL(process.env.https_proxy).toString();
+    const dispatcher = new ProxyAgent({uri: proxyUrl});
+    setGlobalDispatcher(dispatcher);
+  } catch (error) {
+    console.error('Failed to set proxy:', error);
+  }
+}
+dotenv.config();
+
+const schema = {
+  type: SchemaType.OBJECT,
+  properties: {
+    action: {
+      type: SchemaType.STRING,
+      enum: ["search", "readURL", "rewrite", "answer", "reflect"],
+      description: "Must match exactly one action type"
+    },
+    remainedGaps: {
+      type: SchemaType.ARRAY,
+      items: {
+        type: SchemaType.STRING
+      },
+      description: "Only required when choosing 'reflect' action, must be an array of gaps in the knowledge",
+    },
+    searchKeywords: {
+      type: SchemaType.ARRAY,
+      items: {
+        type: SchemaType.STRING
+      },
+      description: "Only required when choosing 'search' action, must be an array of keywords"
+    },
+    URLTargets: {
+      type: SchemaType.ARRAY,
+      items: {
+        type: SchemaType.STRING
+      },
+      description: "Only required when choosing 'readURL' action, must be an array of URLs"
+    },
+    rewriteQuery: {
+      type: SchemaType.STRING,
+      description: "Only required when choosing 'rewrite' action, must be a new query that might lead to better or more relevant information",
+    },
+    answer: {
+      type: SchemaType.STRING,
+      description: "Only required when choosing 'answer' action, must be the final answer in natural language"
+    },
+    references: {
+      type: SchemaType.ARRAY,
+      items: {
+        type: SchemaType.OBJECT,
+        properties: {
+          title: {
+            type: SchemaType.STRING,
+            description: "Title of the document; must be directly from the context"
+          },
+          url: {
+            type: SchemaType.STRING,
+            description: "URL of the document; must be directly from the context"
+          }
+        },
+        required: ["title", "url"]
+      },
+      description: "Only required when choosing 'answer' action, must be an array of references"
+    },
+    reasoning: {
+      type: SchemaType.STRING,
+      description: "Explain why choose this action?"
+    },
+    confidence: {
+      type: SchemaType.NUMBER,
+      minimum: 0.0,
+      maximum: 1.0,
+      description: "Represents the confidence level of in answering the question BEFORE taking the action. Must be a float between 0.0 and 1.0",
+    }
+  },
+  required: ["action", "reasoning", "confidence"],
+};
+
+const apiKey = process.env.GEMINI_API_KEY as string;
+const jinaToken = process.env.JINA_API_KEY as string;
+if (!apiKey) {
+  throw new Error("GEMINI_API_KEY  not found");
+}
+if (!jinaToken) {
+  throw new Error("JINA_API_KEY not found");
+}
+
+const modelName = 'gemini-1.5-flash';
+const genAI = new GoogleGenerativeAI(apiKey);
+const model = genAI.getGenerativeModel({
+  model: modelName,
+  generationConfig: {
+    temperature: 0.7,
+    responseMimeType: "application/json",
+    responseSchema: schema
+  }
+});
+
+function getPrompt(question: string, context?: string) {
+  let contextIntro = ``;
+  if (!!context) {
+    contextIntro = `You have the following context:
+    ${context}
+     `;
+  }
+
+  return `You are an AI research analyst capable of multi-step reasoning.
+
+${contextIntro}
+
+Based on the context and the knowledge in your training data, you must answer the following question with 100% confidence:
+
+${question}
+
+If you are not 100% confident in your answer, you should first take a reflection to identify the gaps in your knowledge:
+
+**reflect**:
+- Challenge existing knowledge with what-if thinking.
+- Reflect on the gaps in your knowledge and ask for more questions to fill those gaps.
+- You use this action when you feel like you need to first answer those questions before proceeding with the current one.
+- This action has higher priority than all other actions.
+
+If you are still not confident after reflecting, you can take one of the following actions:
+
+**search**:
+- Search external real-world information via a public search engine.
+- The search engine works best with short, keyword-based queries.
+- You use this action when you need more world knowledge or up to date information that is not covered in your training data or cut-off knowledge base.
+
+**readURL**:
+- Provide a specific URL to fetch and read its content in detail.
+- Any URL must come from the current context.
+- You use this action when you feel like that particular URL might have the information you need to answer the question.
+
+**rewrite**:
+- Propose a new or modified query (in a different phrasing, more details, or from another angle) that might lead to better or more relevant information.
+- This rewritten query can help the search engine find more accurate results, thereby improving your confidence in answering the original question.
+- You use this action when you think the current query is too vague, broad, or ambiguous; or the search engine results are not satisfactory.
+
+**answer**:
+- Provide your answer to the user, **only** if you are completely sure.
+
+When you decide on your action, respond **only** in valid JSON format according to the schema below.
+
+**Important**:
+- Do not include any extra keys.
+- Do not include explanatory text, markdown formatting, or reasoning in the final output.
+- Output exactly one JSON object in your response.
+   `;
+
+}
+
+
+async function getResponse(question: string) {
+  let tokenBudget = 300000;
+  let totalTokens = 0;
+  let context = '';
+  let step = 0;
+  let gaps: string[] = [];
+  let hasAnswer = false;
+
+  while (totalTokens < tokenBudget && !hasAnswer) {
+    const currentQuestion = gaps.length > 0 ? gaps.shift()! : question;
+    const prompt = getPrompt(currentQuestion, context);
+    console.log('Prompt length:', prompt.length);
+    const result = await model.generateContent(prompt);
+    const response = await result.response;
+    const usage = response.usageMetadata;
+    step++;
+
+    totalTokens += usage?.totalTokenCount || 0;
+    console.log(`Tokens: ${totalTokens}/${tokenBudget}`);
+
+    const action = JSON.parse(response.text());
+    console.log('Action:', action);
+
+    if (action.action === 'answer') {
+      hasAnswer = true;
+      continue;
+    }
+
+    if (action.action === 'reflect' && action.remainedGaps) {
+      gaps.push(...action.remainedGaps);
+      const contextRecord = JSON.stringify({
+        step,
+        ...action,
+        question: currentQuestion
+      });
+      context = `${context}\n${contextRecord}`;
+      continue;
+    }
+
+    try {
+      if (action.action === 'search' && action.searchKeywords) {
+        const results = await search(action.searchKeywords.join(' '), jinaToken);
+        const contextRecord = JSON.stringify({
+          step,
+          ...action,
+          question: currentQuestion,
+          result: results.data
+        });
+        context = `${context}\n${contextRecord}`;
+        totalTokens += results.data.reduce((sum, r) => sum + r.usage.tokens, 0);
+      } else if (action.action === 'readURL' && action.URLTargets?.length) {
+        const urlResults = await Promise.all(
+          action.URLTargets.map(async (url: string) => {
+            const response = await readUrl(url, jinaToken);
+            return {url, result: response};
+          })
+        );
+
+        const contextRecord = JSON.stringify({
+          step,
+          ...action,
+          question: currentQuestion,
+          result: urlResults
+        });
+        context = `${context}\n${contextRecord}`;
+        totalTokens += urlResults.reduce((sum, r) => sum + r.result.data.usage.tokens, 0);
+      } else if (action.action === 'rewrite' && action.rewriteQuery) {
+        // Immediately search with the new rewriteQuery
+        const results = await search(action.rewriteQuery, jinaToken);
+        const contextRecord = JSON.stringify({
+          step,
+          ...action,
+          question: currentQuestion,
+          result: results.data
+        });
+        context = `${context}\n${contextRecord}`;
+        totalTokens += results.data.reduce((sum, r) => sum + r.usage.tokens, 0);
+      }
+    } catch (error) {
+      console.error('Error fetching data:', error);
+    }
+  }
+}
+
+
+const question = process.argv[2] || "";
+getResponse(question);
diff --git a/src/tools/read.ts b/src/tools/read.ts
new file mode 100644
index 0000000..8968d86
--- /dev/null
+++ b/src/tools/read.ts
@@ -0,0 +1,43 @@
+import https from 'https';
+
+interface ReadResponse {
+  code: number;
+  status: number;
+  data: {
+    title: string;
+    description: string;
+    url: string;
+    content: string;
+    usage: { tokens: number; };
+  };
+}
+
+export function readUrl(url: string, token: string): Promise<ReadResponse> {
+  return new Promise((resolve, reject) => {
+    const data = JSON.stringify({url});
+
+    const options = {
+      hostname: 'r.jina.ai',
+      port: 443,
+      path: '/',
+      method: 'POST',
+      headers: {
+        'Accept': 'application/json',
+        'Authorization': `Bearer ${token}`,
+        'Content-Type': 'application/json',
+        'Content-Length': data.length,
+        'X-Retain-Images': 'none'
+      }
+    };
+
+    const req = https.request(options, (res) => {
+      let responseData = '';
+      res.on('data', (chunk) => responseData += chunk);
+      res.on('end', () => resolve(JSON.parse(responseData)));
+    });
+
+    req.on('error', reject);
+    req.write(data);
+    req.end();
+  });
+}
diff --git a/src/tools/search.ts b/src/tools/search.ts
new file mode 100644
index 0000000..32a76cf
--- /dev/null
+++ b/src/tools/search.ts
@@ -0,0 +1,38 @@
+import https from 'https';
+
+interface SearchResponse {
+  code: number;
+  status: number;
+  data: Array<{
+    title: string;
+    description: string;
+    url: string;
+    content: string;
+    usage: { tokens: number; };
+  }>;
+}
+
+export function search(query: string, token: string): Promise<SearchResponse> {
+  return new Promise((resolve, reject) => {
+    const options = {
+      hostname: 's.jina.ai',
+      port: 443,
+      path: `/${encodeURIComponent(query)}`,
+      method: 'GET',
+      headers: {
+        'Accept': 'application/json',
+        'Authorization': `Bearer ${token}`,
+        'X-Retain-Images': 'none'
+      }
+    };
+
+    const req = https.request(options, (res) => {
+      let responseData = '';
+      res.on('data', (chunk) => responseData += chunk);
+      res.on('end', () => resolve(JSON.parse(responseData)));
+    });
+
+    req.on('error', reject);
+    req.end();
+  });
+}