chore: first commit

2025-12-26 06:28:56 +08:00 · 2025-01-26 14:53:55 +08:00 · 2025-01-26 14:53:55 +08:00 · 5eb75fcb61
commit 5eb75fcb61
parent 91f1cbcb97
6 changed files with 396 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,5 +1,6 @@
 # Logs
 logs
 .idea
 *.log
 npm-debug.log*
 yarn-debug.log*
--- a/package-lock.json
+++ b/package-lock.json
@ -0,0 +1,48 @@
 {
  "name": "agentic-search",
  "version": "1.0.0",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "agentic-search",
      "version": "1.0.0",
      "license": "ISC",
      "dependencies": {
        "@google/generative-ai": "^0.21.0",
        "dotenv": "^16.4.7",
        "undici": "^7.3.0"
      }
    },
    "node_modules/@google/generative-ai": {
      "version": "0.21.0",
      "resolved": "https://registry.npmjs.org/@google/generative-ai/-/generative-ai-0.21.0.tgz",
      "integrity": "sha512-7XhUbtnlkSEZK15kN3t+tzIMxsbKm/dSkKBFalj+20NvPKe1kBY7mR2P7vuijEn+f06z5+A8bVGKO0v39cr6Wg==",
      "license": "Apache-2.0",
      "engines": {
        "node": ">=18.0.0"
      }
    },
    "node_modules/dotenv": {
      "version": "16.4.7",
      "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.4.7.tgz",
      "integrity": "sha512-47qPchRCykZC03FhkYAhrvwU4xDBFIj1QPqaarj6mdM/hgUzfPHcpkHJOn3mJAufFeeAxAzeGsr5X0M4k6fLZQ==",
      "license": "BSD-2-Clause",
      "engines": {
        "node": ">=12"
      },
      "funding": {
        "url": "https://dotenvx.com"
      }
    },
    "node_modules/undici": {
      "version": "7.3.0",
      "resolved": "https://registry.npmjs.org/undici/-/undici-7.3.0.tgz",
      "integrity": "sha512-Qy96NND4Dou5jKoSJ2gm8ax8AJM/Ey9o9mz7KN1bb9GP+G0l20Zw8afxTnY2f4b7hmhn/z8aC2kfArVQlAhFBw==",
      "license": "MIT",
      "engines": {
        "node": ">=20.18.1"
      }
    }
  }
 }
--- a/package.json
+++ b/package.json
@ -0,0 +1,18 @@
 {
  "name": "agentic-search",
  "version": "1.0.0",
  "main": "index.js",
  "scripts": {
    "build": "tsc",
    "as": "npx ts-node src/agent.ts"
  },
  "keywords": [],
  "author": "",
  "license": "ISC",
  "description": "",
  "dependencies": {
    "@google/generative-ai": "^0.21.0",
    "dotenv": "^16.4.7",
    "undici": "^7.3.0"
  }
 }
--- a/src/agent.ts
+++ b/src/agent.ts
@ -0,0 +1,248 @@
 import {GoogleGenerativeAI, SchemaType} from "@google/generative-ai";
 import dotenv from 'dotenv';
 import {ProxyAgent, setGlobalDispatcher} from "undici";
 import {readUrl} from "./tools/read";
 import {search} from "./tools/search";
 // 获取代理URL并设置代理
 if (process.env.https_proxy) {
  try {
    const proxyUrl = new URL(process.env.https_proxy).toString();
    const dispatcher = new ProxyAgent({uri: proxyUrl});
    setGlobalDispatcher(dispatcher);
  } catch (error) {
    console.error('Failed to set proxy:', error);
  }
 }
 dotenv.config();
 const schema = {
  type: SchemaType.OBJECT,
  properties: {
    action: {
      type: SchemaType.STRING,
      enum: ["search", "readURL", "rewrite", "answer", "reflect"],
      description: "Must match exactly one action type"
    },
    remainedGaps: {
      type: SchemaType.ARRAY,
      items: {
        type: SchemaType.STRING
      },
      description: "Only required when choosing 'reflect' action, must be an array of gaps in the knowledge",
    },
    searchKeywords: {
      type: SchemaType.ARRAY,
      items: {
        type: SchemaType.STRING
      },
      description: "Only required when choosing 'search' action, must be an array of keywords"
    },
    URLTargets: {
      type: SchemaType.ARRAY,
      items: {
        type: SchemaType.STRING
      },
      description: "Only required when choosing 'readURL' action, must be an array of URLs"
    },
    rewriteQuery: {
      type: SchemaType.STRING,
      description: "Only required when choosing 'rewrite' action, must be a new query that might lead to better or more relevant information",
    },
    answer: {
      type: SchemaType.STRING,
      description: "Only required when choosing 'answer' action, must be the final answer in natural language"
    },
    references: {
      type: SchemaType.ARRAY,
      items: {
        type: SchemaType.OBJECT,
        properties: {
          title: {
            type: SchemaType.STRING,
            description: "Title of the document; must be directly from the context"
          },
          url: {
            type: SchemaType.STRING,
            description: "URL of the document; must be directly from the context"
          }
        },
        required: ["title", "url"]
      },
      description: "Only required when choosing 'answer' action, must be an array of references"
    },
    reasoning: {
      type: SchemaType.STRING,
      description: "Explain why choose this action?"
    },
    confidence: {
      type: SchemaType.NUMBER,
      minimum: 0.0,
      maximum: 1.0,
      description: "Represents the confidence level of in answering the question BEFORE taking the action. Must be a float between 0.0 and 1.0",
    }
  },
  required: ["action", "reasoning", "confidence"],
 };
 const apiKey = process.env.GEMINI_API_KEY as string;
 const jinaToken = process.env.JINA_API_KEY as string;
 if (!apiKey) {
  throw new Error("GEMINI_API_KEY  not found");
 }
 if (!jinaToken) {
  throw new Error("JINA_API_KEY not found");
 }
 const modelName = 'gemini-1.5-flash';
 const genAI = new GoogleGenerativeAI(apiKey);
 const model = genAI.getGenerativeModel({
  model: modelName,
  generationConfig: {
    temperature: 0.7,
    responseMimeType: "application/json",
    responseSchema: schema
  }
 });
 function getPrompt(question: string, context?: string) {
  let contextIntro = ``;
  if (!!context) {
    contextIntro = `You have the following context:
    ${context}
     `;
  }
  return `You are an AI research analyst capable of multi-step reasoning.
 ${contextIntro}
 Based on the context and the knowledge in your training data, you must answer the following question with 100% confidence:
 ${question}
 If you are not 100% confident in your answer, you should first take a reflection to identify the gaps in your knowledge:
 **reflect**:
 - Challenge existing knowledge with what-if thinking.
 - Reflect on the gaps in your knowledge and ask for more questions to fill those gaps.
 - You use this action when you feel like you need to first answer those questions before proceeding with the current one.
 - This action has higher priority than all other actions.
 If you are still not confident after reflecting, you can take one of the following actions:
 **search**:
 - Search external real-world information via a public search engine.
 - The search engine works best with short, keyword-based queries.
 - You use this action when you need more world knowledge or up to date information that is not covered in your training data or cut-off knowledge base.
 **readURL**:
 - Provide a specific URL to fetch and read its content in detail.
 - Any URL must come from the current context.
 - You use this action when you feel like that particular URL might have the information you need to answer the question.
 **rewrite**:
 - Propose a new or modified query (in a different phrasing, more details, or from another angle) that might lead to better or more relevant information.
 - This rewritten query can help the search engine find more accurate results, thereby improving your confidence in answering the original question.
 - You use this action when you think the current query is too vague, broad, or ambiguous; or the search engine results are not satisfactory.
 **answer**:
 - Provide your answer to the user, **only** if you are completely sure.
 When you decide on your action, respond **only** in valid JSON format according to the schema below.
 **Important**:
 - Do not include any extra keys.
 - Do not include explanatory text, markdown formatting, or reasoning in the final output.
 - Output exactly one JSON object in your response.
   `;
 }
 async function getResponse(question: string) {
  let tokenBudget = 300000;
  let totalTokens = 0;
  let context = '';
  let step = 0;
  let gaps: string[] = [];
  let hasAnswer = false;
  while (totalTokens < tokenBudget && !hasAnswer) {
    const currentQuestion = gaps.length > 0 ? gaps.shift()! : question;
    const prompt = getPrompt(currentQuestion, context);
    console.log('Prompt length:', prompt.length);
    const result = await model.generateContent(prompt);
    const response = await result.response;
    const usage = response.usageMetadata;
    step++;
    totalTokens += usage?.totalTokenCount || 0;
    console.log(`Tokens: ${totalTokens}/${tokenBudget}`);
    const action = JSON.parse(response.text());
    console.log('Action:', action);
    if (action.action === 'answer') {
      hasAnswer = true;
      continue;
    }
    if (action.action === 'reflect' && action.remainedGaps) {
      gaps.push(...action.remainedGaps);
      const contextRecord = JSON.stringify({
        step,
        ...action,
        question: currentQuestion
      });
      context = `${context}\n${contextRecord}`;
      continue;
    }
    try {
      if (action.action === 'search' && action.searchKeywords) {
        const results = await search(action.searchKeywords.join(' '), jinaToken);
        const contextRecord = JSON.stringify({
          step,
          ...action,
          question: currentQuestion,
          result: results.data
        });
        context = `${context}\n${contextRecord}`;
        totalTokens += results.data.reduce((sum, r) => sum + r.usage.tokens, 0);
      } else if (action.action === 'readURL' && action.URLTargets?.length) {
        const urlResults = await Promise.all(
          action.URLTargets.map(async (url: string) => {
            const response = await readUrl(url, jinaToken);
            return {url, result: response};
          })
        );
        const contextRecord = JSON.stringify({
          step,
          ...action,
          question: currentQuestion,
          result: urlResults
        });
        context = `${context}\n${contextRecord}`;
        totalTokens += urlResults.reduce((sum, r) => sum + r.result.data.usage.tokens, 0);
      } else if (action.action === 'rewrite' && action.rewriteQuery) {
        // Immediately search with the new rewriteQuery
        const results = await search(action.rewriteQuery, jinaToken);
        const contextRecord = JSON.stringify({
          step,
          ...action,
          question: currentQuestion,
          result: results.data
        });
        context = `${context}\n${contextRecord}`;
        totalTokens += results.data.reduce((sum, r) => sum + r.usage.tokens, 0);
      }
    } catch (error) {
      console.error('Error fetching data:', error);
    }
  }
 }
 const question = process.argv[2] || "";
 getResponse(question);
--- a/src/tools/read.ts
+++ b/src/tools/read.ts
@ -0,0 +1,43 @@
 import https from 'https';
 interface ReadResponse {
  code: number;
  status: number;
  data: {
    title: string;
    description: string;
    url: string;
    content: string;
    usage: { tokens: number; };
  };
 }
 export function readUrl(url: string, token: string): Promise<ReadResponse> {
  return new Promise((resolve, reject) => {
    const data = JSON.stringify({url});
    const options = {
      hostname: 'r.jina.ai',
      port: 443,
      path: '/',
      method: 'POST',
      headers: {
        'Accept': 'application/json',
        'Authorization': `Bearer ${token}`,
        'Content-Type': 'application/json',
        'Content-Length': data.length,
        'X-Retain-Images': 'none'
      }
    };
    const req = https.request(options, (res) => {
      let responseData = '';
      res.on('data', (chunk) => responseData += chunk);
      res.on('end', () => resolve(JSON.parse(responseData)));
    });
    req.on('error', reject);
    req.write(data);
    req.end();
  });
 }
--- a/src/tools/search.ts
+++ b/src/tools/search.ts
@ -0,0 +1,38 @@
 import https from 'https';
 interface SearchResponse {
  code: number;
  status: number;
  data: Array<{
    title: string;
    description: string;
    url: string;
    content: string;
    usage: { tokens: number; };
  }>;
 }
 export function search(query: string, token: string): Promise<SearchResponse> {
  return new Promise((resolve, reject) => {
    const options = {
      hostname: 's.jina.ai',
      port: 443,
      path: `/${encodeURIComponent(query)}`,
      method: 'GET',
      headers: {
        'Accept': 'application/json',
        'Authorization': `Bearer ${token}`,
        'X-Retain-Images': 'none'
      }
    };
    const req = https.request(options, (res) => {
      let responseData = '';
      res.on('data', (chunk) => responseData += chunk);
      res.on('end', () => resolve(JSON.parse(responseData)));
    });
    req.on('error', reject);
    req.end();
  });
 }