chore: first commit

2025-12-26 06:28:56 +08:00 · 2025-01-26 17:59:35 +08:00 · 2025-01-26 17:59:35 +08:00 · 33179010b7
commit 33179010b7
parent 48230560d3
1 changed files with 189 additions and 153 deletions
--- a/src/agent.ts
+++ b/src/agent.ts
@ -3,7 +3,8 @@ import dotenv from 'dotenv';
 import {ProxyAgent, setGlobalDispatcher} from "undici";
 import {readUrl} from "./tools/read";
 import {search} from "./tools/search";
-// 获取代理URL并设置代理
+
+// Proxy setup remains the same
 if (process.env.https_proxy) {
  try {
    const proxyUrl = new URL(process.env.https_proxy).toString();
@ -15,169 +16,210 @@ if (process.env.https_proxy) {
 }
 dotenv.config();

-const schema = {
-  type: SchemaType.OBJECT,
+type ResponseSchema = {
+  type: SchemaType.OBJECT;
  properties: {
    action: {
-      type: SchemaType.STRING,
-      enum: ["search", "readURL", "answer", "reflect"],
-      description: "Must match exactly one action type"
-    },
-    questionsToAnswer: {
-      type: SchemaType.ARRAY,
-      items: {
-        type: SchemaType.STRING,
-        description: "each question must be a single line, concise and clear. not composite or compound, less than 20 words.",
-      },
-      description: "Only required when choosing 'reflect' action, list of most important questions to answer to fill the knowledge gaps.",
-      maxItems: 2
-    },
-    searchKeywords: {
-      type: SchemaType.ARRAY,
-      items: {
-        type: SchemaType.STRING
-      },
-      description: "Only required when choosing 'search' action, must be an array of keywords",
-      maxItems: 3
-    },
+      type: SchemaType.STRING;
+      enum: string[];
+      description: string;
+    };
+    searchQuery: {
+      type: SchemaType.STRING;
+      description: string;
+    };
    URLTargets: {
-      type: SchemaType.ARRAY,
+      type: SchemaType.ARRAY;
      items: {
-        type: SchemaType.STRING
-      },
-      description: "Only required when choosing 'readURL' action, must be an array of URLs"
-    },
+        type: SchemaType.STRING;
+      };
+      description: string;
+    };
    answer: {
-      type: SchemaType.STRING,
-      description: "Only required when choosing 'answer' action, must be the final answer in natural language"
-    },
+      type: SchemaType.STRING;
+      description: string;
+    };
    references: {
-      type: SchemaType.ARRAY,
+      type: SchemaType.ARRAY;
      items: {
-        type: SchemaType.OBJECT,
+        type: SchemaType.OBJECT;
        properties: {
          title: {
-            type: SchemaType.STRING,
-            description: "Title of the document; must be directly from the context"
-          },
+            type: SchemaType.STRING;
+            description: string;
+          };
          url: {
-            type: SchemaType.STRING,
-            description: "URL of the document; must be directly from the context"
-          }
-        },
-        required: ["title", "url"]
-      },
-      description: "Only required when choosing 'answer' action, must be an array of references"
-    },
+            type: SchemaType.STRING;
+            description: string;
+          };
+        };
+        required: string[];
+      };
+      minItems: number;
+      description: string;
+    };
    reasoning: {
-      type: SchemaType.STRING,
-      description: "Explain why choose this action?"
-    },
+      type: SchemaType.STRING;
+      description: string;
+    };
    confidence: {
-      type: SchemaType.NUMBER,
-      minimum: 0.0,
-      maximum: 1.0,
-      description: "Represents the confidence level of in answering the question BEFORE taking the action. Must be a float between 0.0 and 1.0",
-    }
-  },
-  required: ["action", "reasoning", "confidence"],
+      type: SchemaType.NUMBER;
+      minimum: number;
+      maximum: number;
+      description: string;
+    };
+    questionsToAnswer?: {
+      type: SchemaType.ARRAY;
+      items: {
+        type: SchemaType.STRING;
+        description: string;
+      };
+      description: string;
+      maxItems: number;
+    };
+  };
+  required: string[];
 };

-const apiKey = process.env.GEMINI_API_KEY as string;
-const jinaToken = process.env.JINA_API_KEY as string;
-if (!apiKey) {
-  throw new Error("GEMINI_API_KEY  not found");
-}
-if (!jinaToken) {
-  throw new Error("JINA_API_KEY not found");
+function getSchema(allowReflect: boolean): ResponseSchema {
+  return {
+    type: SchemaType.OBJECT,
+    properties: {
+      action: {
+        type: SchemaType.STRING,
+        enum: allowReflect ? ["search", "readURL", "answer", "reflect"] : ["search", "readURL", "answer"],
+        description: "Must match exactly one action type"
+      },
+      questionsToAnswer: allowReflect ? {
+        type: SchemaType.ARRAY,
+        items: {
+          type: SchemaType.STRING,
+          description: "each question must be a single line, concise and clear. not composite or compound, less than 20 words.",
+        },
+        description: "Only required when choosing 'reflect' action, list of most important questions to answer to fill the knowledge gaps.",
+        maxItems: 2
+      } : undefined,
+      searchQuery: {
+        type: SchemaType.STRING,
+        description: "Only required when choosing 'search' action, must be a short, keyword-based query that BM25, tf-idf based search engines can understand.",
+      },
+      URLTargets: {
+        type: SchemaType.ARRAY,
+        items: {
+          type: SchemaType.STRING
+        },
+        description: "Only required when choosing 'readURL' action, must be an array of URLs"
+      },
+      answer: {
+        type: SchemaType.STRING,
+        description: "Only required when choosing 'answer' action, must be the final answer in natural language"
+      },
+      references: {
+        type: SchemaType.ARRAY,
+        items: {
+          type: SchemaType.OBJECT,
+          properties: {
+            title: {
+              type: SchemaType.STRING,
+              description: "Title of the document; must be directly from the context",
+            },
+            url: {
+              type: SchemaType.STRING,
+              description: "URL of the document; must be directly from the context"
+            }
+          },
+          required: ["title", "url"]
+        },
+        minItems: 1,
+        description: "Only required when choosing 'answer' action, must be an array of references"
+      },
+      reasoning: {
+        type: SchemaType.STRING,
+        description: "Explain why choose this action?"
+      },
+      confidence: {
+        type: SchemaType.NUMBER,
+        minimum: 0.0,
+        maximum: 1.0,
+        description: "Represents the confidence level of in answering the question BEFORE taking the action. Must be a float between 0.0 and 1.0",
+      }
+    },
+    required: ["action", "reasoning", "confidence"],
+  };
 }

-const modelName = 'gemini-1.5-flash';
-const genAI = new GoogleGenerativeAI(apiKey);
-const model = genAI.getGenerativeModel({
-  model: modelName,
-  generationConfig: {
-    temperature: 0.7,
-    responseMimeType: "application/json",
-    responseSchema: schema
-  }
-});
+function getPrompt(question: string, context?: string, allowReflect: boolean = false) {
+  const contextIntro = context ?
+    `\nYour current context contains these previous actions:\n\n    ${context}\n`
+    : '';

-function getPrompt(question: string, context?: string, allowReflect:boolean = false) {
-  let contextIntro = ``;
-  if (!!context) {
-    contextIntro = `
-You have the following actions records in your context:
-
-    ${context}
-     `;
-  }
-
-  let reflectAction = '';
-  if (allowReflect) {
-    reflectAction = `
-If you are not 100% confident in your answer, then identify the gaps in your knowledge with "reflect" action:
-
-**reflect**:
- Challenge existing knowledge with what-if or divide-and-conquer thinking.
- Reflect on the gaps in your knowledge and ask for most important questions to fill those gaps.
- You use this action when you feel like you need to first answer those questions before proceeding with the current one.
- Should not similar to the original question or existing questionsToAnswer in the context.
- Each question must be concise and clear less than 20 words and not composite or compound.
-    
-    `
-  }
-
-
-
-  return `You are an AI research analyst capable of multi-step reasoning.
-
-${contextIntro}
-
-Based on the previous actions and the knowledge in your training data, you must answer the following question with 100% confidence:
+  let actionsDescription = `
+Using your training data and prior context, answer the following question with absolute certainty:

 ${question}

-${reflectAction}
-Or you can take one of the following actions:
+When uncertain or needing additional information, select one of these actions:

 **search**:
- Search external real-world information via a public search engine.
- The search engine works best with short, keyword-based queries.
- You use this action when you need more world knowledge or up to date information that is not covered in your training data or cut-off knowledge base.
+- Query external sources using a public search engine
+- Optimize for concise, keyword-based searches
+- Use for recent information (post-training data) or missing domain knowledge

 **readURL**:
- Provide a specific URL to fetch and read its content in detail.
- Any URL must come from the current context.
- You use this action when you feel like that particular URL might have the information you need to answer the question.
+- Access content from specific URLs found in current context
+- Requires existing URLs from previous actions
+- Use when confident a contextual URL contains needed information

 **answer**:
- Provide your answer to the user, **only** if you are completely sure.
+- Provide final response only when 100% certain
+- Responses must be definitive (no ambiguity, uncertainty, or disclaimers)
+${allowReflect ? `- If doubts remain, use "reflect" instead` : ''}`;

-When you decide on your action, respond **only** in valid JSON format according to the schema below.
+  if (allowReflect) {
+    actionsDescription += `\n\n**reflect**:
+- Perform critical analysis through hypothetical scenarios or systematic breakdowns
+- Identify knowledge gaps and formulate essential clarifying questions
+- Questions must be:
+  - Original (not variations of existing questions)
+  - Focused on single concepts
+  - Under 20 words
+  - Non-compound/non-complex`;
+  }

-**Important**:
- Do not include any extra keys.
- Do not include explanatory text, markdown formatting, or reasoning in the final output.
- Output exactly one JSON object in your response.
-   `;
+  return `You are an advanced AI research analyst specializing in multi-step reasoning.${contextIntro}${actionsDescription}

+Respond exclusively in valid JSON format matching exact JSON schema.
+
+Critical Requirements:
+- Include ONLY ONE action type
+- Never add unsupported keys
+- Exclude all non-JSON text, markdown, or explanations
+- Maintain strict JSON syntax`;
 }

-
 async function getResponse(question: string) {
  let tokenBudget = 30000000;
  let totalTokens = 0;
-  let context = '';  // global context to store all the actions records
+  let context = '';
  let step = 0;
-  let gaps: string[] = [];
+  let gaps: string[] = [question];  // All questions to be answered including the orginal question
+
  while (totalTokens < tokenBudget) {
-    const allowReflect = gaps.length === 0;
+    console.log('Gaps:', gaps)
+    const allowReflect = gaps.length <= 1;
    const currentQuestion = gaps.length > 0 ? gaps.shift()! : question;
    const prompt = getPrompt(currentQuestion, context, allowReflect);
-    console.log('Prompt length:', prompt.length);
-    console.log('Context:', context.length);
-    console.log('Gaps:', gaps.length);
+    console.log('Prompt:', prompt.length)
+
+    const model = genAI.getGenerativeModel({
+      model: modelName,
+      generationConfig: {
+        temperature: 0.7,
+        responseMimeType: "application/json",
+        responseSchema: getSchema(allowReflect)
+      }
+    });
+
    const result = await model.generateContent(prompt);
    const response = await result.response;
    const usage = response.usageMetadata;
@ -191,37 +233,36 @@ async function getResponse(question: string) {

    if (action.action === 'answer') {
      if (currentQuestion === question) {
-        return action;  // Exit only for original question's answer not the gap question
+        return action;
      } else {
-        const contextRecord = JSON.stringify({
+        context = `${context}\n${JSON.stringify({
          step,
          ...action,
          question: currentQuestion
-        });
-        context = `${context}\n${contextRecord}`;
+        })}`;
      }
    }

    if (action.action === 'reflect' && action.questionsToAnswer) {
      gaps.push(...action.questionsToAnswer);
-      const contextRecord = JSON.stringify({
+      gaps.push(question);  // always keep the original question in the gaps
+      context = `${context}\n${JSON.stringify({
        step,
        ...action,
        question: currentQuestion
-      });
-      context = `${context}\n${contextRecord}`;
+      })}`;
    }

+    // Rest of the action handling remains the same
    try {
-      if (action.action === 'search' && action.searchKeywords) {
-        const results = await search(action.searchKeywords.join(' '), jinaToken);
-        const contextRecord = JSON.stringify({
+      if (action.action === 'search' && action.searchQuery) {
+        const results = await search(action.searchQuery, jinaToken);
+        context = `${context}\n${JSON.stringify({
          step,
          ...action,
          question: currentQuestion,
          result: results.data
-        });
-        context = `${context}\n${contextRecord}`;
+        })}`;
        totalTokens += results.data.reduce((sum, r) => sum + r.usage.tokens, 0);
      } else if (action.action === 'readURL' && action.URLTargets?.length) {
        const urlResults = await Promise.all(
@ -231,25 +272,13 @@ async function getResponse(question: string) {
          })
        );

-        const contextRecord = JSON.stringify({
+        context = `${context}\n${JSON.stringify({
          step,
          ...action,
          question: currentQuestion,
          result: urlResults
-        });
-        context = `${context}\n${contextRecord}`;
+        })}`;
        totalTokens += urlResults.reduce((sum, r) => sum + r.result.data.usage.tokens, 0);
-      } else if (action.action === 'rewrite' && action.rewriteQuery) {
-        // Immediately search with the new rewriteQuery
-        const results = await search(action.rewriteQuery, jinaToken);
-        const contextRecord = JSON.stringify({
-          step,
-          ...action,
-          question: currentQuestion,
-          result: results.data
-        });
-        context = `${context}\n${contextRecord}`;
-        totalTokens += results.data.reduce((sum, r) => sum + r.usage.tokens, 0);
      }
    } catch (error) {
      console.error('Error fetching data:', error);
@ -257,6 +286,13 @@ async function getResponse(question: string) {
  }
 }

+const apiKey = process.env.GEMINI_API_KEY as string;
+const jinaToken = process.env.JINA_API_KEY as string;
+if (!apiKey) throw new Error("GEMINI_API_KEY not found");
+if (!jinaToken) throw new Error("JINA_API_KEY not found");
+
+const modelName = 'gemini-1.5-flash';
+const genAI = new GoogleGenerativeAI(apiKey);

 const question = process.argv[2] || "";
-getResponse(question);
+getResponse(question);