chore: first commit

2025-12-26 06:28:56 +08:00 · 2025-01-27 15:21:40 +08:00 · 2025-01-27 15:21:40 +08:00 · 21af8a6c82
commit 21af8a6c82
parent 2415ec3ebd
3 changed files with 213 additions and 22 deletions
--- a/README.md
+++ b/README.md
@ -16,3 +16,68 @@ npm run dev "what is the twitter account of jina ai's founder"
 npm run dev "who will be president of US in 2028?"
 npm run dev "what should be jina ai strategy for 2025?"
 ```
+
+```mermaid
+
+flowchart TD
+    subgraph Inputs[System Inputs]
+        OrigQuestion[Original Question]
+        TokenBudget[Token Budget]
+    end
+
+    subgraph States[Global States]
+        direction TB
+        GapQueue[Question Queue]
+        ContextStore[Action History]
+        BadStore[Failed Attempts]
+        QuestionStore[Question History]
+        KeywordStore[Keyword History]
+    end
+
+    subgraph Outputs[System Outputs]
+        FinalAnswer[Answer]
+    end
+
+    TokenBudget -->|check| End[System End]
+    
+    OrigQuestion -->|initialize| GapQueue
+    GapQueue -->|pop| NextQ[Question]
+    NextQ -->|generate| AIResponse[Response]
+    AIResponse -->|analyze| ActionType{Action Type}
+    
+    ActionType -->|is search| SearchOp[Search Results]
+    SearchOp -->|store| ContextStore
+    SearchOp -->|add| KeywordStore
+    SearchOp -->|continue| TokenBudget
+    
+    ActionType -->|is read| URLData[URL Content]
+    URLData -->|store| ContextStore
+    URLData -->|continue| TokenBudget
+    
+    ActionType -->|is reflect| NewQuestions[Questions]
+    NewQuestions -->|check against| QuestionStore
+    NewQuestions -->|filter| UniqueQuestions[Unique Questions]
+    UniqueQuestions -->|push to| GapQueue
+    UniqueQuestions -->|add to| QuestionStore
+    UniqueQuestions -->|continue| TokenBudget
+    
+    ActionType -->|is answer| AnswerCheck{Original Question}
+    AnswerCheck -->|compare with| OrigQuestion
+    AnswerCheck -->|is not| ContextStore
+    ContextStore -->|continue| TokenBudget
+    
+    AnswerCheck -->|is| Evaluation[Answer Quality]
+    Evaluation -->|check| ValidCheck{Quality}
+    ValidCheck -->|passes| FinalAnswer
+    FinalAnswer -->|return| End
+    
+    ValidCheck -->|fails| BadStore
+    ValidCheck -->|fails and clear| ContextStore
+
+    classDef state fill:#e1f5fe,stroke:#01579b
+    classDef input fill:#e8f5e9,stroke:#2e7d32
+    classDef output fill:#fce4ec,stroke:#c2185b
+    class GapQueue,ContextStore,BadStore,QuestionStore,KeywordStore state
+    class OrigQuestion,TokenBudget input
+    class FinalAnswer output
+```
--- a/src/agent.ts
+++ b/src/agent.ts
@ -6,6 +6,7 @@ import fs from 'fs/promises';
 import {SafeSearchType, search} from "duck-duck-scrape";
 import {rewriteQuery} from "./tools/query-rewriter";
 import {dedupQueries} from "./tools/dedup";
+import {evaluateAnswer} from "./tools/evaluator";

 // Proxy setup remains the same
 if (process.env.https_proxy) {
@ -90,12 +91,6 @@ type ResponseSchema = {
      type: SchemaType.STRING;
      description: string;
    };
-    confidence: {
-      type: SchemaType.NUMBER;
-      minimum: number;
-      maximum: number;
-      description: string;
-    };
    questionsToAnswer?: {
      type: SchemaType.ARRAY;
      items: {
@ -164,18 +159,20 @@ function getSchema(allowReflect: boolean): ResponseSchema {
        type: SchemaType.STRING,
        description: "Explain why choose this action?"
      },
-      confidence: {
-        type: SchemaType.NUMBER,
-        minimum: 0.0,
-        maximum: 1.0,
-        description: "Represents the confidence level of in answering the question BEFORE taking the action.",
-      }
    },
-    required: ["action", "reasoning", "confidence"],
+    required: ["action", "reasoning"],
  };
 }

-function getPrompt(question: string, context?: any[], allQuestions?: string[], allowReflect: boolean = false) {
+function getPrompt(question: string, context?: any[], allQuestions?: string[], allowReflect: boolean = false, badContext?: any[] ) {
+  const badContextIntro = badContext?.length ?
+    `Your last unsuccessful answer contains these previous actions and knowledge:
+    ${JSON.stringify(badContext, null, 2)}
+    
+    Learn to avoid these mistakes and think of a new approach, from a different angle, e.g. search for different keywords, read different URLs, or ask different questions.
+    `
+    : '';
+
  const contextIntro = context?.length ?
    `Your current context contains these previous actions and knowledge:
    ${JSON.stringify(context, null, 2)}
@ -183,7 +180,7 @@ function getPrompt(question: string, context?: any[], allQuestions?: string[], a
    : '';

  let actionsDescription = `
-Using your training data and prior context, answer the following question with absolute certainty:
+Using your training data and prior lessons learned, answer the following question with absolute certainty:

 ${question}

@ -216,7 +213,7 @@ ${allQuestions?.length ? `Existing questions you have asked, make sure to not re
  `;
  }

-  return `You are an advanced AI research analyst specializing in multi-step reasoning.${contextIntro}${actionsDescription}
+  return `You are an advanced AI research analyst specializing in multi-step reasoning.${contextIntro}${badContextIntro}${actionsDescription}

 Respond exclusively in valid JSON format matching exact JSON schema.

@ -227,14 +224,14 @@ Critical Requirements:
 - Maintain strict JSON syntax`;
 }

-async function getResponse(question: string) {
-  let tokenBudget = 30000000;
+async function getResponse(question: string, tokenBudget: number=30000000) {
  let totalTokens = 0;
  let context = [];
  let step = 0;
  let gaps: string[] = [question];  // All questions to be answered including the orginal question
  let allQuestions = [question];
  let allKeywords = [];
+  let badContext = [];

  while (totalTokens < tokenBudget) {
    // add 1s delay to avoid rate limiting
@ -267,14 +264,19 @@ async function getResponse(question: string) {
    console.log('Question-Action:', currentQuestion, action);

    if (action.action === 'answer') {
-      if (currentQuestion === question) {
-        return action;
-      } else {
-        context.push({
+      context.push({
          step,
          question: currentQuestion,
          ...action,
        });
+      if (currentQuestion === question) {
+        const evaluation = await evaluateAnswer(currentQuestion, action.answer);
+        if (evaluation) {
+          return action;
+        } else {
+          badContext.push(...context);
+          context = [];
+        }
      }
    }

--- a/src/tools/evaluator.ts
+++ b/src/tools/evaluator.ts
@ -0,0 +1,124 @@
+import { GoogleGenerativeAI, SchemaType } from "@google/generative-ai";
+import dotenv from 'dotenv';
+import { ProxyAgent, setGlobalDispatcher } from "undici";
+
+// Proxy setup
+if (process.env.https_proxy) {
+  try {
+    const proxyUrl = new URL(process.env.https_proxy).toString();
+    const dispatcher = new ProxyAgent({ uri: proxyUrl });
+    setGlobalDispatcher(dispatcher);
+  } catch (error) {
+    console.error('Failed to set proxy:', error);
+  }
+}
+dotenv.config();
+
+const apiKey = process.env.GEMINI_API_KEY;
+if (!apiKey) {
+  throw new Error("GEMINI_API_KEY not found in environment variables");
+}
+
+type EvaluationResponse = {
+  is_valid_answer: boolean;
+  reasoning: string;
+};
+
+const responseSchema = {
+  type: SchemaType.OBJECT,
+  properties: {
+    is_valid_answer: {
+      type: SchemaType.BOOLEAN,
+      description: "Whether the answer properly addresses the question"
+    },
+    reasoning: {
+      type: SchemaType.STRING,
+      description: "Detailed explanation of the evaluation"
+    }
+  },
+  required: ["is_valid_answer", "reasoning"]
+};
+
+const modelName = 'gemini-1.5-flash';
+
+const genAI = new GoogleGenerativeAI(apiKey);
+const model = genAI.getGenerativeModel({
+  model: modelName,
+  generationConfig: {
+    temperature: 0.1,
+    responseMimeType: "application/json",
+    responseSchema: responseSchema
+  }
+});
+
+function getPrompt(question: string, answer: string): string {
+  return `You are an expert evaluator of question-answer pairs. Analyze if the given answer properly addresses the question and provides meaningful information.
+
+Core Evaluation Criteria:
+1. Completeness: Answer must directly address the main point of the question
+2. Clarity: Answer should be clear and unambiguous
+3. Informativeness: Answer must provide substantial, useful information
+4. Specificity: Generic or vague responses are not acceptable
+5. Definitiveness: "I don't know" or highly uncertain responses are not valid
+6. Relevance: Answer must be directly related to the question topic
+7. Accuracy: Information provided should be factually sound (if verifiable)
+
+Examples:
+
+Question: "What are the system requirements for running Python 3.9?"
+Answer: "I'm not entirely sure, but I think you need a computer with some RAM."
+Evaluation: {
+  "is_valid_answer": false,
+  "reasoning": "The answer is vague, uncertain, and lacks specific information about actual system requirements. It fails the specificity and informativeness criteria."
+}
+
+Question: "What are the system requirements for running Python 3.9?"
+Answer: "Python 3.9 requires: Windows 7 or later, macOS 10.11 or later, or Linux. Minimum 4GB RAM recommended, 2GB disk space, and x86-64 processor. For Windows, you'll need Microsoft Visual C++ 2015 or later."
+Evaluation: {
+  "is_valid_answer": true,
+  "reasoning": "The answer is comprehensive, specific, and covers all key system requirements across different operating systems. It provides concrete numbers and necessary additional components."
+}
+
+Now evaluate this pair:
+Question: ${JSON.stringify(question)}
+Answer: ${JSON.stringify(answer)}`;
+}
+
+export async function evaluateAnswer(question: string, answer: string): Promise<boolean> {
+  try {
+    const prompt = getPrompt(question, answer);
+    const result = await model.generateContent(prompt);
+    const response = await result.response;
+    const json = JSON.parse(response.text()) as EvaluationResponse;
+    console.log('Evaluation:', json);
+    return json.is_valid_answer;
+  } catch (error) {
+    console.error('Error in answer evaluation:', error);
+    throw error;
+  }
+}
+
+// Example usage
+async function main() {
+  const question = process.argv[2] || '';
+  const answer = process.argv[3] || '';
+
+  if (!question || !answer) {
+    console.error('Please provide both question and answer as command line arguments');
+    process.exit(1);
+  }
+
+  console.log('\nQuestion:', question);
+  console.log('Answer:', answer);
+
+  try {
+    const evaluation = await evaluateAnswer(question, answer);
+    console.log('\nEvaluation Result:', evaluation);
+  } catch (error) {
+    console.error('Failed to evaluate answer:', error);
+  }
+}
+
+if (require.main === module) {
+  main().catch(console.error);
+}