chore: first commit

2025-12-26 06:28:56 +08:00 · 2025-01-27 15:21:40 +08:00 · 2025-01-27 15:21:40 +08:00 · 21af8a6c82
commit 21af8a6c82
parent 2415ec3ebd
3 changed files with 213 additions and 22 deletions
--- a/README.md
+++ b/README.md
@ -16,3 +16,68 @@ npm run dev "what is the twitter account of jina ai's founder"
 npm run dev "who will be president of US in 2028?"
 npm run dev "what should be jina ai strategy for 2025?"
 ```
 ```mermaid
 flowchart TD
    subgraph Inputs[System Inputs]
        OrigQuestion[Original Question]
        TokenBudget[Token Budget]
    end
    subgraph States[Global States]
        direction TB
        GapQueue[Question Queue]
        ContextStore[Action History]
        BadStore[Failed Attempts]
        QuestionStore[Question History]
        KeywordStore[Keyword History]
    end
    subgraph Outputs[System Outputs]
        FinalAnswer[Answer]
    end
    TokenBudget -->|check| End[System End]
    OrigQuestion -->|initialize| GapQueue
    GapQueue -->|pop| NextQ[Question]
    NextQ -->|generate| AIResponse[Response]
    AIResponse -->|analyze| ActionType{Action Type}
    ActionType -->|is search| SearchOp[Search Results]
    SearchOp -->|store| ContextStore
    SearchOp -->|add| KeywordStore
    SearchOp -->|continue| TokenBudget
    ActionType -->|is read| URLData[URL Content]
    URLData -->|store| ContextStore
    URLData -->|continue| TokenBudget
    ActionType -->|is reflect| NewQuestions[Questions]
    NewQuestions -->|check against| QuestionStore
    NewQuestions -->|filter| UniqueQuestions[Unique Questions]
    UniqueQuestions -->|push to| GapQueue
    UniqueQuestions -->|add to| QuestionStore
    UniqueQuestions -->|continue| TokenBudget
    ActionType -->|is answer| AnswerCheck{Original Question}
    AnswerCheck -->|compare with| OrigQuestion
    AnswerCheck -->|is not| ContextStore
    ContextStore -->|continue| TokenBudget
    AnswerCheck -->|is| Evaluation[Answer Quality]
    Evaluation -->|check| ValidCheck{Quality}
    ValidCheck -->|passes| FinalAnswer
    FinalAnswer -->|return| End
    ValidCheck -->|fails| BadStore
    ValidCheck -->|fails and clear| ContextStore
    classDef state fill:#e1f5fe,stroke:#01579b
    classDef input fill:#e8f5e9,stroke:#2e7d32
    classDef output fill:#fce4ec,stroke:#c2185b
    class GapQueue,ContextStore,BadStore,QuestionStore,KeywordStore state
    class OrigQuestion,TokenBudget input
    class FinalAnswer output
 ```
--- a/src/agent.ts
+++ b/src/agent.ts
@ -6,6 +6,7 @@ import fs from 'fs/promises';
 import {SafeSearchType, search} from "duck-duck-scrape";
 import {rewriteQuery} from "./tools/query-rewriter";
 import {dedupQueries} from "./tools/dedup";
 import {evaluateAnswer} from "./tools/evaluator";
 // Proxy setup remains the same
 if (process.env.https_proxy) {
@ -90,12 +91,6 @@ type ResponseSchema = {
      type: SchemaType.STRING;
      description: string;
    };
    confidence: {
      type: SchemaType.NUMBER;
      minimum: number;
      maximum: number;
      description: string;
    };
    questionsToAnswer?: {
      type: SchemaType.ARRAY;
      items: {
@ -164,18 +159,20 @@ function getSchema(allowReflect: boolean): ResponseSchema {
        type: SchemaType.STRING,
        description: "Explain why choose this action?"
      },
      confidence: {
        type: SchemaType.NUMBER,
        minimum: 0.0,
        maximum: 1.0,
        description: "Represents the confidence level of in answering the question BEFORE taking the action.",
      }
    },
-    required: ["action", "reasoning", "confidence"],
+    required: ["action", "reasoning"],
  };
 }
-function getPrompt(question: string, context?: any[], allQuestions?: string[], allowReflect: boolean = false) {
+function getPrompt(question: string, context?: any[], allQuestions?: string[], allowReflect: boolean = false, badContext?: any[] ) {
  const badContextIntro = badContext?.length ?
    `Your last unsuccessful answer contains these previous actions and knowledge:
    ${JSON.stringify(badContext, null, 2)}
    Learn to avoid these mistakes and think of a new approach, from a different angle, e.g. search for different keywords, read different URLs, or ask different questions.
    `
    : '';
  const contextIntro = context?.length ?
    `Your current context contains these previous actions and knowledge:
    ${JSON.stringify(context, null, 2)}
@ -183,7 +180,7 @@ function getPrompt(question: string, context?: any[], allQuestions?: string[], a
    : '';
  let actionsDescription = `
-Using your training data and prior context, answer the following question with absolute certainty:
+Using your training data and prior lessons learned, answer the following question with absolute certainty:
 ${question}
@ -216,7 +213,7 @@ ${allQuestions?.length ? `Existing questions you have asked, make sure to not re
  `;
  }
-  return `You are an advanced AI research analyst specializing in multi-step reasoning.${contextIntro}${actionsDescription}
+  return `You are an advanced AI research analyst specializing in multi-step reasoning.${contextIntro}${badContextIntro}${actionsDescription}
 Respond exclusively in valid JSON format matching exact JSON schema.
@ -227,14 +224,14 @@ Critical Requirements:
 - Maintain strict JSON syntax`;
 }
-async function getResponse(question: string) {
+async function getResponse(question: string, tokenBudget: number=30000000) {
  let tokenBudget = 30000000;
  let totalTokens = 0;
  let context = [];
  let step = 0;
  let gaps: string[] = [question];  // All questions to be answered including the orginal question
  let allQuestions = [question];
  let allKeywords = [];
  let badContext = [];
  while (totalTokens < tokenBudget) {
    // add 1s delay to avoid rate limiting
@ -267,14 +264,19 @@ async function getResponse(question: string) {
    console.log('Question-Action:', currentQuestion, action);
    if (action.action === 'answer') {
-      if (currentQuestion === question) {
+      context.push({
        return action;
      } else {
        context.push({
          step,
          question: currentQuestion,
          ...action,
        });
      if (currentQuestion === question) {
        const evaluation = await evaluateAnswer(currentQuestion, action.answer);
        if (evaluation) {
          return action;
        } else {
          badContext.push(...context);
          context = [];
        }
      }
    }
--- a/src/tools/evaluator.ts
+++ b/src/tools/evaluator.ts
@ -0,0 +1,124 @@
 import { GoogleGenerativeAI, SchemaType } from "@google/generative-ai";
 import dotenv from 'dotenv';
 import { ProxyAgent, setGlobalDispatcher } from "undici";
 // Proxy setup
 if (process.env.https_proxy) {
  try {
    const proxyUrl = new URL(process.env.https_proxy).toString();
    const dispatcher = new ProxyAgent({ uri: proxyUrl });
    setGlobalDispatcher(dispatcher);
  } catch (error) {
    console.error('Failed to set proxy:', error);
  }
 }
 dotenv.config();
 const apiKey = process.env.GEMINI_API_KEY;
 if (!apiKey) {
  throw new Error("GEMINI_API_KEY not found in environment variables");
 }
 type EvaluationResponse = {
  is_valid_answer: boolean;
  reasoning: string;
 };
 const responseSchema = {
  type: SchemaType.OBJECT,
  properties: {
    is_valid_answer: {
      type: SchemaType.BOOLEAN,
      description: "Whether the answer properly addresses the question"
    },
    reasoning: {
      type: SchemaType.STRING,
      description: "Detailed explanation of the evaluation"
    }
  },
  required: ["is_valid_answer", "reasoning"]
 };
 const modelName = 'gemini-1.5-flash';
 const genAI = new GoogleGenerativeAI(apiKey);
 const model = genAI.getGenerativeModel({
  model: modelName,
  generationConfig: {
    temperature: 0.1,
    responseMimeType: "application/json",
    responseSchema: responseSchema
  }
 });
 function getPrompt(question: string, answer: string): string {
  return `You are an expert evaluator of question-answer pairs. Analyze if the given answer properly addresses the question and provides meaningful information.
 Core Evaluation Criteria:
 1. Completeness: Answer must directly address the main point of the question
 2. Clarity: Answer should be clear and unambiguous
 3. Informativeness: Answer must provide substantial, useful information
 4. Specificity: Generic or vague responses are not acceptable
 5. Definitiveness: "I don't know" or highly uncertain responses are not valid
 6. Relevance: Answer must be directly related to the question topic
 7. Accuracy: Information provided should be factually sound (if verifiable)
 Examples:
 Question: "What are the system requirements for running Python 3.9?"
 Answer: "I'm not entirely sure, but I think you need a computer with some RAM."
 Evaluation: {
  "is_valid_answer": false,
  "reasoning": "The answer is vague, uncertain, and lacks specific information about actual system requirements. It fails the specificity and informativeness criteria."
 }
 Question: "What are the system requirements for running Python 3.9?"
 Answer: "Python 3.9 requires: Windows 7 or later, macOS 10.11 or later, or Linux. Minimum 4GB RAM recommended, 2GB disk space, and x86-64 processor. For Windows, you'll need Microsoft Visual C++ 2015 or later."
 Evaluation: {
  "is_valid_answer": true,
  "reasoning": "The answer is comprehensive, specific, and covers all key system requirements across different operating systems. It provides concrete numbers and necessary additional components."
 }
 Now evaluate this pair:
 Question: ${JSON.stringify(question)}
 Answer: ${JSON.stringify(answer)}`;
 }
 export async function evaluateAnswer(question: string, answer: string): Promise<boolean> {
  try {
    const prompt = getPrompt(question, answer);
    const result = await model.generateContent(prompt);
    const response = await result.response;
    const json = JSON.parse(response.text()) as EvaluationResponse;
    console.log('Evaluation:', json);
    return json.is_valid_answer;
  } catch (error) {
    console.error('Error in answer evaluation:', error);
    throw error;
  }
 }
 // Example usage
 async function main() {
  const question = process.argv[2] || '';
  const answer = process.argv[3] || '';
  if (!question || !answer) {
    console.error('Please provide both question and answer as command line arguments');
    process.exit(1);
  }
  console.log('\nQuestion:', question);
  console.log('Answer:', answer);
  try {
    const evaluation = await evaluateAnswer(question, answer);
    console.log('\nEvaluation Result:', evaluation);
  } catch (error) {
    console.error('Failed to evaluate answer:', error);
  }
 }
 if (require.main === module) {
  main().catch(console.error);
 }