chore: first commit

2025-12-26 06:28:56 +08:00 · 2025-01-30 18:07:57 +08:00 · 2025-01-30 18:07:57 +08:00 · 712d01215c
commit 712d01215c
parent 8dc5e0fe8b
2 changed files with 160 additions and 7 deletions
--- a/src/agent.ts
+++ b/src/agent.ts
@ -7,7 +7,8 @@ import {SafeSearchType, search} from "duck-duck-scrape";
 import {rewriteQuery} from "./tools/query-rewriter";
 import {dedupQueries} from "./tools/dedup";
 import {evaluateAnswer} from "./tools/evaluator";
-import {buildURLMap, StepData} from "./tools/getURLIndex";
+import {StepData} from "./tools/getURLIndex";
+import {analyzeSteps} from "./tools/error-analyzer";

 // Proxy setup remains the same
 if (process.env.https_proxy) {
@ -198,11 +199,10 @@ Your have tried the following actions but failed to find the answer to the quest

 ${badContext.map((c, i) => `
 ### Attempt ${i + 1}
-${c.join('\n')}
-`).join('\n')}
-    
-Learn to avoid these mistakes and think of a new approach, from a different angle, e.g. search for different keywords, read different URLs, or ask different questions.
-    `
+- Recap: ${c.recap}
+- Blame: ${c.blame}
+- Improvement: ${c.improvement}
+`).join('\n')}`
    : '';

  const contextIntro = context?.length ?
@ -400,7 +400,9 @@ The evaluator thinks your answer is bad because:
 ${evaluation.reasoning}
 `);
          // store the bad context and reset the diary context
-          badContext.push(diaryContext);
+          const errorAnalysis = await analyzeSteps(diaryContext);
+          console.log('Error Analysis:', errorAnalysis);
+          badContext.push(errorAnalysis);
          diaryContext = [];
          step = 0;
        }
--- a/src/tools/error-analyzer.ts
+++ b/src/tools/error-analyzer.ts
@ -0,0 +1,151 @@
+import {GoogleGenerativeAI, SchemaType} from "@google/generative-ai";
+import dotenv from 'dotenv';
+import {ProxyAgent, setGlobalDispatcher} from "undici";
+
+// Proxy setup
+if (process.env.https_proxy) {
+  try {
+    const proxyUrl = new URL(process.env.https_proxy).toString();
+    const dispatcher = new ProxyAgent({uri: proxyUrl});
+    setGlobalDispatcher(dispatcher);
+  } catch (error) {
+    console.error('Failed to set proxy:', error);
+  }
+}
+dotenv.config();
+
+const apiKey = process.env.GEMINI_API_KEY;
+if (!apiKey) {
+  throw new Error("GEMINI_API_KEY not found in environment variables");
+}
+
+type EvaluationResponse = {
+    recap: string;
+    blame: string;
+    improvement: string;
+};
+
+const responseSchema = {
+  type: SchemaType.OBJECT,
+  properties: {
+    recap: {
+      type: SchemaType.STRING,
+      description: "Recap of the actions taken and the steps conducted"
+    },
+    blame: {
+      type: SchemaType.STRING,
+      description: "Which step or action was the root cause of the answer rejection"
+    },
+    improvement: {
+      type: SchemaType.STRING,
+      description: "Suggested improvements for the next iteration"
+    }
+  },
+  required: ["recap", "blame", "improvement"]
+};
+
+const modelName = 'gemini-1.5-flash';
+
+const genAI = new GoogleGenerativeAI(apiKey);
+const model = genAI.getGenerativeModel({
+  model: modelName,
+  generationConfig: {
+    temperature: 0,
+    responseMimeType: "application/json",
+    responseSchema: responseSchema
+  }
+});
+
+function getPrompt(diaryContext: string[]): string {
+  return `You are an expert at analyzing search and reasoning processes. Your task is to analyze the given sequence of steps and identify what went wrong in the search process.
+
+Focus on:
+1. The sequence of actions taken
+2. The effectiveness of each step
+3. The logic between consecutive steps
+4. Alternative approaches that could have been taken
+5. Signs of getting stuck in repetitive patterns
+6. Whether the final answer matches the accumulated information
+
+Based on the steps provided, generate a JSON response following this schema.
+
+Example steps to analyze:
+
+At step 1, you took the **search** action and look for external information for the question: "how old is jina ai ceo?".
+In particular, you tried to search for the following keywords: "jina ai ceo age".
+You found quite some information and add them to your URL list and **visit** them later when needed. 
+
+
+At step 2, you took the **visit** action and deep dive into the following URLs:
+https://www.linkedin.com/in/hxiao87
+https://www.crunchbase.com/person/han-xiao
+You found some useful information on the web and add them to your knowledge for future reference.
+
+
+At step 3, you took the **search** action and look for external information for the question: "how old is jina ai ceo?".
+In particular, you tried to search for the following keywords: "Han Xiao birthdate, Jina AI founder birthdate".
+You found quite some information and add them to your URL list and **visit** them later when needed. 
+
+
+At step 4, you took the **search** action and look for external information for the question: "how old is jina ai ceo?".
+In particular, you tried to search for the following keywords: han xiao birthday. 
+But then you realized you have already searched for these keywords before.
+You decided to think out of the box or cut from a completely different angle.
+
+
+At step 5, you took the **search** action and look for external information for the question: "how old is jina ai ceo?".
+In particular, you tried to search for the following keywords: han xiao birthday. 
+But then you realized you have already searched for these keywords before.
+You decided to think out of the box or cut from a completely different angle.
+
+
+At step 6, you took the **visit** action and deep dive into the following URLs:
+https://kpopwall.com/han-xiao/
+https://www.idolbirthdays.net/han-xiao
+You found some useful information on the web and add them to your knowledge for future reference.
+
+
+At step 7, you took **answer** action but evaluator thinks it is not a good answer:
+
+Original question: 
+how old is jina ai ceo?
+
+Your answer: 
+The age of the Jina AI CEO cannot be definitively determined from the provided information.
+
+The evaluator thinks your answer is bad because: 
+The answer is not definitive and fails to provide the requested information.  Lack of information is unacceptable, more search and deep reasoning is needed.
+
+Analyze the steps and provide detailed feedback following these guidelines:
+- In the recap: Summarize key actions chronologically, highlight patterns, and identify where the process started to go wrong
+- In the blame: Point to specific steps or patterns that led to the inadequate answer
+- In the improvement: Provide actionable suggestions that could have led to a better outcome
+
+Example analysis output:
+{
+  "recap": "The search process consisted of 7 steps with multiple search and visit actions. The initial searches focused on basic biographical information through LinkedIn and Crunchbase (steps 1-2). When this didn't yield the specific age information, additional searches were conducted for birthdate information (steps 3-5). The process showed signs of repetition in steps 4-5 with identical searches. Final visits to entertainment websites (step 6) suggested a loss of focus on reliable business sources.",
+  
+  "blame": "The root cause of failure was getting stuck in a repetitive search pattern without adapting the strategy. Steps 4-5 repeated the same search, and step 6 deviated to less reliable entertainment sources instead of exploring business journals, news articles, or professional databases. Additionally, the process didn't attempt to triangulate age through indirect information like education history or career milestones.",
+  
+  "improvement": "1. Avoid repeating identical searches and implement a strategy to track previously searched terms. 2. When direct age/birthdate searches fail, try indirect approaches like: searching for earliest career mentions, finding university graduation years, or identifying first company founding dates. 3. Focus on high-quality business sources and avoid entertainment websites for professional information. 4. Consider using industry event appearances or conference presentations where age-related context might be mentioned. 5. If exact age cannot be determined, provide an estimated range based on career timeline and professional achievements."
+}
+
+Review the steps below carefully and generate your analysis following this format.
+
+${diaryContext.join('\n')}
+`;
+}
+
+export async function analyzeSteps(diaryContext: string[]): Promise<EvaluationResponse> {
+  try {
+    const prompt = getPrompt(diaryContext);
+    const result = await model.generateContent(prompt);
+    const response = await result.response;
+    const json = JSON.parse(response.text()) as EvaluationResponse;
+    console.log('Rejection analysis:', json);
+    return json;
+  } catch (error) {
+    console.error('Error in answer evaluation:', error);
+    throw error;
+  }
+}