chore: update readme

2026-03-22 07:29:35 +08:00 · 2025-02-11 11:04:31 +08:00
parent 30d529082e
commit 188f1bb640
5 changed files with 217 additions and 53 deletions
--- a/src/agent.ts
+++ b/src/agent.ts
@@ -1,7 +1,7 @@
 import {z, ZodObject} from 'zod';
 import {generateObject} from 'ai';
 import {getModel, getMaxTokens, SEARCH_PROVIDER, STEP_SLEEP} from "./config";
-import {readUrl} from "./tools/read";
+import {readUrl, removeAllLineBreaks} from "./tools/read";
 import {handleGenerateObjectError} from './utils/error-handling';
 import fs from 'fs/promises';
 import {SafeSearchType, search as duckSearch} from "duck-duck-scrape";
@@ -273,9 +273,7 @@ function updateContext(step: any) {
  allContext.push(step)
 }
-function removeAllLineBreaks(text: string) {
+
  return text.replace(/(\r\n|\n|\r)/gm, " ");
 }
 function removeHTMLtags(text: string) {
  return text.replace(/<[^>]*>?/gm, '');
@@ -390,7 +388,7 @@ export async function getResponse(question: string, tokenBudget: number = 1_000_
        ...thisStep,
      });
-      const {response: evaluation} = await evaluateAnswer(currentQuestion, thisStep.answer,
+      const {response: evaluation} = await evaluateAnswer(currentQuestion, thisStep,
        evaluationMetrics[currentQuestion], context.tokenTracker);
      if (currentQuestion === question) {
--- a/src/tools/brave-search.ts
+++ b/src/tools/brave-search.ts
@@ -7,7 +7,7 @@ export async function braveSearch(query: string): Promise<{ response: BraveSearc
  const response = await axios.get<BraveSearchResponse>('https://api.search.brave.com/res/v1/web/search', {
    params: {
      q: query,
-      count: 5,
+      count: 10,
      safesearch: 'off'
    },
    headers: {
--- a/src/tools/evaluator.ts
+++ b/src/tools/evaluator.ts
@@ -1,13 +1,14 @@
-import { z } from 'zod';
+import {z} from 'zod';
-import { generateObject } from 'ai';
+import {generateObject} from 'ai';
-import { getModel, getMaxTokens } from "../config";
+import {getModel, getMaxTokens} from "../config";
-import { TokenTracker } from "../utils/token-tracker";
+import {TokenTracker} from "../utils/token-tracker";
-import { EvaluationResponse } from '../types';
+import {AnswerAction, EvaluationResponse} from '../types';
-import { handleGenerateObjectError } from '../utils/error-handling';
+import {handleGenerateObjectError} from '../utils/error-handling';
 import {readUrl, removeAllLineBreaks} from "./read";
 const model = getModel('evaluator');
-type EvaluationType = 'definitive' | 'freshness' | 'plurality';
+type EvaluationType = 'definitive' | 'freshness' | 'plurality' | 'attribution';
 const baseSchema = {
  pass: z.boolean().describe('Whether the answer passes the evaluation criteria defined by the evaluator'),
@@ -41,6 +42,73 @@ const pluralitySchema = z.object({
  })
 });
 const attributionSchema = z.object({
  ...baseSchema,
  type: z.literal('attribution'),
  attribution_analysis: z.object({
    sources_provided: z.boolean().describe('Whether the answer provides source references'),
    sources_verified: z.boolean().describe('Whether the provided sources contain the claimed information'),
    quotes_accurate: z.boolean().describe('Whether the quotes accurately represent the source content')
  })
 });
 function getAttributionPrompt(question: string, answer: string, sourceContent: string): string {
  return `You are an evaluator that verifies if answer content is properly attributed to and supported by the provided sources.
 <rules>
 1. Source Verification:
   - Check if answer claims are supported by the provided source content
   - Verify that quotes are accurate and in proper context
   - Ensure numerical data and statistics match the source
   - Flag any claims that go beyond what the sources support
 2. Attribution Analysis:
   - Check if answer properly references its sources
   - Verify that important claims have clear source attribution
   - Ensure quotes are properly marked and cited
   - Check for any unsupported generalizations
 3. Accuracy Requirements:
   - Direct quotes must match source exactly
   - Paraphrasing must maintain original meaning
   - Statistics and numbers must be precise
   - Context must be preserved
 </rules>
 <examples>
 Question: "What are Jina AI's main products?"
 Answer: "According to Jina AI's website, their main products are DocArray and Jina Framework."
 Source Content: "Jina AI's flagship products include DocArray, Jina Framework, and JCloud, offering a complete ecosystem for neural search applications."
 Evaluation: {
  "pass": false,
  "think": "The answer omits JCloud which is mentioned as a main product in the source. The information provided is incomplete and potentially misleading as it fails to mention a significant product from the company's ecosystem.",
  "attribution_analysis": {
    "sources_provided": true,
    "sources_verified": false,
    "quotes_accurate": false
  }
 }
 Question: "When was Python first released?"
 Answer: "Python was first released in 1991 by Guido van Rossum."
 Source Content: "Python was first released in 1991 by Guido van Rossum while working at CWI."
 Evaluation: {
  "pass": true,
  "think": "The answer accurately reflects the core information from the source about Python's release date and creator, though it omits the additional context about CWI which isn't essential to the question.",
  "attribution_analysis": {
    "sources_provided": true,
    "sources_verified": true,
    "quotes_accurate": true
  }
 }
 </examples>
 Now evaluate this pair:
 Question: ${JSON.stringify(question)}
 Answer: ${JSON.stringify(answer)}
 Source Content: ${JSON.stringify(sourceContent)}`;
 }
 function getDefinitivePrompt(question: string, answer: string): string {
  return `You are an evaluator of answer definitiveness. Analyze if the given answer provides a definitive response or not.
@@ -332,67 +400,161 @@ export async function evaluateQuestion(
  }
 }
 // Helper function to handle common evaluation logic
 async function performEvaluation(
  evaluationType: EvaluationType,
  params: {
    model: any;
    schema: z.ZodType<any>;
    prompt: string;
    maxTokens: number;
  },
  tracker?: TokenTracker
 ): Promise<GenerateObjectResult> {
  const result = await generateObject({
    model: params.model,
    schema: params.schema,
    prompt: params.prompt,
    maxTokens: params.maxTokens
  });
  (tracker || new TokenTracker()).trackUsage('evaluator', result.usage?.totalTokens || 0);
  console.log(`${evaluationType} Evaluation:`, result.object);
  return result;
 }
 interface GenerateObjectResult {
  object: EvaluationResponse;
  usage?: {
    totalTokens: number;
  };
 }
 // Main evaluation function
 export async function evaluateAnswer(
  question: string,
-  answer: string,
+  action: AnswerAction,
  evaluationOrder: EvaluationType[] = ['definitive', 'freshness', 'plurality'],
  tracker?: TokenTracker
 ): Promise<{ response: EvaluationResponse }> {
-  let result;
+  let result: GenerateObjectResult;
  // Only add attribution if we have valid references
  if (action.references && action.references.length > 0) {
    evaluationOrder = ['attribution', ...evaluationOrder];
  }
  for (const evaluationType of evaluationOrder) {
    try {
      switch (evaluationType) {
-        case 'definitive':
+        case 'attribution': {
-          result = await generateObject({
+          // Safely handle references and ensure we have content
-            model,
+          const urls = action.references?.map(ref => ref.url) ?? [];
-            schema: definitiveSchema,
+          const uniqueURLs = [...new Set(urls)];
-            prompt: getDefinitivePrompt(question, answer),
+          const allKnowledge = await fetchSourceContent(uniqueURLs, tracker);
-            maxTokens: getMaxTokens('evaluator')
+
-          });
+          if (!allKnowledge.trim()) {
-          (tracker || new TokenTracker()).trackUsage('evaluator', result.usage?.totalTokens || 0);
+            return {
-          console.log('Evaluation:', result.object);
+              response: {
-          if (!result.object.pass) {
+                pass: false,
-            return { response: result.object };
+                think: "The answer does not provide any valid attribution references that could be verified. No accessible source content was found to validate the claims made in the answer.",
                type: 'attribution',
              }
            };
          }
          result = await performEvaluation(
            'attribution',
            {
              model,
              schema: attributionSchema,
              prompt: getAttributionPrompt(question, action.answer, allKnowledge),
              maxTokens: getMaxTokens('evaluator')
            },
            tracker
          );
          break;
        }
        case 'definitive':
          result = await performEvaluation(
            'definitive',
            {
              model,
              schema: definitiveSchema,
              prompt: getDefinitivePrompt(question, action.answer),
              maxTokens: getMaxTokens('evaluator')
            },
            tracker
          );
          break;
        case 'freshness':
-          result = await generateObject({
+          result = await performEvaluation(
-            model,
+            'freshness',
-            schema: freshnessSchema,
+            {
-            prompt: getFreshnessPrompt(question, answer, new Date().toISOString()),
+              model,
-            maxTokens: getMaxTokens('evaluator')
+              schema: freshnessSchema,
-          });
+              prompt: getFreshnessPrompt(question, action.answer, new Date().toISOString()),
-          (tracker || new TokenTracker()).trackUsage('evaluator', result.usage?.totalTokens || 0);
+              maxTokens: getMaxTokens('evaluator')
-          console.log('Evaluation:', result.object);
+            },
-          if (!result.object.pass) {
+            tracker
-            return { response: result.object };
+          );
          }
          break;
        case 'plurality':
-          result = await generateObject({
+          result = await performEvaluation(
-            model,
+            'plurality',
-            schema: pluralitySchema,
+            {
-            prompt: getPluralityPrompt(question, answer),
+              model,
-            maxTokens: getMaxTokens('evaluator')
+              schema: pluralitySchema,
-          });
+              prompt: getPluralityPrompt(question, action.answer),
-          (tracker || new TokenTracker()).trackUsage('evaluator', result.usage?.totalTokens || 0);
+              maxTokens: getMaxTokens('evaluator')
-          console.log('Evaluation:', result.object);
+            },
-          if (!result.object.pass) {
+            tracker
-            return { response: result.object };
+          );
          }
          break;
      }
      if (!result?.object.pass) {
        return {response: result.object};
      }
    } catch (error) {
      const errorResult = await handleGenerateObjectError<EvaluationResponse>(error);
      (tracker || new TokenTracker()).trackUsage('evaluator', errorResult.totalTokens || 0);
-      // Always return from catch block to prevent undefined result
+      return {response: errorResult.object};
      return { response: errorResult.object };
    }
  }
-  // Only reach this point if all evaluations pass
+  return {response: result!.object};
-  return { response: result!.object };
+}
 // Helper function to fetch and combine source content
 async function fetchSourceContent(urls: string[], tracker?: TokenTracker): Promise<string> {
  if (!urls.length) return '';
  try {
    const results = await Promise.all(
      urls.map(async (url) => {
        try {
          const {response} = await readUrl(url, tracker);
          const content = response?.data?.content || '';
          return removeAllLineBreaks(content);
        } catch (error) {
          console.error('Error reading URL:', error);
          return '';
        }
      })
    );
    // Filter out empty results and join with proper separation
    return results
      .filter(content => content.trim())
      .join('\n\n');
  } catch (error) {
    console.error('Error fetching source content:', error);
    return '';
  }
 }
--- a/src/tools/read.ts
+++ b/src/tools/read.ts
@@ -91,4 +91,8 @@ export function readUrl(url: string, tracker?: TokenTracker): Promise<{ response
    req.write(data);
    req.end();
  });
 }
 export function removeAllLineBreaks(text: string) {
  return text.replace(/(\r\n|\n|\r)/gm, " ");
 }
--- a/src/types.ts
+++ b/src/types.ts
@@ -100,7 +100,7 @@ export interface ReadResponse {
 export type EvaluationResponse = {
  pass: boolean;
  think: string;
-  type?: 'definitive' | 'freshness' | 'plurality';
+  type?: 'definitive' | 'freshness' | 'plurality' | 'attribution';
  freshness_analysis?: {
    likely_outdated: boolean;
    dates_mentioned: string[];