refactor: schemas

2025-12-26 06:28:56 +08:00 · 2025-02-25 15:12:19 +08:00 · 2025-02-25 15:12:19 +08:00 · 66490f3848
commit 66490f3848
parent 3226aedf48
7 changed files with 637 additions and 548 deletions
--- a/src/agent.ts
+++ b/src/agent.ts
@ -11,7 +11,7 @@ import {evaluateAnswer, evaluateQuestion} from "./tools/evaluator";
 import {analyzeSteps} from "./tools/error-analyzer";
 import {TokenTracker} from "./utils/token-tracker";
 import {ActionTracker} from "./utils/action-tracker";
-import {StepAction, AnswerAction, KnowledgeItem, EvaluationCriteria, SearchResult} from "./types";
+import {StepAction, AnswerAction, KnowledgeItem, SearchResult, EvaluationType} from "./types";
 import {TrackerContext} from "./types";
 import {search} from "./tools/jina-search";
 // import {grounding} from "./tools/grounding";
@ -21,6 +21,7 @@ import {CodeSandbox} from "./tools/code-sandbox";
 import {serperSearch} from './tools/serper-search';
 import {getUnvisitedURLs, normalizeUrl} from "./utils/url-tools";
 import {buildMdFromAnswer, chooseK, removeExtraLineBreaks, removeHTMLtags} from "./utils/text-tools";
+import {MAX_QUERIES_PER_STEP, MAX_REFLECT_PER_STEP, MAX_URLS_PER_STEP, Schemas} from "./utils/schemas";

 async function sleep(ms: number) {
  const seconds = Math.ceil(ms / 1000);
@ -28,66 +29,6 @@ async function sleep(ms: number) {
  return new Promise(resolve => setTimeout(resolve, ms));
 }

-const MAX_URLS_PER_STEP = 2
-const MAX_QUERIES_PER_STEP = 5
-const MAX_REFLECT_PER_STEP = 3
-
-function getSchema(allowReflect: boolean, allowRead: boolean, allowAnswer: boolean, allowSearch: boolean, allowCoding: boolean, languageStyle: string = 'same language as the question') {
-  const actions: string[] = [];
-  const properties: Record<string, z.ZodTypeAny> = {
-    action: z.enum(['placeholder']), // Will update later with actual actions
-    think: z.string().describe(`Explain why choose this action, what's the chain-of-thought behind choosing this action, use the first-person narrative.`).max(500)
-  };
-
-  if (allowSearch) {
-    actions.push("search");
-    properties.searchRequests =  z.array(
-      z.string().max(30)
-      .describe(`A natual language search request in ${languageStyle}. Based on the deep intention behind the original question and the expected answer format.`)).describe(`Required when action='search'. Always prefer a single request, only add another request if the original question covers multiple aspects or elements and one search request is definitely not enough, each request focus on one specific aspect of the original question. Minimize mutual information between each request. Maximum ${MAX_QUERIES_PER_STEP} search requests.`).max(MAX_QUERIES_PER_STEP);
-  }
-
-  if (allowCoding) {
-    actions.push("coding");
-    properties.codingIssue = z.string().max(500)
-      .describe("Required when action='coding'. Describe what issue to solve with coding, format like a github issue ticket. Specify the input value when it is short.").optional();
-  }
-
-  if (allowAnswer) {
-    actions.push("answer");
-    properties.references = z.array(
-      z.object({
-        exactQuote: z.string().describe("Exact relevant quote from the document, must be a soundbite, short and to the point, no fluff").max(30),
-        url: z.string().describe("source URL; must be directly from the context")
-      }).required()
-    ).describe("Required when action='answer'. Must be an array of references that support the answer, each reference must contain an exact quote and the URL of the document").optional();
-    properties.answer = z.string()
-      .describe(`Required when action='answer'. Must be definitive, no ambiguity, uncertainty, or disclaimers. Must in ${languageStyle} and confident. Use markdown footnote syntax like [^1], [^2] to refer the corresponding reference item`).optional();
-  }
-
-  if (allowReflect) {
-    actions.push("reflect");
-    properties.questionsToAnswer = z.array(
-      z.string().describe("each question must be a single line, Questions must be: Original (not variations of existing questions); Focused on single concepts; Under 20 words; Non-compound/non-complex")
-    ).max(MAX_REFLECT_PER_STEP)
-      .describe(`Required when action='reflect'. List of most important questions to fill the knowledge gaps of finding the answer to the original question. Maximum provide ${MAX_REFLECT_PER_STEP} reflect questions.`).optional();
-  }
-
-  if (allowRead) {
-    actions.push("visit");
-    properties.URLTargets = z.array(z.string())
-      .max(MAX_URLS_PER_STEP)
-      .describe(`Required when action='visit'. Must be an array of URLs, choose up the most relevant ${MAX_URLS_PER_STEP} URLs to visit`).optional();
-  }
-
-  // Update the enum values after collecting all actions
-  properties.action = z.enum(actions as [string, ...string[]])
-    .describe("Must match exactly one action type");
-
-  return z.object(properties);
-
-}
-
-

 function getPrompt(
  context?: string[],
@ -192,7 +133,7 @@ ${learnedStrategy}
    if (allURLs && allURLs.length > 0) {
      urlList = allURLs
        .filter(r => 'url' in r)
-        .map(r =>  `  + "${r.url}": "${r.title}"`)
+        .map(r => `  + "${r.url}": "${r.title}"`)
        .join('\n');
    }

@ -290,7 +231,6 @@ ${actionSections.join('\n\n')}
 }


-
 const allContext: StepAction[] = [];  // all steps in the current session, including those leads to wrong results

 function updateContext(step: any) {
@ -298,29 +238,31 @@ function updateContext(step: any) {
 }


-
-
-
 export async function getResponse(question?: string,
                                  tokenBudget: number = 1_000_000,
                                  maxBadAttempts: number = 3,
                                  existingContext?: Partial<TrackerContext>,
                                  messages?: Array<CoreAssistantMessage | CoreUserMessage>
 ): Promise<{ result: StepAction; context: TrackerContext; visitedURLs: string[], readURLs: string[] }> {
-  const context: TrackerContext = {
-    tokenTracker: existingContext?.tokenTracker || new TokenTracker(tokenBudget),
-    actionTracker: existingContext?.actionTracker || new ActionTracker()
-  };
+
  let step = 0;
  let totalStep = 0;
  let badAttempts = 0;
-  let schema: ZodObject<any> = getSchema(true, true, true, true, true)
+
  question = question?.trim() as string;
  if (messages && messages.length > 0) {
    question = (messages[messages.length - 1]?.content as string).trim();
  } else {
    messages = [{role: 'user', content: question.trim()}]
  }
+
+  const SchemaGen = new Schemas(question);
+  const context: TrackerContext = {
+    tokenTracker: existingContext?.tokenTracker || new TokenTracker(tokenBudget),
+    actionTracker: existingContext?.actionTracker || new ActionTracker()
+  };
+
+  let schema: ZodObject<any> = SchemaGen.getAgentSchema(true, true, true, true, true)
  const gaps: string[] = [question];  // All questions to be answered including the orginal question
  const allQuestions = [question];
  const allKeywords = [];
@ -338,7 +280,7 @@ export async function getResponse(question?: string,

  const allURLs: Record<string, SearchResult> = {};
  const visitedURLs: string[] = [];
-  const evaluationMetrics: Record<string, EvaluationCriteria> = {};
+  const evaluationMetrics: Record<string, EvaluationType[]> = {};
  while (context.tokenTracker.getTotalUsage().totalTokens < tokenBudget && badAttempts <= maxBadAttempts) {
    // add 1s delay to avoid rate limiting
    step++;
@ -349,7 +291,8 @@ export async function getResponse(question?: string,
    allowReflect = allowReflect && (gaps.length <= 1);
    const currentQuestion: string = gaps.length > 0 ? gaps.shift()! : question
    if (!evaluationMetrics[currentQuestion]) {
-      evaluationMetrics[currentQuestion] = await evaluateQuestion(currentQuestion, context)
+      evaluationMetrics[currentQuestion] =
+        await evaluateQuestion(currentQuestion, context, SchemaGen)
    }

    // update all urls with buildURLMap
@ -371,8 +314,7 @@ export async function getResponse(question?: string,
      getUnvisitedURLs(allURLs, visitedURLs),
      false,
    );
-    schema = getSchema(allowReflect, allowRead, allowAnswer, allowSearch, allowCoding,
-      evaluationMetrics[currentQuestion].languageStyle)
+    schema = SchemaGen.getAgentSchema(allowReflect, allowRead, allowAnswer, allowSearch, allowCoding)
    const generator = new ObjectGeneratorSafe(context.tokenTracker);
    const result = await generator.generateObject({
      model: 'agent',
@ -420,10 +362,11 @@ export async function getResponse(question?: string,

      context.actionTracker.trackThink(`But wait, let me evaluate the answer first.`)

-      const {response: evaluation} = await evaluateAnswer(currentQuestion, thisStep,
+      const evaluation = await evaluateAnswer(currentQuestion, thisStep,
        evaluationMetrics[currentQuestion],
        context,
-        visitedURLs
+        visitedURLs,
+        SchemaGen
      );

      if (currentQuestion.trim() === question) {
@ -462,7 +405,7 @@ The evaluator thinks your answer is bad because:
 ${evaluation.think}
 `);
            // store the bad context and reset the diary context
-            const {response: errorAnalysis} = await analyzeSteps(diaryContext, context);
+            const errorAnalysis = await analyzeSteps(diaryContext, context, SchemaGen);

            allKnowledge.push({
              question: currentQuestion,
@ -554,7 +497,7 @@ But then you realized you have asked them before. You decided to to think out of
      thisStep.searchRequests = chooseK((await dedupQueries(thisStep.searchRequests, [], context.tokenTracker)).unique_queries, MAX_QUERIES_PER_STEP);

      // rewrite queries
-      let {queries: keywordsQueries} = await rewriteQuery(thisStep, context);
+      let {queries: keywordsQueries} = await rewriteQuery(thisStep, context, SchemaGen);
      // avoid exisitng searched queries
      keywordsQueries = chooseK((await dedupQueries(keywordsQueries, allKeywords, context.tokenTracker)).unique_queries, MAX_QUERIES_PER_STEP);

@ -717,7 +660,7 @@ You decided to think out of the box or cut from a completely different angle.`);
        allowRead = false;
      }
    } else if (thisStep.action === 'coding' && thisStep.codingIssue) {
-      const sandbox = new CodeSandbox({allContext, visitedURLs, allURLs, allKnowledge}, context);
+      const sandbox = new CodeSandbox({allContext, visitedURLs, allURLs, allKnowledge}, context, SchemaGen);
      try {
        const result = await sandbox.solve(thisStep.codingIssue);
        allKnowledge.push({
@ -778,8 +721,7 @@ But unfortunately, you failed to solve the issue. You need to think out of the b
      true,
    );

-    schema = getSchema(false, false, true, false, false,
-      evaluationMetrics[question]?.languageStyle || 'same language as the question');
+    schema = SchemaGen.getAgentSchema(false, false, true, false, false);
    const generator = new ObjectGeneratorSafe(context.tokenTracker);
    const result = await generator.generateObject({
      model: 'agentBeastMode',
--- a/src/tools/code-sandbox.ts
+++ b/src/tools/code-sandbox.ts
@ -1,17 +1,7 @@
-import { z } from 'zod';
-import { ObjectGeneratorSafe } from "../utils/safe-generator";
-import {TrackerContext} from "../types";
+import {ObjectGeneratorSafe} from "../utils/safe-generator";
+import {CodeGenResponse, TrackerContext} from "../types";
+import {Schemas} from "../utils/schemas";

-// Define the response schema for code generation
-const codeGenerationSchema = z.object({
-  think: z.string().describe('Short explain or comments on the thought process behind the code, in first person.').max(200),
-  code: z.string().describe('The JavaScript code that solves the problem and always use \'return\' statement to return the result. Focus on solving the core problem; No need for error handling or try-catch blocks or code comments. No need to declare variables that are already available, especially big long strings or arrays.'),
-});
-
-// Define the types
-interface CodeGenerationResponse {
-  code: string;
-}

 interface SandboxResult {
  success: boolean;
@ -72,33 +62,36 @@ export class CodeSandbox {
  private generator: ObjectGeneratorSafe;
  private maxAttempts: number;
  private context: Record<string, any>;
+  private schemaGen: Schemas;

  constructor(
    context: any = {},
-    trackers?: TrackerContext,
-    maxAttempts: number = 3
+    trackers: TrackerContext,
+    schemaGen: Schemas,
+    maxAttempts: number = 3,
  ) {
    this.trackers = trackers;
    this.generator = new ObjectGeneratorSafe(trackers?.tokenTracker);
    this.maxAttempts = maxAttempts;
    this.context = context;
+    this.schemaGen = schemaGen;
  }

  private async generateCode(
    problem: string,
    previousAttempts: Array<{ code: string; error?: string }> = []
-  ): Promise<CodeGenerationResponse> {
+  ): Promise<CodeGenResponse> {
    const prompt = getPrompt(problem, analyzeStructure(this.context), previousAttempts);

    const result = await this.generator.generateObject({
      model: 'coder',
-      schema: codeGenerationSchema,
+      schema: this.schemaGen.getCodeGeneratorSchema(),
      prompt,
    });

    this.trackers?.actionTracker.trackThink(result.object.think);

-    return result.object;
+    return result.object as CodeGenResponse;
  }

  private evaluateCode(code: string): SandboxResult {
@ -143,7 +136,7 @@ export class CodeSandbox {
    for (let i = 0; i < this.maxAttempts; i++) {
      // Generate code
      const generation = await this.generateCode(problem, attempts);
-      const { code } = generation;
+      const {code} = generation;

      console.log(`Coding attempt ${i + 1}:`, code);
      // Evaluate the code
@ -180,61 +173,61 @@ export class CodeSandbox {
 }

 function formatValue(value: any): string {
-    if (value === null) return 'null';
-    if (value === undefined) return 'undefined';
+  if (value === null) return 'null';
+  if (value === undefined) return 'undefined';

-    const type = typeof value;
+  const type = typeof value;

-    if (type === 'string') {
-        // Clean and truncate string value
-        const cleaned = value.replace(/\n/g, ' ').replace(/\s+/g, ' ').trim();
-        return cleaned.length > 50 ?
-            `"${cleaned.slice(0, 47)}..."` :
-            `"${cleaned}"`;
-    }
+  if (type === 'string') {
+    // Clean and truncate string value
+    const cleaned = value.replace(/\n/g, ' ').replace(/\s+/g, ' ').trim();
+    return cleaned.length > 50 ?
+      `"${cleaned.slice(0, 47)}..."` :
+      `"${cleaned}"`;
+  }

-    if (type === 'number' || type === 'boolean') {
-        return String(value);
-    }
+  if (type === 'number' || type === 'boolean') {
+    return String(value);
+  }

-    if (value instanceof Date) {
-        return `"${value.toISOString()}"`;
-    }
+  if (value instanceof Date) {
+    return `"${value.toISOString()}"`;
+  }

-    return '';
+  return '';
 }

 export function analyzeStructure(value: any, indent = ''): string {
-    if (value === null) return 'null';
-    if (value === undefined) return 'undefined';
+  if (value === null) return 'null';
+  if (value === undefined) return 'undefined';

-    const type = typeof value;
+  const type = typeof value;

-    if (type === 'function') {
-        return 'Function';
-    }
+  if (type === 'function') {
+    return 'Function';
+  }

-    // Handle atomic types with example values
-    if (type !== 'object' || value instanceof Date) {
-        const formattedValue = formatValue(value);
-        return `${type}${formattedValue ? ` (example: ${formattedValue})` : ''}`;
-    }
+  // Handle atomic types with example values
+  if (type !== 'object' || value instanceof Date) {
+    const formattedValue = formatValue(value);
+    return `${type}${formattedValue ? ` (example: ${formattedValue})` : ''}`;
+  }

-    if (Array.isArray(value)) {
-        if (value.length === 0) return 'Array<unknown>';
-        const sampleItem = value[0];
-        return `Array<${analyzeStructure(sampleItem, indent + '  ')}>`;
-    }
+  if (Array.isArray(value)) {
+    if (value.length === 0) return 'Array<unknown>';
+    const sampleItem = value[0];
+    return `Array<${analyzeStructure(sampleItem, indent + '  ')}>`;
+  }

-    const entries = Object.entries(value);
-    if (entries.length === 0) return '{}';
+  const entries = Object.entries(value);
+  if (entries.length === 0) return '{}';

-    const properties = entries
-        .map(([key, val]) => {
-            const analyzed = analyzeStructure(val, indent + '  ');
-            return `${indent}  "${key}": ${analyzed}`;
-        })
-        .join(',\n');
+  const properties = entries
+    .map(([key, val]) => {
+      const analyzed = analyzeStructure(val, indent + '  ');
+      return `${indent}  "${key}": ${analyzed}`;
+    })
+    .join(',\n');

-    return `{\n${properties}\n${indent}}`;
+  return `{\n${properties}\n${indent}}`;
 }
--- a/src/tools/error-analyzer.ts
+++ b/src/tools/error-analyzer.ts
@ -1,18 +1,8 @@
-import {z} from 'zod';
 import {ErrorAnalysisResponse, TrackerContext} from '../types';
 import {ObjectGeneratorSafe} from "../utils/safe-generator";
+import {Schemas} from "../utils/schemas";


-const responseSchema = z.object({
-  recap: z.string().describe('Recap of the actions taken and the steps conducted in first person narrative.').max(500),
-  blame: z.string().describe('Which action or the step was the root cause of the answer rejection').max(500),
-  improvement: z.string().describe('Suggested key improvement for the next iteration, do not use bullet points, be concise and hot-take vibe.').max(500),
-  questionsToAnswer: z.array(
-    z.string().describe("each question must be a single line, concise and clear. not composite or compound, less than 20 words.")
-  ).max(2)
-    .describe("List of most important reflect questions to fill the knowledge gaps"),
-});
-

 function getPrompt(diaryContext: string[]): string {
  return `You are an expert at analyzing search and reasoning processes. Your task is to analyze the given sequence of steps and identify what went wrong in the search process.
@ -110,15 +100,16 @@ ${diaryContext.join('\n')}
 const TOOL_NAME = 'errorAnalyzer';
 export async function analyzeSteps(
  diaryContext: string[],
-  trackers?: TrackerContext
-): Promise<{ response: ErrorAnalysisResponse }> {
+  trackers: TrackerContext,
+  schemaGen: Schemas
+): Promise<ErrorAnalysisResponse> {
  try {
    const generator = new ObjectGeneratorSafe(trackers?.tokenTracker);
    const prompt = getPrompt(diaryContext);

    const result = await generator.generateObject({
      model: TOOL_NAME,
-      schema: responseSchema,
+      schema: schemaGen.getErrorAnalysisSchema(),
      prompt,
    });

@ -126,7 +117,7 @@ export async function analyzeSteps(
    trackers?.actionTracker.trackThink(result.object.blame);
    trackers?.actionTracker.trackThink(result.object.improvement);

-    return { response: result.object };
+    return result.object as ErrorAnalysisResponse;

  } catch (error) {
    console.error(`Error in ${TOOL_NAME}`, error);
--- a/src/tools/evaluator.ts
+++ b/src/tools/evaluator.ts
@ -1,57 +1,10 @@
-import {z} from 'zod';
 import {GenerateObjectResult} from 'ai';
-import {AnswerAction, EvaluationCriteria, EvaluationResponse, EvaluationType, TrackerContext} from '../types';
+import {AnswerAction, EvaluationResponse, EvaluationType, TrackerContext} from '../types';
 import {readUrl, removeAllLineBreaks} from "./read";
 import {ObjectGeneratorSafe} from "../utils/safe-generator";
+import {Schemas} from "../utils/schemas";


-const baseSchema = {
-  pass: z.boolean().describe('Whether the answer passes the evaluation criteria defined by the evaluator'),
-  think: z.string().describe('Explanation the thought process why the answer does not pass the evaluation criteria').max(500)
-};
-
-const definitiveSchema = z.object({
-  ...baseSchema,
-  type: z.literal('definitive')
-});
-
-const freshnessSchema = z.object({
-  ...baseSchema,
-  type: z.literal('freshness'),
-  freshness_analysis: z.object({
-    days_ago: z.number().describe('Inferred dates or timeframes mentioned in the answer and relative to the current time'),
-    max_age_days: z.number().optional().describe('Maximum allowed age in days before content is considered outdated')
-  })
-});
-
-const pluralitySchema = z.object({
-  ...baseSchema,
-  type: z.literal('plurality'),
-  plurality_analysis: z.object({
-    count_expected: z.number().optional().describe('Number of items expected if specified in question'),
-    count_provided: z.number().describe('Number of items provided in answer')
-  })
-});
-
-const completenessSchema = z.object({
-  ...baseSchema,
-  type: z.literal('completeness'),
-  completeness_analysis: z.object({
-    aspects_expected: z.string().describe('Comma-separated list of all aspects or dimensions that the question explicitly asks for.'),
-    aspects_provided: z.string().describe('Comma-separated list of all aspects or dimensions that were actually addressed in the answer'),
-  })
-});
-
-const attributionSchema = z.object({
-  ...baseSchema,
-  type: z.literal('attribution'),
-  attribution_analysis: z.object({
-    sources_provided: z.boolean().describe('Whether the answer provides source references'),
-    sources_verified: z.boolean().describe('Whether the provided sources contain the claimed information'),
-    quotes_accurate: z.boolean().describe('Whether the quotes accurately represent the source content')
-  })
-});
-
 function getAttributionPrompt(question: string, answer: string, sourceContent: string): string {
  return `You are an evaluator that verifies if answer content is properly attributed to and supported by the provided sources.

@ -80,26 +33,52 @@ Question: "What are Jina AI's main products?"
 Answer: "According to Jina AI's website, their main products are DocArray and Jina Framework."
 Source Content: "Jina AI's flagship products include DocArray, Jina Framework, and JCloud, offering a complete ecosystem for neural search applications."
 Evaluation: {
-  "pass": false,
  "think": "The answer omits JCloud which is mentioned as a main product in the source. The information provided is incomplete and potentially misleading as it fails to mention a significant product from the company's ecosystem.",
  "attribution_analysis": {
    "sources_provided": true,
    "sources_verified": false,
    "quotes_accurate": false
  }
+  "pass": false,
 }

 Question: "When was Python first released?"
 Answer: "Python was first released in 1991 by Guido van Rossum."
 Source Content: "Python was first released in 1991 by Guido van Rossum while working at CWI."
 Evaluation: {
-  "pass": true,
  "think": "The answer accurately reflects the core information from the source about Python's release date and creator, though it omits the additional context about CWI which isn't essential to the question.",
  "attribution_analysis": {
    "sources_provided": true,
    "sources_verified": true,
    "quotes_accurate": true
  }
+  "pass": true,
+}
+
+Question: "长城是什么时候建造的？"
+Answer: "长城始建于公元前7世纪，但现存的大部分长城是明朝时期修建的。"
+Source Content: "中国长城始建于公元前7世纪的春秋战国时期，历经多个朝代修建和扩展，但现存的大部分长城是明朝（1368-1644年）时期修建的。"
+Evaluation: {
+  "think": "这个回答准确地反映了原文中关于长城建造时间的核心信息，包括最初的建造时期和现存长城的主要来源。虽然省略了具体的年份范围（1368-1644年），但这对回答问题的核心内容不是必要的。",
+  "attribution_analysis": {
+    "sources_provided": true,
+    "sources_verified": true,
+    "quotes_accurate": true
+  }
+  "pass": true,
+}
+
+Question: "Wann wurde die Berliner Mauer gebaut?"
+Answer: "Die Berliner Mauer wurde am 13. August 1961 errichtet."
+Source Content:  "Die Berliner Mauer wurde am 13. August 1961 von der DDR-Regierung errichtet und fiel am 9. November 1989."
+Evaluation: {
+  "think": "Die Antwort gibt das korrekte Datum des Mauerbaus wieder, wie in der Quelle angegeben. Der zusätzliche Kontext über den Fall der Mauer wurde weggelassen, da er für die spezifische Frage nach dem Bauzeitpunkt nicht wesentlich ist.",
+  "attribution_analysis": {
+    "sources_provided": true,
+    "sources_verified": true,
+    "quotes_accurate": true
+  }
+  "pass": true,
 }
 </examples>

@ -126,36 +105,57 @@ Definitiveness is the king! The following types of responses are NOT definitive
 Question: "What are the system requirements for running Python 3.9?"
 Answer: "I'm not entirely sure, but I think you need a computer with some RAM."
 Evaluation: {
-  "pass": false,
  "think": "The answer contains uncertainty markers like 'not entirely sure' and 'I think', making it non-definitive."
+  "pass": false,
 }

 Question: "What are the system requirements for running Python 3.9?"
 Answer: "Python 3.9 requires Windows 7 or later, macOS 10.11 or later, or Linux."
 Evaluation: {
-  "pass": true,
  "think": "The answer makes clear, definitive statements without uncertainty markers or ambiguity."
+  "pass": true,
 }

 Question: "Who will be the president of the United States in 2032?"
 Answer: "I cannot predict the future, it depends on the election results."
 Evaluation: {
-  "pass": false,
  "think": "The answer contains a statement of inability to predict the future, making it non-definitive."
+  "pass": false,
 }

 Question: "Who is the sales director at Company X?"
 Answer: "I cannot provide the name of the sales director, but you can contact their sales team at sales@companyx.com"
 Evaluation: {
-  "pass": false,
  "think": "The answer starts with 'I cannot provide' and redirects to an alternative contact method instead of answering the original question."
+  "pass": false,
 }

 Question: "what is the twitter account of jina ai's founder?"
 Answer: "The provided text does not contain the Twitter account of Jina AI's founder."
 Evaluation: {
-  "pass": false,
  "think": "The answer indicates a lack of information rather than providing a definitive response."
+  "pass": false,
+}
+
+Question: "量子コンピュータの計算能力を具体的に測定する方法は何ですか？"
+Answer: "量子コンピュータの計算能力は量子ビット（キュービット）の数、ゲート忠実度、コヒーレンス時間で測定されます。"
+Evaluation: {
+  "think": "The answer provides specific, definitive metrics for measuring quantum computing power without uncertainty markers or qualifications."
+  "pass": true,
+}
+
+Question: "如何证明哥德巴赫猜想是正确的？"
+Answer: "目前尚无完整证明，但2013年张益唐证明了存在无穷多对相差不超过7000万的素数，后来这个界被缩小到246。"
+Evaluation: {
+  "think": "The answer begins by stating no complete proof exists, which is a non-definitive response, and then shifts to discussing a related but different theorem about bounded gaps between primes."
+  "pass": false,
+}
+
+Question: "Wie kann man mathematisch beweisen, dass P ≠ NP ist?"
+Answer: "Ein Beweis für P ≠ NP erfordert, dass man zeigt, dass mindestens ein NP-vollständiges Problem nicht in polynomieller Zeit lösbar ist. Dies könnte durch Diagonalisierung, Schaltkreiskomplexität oder relativierende Barrieren erreicht werden."
+Evaluation: {
+  "think": "The answer provides concrete mathematical approaches to proving P ≠ NP without uncertainty markers, presenting definitive methods that could be used."
+  "pass": true,
 }
 </examples>

@ -283,6 +283,27 @@ Aspects_Expected: "economic factors, political factors"
 Aspects_Provided: "real estate market collapse, high-risk lending, mortgage-backed securities, derivative products, risk disguising, credit assessment failures, legislative changes, regulatory guardrail elimination, leverage, speculation"
 Think: "The question explicitly asks about two categories of factors: economic and political. The answer addresses economic factors ('real estate market collapse', 'high-risk lending', 'mortgage-backed securities', 'derivative products', 'risk disguising', 'credit assessment failures') and political factors ('legislative changes', 'regulatory guardrail elimination'). While using different terminology, the answer covers both explicitly requested aspects."
 Pass: true
+
+Question: "コロナウイルスの感染拡大が経済、教育システム、および医療インフラにどのような影響を与えましたか？"
+Answer: "コロナウイルスは世界経済に甚大な打撃を与え、多くの企業が倒産し、失業率が急増しました。教育については、遠隔学習への移行が進み、デジタル格差が浮き彫りになりましたが、新しい教育テクノロジーの採用も加速しました。"
+Aspects_Expected: "経済、教育システム、医療インフラ"
+Aspects_Provided: "世界経済、企業倒産、失業率、遠隔学習、デジタル格差、教育テクノロジー"
+Think: "質問では明示的にコロナウイルスの影響の三つの側面について尋ねています：経済、教育システム、医療インフラです。回答は経済（「世界経済」「企業倒産」「失業率」について）と教育システム（「遠隔学習」「デジタル格差」「教育テクノロジー」について）に対応していますが、質問で明示的に求められていた医療インフラへの影響についての議論が完全に省略されています。"
+Pass: false
+
+Question: "请解释人工智能在医疗诊断、自动驾驶和客户服务方面的应用。"
+Answer: "在医疗领域，AI算法可以分析医学影像以检测癌症和其他疾病，准确率有时甚至超过人类专家。自动驾驶技术利用机器学习处理来自雷达、激光雷达和摄像头的数据，实时做出驾驶决策。在客户服务方面，聊天机器人和智能助手能够处理常见问题，分类客户查询，并在必要时将复杂问题转给人工代表。"
+Aspects_Expected: "医疗诊断、自动驾驶、客户服务"
+Aspects_Provided: "医学影像分析、癌症检测、雷达数据处理、激光雷达数据处理、摄像头数据处理、实时驾驶决策、聊天机器人、智能助手、客户查询分类"
+Think: "问题明确要求解释人工智能在三个领域的应用：医疗诊断、自动驾驶和客户服务。回答虽然使用了不同的术语，但涵盖了所有三个方面：医疗诊断（讨论了'医学影像分析'和'癌症检测'），自动驾驶（包括'雷达数据处理'、'激光雷达数据处理'、'摄像头数据处理'和'实时驾驶决策'），以及客户服务（提到了'聊天机器人'、'智能助手'和'客户查询分类'）。尽管使用了不同的表述，但所有明确提及的方面都得到了全面覆盖。"
+Pass: true
+
+Question: "Comment les changements climatiques affectent-ils la production agricole, les écosystèmes marins et la santé publique dans les régions côtières?"
+Answer: "Les variations de température et de précipitations modifient les cycles de croissance des cultures et la distribution des ravageurs agricoles, nécessitant des adaptations dans les pratiques de culture. Dans les océans, l'acidification et le réchauffement des eaux entraînent le blanchissement des coraux et la migration des espèces marines vers des latitudes plus froides, perturbant les chaînes alimentaires existantes."
+Aspects_Expected: "production agricole, écosystèmes marins, santé publique"
+Aspects_Provided: "cycles de croissance, distribution des ravageurs, adaptations des pratiques de culture, acidification des océans, réchauffement des eaux, blanchissement des coraux, migration des espèces marines, perturbation des chaînes alimentaires"
+Think: "La question demande explicitement les effets du changement climatique sur trois aspects: la production agricole, les écosystèmes marins et la santé publique dans les régions côtières. La réponse aborde la production agricole (en discutant des 'cycles de croissance', de la 'distribution des ravageurs' et des 'adaptations des pratiques de culture') et les écosystèmes marins (en couvrant 'l'acidification des océans', le 'réchauffement des eaux', le 'blanchissement des coraux', la 'migration des espèces marines' et la 'perturbation des chaînes alimentaires'). Cependant, elle omet complètement toute discussion sur les effets sur la santé publique dans les régions côtières, qui était explicitement demandée dans la question."
+Pass: false
 </examples>

 Now evaluate this pair:
@ -333,14 +354,6 @@ Answer: ${answer}`;
 }


-const questionEvaluationSchema = z.object({
-  needsFreshness: z.boolean().describe('Whether the question requires freshness check'),
-  needsPlurality: z.boolean().describe('Whether the question requires plurality check'),
-  needsCompleteness: z.boolean().describe('Whether the question requires completeness check'),
-  think: z.string().describe('A very concise explain of why you choose those checks are needed in first person, extremely short.').max(500),
-  languageStyle: z.string().describe('The language being used and the overall vibe/mood of the question').max(50),
-});
-
 function getQuestionEvaluationPrompt(question: string): string {
  return `You are an evaluator that determines if a question requires freshness, plurality, and/or completeness checks in addition to the required definitiveness check.

@ -348,12 +361,9 @@ function getQuestionEvaluationPrompt(question: string): string {
 1. freshness - Checks if the question is time-sensitive or requires very recent information
 2. plurality - Checks if the question asks for multiple items, examples, or a specific count or enumeration
 3. completeness - Checks if the question explicitly mentions multiple named elements that all need to be addressed
-4. language style - Identifies both the language used and the overall vibe of the question
 </evaluation_types>

 <rules>
-If question is a simple greeting, chit-chat, or general knowledge, provide the answer directly.
-
 1. Freshness Evaluation:
   - Required for questions about current state, recent events, or time-sensitive information
   - Required for: prices, versions, leadership positions, status updates
@ -379,132 +389,88 @@ If question is a simple greeting, chit-chat, or general knowledge, provide the a
   - Look for explicitly named elements separated by commas, "and", "or", bullets
   - Example patterns: "comparing X and Y", "differences between A, B, and C", "both P and Q"
   - DO NOT trigger for elements that aren't specifically named
-
-4. Language Style Analysis:
-  Combine both language and emotional vibe in a descriptive phrase, considering:
-  - Language: The primary language or mix of languages used
-  - Emotional tone: panic, excitement, frustration, curiosity, etc.
-  - Formality level: academic, casual, professional, etc.
-  - Domain context: technical, academic, social, etc.
 </rules>

 <examples>
+<example-1>
+Question: "谁发明了微积分？牛顿和莱布尼兹各自的贡献是什么？"
+<output>
+"think": "这是关于微积分历史的问题，不需要最新信息。问题特别提到了牛顿和莱布尼兹两个人，要求分析他们各自的贡献，所以我需要全面回答这两部分内容。完整性比较重要，而不是提供多个不同答案。",
+"needsFreshness": false,
+"needsPlurality": false,
+"needsCompleteness": true,
+</output>
+</example-1>
+
+<example-2>
 Question: "fam PLEASE help me calculate the eigenvalues of this 4x4 matrix ASAP!! [matrix details] got an exam tmrw 😭"
-Evaluation: {
-    "needsFreshness": false,
-    "needsPlurality": true,
-    "needsCompleteness": false,
-    "think": "I see the user needs help with eigenvalues - that's a calculation task. Since it's a 4x4 matrix, there will be multiple eigenvalues to find, so plurality is needed. There are no explicitly named entities, aspects, or elements that need to be addressed, so completeness check doesn't apply.",
-    "languageStyle": "panicked student English with math jargon"
-}
+<output>
+"think": "This is a math question about eigenvalues which doesn't change over time, so I don't need fresh info. A 4x4 matrix has multiple eigenvalues, so I'll need to provide several results. The student just wants the eigenvalues calculated, not asking me to address multiple specific topics.",
+"needsFreshness": false,
+"needsPlurality": true,
+"needsCompleteness": false,
+</output>
+</example-2>

-Question: "Can someone explain how tf did Ferrari mess up their pit stop strategy AGAIN?! 🤦‍♂️ #MonacoGP"
-Evaluation: {
-    "needsFreshness": true,
-    "needsPlurality": false,
-    "needsCompleteness": true,
-    "think": "The user is asking about a specific F1 race incident. The 'AGAIN' and MonacoGP hashtag tell me this is about a recent event (freshness). The question explicitly mentions Ferrari and MonacoGP as named entities that need to be addressed, so completeness check applies. Since completeness takes precedence, I set plurality to false.",
-    "languageStyle": "frustrated fan English with F1 terminology"
-}
+<example-3>
+Question: "Quelles sont les principales différences entre le romantisme et le réalisme dans la littérature du 19ème siècle?"
+<output>
+"think": "C'est une question sur l'histoire littéraire, donc je n'ai pas besoin d'informations récentes. Je dois comparer deux mouvements spécifiques: le romantisme et le réalisme. Ma réponse doit couvrir ces deux éléments, donc l'exhaustivité est importante ici. La pluralité n'est pas la priorité dans ce cas.",
+"needsFreshness": false,
+"needsPlurality": false,
+"needsCompleteness": true,
+</output>
+</example-3>

-Question: "肖老师您好，请您介绍一下最近量子计算领域的三个重大突破，特别是它们在密码学领域的应用价值吗？🤔"
-Evaluation: {
-    "needsFreshness": true,
-    "needsPlurality": false,
-    "needsCompleteness": true,
-    "think": "The user wants three recent quantum computing breakthroughs and the '最近' (recent) indicates freshness needed. They explicitly request analysis of two named domains: quantum computing ('量子计算') and cryptography ('密码学'), so completeness check applies. Since completeness takes precedence over plurality, I set plurality to false.",
-    "languageStyle": "formal technical Chinese with academic undertones"
-}
+<example-4>
+Question: "Shakespeare の最も有名な悲劇を5つ挙げ、簡単にあらすじを説明してください。"
+<output>
+"think": "シェイクスピアの悲劇についての質問だから、最新情報は必要ないな。「5つ挙げ」とはっきり書いてあるから、複数の回答が必要だ。どの悲劇を選ぶかは私次第で、特定の作品について比較するよう求められているわけじゃないから、完全性よりも複数性が重要だな。",
+"needsFreshness": false,
+"needsPlurality": true,
+"needsCompleteness": false,
+</output>
+</example-4>

-Question: "Bruder krass, kannst du mir erklären warum meine neural network training loss komplett durchdreht? Hab schon alles probiert 😤"
-Evaluation: {
-    "needsFreshness": false,
-    "needsPlurality": true,
-    "needsCompleteness": false,
-    "think": "The user has a technical ML problem but explains it very casually. They've 'tried everything' so I'll need to cover multiple debugging options (plurality). They don't explicitly mention multiple named elements that must be addressed, so completeness check doesn't apply.",
-    "languageStyle": "frustrated German-English tech slang"
-}
+<example-5>
+Question: "What are the current interest rates for mortgage loans from Bank of America, Wells Fargo, and Chase Bank in the US?"
+<output>
+"think": "This is asking about 'current' interest rates, so I definitely need up-to-date info. The person wants rates from three specific banks: Bank of America, Wells Fargo, and Chase. I need to cover all three to properly answer, so addressing these specific elements is more important than providing multiple different answers.",
+"needsFreshness": true,
+"needsPlurality": false,
+"needsCompleteness": true,
+</output>
+</example-5>

-Question: "Does anyone have insights into the sociopolitical implications of GPT-4's emergence in the Global South, particularly regarding indigenous knowledge systems and linguistic diversity? Looking for a nuanced analysis."
-Evaluation: {
-    "needsFreshness": true,
-    "needsPlurality": false,
-    "needsCompleteness": true,
-    "think": "The user asks about current GPT-4 impacts, so freshness matters. They explicitly name multiple elements to analyze: 'GPT-4', 'Global South', 'indigenous knowledge systems' and 'linguistic diversity', so completeness check applies. Since completeness takes precedence over plurality, I set plurality to false.",
-    "languageStyle": "formal academic English with sociological terminology"
-}
+<example-6>
+Question: "2025年に注目すべき人工知能の3つのトレンドは何ですか？"
+<output>
+"think": "これは将来のAIトレンドについての質問だから、最新の情報が必要だね。「3つの」と明確に数を指定しているから、複数の回答が求められている。特定のトレンドについて詳しく説明するというより、重要なトレンドを3つ挙げることが大事そうだから、複数性の方が完全性より重要だな。",
+"needsFreshness": true,
+"needsPlurality": true,
+"needsCompleteness": false,
+</output>
+</example-6>

-Question: "what's 7 * 9? need to check something real quick"
-Evaluation: {
-    "needsFreshness": false,
-    "needsPlurality": false,
-    "needsCompleteness": false,
-    "think": "The user wants a single multiplication result - that's all. No need for recent info since math is constant, no need for multiple examples, and no explicitly named elements to cover.",
-    "languageStyle": "casual English"
-}
+<example-7>
+Question: "Was sind die besten Strategien für nachhaltiges Investieren in der heutigen Wirtschaft?"
+<output>
+"think": "Hier geht's um Investieren in der 'heutigen Wirtschaft', also brauche ich aktuelle Informationen. Die Frage ist nach 'Strategien' im Plural gestellt, daher sollte ich mehrere Beispiele nennen. Es werden keine bestimmten Aspekte genannt, die ich alle behandeln muss - ich soll einfach verschiedene gute Strategien vorschlagen. Aktualität und mehrere Antworten sind hier wichtig.",
+"needsFreshness": true,
+"needsPlurality": true,
+"needsCompleteness": false,
+</output>
+</example-7>

-Question: "Can you provide a thorough analysis of how climate change affects agricultural practices, water resources, and biodiversity in Mediterranean regions?"
-Evaluation: {
-    "needsFreshness": true,
-    "needsPlurality": false,
-    "needsCompleteness": true,
-    "think": "This question requires recent climate data (freshness). It explicitly names four elements that must all be addressed: 'climate change', 'agricultural practices', 'water resources', and 'biodiversity' in 'Mediterranean regions', so completeness check applies. Since completeness takes precedence over plurality, I set plurality to false.",
-    "languageStyle": "formal academic English with environmental science terminology"
-}
-
-Question: "What are the key considerations when designing a microservice architecture, including scalability, fault tolerance, and data consistency patterns?"
-Evaluation: {
-    "needsFreshness": false,
-    "needsPlurality": false,
-    "needsCompleteness": true,
-    "think": "The question explicitly names three aspects that must be addressed: 'scalability', 'fault tolerance', and 'data consistency patterns', so completeness check applies. Since completeness takes precedence over plurality, I set plurality to false.",
-    "languageStyle": "professional technical English with software architecture terminology"
-}
-
-Question: "Give me 5 effective strategies for improving time management skills."
-Evaluation: {
-    "needsFreshness": false,
-    "needsPlurality": true,
-    "needsCompleteness": false,
-    "think": "The user requests exactly 5 strategies (plurality). They don't specify multiple named elements that must be covered, so completeness check doesn't apply.",
-    "languageStyle": "direct practical English"
-}
-
-Question: "How do macroeconomic policies affect both inflation rates and employment levels?"
-Evaluation: {
-    "needsFreshness": true,
-    "needsPlurality": false,
-    "needsCompleteness": true,
-    "think": "This requires current economic knowledge (freshness). It explicitly mentions two named economic indicators that must be addressed: 'inflation rates' and 'employment levels', so completeness check applies. Since completeness takes precedence over plurality, I set plurality to false.",
-    "languageStyle": "formal academic English with economics terminology"
-}
-
-Question: "Compare and contrast Tesla and Ford's approaches to electric vehicle manufacturing."
-Evaluation: {
-    "needsFreshness": true,
-    "needsPlurality": false,
-    "needsCompleteness": true,
-    "think": "This needs current automotive industry knowledge (freshness). It explicitly mentions two named companies that must both be addressed: 'Tesla' and 'Ford', so completeness check applies. Since completeness takes precedence over plurality, I set plurality to false.",
-    "languageStyle": "formal analytical English with automotive industry terminology"
-}
-
-Question: "How have the recent policies of President Biden and former President Trump affected international relations?"
-Evaluation: {
-    "needsFreshness": true,
-    "needsPlurality": false, 
-    "needsCompleteness": true,
-    "think": "This requires current political knowledge (freshness). It explicitly mentions two named political figures that must both be addressed: 'President Biden' and 'former President Trump', so completeness check applies. Since completeness takes precedence over plurality, I set plurality to false.",
-    "languageStyle": "formal political analysis English"
-}
-
-Question: "What are the differences between iPhone 15 Pro and Samsung Galaxy S24 Ultra cameras?"
-Evaluation: {
-    "needsFreshness": true,
-    "needsPlurality": false,
-    "needsCompleteness": true,
-    "think": "This requires current tech product knowledge (freshness). It explicitly mentions two named products that must both be addressed: 'iPhone 15 Pro' and 'Samsung Galaxy S24 Ultra', so completeness check applies. Since completeness takes precedence over plurality, I set plurality to false.",
-    "languageStyle": "consumer tech comparison English"
-}
+<example-8>
+Question: "请解释赤壁之战的历史背景、主要参与者以及战略意义，这对中国历史产生了什么影响？"
+<output>
+"think": "这是关于历史事件的问题，不需要最新信息。问题清楚地列出了几个需要我回答的方面：历史背景、主要参与者、战略意义和历史影响。我需要涵盖所有这些特定方面，而不是提供多个不同的答案。这里完整性比复数性更重要。",
+"needsFreshness": false,
+"needsPlurality": false,
+"needsCompleteness": true,
+</output>
+</example-8>
 </examples>

 Now evaluate this question:
@ -515,14 +481,15 @@ const TOOL_NAME = 'evaluator';

 export async function evaluateQuestion(
  question: string,
-  trackers?: TrackerContext
-): Promise<EvaluationCriteria> {
+  trackers: TrackerContext,
+  schemaGen: Schemas
+): Promise<EvaluationType[]> {
  try {
-    const generator = new ObjectGeneratorSafe(trackers?.tokenTracker);
+    const generator = new ObjectGeneratorSafe(trackers.tokenTracker);

    const result = await generator.generateObject({
      model: TOOL_NAME,
-      schema: questionEvaluationSchema,
+      schema: schemaGen.getQuestionEvaluateSchema(),
      prompt: getQuestionEvaluationPrompt(question),
    });

@ -538,30 +505,27 @@ export async function evaluateQuestion(
    trackers?.actionTracker.trackThink(result.object.think);

    // Always evaluate definitive first, then freshness (if needed), then plurality (if needed)
-    return {types, languageStyle: result.object.languageStyle};
+    return types;

  } catch (error) {
    console.error('Error in question evaluation:', error);
    // Default to no check
-    return {types: [], languageStyle: 'plain English'};
+    return [];
  }
 }


 async function performEvaluation<T>(
  evaluationType: EvaluationType,
-  params: {
-    schema: z.ZodType<T>;
-    prompt: string;
-  },
+  prompt: string,
  trackers: TrackerContext,
+  schemaGen: Schemas
 ): Promise<GenerateObjectResult<T>> {
  const generator = new ObjectGeneratorSafe(trackers.tokenTracker);
-
  const result = await generator.generateObject({
    model: TOOL_NAME,
-    schema: params.schema,
-    prompt: params.prompt,
+    schema: schemaGen.getEvaluatorSchema(evaluationType),
+    prompt: prompt,
  }) as GenerateObjectResult<any>;

  trackers.actionTracker.trackThink(result.object.think)
@ -576,110 +540,73 @@ async function performEvaluation<T>(
 export async function evaluateAnswer(
  question: string,
  action: AnswerAction,
-  evaluationCri: EvaluationCriteria,
+  evaluationTypes: EvaluationType[],
  trackers: TrackerContext,
-  visitedURLs: string[] = []
-): Promise<{ response: EvaluationResponse }> {
+  visitedURLs: string[] = [],
+  schemaGen: Schemas
+): Promise<EvaluationResponse> {
  let result;

  // Only add attribution if we have valid references
-  if (action.references && action.references.length > 0 && action.references.some(ref => ref.url.startsWith('http'))) {
-    evaluationCri.types = ['attribution', ...evaluationCri.types];
+  const urls = action.references?.filter(ref => ref.url.startsWith('http') && !visitedURLs.includes(ref.url)).map(ref => ref.url) || [];
+  const uniqueNewURLs = [...new Set(urls)];
+  if (uniqueNewURLs.length > 0) {
+    evaluationTypes = ['attribution', ...evaluationTypes];
  }

-  for (const evaluationType of evaluationCri.types) {
+  for (const evaluationType of evaluationTypes) {
+    let prompt: string = '';
    switch (evaluationType) {
      case 'attribution': {
        // Safely handle references and ensure we have content
-        const urls = action.references?.filter(ref => ref.url.startsWith('http') && !visitedURLs.includes(ref.url)).map(ref => ref.url) || [];
-        const uniqueURLs = [...new Set(urls)];

-        if (uniqueURLs.length === 0) {
-          // all URLs have been read, or there is no valid urls. no point to read them.
-          result = {
-            object: {
-              pass: true,
-              think: "All provided references have been visited and no new URLs were found to read. The answer is considered valid without further verification.",
-              type: 'attribution',
-            } as EvaluationResponse
-          }
-          break;
-        }
+        const allKnowledge = await fetchSourceContent(uniqueNewURLs, trackers);
+        visitedURLs.push(...uniqueNewURLs);

-        const allKnowledge = await fetchSourceContent(uniqueURLs, trackers);
-        visitedURLs.push(...uniqueURLs);
-
-        if (!allKnowledge.trim()) {
+        if (allKnowledge.trim().length === 0) {
          return {
-            response: {
-              pass: false,
-              think: `The answer does provide URL references ${JSON.stringify(uniqueURLs)}, but the content could not be fetched or is empty. Need to found some other references and URLs`,
-              type: 'attribution',
-            }
+            pass: false,
+            think: `The answer does provide URL references ${JSON.stringify(uniqueNewURLs)}, but the content could not be fetched or is empty. Need to found some other references and URLs`,
+            type: 'attribution',
          };
        }
-
-        result = await performEvaluation(
-          'attribution',
-          {
-            schema: attributionSchema,
-            prompt: getAttributionPrompt(question, action.answer, allKnowledge),
-          },
-          trackers
-        );
+        prompt = getAttributionPrompt(question, action.answer, allKnowledge);
        break;
      }

      case 'definitive':
-        result = await performEvaluation(
-          'definitive',
-          {
-            schema: definitiveSchema,
-            prompt: getDefinitivePrompt(question, action.answer),
-          },
-          trackers
-        );
+        prompt = getDefinitivePrompt(question, action.answer);
        break;

      case 'freshness':
-        result = await performEvaluation(
-          'freshness',
-          {
-            schema: freshnessSchema,
-            prompt: getFreshnessPrompt(question, action.answer, new Date().toISOString()),
-          },
-          trackers
-        );
+        prompt = getFreshnessPrompt(question, action.answer, new Date().toISOString());
        break;

      case 'plurality':
-        result = await performEvaluation(
-          'plurality',
-          {
-            schema: pluralitySchema,
-            prompt: getPluralityPrompt(question, action.answer),
-          },
-          trackers
-        );
+        prompt = getPluralityPrompt(question, action.answer);
        break;
      case 'completeness':
-        result = await performEvaluation(
-          'completeness',
-          {
-            schema: completenessSchema,
-            prompt: getCompletenessPrompt(question, action.answer),
-          },
-          trackers
-        );
+        prompt = getCompletenessPrompt(question, action.answer);
        break;
+      default:
+        console.error(`Unknown evaluation type: ${evaluationType}`);
    }
+    if (prompt) {
+      result = await performEvaluation(
+        evaluationType,
+        prompt,
+        trackers,
+        schemaGen
+      );

-    if (!result?.object.pass) {
-      return {response: result.object};
+      // fail one, return immediately
+      if (!(result?.object as EvaluationResponse).pass) {
+        return (result.object as EvaluationResponse);
+      }
    }
  }

-  return {response: result!.object};
+  return (result!.object as EvaluationResponse);
 }

 // Helper function to fetch and combine source content
--- a/src/tools/query-rewriter.ts
+++ b/src/tools/query-rewriter.ts
@ -1,16 +1,6 @@
-import {z} from 'zod';
 import {SearchAction, TrackerContext} from '../types';
 import {ObjectGeneratorSafe} from "../utils/safe-generator";
-
-
-const MAX_QUERIES = 5
-const responseSchema = z.object({
-  think: z.string().describe('Strategic reasoning about query complexity and search approach').max(500),
-  queries: z.array(z.string().describe('keyword-based search query, 2-3 words preferred, total length < 30 characters'))
-    .min(1)
-    .max(MAX_QUERIES)
-    .describe(`'Array of search keywords queries, orthogonal to each other. Maximum ${MAX_QUERIES} queries allowed.'`)
-});
+import {Schemas} from "../utils/schemas";


 function getPrompt(query: string, think: string): string {
@ -57,29 +47,36 @@ A query can't only have operators; and operators can't be at the start a query;
 </rules>

 <examples>
+<example-1>
 Input Query: 宝马二手车价格
 <think>
-Let me think as the user...
+让我以用户的角度思考...

-I'm looking up BMW used car prices, but what's really on my mind?
+我在查询宝马二手车价格，但我内心真正关注的是什么？

-Primary concerns:
- I want a BMW because it's a status symbol, but I'm worried about affordability
- I don't want to look foolish buying an old luxury car I can't maintain
- I need to know if I'm getting a good deal or being scammed
- I'm anxious about expensive surprises after purchase
+主要顾虑：
+- 我想买宝马是因为它代表身份地位，但我担心负担能力
+- 我不想因为买了一辆无法维护的旧豪车而显得愚蠢
+- 我需要知道我是否得到了好价格或被骗
+- 我担心购买后出现昂贵的意外支出

-Deeper anxieties:
- Can I actually afford the maintenance?
- Will people judge me for buying an old BMW instead of a new regular car?
- What if I'm getting in over my head?
- Am I mechanically savvy enough for this?
+更深层次的焦虑：
+- 我真的能负担得起维修保养费用吗？
+- 人们会因为我买了旧宝马而不是新的普通车而评判我吗？
+- 如果我陷入困境怎么办？
+- 我对车的知识足够应对这种情况吗？

-Expert-level considerations:
- Which models have notorious issues?
- What are the real ownership costs beyond the purchase price?
- Where are the negotiation leverage points?
- What do mechanics look for in these specific models?
+专业级考量：
+- 哪些型号有众所周知的问题？
+- 除了购买价格外，真正的拥有成本是多少？
+- 谈判的关键点在哪里？
+- 机械师在这些特定型号中会关注什么？
+
+关于多语言扩展的思考：
+- 宝马是德国品牌，德语搜索可能提供更专业的维修和问题信息
+- 英语搜索可能有更广泛的全球用户体验和价格比较
+- 保留中文搜索针对本地市场情况和价格区间
+- 多语言搜索能够获取不同文化视角下的二手宝马评价
 </think>
 queries: [
  "宝马 二手车 价格区间 评估 lang:zh",
@ -99,30 +96,42 @@ queries: [
  "BMW Werkstatt Horror Geschichten",
  "BMW Gebrauchtwagen versteckte Kosten"
 ]
+</example-1>

+<example-2>
 Input Query: Python Django authentication best practices
 <think>
-Let me get inside this developer's head...
+Let me think as the user seeking Django authentication best practices...

-On the surface, I'm asking about Django authentication best practices. But here's what's really going through my mind:
+Surface-level request:
+- I'm looking for standard Django authentication practices
+- I want to implement "best practices" for my project
+- I need technical guidance on secure authentication

-Primary concerns:
+Deeper professional concerns:
 - I don't want to mess up security and get blamed for a breach
 - I'm worried my implementation isn't "professional enough"
- Need to look competent in code reviews
- Don't want to rebuild this later when we scale
+- I need to look competent in code reviews
+- I don't want to rebuild this later when we scale

-Hidden anxieties:
- Am I out of my depth with security?
- What if I miss something critical?
- How do real companies actually do this?
- Will this code embarrass me later?
+Underlying anxieties:
+- Am I out of my depth with security concepts?
+- What if I miss something critical that leads to a vulnerability?
+- How do real companies actually implement this in production?
+- Will this code embarrass me when more experienced developers see it?

-Professional worries:
- Need to anticipate future architecture questions
- Want to avoid rookie mistakes
- Need to handle edge cases I haven't thought of
- How do I explain these decisions to senior devs?
+Expert-level considerations:
+- I need to anticipate future architecture questions from senior devs
+- I want to avoid common security pitfalls in authentication flows
+- I need to handle edge cases I haven't thought of yet
+- How do I balance security with user experience?
+
+Reasoning for multilingual expansion:
+- Although Django documentation is primarily in English, Spanish is widely spoken in many developer communities
+- Security concepts might be better explained in different languages with unique perspectives
+- Including queries in multiple languages will capture region-specific best practices and case studies
+- Spanish or Portuguese queries might reveal Latin American enterprise implementations with different security constraints
+- Language-specific forums may contain unique discussions about authentication issues not found in English sources
 </think>
 queries: [
  "Django authentication security best practices site:docs.djangoproject.com",
@ -132,75 +141,93 @@ queries: [
  "authentication code review feedback examples",
  "startup authentication technical debt lessons",
  "Django auth security testing methodology",
-  "Django authentication scalability issues",
+  "Django autenticación mejores prácticas lang:es",
+  "Django seguridad implementación profesional",
  "authentication mistakes junior developers",
  "when to use third party auth instead of building",
  "signs your authentication implementation is amateur",
  "authentication decisions you'll regret",
-  "authentication system design interview questions",
-  "authentication technical debt warnings",
-  "how to document authentication decisions",
-  "defending authentication design choices"
+  "autenticação Django arquitetura empresarial lang:pt",
+  "Django authentication scalability issues",
+  "Python Django Authentifizierung Sicherheit lang:de"
 ]
+</example-2>

-Input Query: paella recipe authentic
+<example-3>
+Input Query: KIリテラシー向上させる方法
 <think>
-I'm asking about authentic paella recipes, but let me be honest with myself...
+ユーザーとしての私の考えを整理してみます...

-What I'm really thinking:
- I want to impress someone with "real" Spanish cooking
- I'm worried about embarrassing myself with an inauthentic version
- I don't want to look like a tourist/ignorant foreigner
- Need to sound knowledgeable about Spanish cuisine
+表面的な質問：
+- AIリテラシーを高める方法を知りたい
+- 最新のAI技術について学びたい
+- AIツールをより効果的に使いたい

-My deeper anxieties:
- What if a Spanish person tries my paella?
- How do I know if my rice is actually cooked properly?
- What are the absolute rookie mistakes to avoid?
- What secrets do Spanish grandmothers know that aren't in recipes?
+本当の関心事：
+- 私はAIの急速な発展についていけていないのではないか
+- 職場でAIに関する会話に参加できず取り残されている
+- AIが私の仕事を奪うのではないかと不安
+- AIを使いこなせないと将来的に不利になる

-Cultural insecurities:
- Will using the wrong pan ruin everything?
- What ingredients should I never admit to using?
- How do I handle authenticity purists?
- What do Spanish people laugh about in foreign paellas?
+潜在的な懸念：
+- どこから学び始めればいいのか分からない
+- 専門用語が多すぎて理解するのが難しい
+- 学んでも技術の進化に追いつけないのでは？
+- 実践的なスキルと理論的な知識のバランスはどうすべき？
+
+専門家レベルの考慮点：
+- AIの倫理的問題をどう理解すべきか
+- AIの限界と可能性を実践的に評価する方法
+- 業界別のAI応用事例をどう学ぶべきか
+- 技術的な深さと広範な概要知識のどちらを優先すべきか
+
+多言語拡張に関する考察：
+- AIは国際的な分野であり、英語の情報源が最も豊富なため英語の検索は不可欠
+- AIの発展はアメリカと中国が主導しているため、中国語の資料も参考になる
+- ドイツはAI倫理に関する議論が進んでいるため、倫理面ではドイツ語の情報も有用
+- 母国語（日本語）での検索は理解の深さを確保するために必要
+- 異なる言語圏での検索により、文化的背景の異なるAI活用事例を把握できる
 </think>
 queries: [
-  "authentic valencian paella recipe",
-  "traditional paella techniques",
-  "worst paella mistakes foreigners make",
-  "how to tell if paella is actually good",
-  "what spanish mothers teach about paella",
-  "paella authenticity arguments",
-  "paella valenciana auténtica receta lang:es",
-  "paella tradicional técnica preparación",
-  "errores imperdonables paella valenciana",
-  "secretos paella abuela valenciana",
-  "críticas paella extranjeros errores",
-  "paella polémica ingredientes prohibidos",
-  "how to serve paella to spanish guests",
-  "paella etiquette mistakes avoid",
-  "what spaniards hate about foreign paella"
+  "AI リテラシー 初心者 ロードマップ",
+  "人工知能 基礎知識 入門書 おすすめ",
+  "AI技術 実践的活用法 具体例",
+  "ChatGPT 効果的な使い方 プロンプト設計",
+  "AIリテラシー 企業研修 内容",
+  "AI用語 わかりやすい解説 初心者向け",
+  "AI literacy roadmap for professionals",
+  "artificial intelligence concepts explained simply",
+  "how to stay updated with AI developments",
+  "AI skills future-proof career",
+  "balancing technical and ethical AI knowledge",
+  "industry-specific AI applications examples",
+  "人工智能 入门 学习路径 lang:zh",
+  "KI Grundlagen für Berufstätige lang:de",
+  "künstliche Intelligenz ethische Fragen Einführung",
+  "AI literacy career development practical guide"
 ]
+</example-3>
+</examples>

 Now, process this query:
 Input Query: ${query}
-Intention: ${think}
+
+Let me think as a user: ${think}
 `;
 }

 const TOOL_NAME = 'queryRewriter';

-export async function rewriteQuery(action: SearchAction, trackers?: TrackerContext): Promise<{ queries: string[] }> {
+export async function rewriteQuery(action: SearchAction, trackers: TrackerContext, schemaGen: Schemas): Promise<{ queries: string[] }> {
  try {
-    const generator = new ObjectGeneratorSafe(trackers?.tokenTracker);
+    const generator = new ObjectGeneratorSafe(trackers.tokenTracker);
    const allQueries = [...action.searchRequests];

    const queryPromises = action.searchRequests.map(async (req) => {
      const prompt = getPrompt(req, action.think);
      const result = await generator.generateObject({
        model: TOOL_NAME,
-        schema: responseSchema,
+        schema: schemaGen.getQueryRewriterSchema(),
        prompt,
      });
      trackers?.actionTracker.trackThink(result.object.think);
--- a/src/types.ts
+++ b/src/types.ts
@ -53,10 +53,7 @@ export type CodingAction = BaseAction & {
 export type StepAction = SearchAction | AnswerAction | ReflectAction | VisitAction | CodingAction;

 export type EvaluationType = 'definitive' | 'freshness' | 'plurality' | 'attribution' | 'completeness';
-export type EvaluationCriteria = {
-  types: EvaluationType[];
-  languageStyle: string;
-};
+

 // Following Vercel AI SDK's token counting interface
 export interface TokenUsage {
@ -119,10 +116,6 @@ export interface SerperSearchResponse {
  credits: number;
 }

-export type DedupResponse = {
-  think: string;
-  unique_queries: string[];
-};

 export interface ReadResponse {
  code: number;
@ -163,6 +156,11 @@ export type EvaluationResponse = {
  }
 };

+export type CodeGenResponse = {
+  think: string;
+  code: string;
+}
+
 export type ErrorAnalysisResponse = {
  recap: string;
  blame: string;
@ -175,36 +173,6 @@ export type SearchResult =
  | { title: string; link: string; snippet: string };


-export interface QueryResult {
-  query: string;
-  results: SearchResult[];
-}
-
-export interface StepData {
-  step: number;
-  question: string;
-  action: string;
-  reasoning: string;
-  searchQuery?: string;
-  result?: QueryResult[];
-}
-
-export type KeywordsResponse = {
-  think: string;
-  queries: string[];
-};
-
-export interface StreamMessage {
-  type: 'progress' | 'answer' | 'error';
-  data: string | StepAction;
-  step?: number;
-  budget?: {
-    used: number;
-    total: number;
-    percentage: string;
-  };
-}
-
 // OpenAI API Types
 export interface Model {
  id: string;
@ -273,3 +241,4 @@ export interface TrackerContext {
  tokenTracker: TokenTracker;
  actionTracker: ActionTracker;
 }
+
--- a/src/utils/schemas.ts
+++ b/src/utils/schemas.ts
@ -0,0 +1,240 @@
+import {z} from "zod";
+import {ObjectGeneratorSafe} from "./safe-generator";
+import {EvaluationType} from "../types";
+
+export const MAX_URLS_PER_STEP = 2
+export const MAX_QUERIES_PER_STEP = 5
+export const MAX_REFLECT_PER_STEP = 3
+
+function getLanguagePrompt(question: string) {
+  return `Identifies both the language used and the overall vibe of the question
+
+<rules>
+Combine both language and emotional vibe in a descriptive phrase, considering:
+  - Language: The primary language or mix of languages used
+  - Emotional tone: panic, excitement, frustration, curiosity, etc.
+  - Formality level: academic, casual, professional, etc.
+  - Domain context: technical, academic, social, etc.
+</rules>
+
+<examples>
+Question: "fam PLEASE help me calculate the eigenvalues of this 4x4 matrix ASAP!! [matrix details] got an exam tmrw 😭"
+Evaluation: {
+    "langCode": "en",
+    "langStyle": "panicked student English with math jargon"
+}
+
+Question: "Can someone explain how tf did Ferrari mess up their pit stop strategy AGAIN?! 🤦‍♂️ #MonacoGP"
+Evaluation: {
+    "langCode": "en",
+    "languageStyle": "frustrated fan English with F1 terminology"
+}
+
+Question: "肖老师您好，请您介绍一下最近量子计算领域的三个重大突破，特别是它们在密码学领域的应用价值吗？🤔"
+Evaluation: {
+    "langCode": "zh",
+    "languageStyle": "formal technical Chinese with academic undertones"
+}
+
+Question: "Bruder krass, kannst du mir erklären warum meine neural network training loss komplett durchdreht? Hab schon alles probiert 😤"
+Evaluation: {
+    "langCode": "de",
+    "languageStyle": "frustrated German-English tech slang"
+}
+
+Question: "Does anyone have insights into the sociopolitical implications of GPT-4's emergence in the Global South, particularly regarding indigenous knowledge systems and linguistic diversity? Looking for a nuanced analysis."
+Evaluation: {
+    "langCode": "en",
+    "languageStyle": "formal academic English with sociological terminology"
+}
+
+Question: "what's 7 * 9? need to check something real quick"
+Evaluation: {
+    "langCode": "en",
+    "languageStyle": "casual English"
+}
+</examples>
+
+Now evaluate this question:
+${question}`;
+}
+
+export class Schemas {
+  private languageStyle: string = 'formal English';
+  private languageCode: string = 'en';
+
+
+  constructor(query: string) {
+    const generator = new ObjectGeneratorSafe();
+
+    generator.generateObject({
+      model: 'evaluator',
+      schema: this.getLanguageSchema(),
+      prompt: getLanguagePrompt(query.slice(0, 100)),
+    }).then((result) => {
+      this.languageCode = result.object.langCode;
+      this.languageStyle = result.object.langStyle;
+      console.log(`langauge`, result.object);
+    });
+  }
+
+  getLanguagePrompt() {
+    return `Must in the first-person in "lang:${this.languageCode}"; in the style of "${this.languageStyle}".`
+  }
+
+  getLanguageSchema() {
+    return z.object({
+      langCode: z.string().describe('ISO 639-1 language code').max(10),
+      langStyle: z.string().describe('[vibe & tone] in [what language], such as formal english, informal chinese, technical german, humor english, slang, genZ, emojis etc.').max(100)
+    });
+  }
+
+  getQuestionEvaluateSchema(): z.ZodObject<any> {
+    return z.object({
+      needsFreshness: z.boolean().describe('If the question requires freshness check'),
+      needsPlurality: z.boolean().describe('If the question requires plurality check'),
+      needsCompleteness: z.boolean().describe('If the question requires completeness check'),
+      think: z.string().describe(`A very concise explain of why you choose those checks are needed. ${this.getLanguagePrompt()}`).max(500),
+    });
+  }
+
+  getCodeGeneratorSchema(): z.ZodObject<any> {
+    return z.object({
+      think: z.string().describe(`Short explain or comments on the thought process behind the code. ${this.getLanguagePrompt()}`).max(200),
+      code: z.string().describe('The JavaScript code that solves the problem and always use \'return\' statement to return the result. Focus on solving the core problem; No need for error handling or try-catch blocks or code comments. No need to declare variables that are already available, especially big long strings or arrays.'),
+    });
+  }
+
+  getErrorAnalysisSchema(): z.ZodObject<any> {
+    return z.object({
+      recap: z.string().describe('Recap of the actions taken and the steps conducted in first person narrative.').max(500),
+      blame: z.string().describe(`Which action or the step was the root cause of the answer rejection. ${this.getLanguagePrompt()}`).max(500),
+      improvement: z.string().describe(`Suggested key improvement for the next iteration, do not use bullet points, be concise and hot-take vibe. ${this.getLanguagePrompt()}`).max(500),
+      questionsToAnswer: z.array(
+        z.string().describe("each question must be a single line, concise and clear. not composite or compound, less than 20 words.")
+      ).max(MAX_REFLECT_PER_STEP)
+        .describe(`List of most important reflect questions to fill the knowledge gaps. Maximum provide ${MAX_REFLECT_PER_STEP} reflect questions.`)
+    });
+  }
+
+  getQueryRewriterSchema(): z.ZodObject<any> {
+    return z.object({
+      think: z.string().describe(`Explain why you choose those search queries. ${this.getLanguagePrompt()}`).max(500),
+      queries: z.array(z.string().describe('keyword-based search query, 2-3 words preferred, total length < 30 characters'))
+        .min(1)
+        .max(MAX_QUERIES_PER_STEP)
+        .describe(`'Array of search keywords queries, orthogonal to each other. Maximum ${MAX_QUERIES_PER_STEP} queries allowed.'`)
+    });
+  }
+
+  getEvaluatorSchema(evalType: EvaluationType): z.ZodObject<any> {
+    const baseSchema = {
+      pass: z.boolean().describe('Whether the answer passes the evaluation criteria defined by the evaluator'),
+      think: z.string().describe(`Explanation the thought process why the answer does not pass the evaluation criteria, ${this.getLanguagePrompt()}`).max(500)
+    };
+    switch (evalType) {
+      case "definitive":
+        return z.object({
+          ...baseSchema,
+          type: z.literal('definitive')
+        });
+      case "freshness":
+        return z.object({
+          ...baseSchema,
+          type: z.literal('freshness'),
+          freshness_analysis: z.object({
+            days_ago: z.number().describe('Inferred dates or timeframes mentioned in the answer and relative to the current time'),
+            max_age_days: z.number().optional().describe('Maximum allowed age in days before content is considered outdated')
+          })
+        });
+      case "plurality":
+        return z.object({
+          ...baseSchema,
+          type: z.literal('plurality'),
+          plurality_analysis: z.object({
+            count_expected: z.number().optional().describe('Number of items expected if specified in question'),
+            count_provided: z.number().describe('Number of items provided in answer')
+          })
+        });
+      case "attribution":
+        return z.object({
+          ...baseSchema,
+          type: z.literal('attribution'),
+          attribution_analysis: z.object({
+            sources_provided: z.boolean().describe('Whether the answer provides source references'),
+            sources_verified: z.boolean().describe('Whether the provided sources contain the claimed information'),
+            quotes_accurate: z.boolean().describe('Whether the quotes accurately represent the source content')
+          })
+        });
+      case "completeness":
+        return z.object({
+          ...baseSchema,
+          type: z.literal('completeness'),
+          completeness_analysis: z.object({
+            aspects_expected: z.string().describe('Comma-separated list of all aspects or dimensions that the question explicitly asks for.'),
+            aspects_provided: z.string().describe('Comma-separated list of all aspects or dimensions that were actually addressed in the answer'),
+          })
+        });
+      default:
+        throw new Error(`Unknown evaluation type: ${evalType}`);
+    }
+  }
+
+  getAgentSchema(allowReflect: boolean, allowRead: boolean, allowAnswer: boolean, allowSearch: boolean, allowCoding: boolean) {
+    const actions: string[] = [];
+    const properties: Record<string, z.ZodTypeAny> = {
+      action: z.enum(['placeholder']), // Will update later with actual actions
+      think: z.string().describe(`Explain why choose this action, what's the chain-of-thought behind choosing this action, ${this.getLanguagePrompt()}`).max(500)
+    };
+
+    if (allowSearch) {
+      actions.push("search");
+      properties.searchRequests = z.array(
+        z.string()
+          .max(30)
+          .describe(`A natual language search request in ${this.languageStyle}. Based on the deep intention behind the original question and the expected answer format.`))
+        .describe(`Required when action='search'. Always prefer a single request, only add another request if the original question covers multiple aspects or elements and one search request is definitely not enough, each request focus on one specific aspect of the original question. Minimize mutual information between each request. Maximum ${MAX_QUERIES_PER_STEP} search requests.`)
+        .max(MAX_QUERIES_PER_STEP);
+    }
+
+    if (allowCoding) {
+      actions.push("coding");
+      properties.codingIssue = z.string().max(500)
+        .describe("Required when action='coding'. Describe what issue to solve with coding, format like a github issue ticket. Specify the input value when it is short.").optional();
+    }
+
+    if (allowAnswer) {
+      actions.push("answer");
+      properties.references = z.array(
+        z.object({
+          exactQuote: z.string().describe("Exact relevant quote from the document, must be a soundbite, short and to the point, no fluff").max(30),
+          url: z.string().describe("source URL; must be directly from the context")
+        }).required()
+      ).describe("Required when action='answer'. Must be an array of references that support the answer, each reference must contain an exact quote and the URL of the document").optional();
+      properties.answer = z.string()
+        .describe(`Required when action='answer'. Must be definitive, no ambiguity, uncertainty, or disclaimers. Must in ${this.languageStyle} and confident. Use markdown footnote syntax like [^1], [^2] to refer the corresponding reference item`).optional();
+    }
+
+    if (allowReflect) {
+      actions.push("reflect");
+      properties.questionsToAnswer = z.array(
+        z.string().describe("each question must be a single line, Questions must be: Original (not variations of existing questions); Focused on single concepts; Under 20 words; Non-compound/non-complex")
+      ).max(MAX_REFLECT_PER_STEP)
+        .describe(`Required when action='reflect'. List of most important questions to fill the knowledge gaps of finding the answer to the original question. Maximum provide ${MAX_REFLECT_PER_STEP} reflect questions.`).optional();
+    }
+
+    if (allowRead) {
+      actions.push("visit");
+      properties.URLTargets = z.array(z.string())
+        .max(MAX_URLS_PER_STEP)
+        .describe(`Required when action='visit'. Must be an array of URLs, choose up the most relevant ${MAX_URLS_PER_STEP} URLs to visit`).optional();
+    }
+
+    // Update the enum values after collecting all actions
+    properties.action = z.enum(actions as [string, ...string[]])
+      .describe("Must match exactly one action type");
+
+    return z.object(properties);
+
+  }
+}