diff --git a/src/agent.ts b/src/agent.ts
index 819a882..dc66d75 100644
--- a/src/agent.ts
+++ b/src/agent.ts
@@ -11,7 +11,7 @@ import {evaluateAnswer, evaluateQuestion} from "./tools/evaluator";
 import {analyzeSteps} from "./tools/error-analyzer";
 import {TokenTracker} from "./utils/token-tracker";
 import {ActionTracker} from "./utils/action-tracker";
-import {StepAction, AnswerAction, KnowledgeItem} from "./types";
+import {StepAction, AnswerAction, KnowledgeItem, EvaluationCriteria} from "./types";
 import {TrackerContext} from "./types";
 import {search} from "./tools/jina-search";
 // import {grounding} from "./tools/grounding";
@@ -24,7 +24,7 @@ async function sleep(ms: number) {
   return new Promise(resolve => setTimeout(resolve, ms));
 }
 
-function getSchema(allowReflect: boolean, allowRead: boolean, allowAnswer: boolean, allowSearch: boolean) {
+function getSchema(allowReflect: boolean, allowRead: boolean, allowAnswer: boolean, allowSearch: boolean, languageStyle: string = 'same language as the question') {
   const actions: string[] = [];
   const properties: Record<string, z.ZodTypeAny> = {
     action: z.enum(['placeholder']), // Will update later with actual actions
@@ -40,7 +40,7 @@ function getSchema(allowReflect: boolean, allowRead: boolean, allowAnswer: boole
   if (allowAnswer) {
     actions.push("answer");
     properties.answer = z.string()
-      .describe("Required when action='answer'. Must be the final answer in natural language").optional();
+      .describe(`Required when action='answer'. Must in ${languageStyle}`).optional();
     properties.references = z.array(
       z.object({
         exactQuote: z.string().describe("Exact relevant quote from the document"),
@@ -85,7 +85,8 @@ function getPrompt(
   badContext?: { question: string, answer: string, evaluation: string, recap: string; blame: string; improvement: string; }[],
   knowledge?: KnowledgeItem[],
   allURLs?: Record<string, string>,
-  beastMode?: boolean
+  beastMode?: boolean,
+  languageStyle?: string
 ): string {
   const sections: string[] = [];
   const actionSections: string[] = [];
@@ -216,11 +217,11 @@ ${allKeywords.join('\n')}
   if (allowAnswer) {
     actionSections.push(`
 <action-answer>
-- If <question> is a simple greeting, chit-chat, or general knowledge, provide the answer directly.
-- Must provide "references" and each must specify "exactQuote" and "url" 
-- In the answer, use markdown footnote syntax like [^1], [^2] to refer to the references
-- Responses must be definitive (no ambiguity, uncertainty, or disclaimers)
-- Provide final response only when 100% certain${allowReflect ? '\n- If doubts remain, use <action-reflect> instead' : ''}
+- If <question> is a simple greeting, chit-chat, or general knowledge, provide the answer directly;
+- Must provide "references" and each must specify "exactQuote" and "url";
+- In the answer, use markdown footnote syntax like [^1], [^2] to refer to the references;
+- Responses must be definitive (no ambiguity, uncertainty, or disclaimers) and in the style of ${languageStyle};
+- Provide final response only when 100% certain;${allowReflect ? '\n- If doubts remain, use <action-reflect> instead' : ''}
 </action-answer>
 `);
   }
@@ -299,8 +300,9 @@ export async function getResponse(question: string,
   let totalStep = 0;
   let badAttempts = 0;
   let schema: ZodObject<any> = getSchema(true, true, true, true)
-  const gaps: string[] = [question.trim()];  // All questions to be answered including the orginal question
-  const allQuestions = [question.trim()];
+  question = question.trim()
+  const gaps: string[] = [question];  // All questions to be answered including the orginal question
+  const allQuestions = [question];
   const allKeywords = [];
   const allKnowledge: KnowledgeItem[] = [];  // knowledge are intermedidate questions that are answered
   // iterate over historyMessages
@@ -329,7 +331,7 @@ export async function getResponse(question: string,
 
   const allURLs: Record<string, string> = {};
   const visitedURLs: string[] = [];
-  const evaluationMetrics: Record<string, any[]> = {};
+  const evaluationMetrics: Record<string, EvaluationCriteria> = {};
   while (context.tokenTracker.getTotalUsage().totalTokens < tokenBudget && badAttempts <= maxBadAttempts) {
     // add 1s delay to avoid rate limiting
     await sleep(STEP_SLEEP);
@@ -339,7 +341,7 @@ export async function getResponse(question: string,
     console.log(`Step ${totalStep} / Budget used ${budgetPercentage}%`);
     console.log('Gaps:', gaps);
     allowReflect = allowReflect && (gaps.length <= 1);
-    const currentQuestion = gaps.length > 0 ? gaps.shift()! : question.trim();
+    const currentQuestion = gaps.length > 0 ? gaps.shift()! : question
     if (!evaluationMetrics[currentQuestion]) {
       evaluationMetrics[currentQuestion] = await evaluateQuestion(currentQuestion, context.tokenTracker)
     }
@@ -361,9 +363,11 @@ export async function getResponse(question: string,
       badContext,
       allKnowledge,
       allURLs,
-      false
+      false,
+      evaluationMetrics[currentQuestion].languageStyle
     );
-    schema = getSchema(allowReflect, allowRead, allowAnswer, allowSearch)
+    schema = getSchema(allowReflect, allowRead, allowAnswer, allowSearch,
+      evaluationMetrics[currentQuestion].languageStyle)
     const generator = new ObjectGeneratorSafe(context.tokenTracker);
     const result = await generator.generateObject({
       model: 'agent',
@@ -401,7 +405,7 @@ export async function getResponse(question: string,
       const {response: evaluation} = await evaluateAnswer(currentQuestion, thisStep,
         evaluationMetrics[currentQuestion], context.tokenTracker);
 
-      if (currentQuestion.trim() === question.trim()) {
+      if (currentQuestion.trim() === question) {
         if (evaluation.pass) {
           diaryContext.push(`
 At step ${step}, you took **answer** action and finally found the answer to the original question:
@@ -458,7 +462,7 @@ ${evaluation.think}
               // reranker? maybe
               gaps.push(...errorAnalysis.questionsToAnswer.slice(0, 2));
               allQuestions.push(...errorAnalysis.questionsToAnswer.slice(0, 2));
-              gaps.push(question.trim());  // always keep the original question in the gaps
+              gaps.push(question);  // always keep the original question in the gaps
             }
 
             badAttempts++;
@@ -505,7 +509,7 @@ You will now figure out the answers to these sub-questions and see if they can h
 `);
         gaps.push(...newGapQuestions.slice(0, 2));
         allQuestions.push(...newGapQuestions.slice(0, 2));
-        gaps.push(question.trim());  // always keep the original question in the gaps
+        gaps.push(question);  // always keep the original question in the gaps
       } else {
         diaryContext.push(`
 At step ${step}, you took **reflect** and think about the knowledge gaps. You tried to break down the question "${currentQuestion}" into gap-questions like this: ${oldQuestions.join(', ')} 
@@ -697,10 +701,12 @@ You decided to think out of the box or cut from a completely different angle.`);
       badContext,
       allKnowledge,
       allURLs,
-      true
+      true,
+      evaluationMetrics[question]?.languageStyle || 'same language as the question'
     );
 
-    schema = getSchema(false, false, true, false);
+    schema = getSchema(false, false, true, false,
+      evaluationMetrics[question]?.languageStyle || 'same language as the question');
     const generator = new ObjectGeneratorSafe(context.tokenTracker);
     const result = await generator.generateObject({
       model: 'agentBeastMode',
@@ -721,7 +727,15 @@ You decided to think out of the box or cut from a completely different angle.`);
 async function storeContext(prompt: string, schema: any, memory: any[][], step: number) {
   if ((process as any).asyncLocalContext?.available?.()) {
     const [context, keywords, questions, knowledge] = memory;
-    (process as any).asyncLocalContext.ctx.promptContext = { prompt, schema, context, keywords, questions, knowledge, step };
+    (process as any).asyncLocalContext.ctx.promptContext = {
+      prompt,
+      schema,
+      context,
+      keywords,
+      questions,
+      knowledge,
+      step
+    };
     return;
   }
 
diff --git a/src/tools/evaluator.ts b/src/tools/evaluator.ts
index 33d5d81..fdc786a 100644
--- a/src/tools/evaluator.ts
+++ b/src/tools/evaluator.ts
@@ -1,12 +1,11 @@
 import {z} from 'zod';
 import {GenerateObjectResult} from 'ai';
 import {TokenTracker} from "../utils/token-tracker";
-import {AnswerAction, EvaluationResponse} from '../types';
+import {AnswerAction, EvaluationCriteria, EvaluationResponse, EvaluationType} from '../types';
 import {readUrl, removeAllLineBreaks} from "./read";
 import {ObjectGeneratorSafe} from "../utils/safe-generator";
 
 
-type EvaluationType = 'definitive' | 'freshness' | 'plurality' | 'attribution';
 
 const baseSchema = {
   pass: z.boolean().describe('Whether the answer passes the evaluation criteria defined by the evaluator'),
@@ -301,7 +300,8 @@ Answer: ${JSON.stringify(answer)}`;
 const questionEvaluationSchema = z.object({
   needsFreshness: z.boolean().describe('Whether the question requires freshness check'),
   needsPlurality: z.boolean().describe('Whether the question requires plurality check'),
-  reasoning: z.string().describe('Explanation of why these checks are needed or not needed')
+  reasoning: z.string().describe('Explanation of why these checks are needed or not needed'),
+  languageStyle: z.string().describe('The language being used and the overall vibe/mood of the question'),
 });
 
 function getQuestionEvaluationPrompt(question: string): string {
@@ -310,6 +310,7 @@ function getQuestionEvaluationPrompt(question: string): string {
 <evaluation_types>
 1. freshness - Checks if the question is time-sensitive or requires very recent information
 2. plurality - Checks if the question asks for multiple items or a specific count or enumeration
+3. language style - Identifies both the language used and the overall vibe of the question
 </evaluation_types>
 
 <rules>
@@ -326,42 +327,54 @@ If question is a simple greeting, chit-chat, or general knowledge, provide the a
    - Check for: numbers ("5 examples"), plural nouns, list requests
    - Look for: "all", "list", "enumerate", "examples", plural forms
    - Required when question implies completeness ("all the reasons", "every factor")
+
+3. Language Style Analysis:
+  Combine both language and emotional vibe in a descriptive phrase, considering:
+  - Language: The primary language or mix of languages used
+  - Emotional tone: panic, excitement, frustration, curiosity, etc.
+  - Formality level: academic, casual, professional, etc.
+  - Domain context: technical, academic, social, etc.
 </rules>
 
 <examples>
-Question: "Hello, how are you?"
+Question: "fam PLEASE help me calculate the eigenvalues of this 4x4 matrix ASAP!! [matrix details] got an exam tmrw 😭"
 Evaluation: {
-  "needsFreshness": false,
-  "needsPlurality": false,
-  "reasoning": "Simple greeting, no additional checks needed."
+    "needsFreshness": false,
+    "needsPlurality": true,
+    "reasoning": "Multiple eigenvalues needed but no time-sensitive information required",
+    "languageStyle": "panicked student English with math jargon"
 }
 
-Question: "What is the current CEO of OpenAI?"
+Question: "Can someone explain how tf did Ferrari mess up their pit stop strategy AGAIN?! 🤦‍♂️ #MonacoGP"
 Evaluation: {
-  "needsFreshness": true,
-  "needsPlurality": false,
-  "reasoning": "Question asks about current leadership position which requires freshness check. No plurality check needed as it asks for a single position."
+    "needsFreshness": true,
+    "needsPlurality": true,
+    "reasoning": "Refers to recent race event and requires analysis of multiple strategic decisions",
+    "languageStyle": "frustrated fan English with F1 terminology"
 }
 
-Question: "List all the AI companies in Berlin"
+Question: "肖老师您好，请您介绍一下最近量子计算领域的三个重大突破，特别是它们在密码学领域的应用价值吗？🤔"
 Evaluation: {
-  "needsFreshness": false,
-  "needsPlurality": true,
-  "reasoning": "Question asks for a comprehensive list ('all') which requires plurality check. No freshness check needed as it's not time-sensitive."
+    "needsFreshness": true,
+    "needsPlurality": true,
+    "reasoning": "Asks for recent breakthroughs (freshness) and specifically requests three examples (plurality)",
+    "languageStyle": "formal technical Chinese with academic undertones"
 }
 
-Question: "What are the top 5 latest AI models released by OpenAI?"
+Question: "Bruder krass, kannst du mir erklären warum meine neural network training loss komplett durchdreht? Hab schon alles probiert 😤"
 Evaluation: {
-  "needsFreshness": true,
-  "needsPlurality": true,
-  "reasoning": "Question requires freshness check for 'latest' releases and plurality check for 'top 5' items."
+    "needsFreshness": false,
+    "needsPlurality": true,
+    "reasoning": "Requires comprehensive debugging analysis of multiple potential issues",
+    "languageStyle": "frustrated German-English tech slang"
 }
 
-Question: "Who created Python?"
+Question: "Does anyone have insights into the sociopolitical implications of GPT-4's emergence in the Global South, particularly regarding indigenous knowledge systems and linguistic diversity? Looking for a nuanced analysis."
 Evaluation: {
-  "needsFreshness": false,
-  "needsPlurality": false,
-  "reasoning": "Simple factual question requiring only definitiveness check. No time sensitivity or multiple items needed."
+    "needsFreshness": true,
+    "needsPlurality": true,
+    "reasoning": "Requires analysis of current impacts (freshness) across multiple dimensions: sociopolitical, cultural, and linguistic (plurality)",
+    "languageStyle": "formal academic English with sociological terminology"
 }
 </examples>
 
@@ -374,7 +387,7 @@ const TOOL_NAME = 'evaluator';
 export async function evaluateQuestion(
   question: string,
   tracker?: TokenTracker
-): Promise<EvaluationType[]> {
+): Promise<EvaluationCriteria> {
   try {
     const generator = new ObjectGeneratorSafe(tracker);
 
@@ -394,12 +407,12 @@ export async function evaluateQuestion(
     console.log('Question Metrics:', types);
 
     // Always evaluate definitive first, then freshness (if needed), then plurality (if needed)
-    return types;
+    return {types, languageStyle: result.object.languageStyle};
 
   } catch (error) {
     console.error('Error in question evaluation:', error);
     // Default to all evaluation types in case of error
-    return ['definitive', 'freshness', 'plurality'];
+    return {types: ['definitive', 'freshness', 'plurality'], languageStyle: 'plain English'};
   }
 }
 
@@ -430,17 +443,17 @@ async function performEvaluation<T>(
 export async function evaluateAnswer(
   question: string,
   action: AnswerAction,
-  evaluationOrder: EvaluationType[] = ['definitive', 'freshness', 'plurality'],
+  evaluationCri: EvaluationCriteria,
   tracker?: TokenTracker
 ): Promise<{ response: EvaluationResponse }> {
   let result;
 
   // Only add attribution if we have valid references
   if (action.references && action.references.length > 0) {
-    evaluationOrder = ['attribution', ...evaluationOrder];
+    evaluationCri.types = ['attribution', ...evaluationCri.types];
   }
 
-  for (const evaluationType of evaluationOrder) {
+  for (const evaluationType of evaluationCri.types) {
     switch (evaluationType) {
       case 'attribution': {
         // Safely handle references and ensure we have content
diff --git a/src/types.ts b/src/types.ts
index f3f9f0d..12ac169 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -45,6 +45,11 @@ export type VisitAction = BaseAction & {
 
 export type StepAction = SearchAction | AnswerAction | ReflectAction | VisitAction;
 
+export type EvaluationType = 'definitive' | 'freshness' | 'plurality' | 'attribution';
+export type EvaluationCriteria = {
+  types: EvaluationType[];
+  languageStyle: string;
+};
 
 // Following Vercel AI SDK's token counting interface
 export interface TokenUsage {