diff --git a/README.md b/README.md
index 71bd23f..95107a7 100644
--- a/README.md
+++ b/README.md
@@ -224,7 +224,7 @@ flowchart TD
 
 ## Evaluation
 
-I kept the evaluation simple, LLM-as-a-judge and collect some ego questions (i.e. questions about Jina AI that I know 100% the answer) for evaluation.
+I kept the evaluation simple, LLM-as-a-judge and collect some [ego questions](./src/evals/ego-questions.json) for evaluation. These are the questions about Jina AI that I know 100% the answer but LLMs do not.
 
 I mainly look at 3 things: total steps, total tokens, and the correctness of the final answer.
 
diff --git a/src/agent.ts b/src/agent.ts
index 8b1608c..854356f 100644
--- a/src/agent.ts
+++ b/src/agent.ts
@@ -667,10 +667,10 @@ You decided to think out of the box or cut from a completely different angle.`);
       object = result.object;
       totalTokens = result.totalTokens;
     }
-    context.tokenTracker.trackUsage('agent', totalTokens);
-
     await storeContext(prompt, [allContext, allKeywords, allQuestions, allKnowledge], totalStep);
     thisStep = object as StepAction;
+    context.actionTracker.trackAction({totalStep, thisStep, gaps, badAttempts});
+    context.tokenTracker.trackUsage('agent', totalTokens);
     console.log(thisStep)
     return {result: thisStep, context};
   }
diff --git a/src/evals/batch-evals.ts b/src/evals/batch-evals.ts
index 4680469..db31155 100644
--- a/src/evals/batch-evals.ts
+++ b/src/evals/batch-evals.ts
@@ -25,6 +25,63 @@ interface EvaluationResult {
   actual_answer: string;
 }
 
+interface EvaluationStats {
+  model_name: string;
+  pass_rate: number;
+  avg_steps: number;
+  max_steps: number;
+  min_steps: number;
+  median_steps: number;
+  avg_tokens: number;
+  median_tokens: number;
+  max_tokens: number;
+  min_tokens: number;
+}
+
+function calculateMedian(numbers: number[]): number {
+  const sorted = [...numbers].sort((a, b) => a - b);
+  const middle = Math.floor(sorted.length / 2);
+
+  if (sorted.length % 2 === 0) {
+    return (sorted[middle - 1] + sorted[middle]) / 2;
+  }
+  return sorted[middle];
+}
+
+function calculateStats(results: EvaluationResult[], modelName: string): EvaluationStats {
+  const steps = results.map(r => r.total_steps);
+  const tokens = results.map(r => r.total_tokens);
+  const passCount = results.filter(r => r.pass).length;
+
+  return {
+    model_name: modelName,
+    pass_rate: (passCount / results.length) * 100,
+    avg_steps: steps.reduce((a, b) => a + b, 0) / steps.length,
+    max_steps: Math.max(...steps),
+    min_steps: Math.min(...steps),
+    median_steps: calculateMedian(steps),
+    avg_tokens: tokens.reduce((a, b) => a + b, 0) / tokens.length,
+    median_tokens: calculateMedian(tokens),
+    max_tokens: Math.max(...tokens),
+    min_tokens: Math.min(...tokens)
+  };
+}
+
+function printStats(stats: EvaluationStats): void {
+  console.log('\n=== Evaluation Statistics ===');
+  console.log(`Model: ${stats.model_name}`);
+  console.log(`Pass Rate: ${stats.pass_rate.toFixed(0)}%`);
+  console.log(`Average Steps: ${stats.avg_steps.toFixed(0)}`);
+  console.log(`Maximum Steps: ${stats.max_steps}`);
+  console.log(`Minimum Steps: ${stats.min_steps}`);
+  console.log(`Median Steps: ${stats.median_steps.toFixed(0)}`);
+  console.log(`Average Tokens: ${stats.avg_tokens.toFixed(0)}`);
+  console.log(`Median Tokens: ${stats.median_tokens.toFixed(0)}`);
+  console.log(`Maximum Tokens: ${stats.max_tokens}`);
+  console.log(`Minimum Tokens: ${stats.min_tokens}`);
+  console.log('===========================\n');
+}
+
 async function getCurrentGitCommit(): Promise<string> {
   try {
     const {stdout} = await execAsync('git rev-parse --short HEAD');
@@ -72,7 +129,9 @@ async function batchEvaluate(inputFile: string): Promise<void> {
   const questions: Question[] = JSON.parse(await fs.readFile(inputFile, 'utf-8'));
   const results: EvaluationResult[] = [];
   const gitCommit = await getCurrentGitCommit();
-  const outputFile = `eval-${gitCommit}.json`;
+  const modelName = process.env.DEFAULT_MODEL_NAME || 'unknown';
+  const outputFile = `eval-${gitCommit}-${modelName}.json`;
+
   // Process each question
   for (let i = 0; i < questions.length; i++) {
     const {question, answer: expectedAnswer} = questions[i];
@@ -83,7 +142,7 @@ async function batchEvaluate(inputFile: string): Promise<void> {
       const {
         result: response,
         context
-      } = await getResponse(question) as { result: AnswerAction; context: TrackerContext };
+      } = await getResponse(question, 0) as { result: AnswerAction; context: TrackerContext };
       const actualAnswer = response.answer;
 
       // Evaluate the response
@@ -114,12 +173,19 @@ async function batchEvaluate(inputFile: string): Promise<void> {
         actual_answer: 'Error occurred'
       });
     }
-    // Save results
-    await fs.writeFile(outputFile, JSON.stringify(results, null, 2));
-    console.log(`\nEvaluation results saved to ${outputFile}`);
   }
 
+  // Calculate and print statistics
+  const stats = calculateStats(results, modelName);
+  printStats(stats);
 
+  // Save results
+  await fs.writeFile(outputFile, JSON.stringify({
+    results,
+    statistics: stats
+  }, null, 2));
+
+  console.log(`\nEvaluation results saved to ${outputFile}`);
 }
 
 // Run batch evaluation if this is the main module
diff --git a/src/evals/ego-questions.json b/src/evals/ego-questions.json
index 8d37a23..f05d677 100644
--- a/src/evals/ego-questions.json
+++ b/src/evals/ego-questions.json
@@ -1,7 +1,7 @@
 [
   {
-    "question": "what is jina ai ceo's twitter account",
-    "answer": "@hxiao"
+    "question": "what did jina ai ceo say about deepseek that went viral and become a meme?",
+    "answer": "a side project"
   },
   {
     "question": "when was jina ai founded?",
@@ -24,28 +24,28 @@
     "answer": "30"
   },
   {
-    "question": "how much rate limit for r.jina.ai api without an api key?",
-    "answer": "20 RPM (requests per minute)"
+    "question": "when was jina reader released?",
+    "answer": "April 2024"
   },
   {
     "question": "How many offices do Jina AI have and where are they?",
     "answer": "four: sunnyvale, berlin, beijing, shenzhen"
   },
   {
-    "question": "Does jina reranker v2 support multilingual?",
-    "answer": "Yes"
+    "question": "what jina-colbert-v2 improves over jina-colbert-v1?",
+    "answer": "v2 add multilingual support"
   },
   {
     "question": "who are the authors of jina-clip-v2 paper?",
     "answer": "Andreas Koukounas, Georgios Mastrapas, Bo Wang, Mohammad Kalim Akram, Sedigheh Eslami, Michael Günther, Isabelle Mohr, Saba Sturua, Scott Martens, Nan Wang, Han Xiao"
   },
   {
-    "question": "what can you find in common between fashion-mnist and bert-as-service?",
-    "answer": "Both are made by Han Xiao"
+    "question": "who is the common author of fashion-mnist and node-deepresearch?",
+    "answer": "Han Xiao"
   },
   {
     "question": "Which countries are the investors of Jina AI from?",
-    "answer": "USA and China, but no German investors"
+    "answer": "USA and China only, no German investors"
   },
   {
     "question": "what is the grounding api endpoint of jina ai?",
@@ -56,24 +56,20 @@
     "answer": "jina-embeddings-v2-base-en and jina-clip-v1"
   },
   {
-    "question": "How much is the 2024 yearbook that jina ai published?",
-    "answer": "$35 USD"
+    "question": "Can I purchase the 2024 yearbook that jina ai published today?",
+    "answer": "No it is sold out."
   },
   {
-    "question": "Any meme or crypto coin that announced by jina ai?",
-    "answer": "No."
+    "question": "How many free tokens do you get from a new jina api key?",
+    "answer": "1 million."
   },
   {
     "question": "Who is the legal signatory of Jina AI gmbh?",
     "answer": "Jiao Liu"
   },
   {
-    "question": "does node-deepresearch project support local LLMs?",
-    "answer": "Yes."
-  },
-  {
-    "question": "what is the name of the jina ai's mascot?",
-    "answer": "Jina"
+    "question": "which llm provider does node-deepresearch project support?",
+    "answer": "Gemini, Openai and some local LLMs"
   },
   {
     "question": "what is the name of the jina ai's mascot?",