feat: update eval and ego questions

2025-12-26 06:28:56 +08:00 · 2025-02-07 11:17:42 +08:00 · 2025-02-07 11:17:42 +08:00 · ef34881f59
commit ef34881f59
parent 0a6ba24b27
3 changed files with 51 additions and 6 deletions
--- a/src/agent.ts
+++ b/src/agent.ts
@ -652,7 +652,7 @@ You decided to think out of the box or cut from a completely different angle.`);

    const model = getModel('agentBeastMode');
    let object;
-    let totalTokens = 0;
+    let totalTokens;
    try {
      const result = await generateObject({
        model,
--- a/src/evals/batch-evals.ts
+++ b/src/evals/batch-evals.ts
@ -3,9 +3,10 @@ import {exec} from 'child_process';
 import {promisify} from 'util';
 import {getResponse} from '../agent';
 import {generateObject} from 'ai';
-import {getModel, getMaxTokens} from '../config';
+import {GEMINI_API_KEY} from '../config';
 import {z} from 'zod';
 import {AnswerAction, TrackerContext} from "../types";
+import {createGoogleGenerativeAI} from "@ai-sdk/google";

 const execAsync = promisify(exec);

@ -49,10 +50,10 @@ Minor wording differences are acceptable as long as the core information of the

  try {
    const result = await generateObject({
-      model: getModel('evaluator'),
+      model: createGoogleGenerativeAI({ apiKey: GEMINI_API_KEY })('gemini-2.0-flash'),  // fix to gemini-2.0-flash for evaluation
      schema,
      prompt,
-      maxTokens: getMaxTokens('evaluator'),
+      maxTokens: 1000,
      temperature: 0  // Setting temperature to 0 for deterministic output
    });

--- a/src/evals/ego-questions.json
+++ b/src/evals/ego-questions.json
@ -1,7 +1,7 @@
 [
  {
    "question": "what is jina ai ceo's twitter account",
-    "answer": "hxiao"
+    "answer": "@hxiao"
  },
  {
    "question": "when was jina ai founded?",
@ -12,7 +12,7 @@
    "answer": "ReaderLM-2.0"
  },
  {
-    "question": "what is the lastest blog post that jina ai published?",
+    "question": "what is the latest blog post that jina ai published?",
    "answer": "A Practical Guide to Deploying Search Foundation Models in Production"
  },
  {
@ -38,5 +38,49 @@
  {
    "question": "who are the authors of jina-clip-v2 paper?",
    "answer": "Andreas Koukounas, Georgios Mastrapas, Bo Wang, Mohammad Kalim Akram, Sedigheh Eslami, Michael Günther, Isabelle Mohr, Saba Sturua, Scott Martens, Nan Wang, Han Xiao"
+  },
+  {
+    "question": "what can you find in common between fashion-mnist and bert-as-service?",
+    "answer": "Both are made by Han Xiao"
+  },
+  {
+    "question": "Which countries are the investors of Jina AI from?",
+    "answer": "USA and China, but no German investors"
+  },
+  {
+    "question": "what is the grounding api endpoint of jina ai?",
+    "answer": "g.jina.ai"
+  },
+  {
+    "question": "which of the following models do not support Matryoshka representation? jina-embeddings-v3, jina-embeddings-v2-base-en, jina-clip-v2, jina-clip-v1",
+    "answer": "jina-embeddings-v2-base-en and jina-clip-v1"
+  },
+  {
+    "question": "How much is the 2024 yearbook that jina ai published?",
+    "answer": "$35 USD"
+  },
+  {
+    "question": "Any meme or crypto coin that announced by jina ai?",
+    "answer": "No."
+  },
+  {
+    "question": "Who is the legal signatory of Jina AI gmbh?",
+    "answer": "Jiao Liu"
+  },
+  {
+    "question": "does node-deepresearch project support local LLMs?",
+    "answer": "Yes."
+  },
+  {
+    "question": "what is the name of the jina ai's mascot?",
+    "answer": "Jina"
+  },
+  {
+    "question": "what is the name of the jina ai's mascot?",
+    "answer": "No, Jina AI does not have a mascot."
+  },
+  {
+    "question": "Does late chunking work with cls pooling?",
+    "answer": "No. late chunking only works with mean pooling."
  }
 ]