diff --git a/src/agent.ts b/src/agent.ts index 2e8d968..8b1608c 100644 --- a/src/agent.ts +++ b/src/agent.ts @@ -652,7 +652,7 @@ You decided to think out of the box or cut from a completely different angle.`); const model = getModel('agentBeastMode'); let object; - let totalTokens = 0; + let totalTokens; try { const result = await generateObject({ model, diff --git a/src/evals/batch-evals.ts b/src/evals/batch-evals.ts index 9f4c219..4680469 100644 --- a/src/evals/batch-evals.ts +++ b/src/evals/batch-evals.ts @@ -3,9 +3,10 @@ import {exec} from 'child_process'; import {promisify} from 'util'; import {getResponse} from '../agent'; import {generateObject} from 'ai'; -import {getModel, getMaxTokens} from '../config'; +import {GEMINI_API_KEY} from '../config'; import {z} from 'zod'; import {AnswerAction, TrackerContext} from "../types"; +import {createGoogleGenerativeAI} from "@ai-sdk/google"; const execAsync = promisify(exec); @@ -49,10 +50,10 @@ Minor wording differences are acceptable as long as the core information of the try { const result = await generateObject({ - model: getModel('evaluator'), + model: createGoogleGenerativeAI({ apiKey: GEMINI_API_KEY })('gemini-2.0-flash'), // fix to gemini-2.0-flash for evaluation schema, prompt, - maxTokens: getMaxTokens('evaluator'), + maxTokens: 1000, temperature: 0 // Setting temperature to 0 for deterministic output }); diff --git a/src/evals/ego-questions.json b/src/evals/ego-questions.json index ed597f4..8d37a23 100644 --- a/src/evals/ego-questions.json +++ b/src/evals/ego-questions.json @@ -1,7 +1,7 @@ [ { "question": "what is jina ai ceo's twitter account", - "answer": "hxiao" + "answer": "@hxiao" }, { "question": "when was jina ai founded?", @@ -12,7 +12,7 @@ "answer": "ReaderLM-2.0" }, { - "question": "what is the lastest blog post that jina ai published?", + "question": "what is the latest blog post that jina ai published?", "answer": "A Practical Guide to Deploying Search Foundation Models in Production" }, { @@ -38,5 +38,49 @@ { "question": "who are the authors of jina-clip-v2 paper?", "answer": "Andreas Koukounas, Georgios Mastrapas, Bo Wang, Mohammad Kalim Akram, Sedigheh Eslami, Michael Günther, Isabelle Mohr, Saba Sturua, Scott Martens, Nan Wang, Han Xiao" + }, + { + "question": "what can you find in common between fashion-mnist and bert-as-service?", + "answer": "Both are made by Han Xiao" + }, + { + "question": "Which countries are the investors of Jina AI from?", + "answer": "USA and China, but no German investors" + }, + { + "question": "what is the grounding api endpoint of jina ai?", + "answer": "g.jina.ai" + }, + { + "question": "which of the following models do not support Matryoshka representation? jina-embeddings-v3, jina-embeddings-v2-base-en, jina-clip-v2, jina-clip-v1", + "answer": "jina-embeddings-v2-base-en and jina-clip-v1" + }, + { + "question": "How much is the 2024 yearbook that jina ai published?", + "answer": "$35 USD" + }, + { + "question": "Any meme or crypto coin that announced by jina ai?", + "answer": "No." + }, + { + "question": "Who is the legal signatory of Jina AI gmbh?", + "answer": "Jiao Liu" + }, + { + "question": "does node-deepresearch project support local LLMs?", + "answer": "Yes." + }, + { + "question": "what is the name of the jina ai's mascot?", + "answer": "Jina" + }, + { + "question": "what is the name of the jina ai's mascot?", + "answer": "No, Jina AI does not have a mascot." + }, + { + "question": "Does late chunking work with cls pooling?", + "answer": "No. late chunking only works with mean pooling." } ] \ No newline at end of file