feat: update eval and ego questions

This commit is contained in:
Han Xiao 2025-02-07 11:17:42 +08:00
parent 0a6ba24b27
commit ef34881f59
3 changed files with 51 additions and 6 deletions

View File

@ -652,7 +652,7 @@ You decided to think out of the box or cut from a completely different angle.`);
const model = getModel('agentBeastMode');
let object;
let totalTokens = 0;
let totalTokens;
try {
const result = await generateObject({
model,

View File

@ -3,9 +3,10 @@ import {exec} from 'child_process';
import {promisify} from 'util';
import {getResponse} from '../agent';
import {generateObject} from 'ai';
import {getModel, getMaxTokens} from '../config';
import {GEMINI_API_KEY} from '../config';
import {z} from 'zod';
import {AnswerAction, TrackerContext} from "../types";
import {createGoogleGenerativeAI} from "@ai-sdk/google";
const execAsync = promisify(exec);
@ -49,10 +50,10 @@ Minor wording differences are acceptable as long as the core information of the
try {
const result = await generateObject({
model: getModel('evaluator'),
model: createGoogleGenerativeAI({ apiKey: GEMINI_API_KEY })('gemini-2.0-flash'), // fix to gemini-2.0-flash for evaluation
schema,
prompt,
maxTokens: getMaxTokens('evaluator'),
maxTokens: 1000,
temperature: 0 // Setting temperature to 0 for deterministic output
});

View File

@ -1,7 +1,7 @@
[
{
"question": "what is jina ai ceo's twitter account",
"answer": "hxiao"
"answer": "@hxiao"
},
{
"question": "when was jina ai founded?",
@ -12,7 +12,7 @@
"answer": "ReaderLM-2.0"
},
{
"question": "what is the lastest blog post that jina ai published?",
"question": "what is the latest blog post that jina ai published?",
"answer": "A Practical Guide to Deploying Search Foundation Models in Production"
},
{
@ -38,5 +38,49 @@
{
"question": "who are the authors of jina-clip-v2 paper?",
"answer": "Andreas Koukounas, Georgios Mastrapas, Bo Wang, Mohammad Kalim Akram, Sedigheh Eslami, Michael Günther, Isabelle Mohr, Saba Sturua, Scott Martens, Nan Wang, Han Xiao"
},
{
"question": "what can you find in common between fashion-mnist and bert-as-service?",
"answer": "Both are made by Han Xiao"
},
{
"question": "Which countries are the investors of Jina AI from?",
"answer": "USA and China, but no German investors"
},
{
"question": "what is the grounding api endpoint of jina ai?",
"answer": "g.jina.ai"
},
{
"question": "which of the following models do not support Matryoshka representation? jina-embeddings-v3, jina-embeddings-v2-base-en, jina-clip-v2, jina-clip-v1",
"answer": "jina-embeddings-v2-base-en and jina-clip-v1"
},
{
"question": "How much is the 2024 yearbook that jina ai published?",
"answer": "$35 USD"
},
{
"question": "Any meme or crypto coin that announced by jina ai?",
"answer": "No."
},
{
"question": "Who is the legal signatory of Jina AI gmbh?",
"answer": "Jiao Liu"
},
{
"question": "does node-deepresearch project support local LLMs?",
"answer": "Yes."
},
{
"question": "what is the name of the jina ai's mascot?",
"answer": "Jina"
},
{
"question": "what is the name of the jina ai's mascot?",
"answer": "No, Jina AI does not have a mascot."
},
{
"question": "Does late chunking work with cls pooling?",
"answer": "No. late chunking only works with mean pooling."
}
]