From a5e5627823cd91b8e96bee7de0c9a8be3eeb01c1 Mon Sep 17 00:00:00 2001 From: Han Xiao Date: Thu, 6 Feb 2025 20:03:36 +0800 Subject: [PATCH] fix: evaluator --- src/evals/ego-questions.json | 22 ++++++++++++++++++++++ src/tools/evaluator.ts | 35 +++++++++++++++++++++++++++-------- 2 files changed, 49 insertions(+), 8 deletions(-) create mode 100644 src/evals/ego-questions.json diff --git a/src/evals/ego-questions.json b/src/evals/ego-questions.json new file mode 100644 index 0000000..4221c08 --- /dev/null +++ b/src/evals/ego-questions.json @@ -0,0 +1,22 @@ +[ + { + "question": "what is jina ai ceo's twitter account", + "answer": "hxiao" + }, + { + "question": "what is the latest model published by jina ai?", + "answer": "ReaderLM-2.0" + }, + { + "question": "what is the lastest blog post that jina ai published?", + "answer": "A Practical Guide to Deploying Search Foundation Models in Production" + }, + { + "question": "what is the context length of readerlm-v2?", + "answer": "512K" + }, + { + "question": "how many employees does jina ai have right now?", + "answer": "30" + } +] \ No newline at end of file diff --git a/src/tools/evaluator.ts b/src/tools/evaluator.ts index 7a79c85..a23b276 100644 --- a/src/tools/evaluator.ts +++ b/src/tools/evaluator.ts @@ -8,20 +8,25 @@ import { handleGenerateObjectError } from '../utils/error-handling'; const model = getModel('evaluator'); const responseSchema = z.object({ - is_definitive: z.boolean().describe('Whether the answer provides a definitive response without uncertainty or \'I don\'t know\' type statements'), + is_definitive: z.boolean().describe('Whether the answer provides a definitive response without uncertainty or negative statements'), reasoning: z.string().describe('Explanation of why the answer is or isn\'t definitive') }); - - function getPrompt(question: string, answer: string): string { return `You are an evaluator of answer definitiveness. Analyze if the given answer provides a definitive response or not. -Core Evaluation Criterion: -- Definitiveness: "I don't know", "lack of information", "doesn't exist", "not sure" or highly uncertain/ambiguous responses are **not** definitive, must return false! + +First, if the answer is not a direct response to the question, it must return false. +Definitiveness is the king! The following types of responses are NOT definitive and must return false: + 1. Expressions of uncertainty: "I don't know", "not sure", "might be", "probably" + 2. Lack of information statements: "doesn't exist", "lack of information", "could not find" + 3. Inability statements: "I cannot provide", "I am unable to", "we cannot" + 4. Negative statements that redirect: "However, you can...", "Instead, try..." + 5. Non-answers that suggest alternatives + -Examples: + Question: "What are the system requirements for running Python 3.9?" Answer: "I'm not entirely sure, but I think you need a computer with some RAM." Evaluation: { @@ -36,13 +41,27 @@ Evaluation: { "reasoning": "The answer makes clear, definitive statements without uncertainty markers or ambiguity." } +Question: "Who will be the president of the United States in 2032?" +Answer: "I cannot predict the future, it depends on the election results." +Evaluation: { + "is_definitive": false, + "reasoning": "The answer contains a statement of inability to predict the future, making it non-definitive." +} + +Question: "Who is the sales director at Company X?" +Answer: "I cannot provide the name of the sales director, but you can contact their sales team at sales@companyx.com" +Evaluation: { + "is_definitive": false, + "reasoning": "The answer starts with 'I cannot provide' and redirects to an alternative contact method instead of answering the original question." +} + Question: "what is the twitter account of jina ai's founder?" Answer: "The provided text does not contain the Twitter account of Jina AI's founder." Evaluation: { "is_definitive": false, "reasoning": "The answer indicates a lack of information rather than providing a definitive response." } - + Now evaluate this pair: Question: ${JSON.stringify(question)} Answer: ${JSON.stringify(answer)}`; @@ -98,4 +117,4 @@ async function main() { if (require.main === module) { main().catch(console.error); -} +} \ No newline at end of file