fix: evaluator

This commit is contained in:
Han Xiao
2025-02-06 20:03:36 +08:00
parent deff7235b2
commit a5e5627823
2 changed files with 49 additions and 8 deletions

View File

@@ -0,0 +1,22 @@
[
{
"question": "what is jina ai ceo's twitter account",
"answer": "hxiao"
},
{
"question": "what is the latest model published by jina ai?",
"answer": "ReaderLM-2.0"
},
{
"question": "what is the lastest blog post that jina ai published?",
"answer": "A Practical Guide to Deploying Search Foundation Models in Production"
},
{
"question": "what is the context length of readerlm-v2?",
"answer": "512K"
},
{
"question": "how many employees does jina ai have right now?",
"answer": "30"
}
]

View File

@@ -8,20 +8,25 @@ import { handleGenerateObjectError } from '../utils/error-handling';
const model = getModel('evaluator');
const responseSchema = z.object({
is_definitive: z.boolean().describe('Whether the answer provides a definitive response without uncertainty or \'I don\'t know\' type statements'),
is_definitive: z.boolean().describe('Whether the answer provides a definitive response without uncertainty or negative statements'),
reasoning: z.string().describe('Explanation of why the answer is or isn\'t definitive')
});
function getPrompt(question: string, answer: string): string {
return `You are an evaluator of answer definitiveness. Analyze if the given answer provides a definitive response or not.
Core Evaluation Criterion:
- Definitiveness: "I don't know", "lack of information", "doesn't exist", "not sure" or highly uncertain/ambiguous responses are **not** definitive, must return false!
<rules>
First, if the answer is not a direct response to the question, it must return false.
Definitiveness is the king! The following types of responses are NOT definitive and must return false:
1. Expressions of uncertainty: "I don't know", "not sure", "might be", "probably"
2. Lack of information statements: "doesn't exist", "lack of information", "could not find"
3. Inability statements: "I cannot provide", "I am unable to", "we cannot"
4. Negative statements that redirect: "However, you can...", "Instead, try..."
5. Non-answers that suggest alternatives
</rules>
Examples:
<examples>
Question: "What are the system requirements for running Python 3.9?"
Answer: "I'm not entirely sure, but I think you need a computer with some RAM."
Evaluation: {
@@ -36,13 +41,27 @@ Evaluation: {
"reasoning": "The answer makes clear, definitive statements without uncertainty markers or ambiguity."
}
Question: "Who will be the president of the United States in 2032?"
Answer: "I cannot predict the future, it depends on the election results."
Evaluation: {
"is_definitive": false,
"reasoning": "The answer contains a statement of inability to predict the future, making it non-definitive."
}
Question: "Who is the sales director at Company X?"
Answer: "I cannot provide the name of the sales director, but you can contact their sales team at sales@companyx.com"
Evaluation: {
"is_definitive": false,
"reasoning": "The answer starts with 'I cannot provide' and redirects to an alternative contact method instead of answering the original question."
}
Question: "what is the twitter account of jina ai's founder?"
Answer: "The provided text does not contain the Twitter account of Jina AI's founder."
Evaluation: {
"is_definitive": false,
"reasoning": "The answer indicates a lack of information rather than providing a definitive response."
}
</examples>
Now evaluate this pair:
Question: ${JSON.stringify(question)}
Answer: ${JSON.stringify(answer)}`;
@@ -98,4 +117,4 @@ async function main() {
if (require.main === module) {
main().catch(console.error);
}
}