mirror of
https://github.com/jina-ai/node-DeepResearch.git
synced 2026-03-22 07:29:35 +08:00
fix: evaluator
This commit is contained in:
22
src/evals/ego-questions.json
Normal file
22
src/evals/ego-questions.json
Normal file
@@ -0,0 +1,22 @@
|
||||
[
|
||||
{
|
||||
"question": "what is jina ai ceo's twitter account",
|
||||
"answer": "hxiao"
|
||||
},
|
||||
{
|
||||
"question": "what is the latest model published by jina ai?",
|
||||
"answer": "ReaderLM-2.0"
|
||||
},
|
||||
{
|
||||
"question": "what is the lastest blog post that jina ai published?",
|
||||
"answer": "A Practical Guide to Deploying Search Foundation Models in Production"
|
||||
},
|
||||
{
|
||||
"question": "what is the context length of readerlm-v2?",
|
||||
"answer": "512K"
|
||||
},
|
||||
{
|
||||
"question": "how many employees does jina ai have right now?",
|
||||
"answer": "30"
|
||||
}
|
||||
]
|
||||
@@ -8,20 +8,25 @@ import { handleGenerateObjectError } from '../utils/error-handling';
|
||||
const model = getModel('evaluator');
|
||||
|
||||
const responseSchema = z.object({
|
||||
is_definitive: z.boolean().describe('Whether the answer provides a definitive response without uncertainty or \'I don\'t know\' type statements'),
|
||||
is_definitive: z.boolean().describe('Whether the answer provides a definitive response without uncertainty or negative statements'),
|
||||
reasoning: z.string().describe('Explanation of why the answer is or isn\'t definitive')
|
||||
});
|
||||
|
||||
|
||||
|
||||
function getPrompt(question: string, answer: string): string {
|
||||
return `You are an evaluator of answer definitiveness. Analyze if the given answer provides a definitive response or not.
|
||||
|
||||
Core Evaluation Criterion:
|
||||
- Definitiveness: "I don't know", "lack of information", "doesn't exist", "not sure" or highly uncertain/ambiguous responses are **not** definitive, must return false!
|
||||
<rules>
|
||||
First, if the answer is not a direct response to the question, it must return false.
|
||||
Definitiveness is the king! The following types of responses are NOT definitive and must return false:
|
||||
1. Expressions of uncertainty: "I don't know", "not sure", "might be", "probably"
|
||||
2. Lack of information statements: "doesn't exist", "lack of information", "could not find"
|
||||
3. Inability statements: "I cannot provide", "I am unable to", "we cannot"
|
||||
4. Negative statements that redirect: "However, you can...", "Instead, try..."
|
||||
5. Non-answers that suggest alternatives
|
||||
</rules>
|
||||
|
||||
Examples:
|
||||
|
||||
<examples>
|
||||
Question: "What are the system requirements for running Python 3.9?"
|
||||
Answer: "I'm not entirely sure, but I think you need a computer with some RAM."
|
||||
Evaluation: {
|
||||
@@ -36,13 +41,27 @@ Evaluation: {
|
||||
"reasoning": "The answer makes clear, definitive statements without uncertainty markers or ambiguity."
|
||||
}
|
||||
|
||||
Question: "Who will be the president of the United States in 2032?"
|
||||
Answer: "I cannot predict the future, it depends on the election results."
|
||||
Evaluation: {
|
||||
"is_definitive": false,
|
||||
"reasoning": "The answer contains a statement of inability to predict the future, making it non-definitive."
|
||||
}
|
||||
|
||||
Question: "Who is the sales director at Company X?"
|
||||
Answer: "I cannot provide the name of the sales director, but you can contact their sales team at sales@companyx.com"
|
||||
Evaluation: {
|
||||
"is_definitive": false,
|
||||
"reasoning": "The answer starts with 'I cannot provide' and redirects to an alternative contact method instead of answering the original question."
|
||||
}
|
||||
|
||||
Question: "what is the twitter account of jina ai's founder?"
|
||||
Answer: "The provided text does not contain the Twitter account of Jina AI's founder."
|
||||
Evaluation: {
|
||||
"is_definitive": false,
|
||||
"reasoning": "The answer indicates a lack of information rather than providing a definitive response."
|
||||
}
|
||||
|
||||
</examples>
|
||||
Now evaluate this pair:
|
||||
Question: ${JSON.stringify(question)}
|
||||
Answer: ${JSON.stringify(answer)}`;
|
||||
@@ -98,4 +117,4 @@ async function main() {
|
||||
|
||||
if (require.main === module) {
|
||||
main().catch(console.error);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user