node-DeepResearch/src/tools/evaluator.ts
2025-01-27 20:21:08 +08:00

131 lines
4.4 KiB
TypeScript

import { GoogleGenerativeAI, SchemaType } from "@google/generative-ai";
import dotenv from 'dotenv';
import { ProxyAgent, setGlobalDispatcher } from "undici";
// Proxy setup
if (process.env.https_proxy) {
try {
const proxyUrl = new URL(process.env.https_proxy).toString();
const dispatcher = new ProxyAgent({ uri: proxyUrl });
setGlobalDispatcher(dispatcher);
} catch (error) {
console.error('Failed to set proxy:', error);
}
}
dotenv.config();
const apiKey = process.env.GEMINI_API_KEY;
if (!apiKey) {
throw new Error("GEMINI_API_KEY not found in environment variables");
}
type EvaluationResponse = {
is_valid_answer: boolean;
reasoning: string;
};
const responseSchema = {
type: SchemaType.OBJECT,
properties: {
is_valid_answer: {
type: SchemaType.BOOLEAN,
description: "Whether the answer properly addresses the question"
},
reasoning: {
type: SchemaType.STRING,
description: "Detailed explanation of the evaluation"
}
},
required: ["is_valid_answer", "reasoning"]
};
const modelName = 'gemini-1.5-flash';
const genAI = new GoogleGenerativeAI(apiKey);
const model = genAI.getGenerativeModel({
model: modelName,
generationConfig: {
temperature: 0,
responseMimeType: "application/json",
responseSchema: responseSchema
}
});
function getPrompt(question: string, answer: string): string {
return `You are an expert evaluator of question-answer pairs. Analyze if the given answer properly addresses the question and provides meaningful information.
Core Evaluation Criteria:
1. Completeness: Answer must directly address the main point of the question
2. Clarity: Answer should be clear and unambiguous
3. Informativeness: Answer must provide substantial, useful information
4. Specificity: Generic or vague responses are not acceptable
5. Definitiveness: "I don't know", "lack of information" or highly uncertain responses are not valid
6. Relevance: Answer must be directly related to the question topic
7. Accuracy: Information provided should be factually sound (if verifiable)
Examples:
Question: "What are the system requirements for running Python 3.9?"
Answer: "I'm not entirely sure, but I think you need a computer with some RAM."
Evaluation: {
"is_valid_answer": false,
"reasoning": "The answer is vague, uncertain, and lacks specific information about actual system requirements. It fails the specificity and informativeness criteria."
}
Question: "What are the system requirements for running Python 3.9?"
Answer: "Python 3.9 requires: Windows 7 or later, macOS 10.11 or later, or Linux. Minimum 4GB RAM recommended, 2GB disk space, and x86-64 processor. For Windows, you'll need Microsoft Visual C++ 2015 or later."
Evaluation: {
"is_valid_answer": true,
"reasoning": "The answer is comprehensive, specific, and covers all key system requirements across different operating systems. It provides concrete numbers and necessary additional components."
}
Question: "what is the twitter account of jina ai's founder?"
Answer: "The provided text does not contain the Twitter account of Jina AI's founder."
Evaluation: {
"is_valid_answer": false,
"reasoning": "The answer is not definitive and fails to provide the requested information. Don't know, can't derive, lack of information is unacceptable,"
}
Now evaluate this pair:
Question: ${JSON.stringify(question)}
Answer: ${JSON.stringify(answer)}`;
}
export async function evaluateAnswer(question: string, answer: string): Promise<EvaluationResponse> {
try {
const prompt = getPrompt(question, answer);
const result = await model.generateContent(prompt);
const response = await result.response;
const json = JSON.parse(response.text()) as EvaluationResponse;
console.log('Evaluation:', json);
return json;
} catch (error) {
console.error('Error in answer evaluation:', error);
throw error;
}
}
// Example usage
async function main() {
const question = process.argv[2] || '';
const answer = process.argv[3] || '';
if (!question || !answer) {
console.error('Please provide both question and answer as command line arguments');
process.exit(1);
}
console.log('\nQuestion:', question);
console.log('Answer:', answer);
try {
const evaluation = await evaluateAnswer(question, answer);
console.log('\nEvaluation Result:', evaluation);
} catch (error) {
console.error('Failed to evaluate answer:', error);
}
}
if (require.main === module) {
main().catch(console.error);
}