chore: first commit

This commit is contained in:
Han Xiao 2025-02-02 18:47:29 +08:00
parent f1b4d2681e
commit 8c8484593f
2 changed files with 35 additions and 35 deletions

View File

@ -351,12 +351,10 @@ ${evaluation.reasoning}
Your journey ends here.
`);
console.log('Final Answer:', thisStep.answer);
tokenTracker.printSummary();
await storeContext(prompt, [allContext, allKeywords, allQuestions, allKnowledge], totalStep);
return thisStep;
}
if (evaluation.is_valid_answer) {
if (evaluation.is_definitive) {
if (thisStep.references.length > 0 || Object.keys(allURLs).length === 0) {
// EXIT POINT OF THE PROGRAM!!!!
diaryContext.push(`
@ -373,8 +371,6 @@ ${evaluation.reasoning}
Your journey ends here. You have successfully answered the original question. Congratulations! 🎉
`);
console.log('Final Answer:', thisStep.answer);
tokenTracker.printSummary();
await storeContext(prompt, [allContext, allKeywords, allQuestions, allKnowledge], totalStep);
return thisStep;
} else {
@ -391,6 +387,9 @@ Unfortunately, you did not provide any references to support your answer.
You need to find more URL references to support your answer.`);
}
await storeContext(prompt, [allContext, allKeywords, allQuestions, allKnowledge], totalStep);
return thisStep;
} else {
diaryContext.push(`
At step ${step}, you took **answer** action but evaluator thinks it is not a good answer:
@ -418,7 +417,7 @@ ${evaluation.reasoning}
diaryContext = [];
step = 0;
}
} else if (evaluation.is_valid_answer) {
} else if (evaluation.is_definitive) {
diaryContext.push(`
At step ${step}, you took **answer** action. You found a good answer to the sub-question:
@ -436,7 +435,8 @@ Although you solved a sub-question, you still need to find the answer to the ori
allKnowledge.push({
question: currentQuestion,
answer: thisStep.answer,
type: 'qa'});
type: 'qa'
});
}
} else if (thisStep.action === 'reflect' && thisStep.questionsToAnswer) {
let newGapQuestions = thisStep.questionsToAnswer
@ -611,5 +611,14 @@ async function storeContext(prompt: string, memory: any[][], step: number) {
const genAI = new GoogleGenerativeAI(GEMINI_API_KEY);
const question = process.argv[2] || "";
getResponse(question);
export async function main() {
const question = process.argv[2] || "";
const finalStep = await getResponse(question);
console.log('Final Answer:', finalStep.answer);
tokenTracker.printSummary();
}
if (require.main === module) {
main().catch(console.error);
}

View File

@ -3,23 +3,23 @@ import { GEMINI_API_KEY, MODEL_NAME } from "../config";
import { tokenTracker } from "../utils/token-tracker";
type EvaluationResponse = {
is_valid_answer: boolean;
is_definitive: boolean;
reasoning: string;
};
const responseSchema = {
type: SchemaType.OBJECT,
properties: {
is_valid_answer: {
is_definitive: {
type: SchemaType.BOOLEAN,
description: "Whether the answer provides any useful information to the question"
description: "Whether the answer provides a definitive response without uncertainty or 'I don't know' type statements"
},
reasoning: {
type: SchemaType.STRING,
description: "Detailed explanation of the evaluation"
description: "Explanation of why the answer is or isn't definitive"
}
},
required: ["is_valid_answer", "reasoning"]
required: ["is_definitive", "reasoning"]
};
const genAI = new GoogleGenerativeAI(GEMINI_API_KEY);
@ -33,41 +33,32 @@ const model = genAI.getGenerativeModel({
});
function getPrompt(question: string, answer: string): string {
return `You are an expert evaluator of question-answer pairs. Analyze if the given answer based on the following criteria is valid or not.
return `You are an evaluator of answer definitiveness. Analyze if the given answer provides a definitive response or not.
Core Evaluation Criteria:
- Definitiveness: "I don't know", "lack of information", "doesn't exist" or highly uncertain ambiguous responses are **not** valid answers, must return false!
- Informativeness: Answer must provide substantial, useful information
- Completeness: When question mentions multiple aspects or elements, the answer should cover all of them
Core Evaluation Criterion:
- Definitiveness: "I don't know", "lack of information", "doesn't exist", "not sure" or highly uncertain/ambiguous responses are **not** definitive, must return false!
Examples:
Question: "What are the system requirements for running Python 3.9?"
Answer: "I'm not entirely sure, but I think you need a computer with some RAM."
Evaluation: {
"is_valid_answer": false,
"reasoning": "The answer is vague, uncertain, and lacks specific information about actual system requirements. It fails the specificity and informativeness criteria."
"is_definitive": false,
"reasoning": "The answer contains uncertainty markers like 'not entirely sure' and 'I think', making it non-definitive."
}
Question: "What are the system requirements for running Python 3.9?"
Answer: "Python 3.9 requires: Windows 7 or later, macOS 10.11 or later, or Linux. Minimum 4GB RAM recommended, 2GB disk space, and x86-64 processor. For Windows, you'll need Microsoft Visual C++ 2015 or later."
Answer: "Python 3.9 requires Windows 7 or later, macOS 10.11 or later, or Linux."
Evaluation: {
"is_valid_answer": true,
"reasoning": "The answer is comprehensive, specific, and covers all key system requirements across different operating systems. It provides concrete numbers and necessary additional components."
"is_definitive": true,
"reasoning": "The answer makes clear, definitive statements without uncertainty markers or ambiguity."
}
Question: "what is the twitter account of jina ai's founder?"
Answer: "The provided text does not contain the Twitter account of Jina AI's founder."
Evaluation: {
"is_valid_answer": false,
"reasoning": "The answer is not definitive and fails to provide the requested information. Don't know, can't derive, lack of information is unacceptable,"
}
Question: "who owns jina ai?"
Answer: "The ownership structure of Jina AI is not publicly disclosed."
Evaluation: {
"is_valid_answer": false,
"reasoning": "The answer is not definitive and fails to provide the requested information. Lack of information is unacceptable, more search and deep reasoning is needed."
"is_definitive": false,
"reasoning": "The answer indicates a lack of information rather than providing a definitive response."
}
Now evaluate this pair:
@ -83,7 +74,7 @@ export async function evaluateAnswer(question: string, answer: string): Promise<
const usage = response.usageMetadata;
const json = JSON.parse(response.text()) as EvaluationResponse;
console.log('Evaluation:', {
valid: json.is_valid_answer,
definitive: json.is_definitive,
reason: json.reasoning
});
const tokens = usage?.totalTokenCount || 0;
@ -114,4 +105,4 @@ async function main() {
if (require.main === module) {
main().catch(console.error);
}
}