mirror of
https://github.com/jina-ai/node-DeepResearch.git
synced 2025-12-26 06:28:56 +08:00
chore: first commit
This commit is contained in:
parent
f1b4d2681e
commit
8c8484593f
27
src/agent.ts
27
src/agent.ts
@ -351,12 +351,10 @@ ${evaluation.reasoning}
|
||||
|
||||
Your journey ends here.
|
||||
`);
|
||||
console.log('Final Answer:', thisStep.answer);
|
||||
tokenTracker.printSummary();
|
||||
await storeContext(prompt, [allContext, allKeywords, allQuestions, allKnowledge], totalStep);
|
||||
return thisStep;
|
||||
}
|
||||
if (evaluation.is_valid_answer) {
|
||||
if (evaluation.is_definitive) {
|
||||
if (thisStep.references.length > 0 || Object.keys(allURLs).length === 0) {
|
||||
// EXIT POINT OF THE PROGRAM!!!!
|
||||
diaryContext.push(`
|
||||
@ -373,8 +371,6 @@ ${evaluation.reasoning}
|
||||
|
||||
Your journey ends here. You have successfully answered the original question. Congratulations! 🎉
|
||||
`);
|
||||
console.log('Final Answer:', thisStep.answer);
|
||||
tokenTracker.printSummary();
|
||||
await storeContext(prompt, [allContext, allKeywords, allQuestions, allKnowledge], totalStep);
|
||||
return thisStep;
|
||||
} else {
|
||||
@ -391,6 +387,9 @@ Unfortunately, you did not provide any references to support your answer.
|
||||
You need to find more URL references to support your answer.`);
|
||||
}
|
||||
|
||||
await storeContext(prompt, [allContext, allKeywords, allQuestions, allKnowledge], totalStep);
|
||||
return thisStep;
|
||||
|
||||
} else {
|
||||
diaryContext.push(`
|
||||
At step ${step}, you took **answer** action but evaluator thinks it is not a good answer:
|
||||
@ -418,7 +417,7 @@ ${evaluation.reasoning}
|
||||
diaryContext = [];
|
||||
step = 0;
|
||||
}
|
||||
} else if (evaluation.is_valid_answer) {
|
||||
} else if (evaluation.is_definitive) {
|
||||
diaryContext.push(`
|
||||
At step ${step}, you took **answer** action. You found a good answer to the sub-question:
|
||||
|
||||
@ -436,7 +435,8 @@ Although you solved a sub-question, you still need to find the answer to the ori
|
||||
allKnowledge.push({
|
||||
question: currentQuestion,
|
||||
answer: thisStep.answer,
|
||||
type: 'qa'});
|
||||
type: 'qa'
|
||||
});
|
||||
}
|
||||
} else if (thisStep.action === 'reflect' && thisStep.questionsToAnswer) {
|
||||
let newGapQuestions = thisStep.questionsToAnswer
|
||||
@ -611,5 +611,14 @@ async function storeContext(prompt: string, memory: any[][], step: number) {
|
||||
|
||||
const genAI = new GoogleGenerativeAI(GEMINI_API_KEY);
|
||||
|
||||
const question = process.argv[2] || "";
|
||||
getResponse(question);
|
||||
|
||||
export async function main() {
|
||||
const question = process.argv[2] || "";
|
||||
const finalStep = await getResponse(question);
|
||||
console.log('Final Answer:', finalStep.answer);
|
||||
tokenTracker.printSummary();
|
||||
}
|
||||
|
||||
if (require.main === module) {
|
||||
main().catch(console.error);
|
||||
}
|
||||
@ -3,23 +3,23 @@ import { GEMINI_API_KEY, MODEL_NAME } from "../config";
|
||||
import { tokenTracker } from "../utils/token-tracker";
|
||||
|
||||
type EvaluationResponse = {
|
||||
is_valid_answer: boolean;
|
||||
is_definitive: boolean;
|
||||
reasoning: string;
|
||||
};
|
||||
|
||||
const responseSchema = {
|
||||
type: SchemaType.OBJECT,
|
||||
properties: {
|
||||
is_valid_answer: {
|
||||
is_definitive: {
|
||||
type: SchemaType.BOOLEAN,
|
||||
description: "Whether the answer provides any useful information to the question"
|
||||
description: "Whether the answer provides a definitive response without uncertainty or 'I don't know' type statements"
|
||||
},
|
||||
reasoning: {
|
||||
type: SchemaType.STRING,
|
||||
description: "Detailed explanation of the evaluation"
|
||||
description: "Explanation of why the answer is or isn't definitive"
|
||||
}
|
||||
},
|
||||
required: ["is_valid_answer", "reasoning"]
|
||||
required: ["is_definitive", "reasoning"]
|
||||
};
|
||||
|
||||
const genAI = new GoogleGenerativeAI(GEMINI_API_KEY);
|
||||
@ -33,41 +33,32 @@ const model = genAI.getGenerativeModel({
|
||||
});
|
||||
|
||||
function getPrompt(question: string, answer: string): string {
|
||||
return `You are an expert evaluator of question-answer pairs. Analyze if the given answer based on the following criteria is valid or not.
|
||||
return `You are an evaluator of answer definitiveness. Analyze if the given answer provides a definitive response or not.
|
||||
|
||||
Core Evaluation Criteria:
|
||||
- Definitiveness: "I don't know", "lack of information", "doesn't exist" or highly uncertain ambiguous responses are **not** valid answers, must return false!
|
||||
- Informativeness: Answer must provide substantial, useful information
|
||||
- Completeness: When question mentions multiple aspects or elements, the answer should cover all of them
|
||||
Core Evaluation Criterion:
|
||||
- Definitiveness: "I don't know", "lack of information", "doesn't exist", "not sure" or highly uncertain/ambiguous responses are **not** definitive, must return false!
|
||||
|
||||
Examples:
|
||||
|
||||
Question: "What are the system requirements for running Python 3.9?"
|
||||
Answer: "I'm not entirely sure, but I think you need a computer with some RAM."
|
||||
Evaluation: {
|
||||
"is_valid_answer": false,
|
||||
"reasoning": "The answer is vague, uncertain, and lacks specific information about actual system requirements. It fails the specificity and informativeness criteria."
|
||||
"is_definitive": false,
|
||||
"reasoning": "The answer contains uncertainty markers like 'not entirely sure' and 'I think', making it non-definitive."
|
||||
}
|
||||
|
||||
Question: "What are the system requirements for running Python 3.9?"
|
||||
Answer: "Python 3.9 requires: Windows 7 or later, macOS 10.11 or later, or Linux. Minimum 4GB RAM recommended, 2GB disk space, and x86-64 processor. For Windows, you'll need Microsoft Visual C++ 2015 or later."
|
||||
Answer: "Python 3.9 requires Windows 7 or later, macOS 10.11 or later, or Linux."
|
||||
Evaluation: {
|
||||
"is_valid_answer": true,
|
||||
"reasoning": "The answer is comprehensive, specific, and covers all key system requirements across different operating systems. It provides concrete numbers and necessary additional components."
|
||||
"is_definitive": true,
|
||||
"reasoning": "The answer makes clear, definitive statements without uncertainty markers or ambiguity."
|
||||
}
|
||||
|
||||
Question: "what is the twitter account of jina ai's founder?"
|
||||
Answer: "The provided text does not contain the Twitter account of Jina AI's founder."
|
||||
Evaluation: {
|
||||
"is_valid_answer": false,
|
||||
"reasoning": "The answer is not definitive and fails to provide the requested information. Don't know, can't derive, lack of information is unacceptable,"
|
||||
}
|
||||
|
||||
Question: "who owns jina ai?"
|
||||
Answer: "The ownership structure of Jina AI is not publicly disclosed."
|
||||
Evaluation: {
|
||||
"is_valid_answer": false,
|
||||
"reasoning": "The answer is not definitive and fails to provide the requested information. Lack of information is unacceptable, more search and deep reasoning is needed."
|
||||
"is_definitive": false,
|
||||
"reasoning": "The answer indicates a lack of information rather than providing a definitive response."
|
||||
}
|
||||
|
||||
Now evaluate this pair:
|
||||
@ -83,7 +74,7 @@ export async function evaluateAnswer(question: string, answer: string): Promise<
|
||||
const usage = response.usageMetadata;
|
||||
const json = JSON.parse(response.text()) as EvaluationResponse;
|
||||
console.log('Evaluation:', {
|
||||
valid: json.is_valid_answer,
|
||||
definitive: json.is_definitive,
|
||||
reason: json.reasoning
|
||||
});
|
||||
const tokens = usage?.totalTokenCount || 0;
|
||||
@ -114,4 +105,4 @@ async function main() {
|
||||
|
||||
if (require.main === module) {
|
||||
main().catch(console.error);
|
||||
}
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user