fix: unnecessary eval

This commit is contained in:
Han Xiao
2025-03-19 08:06:29 +08:00
parent 5bb17cb476
commit 71157a7468
3 changed files with 16 additions and 18 deletions

View File

@@ -226,7 +226,7 @@ ${actionSections.join('\n\n')}
`); `);
// Add footer // Add footer
sections.push(`Think step by step, choose the action, and respond in valid JSON format matching exact JSON schema of that action.`); sections.push(`Think step by step, choose the action, then respond by matching the schema of that action.`);
return removeExtraLineBreaks(sections.join('\n\n')); return removeExtraLineBreaks(sections.join('\n\n'));
} }
@@ -408,7 +408,7 @@ export async function getResponse(question?: string,
let allowReflect = true; let allowReflect = true;
let allowCoding = true; let allowCoding = true;
let system = ''; let system = '';
let maxStrictEvals = 2; let maxStrictEvals = Math.max(1, Math.min(3, maxBadAttempts - 1));
let msgWithKnowledge: CoreMessage[] = []; let msgWithKnowledge: CoreMessage[] = [];
let thisStep: StepAction = {action: 'answer', answer: '', references: [], think: '', isFinal: false}; let thisStep: StepAction = {action: 'answer', answer: '', references: [], think: '', isFinal: false};
@@ -417,7 +417,7 @@ export async function getResponse(question?: string,
const badURLs: string[] = []; const badURLs: string[] = [];
const evaluationMetrics: Record<string, EvaluationType[]> = {}; const evaluationMetrics: Record<string, EvaluationType[]> = {};
// reserve the 10% final budget for the beast mode // reserve the 10% final budget for the beast mode
const regularBudget = tokenBudget * 0.9; const regularBudget = tokenBudget * 0.85;
const finalAnswerPIP: string[] = []; const finalAnswerPIP: string[] = [];
let trivialQuestion = false; let trivialQuestion = false;
while (context.tokenTracker.getTotalUsage().totalTokens < regularBudget && badAttempts <= maxBadAttempts) { while (context.tokenTracker.getTotalUsage().totalTokens < regularBudget && badAttempts <= maxBadAttempts) {
@@ -920,8 +920,8 @@ But unfortunately, you failed to solve the issue. You need to think out of the b
); );
} else { } else {
(thisStep as AnswerAction).mdAnswer = fixCodeBlockIndentation( (thisStep as AnswerAction).mdAnswer = fixCodeBlockIndentation(
buildMdFromAnswer((thisStep as AnswerAction)) buildMdFromAnswer((thisStep as AnswerAction))
); );
} }
console.log(thisStep) console.log(thisStep)

View File

@@ -19,8 +19,6 @@ Analyze the steps and provide detailed feedback following these guidelines:
- In the recap: Summarize key actions chronologically, highlight patterns, and identify where the process started to go wrong - In the recap: Summarize key actions chronologically, highlight patterns, and identify where the process started to go wrong
- In the blame: Point to specific steps or patterns that led to the inadequate answer - In the blame: Point to specific steps or patterns that led to the inadequate answer
- In the improvement: Provide actionable suggestions that could have led to a better outcome - In the improvement: Provide actionable suggestions that could have led to a better outcome
Generate a JSON response following JSON schema.
</rules> </rules>
<example> <example>

View File

@@ -646,17 +646,17 @@ export async function evaluateAnswer(
for (const evaluationType of evaluationTypes) { for (const evaluationType of evaluationTypes) {
let prompt: { system: string; user: string } | undefined let prompt: { system: string; user: string } | undefined
switch (evaluationType) { switch (evaluationType) {
case 'attribution': { // case 'attribution': {
if (allKnowledge.length === 0) { // if (allKnowledge.length === 0) {
return { // return {
pass: false, // pass: false,
think: `The knowledge is completely empty and the answer can not be derived from it. Need to found some other references and URLs`, // think: `The knowledge is completely empty and the answer can not be derived from it. Need to found some other references and URLs`,
type: 'attribution', // type: 'attribution',
}; // };
} // }
prompt = getAttributionPrompt(question, action.answer, allKnowledge); // prompt = getAttributionPrompt(question, action.answer, allKnowledge);
break; // break;
} // }
case 'definitive': case 'definitive':
prompt = getDefinitivePrompt(question, action.answer); prompt = getDefinitivePrompt(question, action.answer);