feat: add timestamp to the knowledge (#64)

This commit is contained in:
Han Xiao
2025-02-13 20:05:54 +08:00
committed by GitHub
parent 507bc38546
commit 3b76e0b4d8
3 changed files with 82 additions and 50 deletions

View File

@@ -11,7 +11,7 @@ import {evaluateAnswer, evaluateQuestion} from "./tools/evaluator";
import {analyzeSteps} from "./tools/error-analyzer";
import {TokenTracker} from "./utils/token-tracker";
import {ActionTracker} from "./utils/action-tracker";
import {StepAction, AnswerAction, KnowledgeItem} from "./types";
import {StepAction, AnswerAction, KnowledgeItem, EvaluationCriteria} from "./types";
import {TrackerContext} from "./types";
import {search} from "./tools/jina-search";
// import {grounding} from "./tools/grounding";
@@ -24,7 +24,7 @@ async function sleep(ms: number) {
return new Promise(resolve => setTimeout(resolve, ms));
}
function getSchema(allowReflect: boolean, allowRead: boolean, allowAnswer: boolean, allowSearch: boolean) {
function getSchema(allowReflect: boolean, allowRead: boolean, allowAnswer: boolean, allowSearch: boolean, languageStyle: string = 'same language as the question') {
const actions: string[] = [];
const properties: Record<string, z.ZodTypeAny> = {
action: z.enum(['placeholder']), // Will update later with actual actions
@@ -40,7 +40,7 @@ function getSchema(allowReflect: boolean, allowRead: boolean, allowAnswer: boole
if (allowAnswer) {
actions.push("answer");
properties.answer = z.string()
.describe("Required when action='answer'. Must be the final answer in natural language").optional();
.describe(`Required when action='answer'. Must in ${languageStyle}`).optional();
properties.references = z.array(
z.object({
exactQuote: z.string().describe("Exact relevant quote from the document"),
@@ -85,7 +85,8 @@ function getPrompt(
badContext?: { question: string, answer: string, evaluation: string, recap: string; blame: string; improvement: string; }[],
knowledge?: KnowledgeItem[],
allURLs?: Record<string, string>,
beastMode?: boolean
beastMode?: boolean,
languageStyle?: string
): string {
const sections: string[] = [];
const actionSections: string[] = [];
@@ -216,11 +217,11 @@ ${allKeywords.join('\n')}
if (allowAnswer) {
actionSections.push(`
<action-answer>
- If <question> is a simple greeting, chit-chat, or general knowledge, provide the answer directly.
- Must provide "references" and each must specify "exactQuote" and "url"
- In the answer, use markdown footnote syntax like [^1], [^2] to refer to the references
- Responses must be definitive (no ambiguity, uncertainty, or disclaimers)
- Provide final response only when 100% certain${allowReflect ? '\n- If doubts remain, use <action-reflect> instead' : ''}
- If <question> is a simple greeting, chit-chat, or general knowledge, provide the answer directly;
- Must provide "references" and each must specify "exactQuote" and "url";
- In the answer, use markdown footnote syntax like [^1], [^2] to refer to the references;
- Responses must be definitive (no ambiguity, uncertainty, or disclaimers) and in the style of ${languageStyle};
- Provide final response only when 100% certain;${allowReflect ? '\n- If doubts remain, use <action-reflect> instead' : ''}
</action-answer>
`);
}
@@ -299,8 +300,9 @@ export async function getResponse(question: string,
let totalStep = 0;
let badAttempts = 0;
let schema: ZodObject<any> = getSchema(true, true, true, true)
const gaps: string[] = [question.trim()]; // All questions to be answered including the orginal question
const allQuestions = [question.trim()];
question = question.trim()
const gaps: string[] = [question]; // All questions to be answered including the orginal question
const allQuestions = [question];
const allKeywords = [];
const allKnowledge: KnowledgeItem[] = []; // knowledge are intermedidate questions that are answered
// iterate over historyMessages
@@ -329,7 +331,7 @@ export async function getResponse(question: string,
const allURLs: Record<string, string> = {};
const visitedURLs: string[] = [];
const evaluationMetrics: Record<string, any[]> = {};
const evaluationMetrics: Record<string, EvaluationCriteria> = {};
while (context.tokenTracker.getTotalUsage().totalTokens < tokenBudget && badAttempts <= maxBadAttempts) {
// add 1s delay to avoid rate limiting
await sleep(STEP_SLEEP);
@@ -339,7 +341,7 @@ export async function getResponse(question: string,
console.log(`Step ${totalStep} / Budget used ${budgetPercentage}%`);
console.log('Gaps:', gaps);
allowReflect = allowReflect && (gaps.length <= 1);
const currentQuestion = gaps.length > 0 ? gaps.shift()! : question.trim();
const currentQuestion = gaps.length > 0 ? gaps.shift()! : question
if (!evaluationMetrics[currentQuestion]) {
evaluationMetrics[currentQuestion] = await evaluateQuestion(currentQuestion, context.tokenTracker)
}
@@ -361,9 +363,11 @@ export async function getResponse(question: string,
badContext,
allKnowledge,
allURLs,
false
false,
evaluationMetrics[currentQuestion].languageStyle
);
schema = getSchema(allowReflect, allowRead, allowAnswer, allowSearch)
schema = getSchema(allowReflect, allowRead, allowAnswer, allowSearch,
evaluationMetrics[currentQuestion].languageStyle)
const generator = new ObjectGeneratorSafe(context.tokenTracker);
const result = await generator.generateObject({
model: 'agent',
@@ -401,7 +405,7 @@ export async function getResponse(question: string,
const {response: evaluation} = await evaluateAnswer(currentQuestion, thisStep,
evaluationMetrics[currentQuestion], context.tokenTracker);
if (currentQuestion.trim() === question.trim()) {
if (currentQuestion.trim() === question) {
if (evaluation.pass) {
diaryContext.push(`
At step ${step}, you took **answer** action and finally found the answer to the original question:
@@ -458,7 +462,7 @@ ${evaluation.think}
// reranker? maybe
gaps.push(...errorAnalysis.questionsToAnswer.slice(0, 2));
allQuestions.push(...errorAnalysis.questionsToAnswer.slice(0, 2));
gaps.push(question.trim()); // always keep the original question in the gaps
gaps.push(question); // always keep the original question in the gaps
}
badAttempts++;
@@ -505,7 +509,7 @@ You will now figure out the answers to these sub-questions and see if they can h
`);
gaps.push(...newGapQuestions.slice(0, 2));
allQuestions.push(...newGapQuestions.slice(0, 2));
gaps.push(question.trim()); // always keep the original question in the gaps
gaps.push(question); // always keep the original question in the gaps
} else {
diaryContext.push(`
At step ${step}, you took **reflect** and think about the knowledge gaps. You tried to break down the question "${currentQuestion}" into gap-questions like this: ${oldQuestions.join(', ')}
@@ -697,10 +701,12 @@ You decided to think out of the box or cut from a completely different angle.`);
badContext,
allKnowledge,
allURLs,
true
true,
evaluationMetrics[question]?.languageStyle || 'same language as the question'
);
schema = getSchema(false, false, true, false);
schema = getSchema(false, false, true, false,
evaluationMetrics[question]?.languageStyle || 'same language as the question');
const generator = new ObjectGeneratorSafe(context.tokenTracker);
const result = await generator.generateObject({
model: 'agentBeastMode',
@@ -721,7 +727,15 @@ You decided to think out of the box or cut from a completely different angle.`);
async function storeContext(prompt: string, schema: any, memory: any[][], step: number) {
if ((process as any).asyncLocalContext?.available?.()) {
const [context, keywords, questions, knowledge] = memory;
(process as any).asyncLocalContext.ctx.promptContext = { prompt, schema, context, keywords, questions, knowledge, step };
(process as any).asyncLocalContext.ctx.promptContext = {
prompt,
schema,
context,
keywords,
questions,
knowledge,
step
};
return;
}

View File

@@ -1,12 +1,11 @@
import {z} from 'zod';
import {GenerateObjectResult} from 'ai';
import {TokenTracker} from "../utils/token-tracker";
import {AnswerAction, EvaluationResponse} from '../types';
import {AnswerAction, EvaluationCriteria, EvaluationResponse, EvaluationType} from '../types';
import {readUrl, removeAllLineBreaks} from "./read";
import {ObjectGeneratorSafe} from "../utils/safe-generator";
type EvaluationType = 'definitive' | 'freshness' | 'plurality' | 'attribution';
const baseSchema = {
pass: z.boolean().describe('Whether the answer passes the evaluation criteria defined by the evaluator'),
@@ -301,7 +300,8 @@ Answer: ${JSON.stringify(answer)}`;
const questionEvaluationSchema = z.object({
needsFreshness: z.boolean().describe('Whether the question requires freshness check'),
needsPlurality: z.boolean().describe('Whether the question requires plurality check'),
reasoning: z.string().describe('Explanation of why these checks are needed or not needed')
reasoning: z.string().describe('Explanation of why these checks are needed or not needed'),
languageStyle: z.string().describe('The language being used and the overall vibe/mood of the question'),
});
function getQuestionEvaluationPrompt(question: string): string {
@@ -310,6 +310,7 @@ function getQuestionEvaluationPrompt(question: string): string {
<evaluation_types>
1. freshness - Checks if the question is time-sensitive or requires very recent information
2. plurality - Checks if the question asks for multiple items or a specific count or enumeration
3. language style - Identifies both the language used and the overall vibe of the question
</evaluation_types>
<rules>
@@ -326,42 +327,54 @@ If question is a simple greeting, chit-chat, or general knowledge, provide the a
- Check for: numbers ("5 examples"), plural nouns, list requests
- Look for: "all", "list", "enumerate", "examples", plural forms
- Required when question implies completeness ("all the reasons", "every factor")
3. Language Style Analysis:
Combine both language and emotional vibe in a descriptive phrase, considering:
- Language: The primary language or mix of languages used
- Emotional tone: panic, excitement, frustration, curiosity, etc.
- Formality level: academic, casual, professional, etc.
- Domain context: technical, academic, social, etc.
</rules>
<examples>
Question: "Hello, how are you?"
Question: "fam PLEASE help me calculate the eigenvalues of this 4x4 matrix ASAP!! [matrix details] got an exam tmrw 😭"
Evaluation: {
"needsFreshness": false,
"needsPlurality": false,
"reasoning": "Simple greeting, no additional checks needed."
"needsFreshness": false,
"needsPlurality": true,
"reasoning": "Multiple eigenvalues needed but no time-sensitive information required",
"languageStyle": "panicked student English with math jargon"
}
Question: "What is the current CEO of OpenAI?"
Question: "Can someone explain how tf did Ferrari mess up their pit stop strategy AGAIN?! 🤦‍♂️ #MonacoGP"
Evaluation: {
"needsFreshness": true,
"needsPlurality": false,
"reasoning": "Question asks about current leadership position which requires freshness check. No plurality check needed as it asks for a single position."
"needsFreshness": true,
"needsPlurality": true,
"reasoning": "Refers to recent race event and requires analysis of multiple strategic decisions",
"languageStyle": "frustrated fan English with F1 terminology"
}
Question: "List all the AI companies in Berlin"
Question: "肖老师您好,请您介绍一下最近量子计算领域的三个重大突破,特别是它们在密码学领域的应用价值吗?🤔"
Evaluation: {
"needsFreshness": false,
"needsPlurality": true,
"reasoning": "Question asks for a comprehensive list ('all') which requires plurality check. No freshness check needed as it's not time-sensitive."
"needsFreshness": true,
"needsPlurality": true,
"reasoning": "Asks for recent breakthroughs (freshness) and specifically requests three examples (plurality)",
"languageStyle": "formal technical Chinese with academic undertones"
}
Question: "What are the top 5 latest AI models released by OpenAI?"
Question: "Bruder krass, kannst du mir erklären warum meine neural network training loss komplett durchdreht? Hab schon alles probiert 😤"
Evaluation: {
"needsFreshness": true,
"needsPlurality": true,
"reasoning": "Question requires freshness check for 'latest' releases and plurality check for 'top 5' items."
"needsFreshness": false,
"needsPlurality": true,
"reasoning": "Requires comprehensive debugging analysis of multiple potential issues",
"languageStyle": "frustrated German-English tech slang"
}
Question: "Who created Python?"
Question: "Does anyone have insights into the sociopolitical implications of GPT-4's emergence in the Global South, particularly regarding indigenous knowledge systems and linguistic diversity? Looking for a nuanced analysis."
Evaluation: {
"needsFreshness": false,
"needsPlurality": false,
"reasoning": "Simple factual question requiring only definitiveness check. No time sensitivity or multiple items needed."
"needsFreshness": true,
"needsPlurality": true,
"reasoning": "Requires analysis of current impacts (freshness) across multiple dimensions: sociopolitical, cultural, and linguistic (plurality)",
"languageStyle": "formal academic English with sociological terminology"
}
</examples>
@@ -374,7 +387,7 @@ const TOOL_NAME = 'evaluator';
export async function evaluateQuestion(
question: string,
tracker?: TokenTracker
): Promise<EvaluationType[]> {
): Promise<EvaluationCriteria> {
try {
const generator = new ObjectGeneratorSafe(tracker);
@@ -394,12 +407,12 @@ export async function evaluateQuestion(
console.log('Question Metrics:', types);
// Always evaluate definitive first, then freshness (if needed), then plurality (if needed)
return types;
return {types, languageStyle: result.object.languageStyle};
} catch (error) {
console.error('Error in question evaluation:', error);
// Default to all evaluation types in case of error
return ['definitive', 'freshness', 'plurality'];
return {types: ['definitive', 'freshness', 'plurality'], languageStyle: 'plain English'};
}
}
@@ -430,17 +443,17 @@ async function performEvaluation<T>(
export async function evaluateAnswer(
question: string,
action: AnswerAction,
evaluationOrder: EvaluationType[] = ['definitive', 'freshness', 'plurality'],
evaluationCri: EvaluationCriteria,
tracker?: TokenTracker
): Promise<{ response: EvaluationResponse }> {
let result;
// Only add attribution if we have valid references
if (action.references && action.references.length > 0) {
evaluationOrder = ['attribution', ...evaluationOrder];
evaluationCri.types = ['attribution', ...evaluationCri.types];
}
for (const evaluationType of evaluationOrder) {
for (const evaluationType of evaluationCri.types) {
switch (evaluationType) {
case 'attribution': {
// Safely handle references and ensure we have content

View File

@@ -45,6 +45,11 @@ export type VisitAction = BaseAction & {
export type StepAction = SearchAction | AnswerAction | ReflectAction | VisitAction;
export type EvaluationType = 'definitive' | 'freshness' | 'plurality' | 'attribution';
export type EvaluationCriteria = {
types: EvaluationType[];
languageStyle: string;
};
// Following Vercel AI SDK's token counting interface
export interface TokenUsage {