fix: try catch in evaluator

This commit is contained in:
Han Xiao 2025-02-12 21:47:42 +08:00
parent 29fc4d9214
commit ee4213111c
3 changed files with 162 additions and 28 deletions

View File

@ -299,8 +299,8 @@ export async function getResponse(question: string,
let totalStep = 0;
let badAttempts = 0;
let schema: ZodObject<any> = getSchema(true, true, true, true)
const gaps: string[] = [question]; // All questions to be answered including the orginal question
const allQuestions = [question];
const gaps: string[] = [question.trim()]; // All questions to be answered including the orginal question
const allQuestions = [question.trim()];
const allKeywords = [];
const allKnowledge = []; // knowledge are intermedidate questions that are answered
// iterate over historyMessages
@ -339,7 +339,7 @@ export async function getResponse(question: string,
console.log(`Step ${totalStep} / Budget used ${budgetPercentage}%`);
console.log('Gaps:', gaps);
allowReflect = allowReflect && (gaps.length <= 1);
const currentQuestion = gaps.length > 0 ? gaps.shift()! : question;
const currentQuestion = gaps.length > 0 ? gaps.shift()! : question.trim();
if (!evaluationMetrics[currentQuestion]) {
evaluationMetrics[currentQuestion] = await evaluateQuestion(currentQuestion, context.tokenTracker)
}
@ -411,7 +411,7 @@ export async function getResponse(question: string,
const {response: evaluation} = await evaluateAnswer(currentQuestion, thisStep,
evaluationMetrics[currentQuestion], context.tokenTracker);
if (currentQuestion === question) {
if (currentQuestion.trim() === question.trim()) {
if (evaluation.pass) {
diaryContext.push(`
At step ${step}, you took **answer** action and finally found the answer to the original question:
@ -466,7 +466,7 @@ ${evaluation.think}
if (errorAnalysis.questionsToAnswer) {
gaps.push(...errorAnalysis.questionsToAnswer.slice(0, 2));
allQuestions.push(...errorAnalysis.questionsToAnswer.slice(0, 2));
gaps.push(question); // always keep the original question in the gaps
gaps.push(question.trim()); // always keep the original question in the gaps
}
badAttempts++;
@ -512,7 +512,7 @@ You will now figure out the answers to these sub-questions and see if they can h
`);
gaps.push(...newGapQuestions);
allQuestions.push(...newGapQuestions);
gaps.push(question); // always keep the original question in the gaps
gaps.push(question.trim()); // always keep the original question in the gaps
} else {
diaryContext.push(`
At step ${step}, you took **reflect** and think about the knowledge gaps. You tried to break down the question "${currentQuestion}" into gap-questions like this: ${oldQuestions.join(', ')}

View File

@ -45,24 +45,149 @@ ${answer.references.map((ref, i) => {
return `${answer.answer.replace(/\(REF_(\d+)\)/g, (_, num) => `[^${num}]`)}\n\n${refStr}`;
}
async function* streamTextNaturally(text: string, streamingState: StreamingState) {
// Split text into chunks that preserve CJK characters, URLs, and regular words
const chunks = splitTextIntoChunks(text);
let burstMode = false;
let consecutiveShortItems = 0;
// Modified streamTextWordByWord function
async function* streamTextWordByWord(text: string, streamingState: StreamingState) {
const words = text.split(/(\s+)/);
for (const word of words) {
if (streamingState.currentlyStreaming) {
const delay = Math.floor(Math.random() * 100);
await new Promise(resolve => setTimeout(resolve, delay));
yield word;
} else {
// If streaming was interrupted, yield all remaining words at once
const remainingWords = words.slice(words.indexOf(word)).join('');
yield remainingWords;
for (const chunk of chunks) {
if (!streamingState.currentlyStreaming) {
yield chunks.slice(chunks.indexOf(chunk)).join('');
return;
}
const delay = calculateDelay(chunk, burstMode);
// Handle consecutive short items
if (getEffectiveLength(chunk) <= 3 && chunk.trim().length > 0) {
consecutiveShortItems++;
if (consecutiveShortItems >= 3) {
burstMode = true;
}
} else {
consecutiveShortItems = 0;
burstMode = false;
}
await new Promise(resolve => setTimeout(resolve, delay));
yield chunk;
}
}
function splitTextIntoChunks(text: string): string[] {
const chunks: string[] = [];
let currentChunk = '';
let inURL = false;
const pushCurrentChunk = () => {
if (currentChunk) {
chunks.push(currentChunk);
currentChunk = '';
}
};
for (let i = 0; i < text.length; i++) {
const char = text[i];
const nextChar = text[i + 1] || '';
// URL detection
if (char === 'h' && text.slice(i, i + 8).match(/https?:\/\//)) {
pushCurrentChunk();
inURL = true;
}
if (inURL) {
currentChunk += char;
// End of URL detection (whitespace or certain punctuation)
if (/[\s\])}"']/.test(nextChar) || i === text.length - 1) {
pushCurrentChunk();
inURL = false;
}
continue;
}
// CJK character detection (including kana and hangul)
if (/[\u4e00-\u9fff\u3040-\u30ff\uac00-\ud7af]/.test(char)) {
pushCurrentChunk();
chunks.push(char);
continue;
}
// Whitespace handling
if (/\s/.test(char)) {
pushCurrentChunk();
chunks.push(char);
continue;
}
// Regular word building
currentChunk += char;
// Break on punctuation
if (/[.!?,;:]/.test(nextChar)) {
pushCurrentChunk();
}
}
pushCurrentChunk();
return chunks.filter(chunk => chunk !== '');
}
function calculateDelay(chunk: string, burstMode: boolean): number {
const trimmedChunk = chunk.trim();
// Handle whitespace
if (trimmedChunk.length === 0) {
return Math.random() * 20 + 10;
}
// Special handling for URLs
if (chunk.match(/^https?:\/\//)) {
return Math.random() * 50 + 100; // Slower typing for URLs
}
// Special handling for CJK characters
if (/^[\u4e00-\u9fff\u3040-\u30ff\uac00-\ud7af]$/.test(chunk)) {
return Math.random() * 100 + 150; // Longer delay for individual CJK characters
}
// Base delay calculation
let baseDelay;
if (burstMode) {
baseDelay = Math.random() * 30 + 20;
} else {
const effectiveLength = getEffectiveLength(chunk);
const perCharacterDelay = Math.max(10, 40 - effectiveLength * 2);
baseDelay = Math.random() * perCharacterDelay + perCharacterDelay;
}
// Add variance based on chunk characteristics
if (/[A-Z]/.test(chunk[0])) {
baseDelay += Math.random() * 20 + 10;
}
if (/[^a-zA-Z\s]/.test(chunk)) {
baseDelay += Math.random() * 30 + 15;
}
// Add pauses for punctuation
if (/[.!?]$/.test(chunk)) {
baseDelay += Math.random() * 350 + 200;
} else if (/[,;:]$/.test(chunk)) {
baseDelay += Math.random() * 150 + 100;
}
return baseDelay;
}
function getEffectiveLength(chunk: string): number {
// Count CJK characters as 2 units
const cjkCount = (chunk.match(/[\u4e00-\u9fff\u3040-\u30ff\uac00-\ud7af]/g) || []).length;
const regularCount = chunk.length - cjkCount;
return regularCount + (cjkCount * 2);
}
// Helper function to emit remaining content immediately
async function emitRemainingContent(
res: Response,
@ -210,7 +335,7 @@ async function processQueue(streamingState: StreamingState, res: Response, reque
streamingState.isEmitting = true;
try {
for await (const word of streamTextWordByWord(current.content, streamingState)) {
for await (const word of streamTextNaturally(current.content, streamingState)) {
const chunk: ChatCompletionChunk = {
id: requestId,
object: 'chat.completion.chunk',

View File

@ -414,17 +414,26 @@ async function performEvaluation(
},
tracker?: TokenTracker
): Promise<GenerateObjectResult<any>> {
const result = await generateObject({
model: params.model,
schema: params.schema,
prompt: params.prompt,
maxTokens: params.maxTokens
});
try {
const result = await generateObject({
model: params.model,
schema: params.schema,
prompt: params.prompt,
maxTokens: params.maxTokens
});
(tracker || new TokenTracker()).trackUsage('evaluator', result.usage);
console.log(`${evaluationType} Evaluation:`, result.object);
(tracker || new TokenTracker()).trackUsage('evaluator', result.usage);
console.log(`${evaluationType} Evaluation:`, result.object);
return result;
return result;
} catch (error) {
const errorResult = await handleGenerateObjectError<any>(error);
(tracker || new TokenTracker()).trackUsage('evaluator', errorResult.usage);
return {
object: errorResult.object,
usage: errorResult.usage
} as GenerateObjectResult<any>;
}
}