fix: try catch in evaluator

2025-12-26 06:28:56 +08:00 · 2025-02-12 21:47:42 +08:00 · 2025-02-12 21:47:42 +08:00 · ee4213111c
commit ee4213111c
parent 29fc4d9214
3 changed files with 162 additions and 28 deletions
--- a/src/agent.ts
+++ b/src/agent.ts
@ -299,8 +299,8 @@ export async function getResponse(question: string,
  let totalStep = 0;
  let badAttempts = 0;
  let schema: ZodObject<any> = getSchema(true, true, true, true)
-  const gaps: string[] = [question];  // All questions to be answered including the orginal question
-  const allQuestions = [question];
+  const gaps: string[] = [question.trim()];  // All questions to be answered including the orginal question
+  const allQuestions = [question.trim()];
  const allKeywords = [];
  const allKnowledge = [];  // knowledge are intermedidate questions that are answered
  // iterate over historyMessages
@ -339,7 +339,7 @@ export async function getResponse(question: string,
    console.log(`Step ${totalStep} / Budget used ${budgetPercentage}%`);
    console.log('Gaps:', gaps);
    allowReflect = allowReflect && (gaps.length <= 1);
-    const currentQuestion = gaps.length > 0 ? gaps.shift()! : question;
+    const currentQuestion = gaps.length > 0 ? gaps.shift()! : question.trim();
    if (!evaluationMetrics[currentQuestion]) {
      evaluationMetrics[currentQuestion] = await evaluateQuestion(currentQuestion, context.tokenTracker)
    }
@ -411,7 +411,7 @@ export async function getResponse(question: string,
      const {response: evaluation} = await evaluateAnswer(currentQuestion, thisStep,
        evaluationMetrics[currentQuestion], context.tokenTracker);

-      if (currentQuestion === question) {
+      if (currentQuestion.trim() === question.trim()) {
        if (evaluation.pass) {
          diaryContext.push(`
 At step ${step}, you took **answer** action and finally found the answer to the original question:
@ -466,7 +466,7 @@ ${evaluation.think}
            if (errorAnalysis.questionsToAnswer) {
              gaps.push(...errorAnalysis.questionsToAnswer.slice(0, 2));
              allQuestions.push(...errorAnalysis.questionsToAnswer.slice(0, 2));
-              gaps.push(question);  // always keep the original question in the gaps
+              gaps.push(question.trim());  // always keep the original question in the gaps
            }

            badAttempts++;
@ -512,7 +512,7 @@ You will now figure out the answers to these sub-questions and see if they can h
 `);
        gaps.push(...newGapQuestions);
        allQuestions.push(...newGapQuestions);
-        gaps.push(question);  // always keep the original question in the gaps
+        gaps.push(question.trim());  // always keep the original question in the gaps
      } else {
        diaryContext.push(`
 At step ${step}, you took **reflect** and think about the knowledge gaps. You tried to break down the question "${currentQuestion}" into gap-questions like this: ${oldQuestions.join(', ')} 
--- a/src/app.ts
+++ b/src/app.ts
@ -45,24 +45,149 @@ ${answer.references.map((ref, i) => {
  return `${answer.answer.replace(/\(REF_(\d+)\)/g, (_, num) => `[^${num}]`)}\n\n${refStr}`;
 }

+async function* streamTextNaturally(text: string, streamingState: StreamingState) {
+  // Split text into chunks that preserve CJK characters, URLs, and regular words
+  const chunks = splitTextIntoChunks(text);
+  let burstMode = false;
+  let consecutiveShortItems = 0;

-// Modified streamTextWordByWord function
-async function* streamTextWordByWord(text: string, streamingState: StreamingState) {
-  const words = text.split(/(\s+)/);
-  for (const word of words) {
-    if (streamingState.currentlyStreaming) {
-      const delay = Math.floor(Math.random() * 100);
-      await new Promise(resolve => setTimeout(resolve, delay));
-      yield word;
-    } else {
-      // If streaming was interrupted, yield all remaining words at once
-      const remainingWords = words.slice(words.indexOf(word)).join('');
-      yield remainingWords;
+  for (const chunk of chunks) {
+    if (!streamingState.currentlyStreaming) {
+      yield chunks.slice(chunks.indexOf(chunk)).join('');
      return;
    }
+
+    const delay = calculateDelay(chunk, burstMode);
+
+    // Handle consecutive short items
+    if (getEffectiveLength(chunk) <= 3 && chunk.trim().length > 0) {
+      consecutiveShortItems++;
+      if (consecutiveShortItems >= 3) {
+        burstMode = true;
+      }
+    } else {
+      consecutiveShortItems = 0;
+      burstMode = false;
+    }
+
+    await new Promise(resolve => setTimeout(resolve, delay));
+    yield chunk;
  }
 }

+function splitTextIntoChunks(text: string): string[] {
+  const chunks: string[] = [];
+  let currentChunk = '';
+  let inURL = false;
+
+  const pushCurrentChunk = () => {
+    if (currentChunk) {
+      chunks.push(currentChunk);
+      currentChunk = '';
+    }
+  };
+
+  for (let i = 0; i < text.length; i++) {
+    const char = text[i];
+    const nextChar = text[i + 1] || '';
+
+    // URL detection
+    if (char === 'h' && text.slice(i, i + 8).match(/https?:\/\//)) {
+      pushCurrentChunk();
+      inURL = true;
+    }
+
+    if (inURL) {
+      currentChunk += char;
+      // End of URL detection (whitespace or certain punctuation)
+      if (/[\s\])}"']/.test(nextChar) || i === text.length - 1) {
+        pushCurrentChunk();
+        inURL = false;
+      }
+      continue;
+    }
+
+    // CJK character detection (including kana and hangul)
+    if (/[\u4e00-\u9fff\u3040-\u30ff\uac00-\ud7af]/.test(char)) {
+      pushCurrentChunk();
+      chunks.push(char);
+      continue;
+    }
+
+    // Whitespace handling
+    if (/\s/.test(char)) {
+      pushCurrentChunk();
+      chunks.push(char);
+      continue;
+    }
+
+    // Regular word building
+    currentChunk += char;
+
+    // Break on punctuation
+    if (/[.!?,;:]/.test(nextChar)) {
+      pushCurrentChunk();
+    }
+  }
+
+  pushCurrentChunk();
+  return chunks.filter(chunk => chunk !== '');
+}
+
+function calculateDelay(chunk: string, burstMode: boolean): number {
+  const trimmedChunk = chunk.trim();
+
+  // Handle whitespace
+  if (trimmedChunk.length === 0) {
+    return Math.random() * 20 + 10;
+  }
+
+  // Special handling for URLs
+  if (chunk.match(/^https?:\/\//)) {
+    return Math.random() * 50 + 100; // Slower typing for URLs
+  }
+
+  // Special handling for CJK characters
+  if (/^[\u4e00-\u9fff\u3040-\u30ff\uac00-\ud7af]$/.test(chunk)) {
+    return Math.random() * 100 + 150; // Longer delay for individual CJK characters
+  }
+
+  // Base delay calculation
+  let baseDelay;
+  if (burstMode) {
+    baseDelay = Math.random() * 30 + 20;
+  } else {
+    const effectiveLength = getEffectiveLength(chunk);
+    const perCharacterDelay = Math.max(10, 40 - effectiveLength * 2);
+    baseDelay = Math.random() * perCharacterDelay + perCharacterDelay;
+  }
+
+  // Add variance based on chunk characteristics
+  if (/[A-Z]/.test(chunk[0])) {
+    baseDelay += Math.random() * 20 + 10;
+  }
+
+  if (/[^a-zA-Z\s]/.test(chunk)) {
+    baseDelay += Math.random() * 30 + 15;
+  }
+
+  // Add pauses for punctuation
+  if (/[.!?]$/.test(chunk)) {
+    baseDelay += Math.random() * 350 + 200;
+  } else if (/[,;:]$/.test(chunk)) {
+    baseDelay += Math.random() * 150 + 100;
+  }
+
+  return baseDelay;
+}
+
+function getEffectiveLength(chunk: string): number {
+  // Count CJK characters as 2 units
+  const cjkCount = (chunk.match(/[\u4e00-\u9fff\u3040-\u30ff\uac00-\ud7af]/g) || []).length;
+  const regularCount = chunk.length - cjkCount;
+  return regularCount + (cjkCount * 2);
+}
+
 // Helper function to emit remaining content immediately
 async function emitRemainingContent(
  res: Response,
@ -210,7 +335,7 @@ async function processQueue(streamingState: StreamingState, res: Response, reque
    streamingState.isEmitting = true;

    try {
-      for await (const word of streamTextWordByWord(current.content, streamingState)) {
+      for await (const word of streamTextNaturally(current.content, streamingState)) {
        const chunk: ChatCompletionChunk = {
          id: requestId,
          object: 'chat.completion.chunk',
--- a/src/tools/evaluator.ts
+++ b/src/tools/evaluator.ts
@ -414,17 +414,26 @@ async function performEvaluation(
  },
  tracker?: TokenTracker
 ): Promise<GenerateObjectResult<any>> {
-  const result = await generateObject({
-    model: params.model,
-    schema: params.schema,
-    prompt: params.prompt,
-    maxTokens: params.maxTokens
-  });
+  try {
+    const result = await generateObject({
+      model: params.model,
+      schema: params.schema,
+      prompt: params.prompt,
+      maxTokens: params.maxTokens
+    });

-  (tracker || new TokenTracker()).trackUsage('evaluator', result.usage);
-  console.log(`${evaluationType} Evaluation:`, result.object);
+    (tracker || new TokenTracker()).trackUsage('evaluator', result.usage);
+    console.log(`${evaluationType} Evaluation:`, result.object);

-  return result;
+    return result;
+  } catch (error) {
+    const errorResult = await handleGenerateObjectError<any>(error);
+    (tracker || new TokenTracker()).trackUsage('evaluator', errorResult.usage);
+    return {
+        object: errorResult.object,
+        usage: errorResult.usage
+    } as GenerateObjectResult<any>;
+  }
 }