fix: url datetime guessing

2025-12-26 06:28:56 +08:00 · 2025-03-07 16:25:05 +08:00 · 2025-03-07 16:25:05 +08:00 · 43e22cbd8d
commit 43e22cbd8d
parent 77da6c6ec5
4 changed files with 45 additions and 13 deletions
--- a/src/agent.ts
+++ b/src/agent.ts
@ -1,5 +1,5 @@
 import {ZodObject} from 'zod';
-import {CoreAssistantMessage, CoreUserMessage} from 'ai';
+import {CoreMessage} from 'ai';
 import {SEARCH_PROVIDER, STEP_SLEEP} from "./config";
 import {readUrl, removeAllLineBreaks} from "./tools/read";
 import fs from 'fs/promises';
@ -73,6 +73,14 @@ Using your training data and prior lessons learned, answer the user question wit
  if (knowledge?.length) {
    const knowledgeItems = knowledge
      .map((k, i) => `
+<knowledge-0>
+<question>
+How can I get the last update time of a URL?
+</question>
+<answer>
+Just choose <action-visit> and put URL in it, it will fetch full text and estimate the last update datetime of that URL. 
+</answer>
+</knowledge-0>
 <knowledge-${i + 1}>
 <question>
 ${k.question}
@ -194,7 +202,8 @@ ${allKeywords.join('\n')}
  if (allowAnswer) {
    actionSections.push(`
 <action-answer>
- For greetings, casual conversation, or general knowledge questions, answer directly without references.
+- For greetings, casual conversation, general knowledge questions answer directly without references.
+- If user ask you to retrieve previous messages or chat history, remember you do have access to the chat history, answer directly without references.
 - For all other questions, provide a verified answer with references. Each reference must include exactQuote, url and datetime.
 - You provide deep, unexpected insights, identifying hidden patterns and connections, and creating "aha moments.".
 - You break conventional thinking, establish unique cross-disciplinary connections, and bring new perspectives to the user.
@ -257,7 +266,7 @@ export async function getResponse(question?: string,
                                  tokenBudget: number = 1_000_000,
                                  maxBadAttempts: number = 3,
                                  existingContext?: Partial<TrackerContext>,
-                                  messages?: Array<CoreAssistantMessage | CoreUserMessage>
+                                  messages?: Array<CoreMessage>
 ): Promise<{ result: StepAction; context: TrackerContext; visitedURLs: string[], readURLs: string[] }> {

  let step = 0;
@ -266,7 +275,14 @@ export async function getResponse(question?: string,

  question = question?.trim() as string;
  if (messages && messages.length > 0) {
-    question = (messages[messages.length - 1]?.content as string).trim();
+    // 2 cases
+    const lastContent = messages[messages.length - 1].content;
+    if (typeof lastContent === 'string') {
+      question = lastContent.trim();
+    } else if (typeof lastContent === 'object' && Array.isArray(lastContent)) {
+      // find the very last sub content whose 'type' is 'text'  and use 'text' as the question
+      question = lastContent.filter(c => c.type === 'text').pop()?.text || '';
+    }
  } else {
    messages = [{role: 'user', content: question.trim()}]
  }
@ -401,7 +417,7 @@ export async function getResponse(question?: string,

      console.log('Updated references:', thisStep.references)

-      if (step === 1 && thisStep.references.length === 0 && thisStep.answer.length < 300) {
+      if (step === 1 && thisStep.references.length === 0) {
        // LLM is so confident and answer immediately, skip all evaluations
        // however, if it does give any reference, it must be evaluated, case study: "How to configure a timeout when loading a huggingface dataset with python?"
        thisStep.isFinal = true;
--- a/src/app.ts
+++ b/src/app.ts
@ -388,11 +388,21 @@ app.post('/v1/chat/completions', (async (req: Request, res: Response) => {
    return res.status(400).json({error: 'Last message must be from user'});
  }

+  console.log('messages', JSON.stringify(body.messages));
+
  // clean <think> from all assistant messages
  body.messages?.filter(message => message.role === 'assistant').forEach(message => {
-    message.content = (message.content as string).replace(/<think>[\s\S]*?<\/think>/g, '').trim();
+    // 2 cases message.content can be a string or an array
+    if (typeof message.content === 'string') {
+        message.content = (message.content as string).replace(/<think>[\s\S]*?<\/think>/g, '').trim();
+    } else if (Array.isArray(message.content)) {
+      // find all type: text and clean <think> from .text
+      message.content.forEach((content: any) => {
+        if (content.type === 'text') {
+          content.text = (content.text as string).replace(/<think>[\s\S]*?<\/think>/g, '').trim();
+      }});
+    }
  });
-  console.log('messages', body.messages);

  let {tokenBudget, maxBadAttempts} = getTokenBudgetAndMaxAttempts(
    body.reasoning_effort,
--- a/src/types.ts
+++ b/src/types.ts
@ -1,5 +1,5 @@
 // Action Types
-import {CoreAssistantMessage, CoreUserMessage, LanguageModelUsage} from "ai";
+import { CoreMessage, LanguageModelUsage} from "ai";

 type BaseAction = {
  action: "search" | "answer" | "reflect" | "visit" | "coding";
@ -200,7 +200,7 @@ export type ResponseFormat = {

 export interface ChatCompletionRequest {
  model: string;
-  messages: Array<CoreUserMessage | CoreAssistantMessage>;
+  messages: Array<CoreMessage>;
  stream?: boolean;
  reasoning_effort?: 'low' | 'medium' | 'high';
  max_completion_tokens?: number;
--- a/src/utils/safe-generator.ts
+++ b/src/utils/safe-generator.ts
@ -1,5 +1,11 @@
-import { z } from 'zod';
-import {generateObject, LanguageModelUsage, NoObjectGeneratedError, Schema} from "ai";
+import {z} from 'zod';
+import {
+  CoreMessage,
+  generateObject,
+  LanguageModelUsage,
+  NoObjectGeneratedError,
+  Schema
+} from "ai";
 import {TokenTracker} from "./token-tracker";
 import {getModel, ToolName, getToolConfig} from "../config";

@ -12,8 +18,8 @@ interface GenerateOptions<T> {
  model: ToolName;
  schema: z.ZodType<T> | Schema<T>;
  prompt?: string;
-  system?:string;
-  messages?: any;
+  system?: string;
+  messages?: CoreMessage[];
 }

 export class ObjectGeneratorSafe {