From dbeee0c8f5ef11292e7c20308e5e1dcb83bc8e34 Mon Sep 17 00:00:00 2001 From: Han Xiao Date: Thu, 6 Mar 2025 17:15:46 +0800 Subject: [PATCH] fix: url datetime guessing --- src/agent.ts | 11 +++++++++-- src/utils/schemas.ts | 2 +- src/utils/url-tools.ts | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 42 insertions(+), 3 deletions(-) diff --git a/src/agent.ts b/src/agent.ts index 4682c72..328f34c 100644 --- a/src/agent.ts +++ b/src/agent.ts @@ -33,7 +33,7 @@ import { countUrlParts, getUnvisitedURLs, normalizeUrl, sampleMultinomial, - weightedURLToString + weightedURLToString, getLastModified } from "./utils/url-tools"; import {buildMdFromAnswer, chooseK, removeExtraLineBreaks, removeHTMLtags} from "./utils/text-tools"; import {MAX_QUERIES_PER_STEP, MAX_REFLECT_PER_STEP, MAX_URLS_PER_STEP, Schemas} from "./utils/schemas"; @@ -384,10 +384,17 @@ export async function getResponse(question?: string, return { exactQuote: ref?.exactQuote || '', title: normalizedUrl ? (allURLs[normalizedUrl]?.title || '') : '', - url: normalizedUrl + url: normalizedUrl, } }); + // parallel process guess all url datetime + await Promise.all(thisStep.references.map(async ref => { + ref.dateTime = await getLastModified(ref.url) || ref?.dateTime || '' + })); + + console.log('Updated references:', thisStep.references) + if (step === 1 && thisStep.references.length === 0) { // LLM is so confident and answer immediately, skip all evaluations // however, if it does give any reference, it must be evaluated, case study: "How to configure a timeout when loading a huggingface dataset with python?" diff --git a/src/utils/schemas.ts b/src/utils/schemas.ts index ea10167..07663b3 100644 --- a/src/utils/schemas.ts +++ b/src/utils/schemas.ts @@ -220,7 +220,7 @@ export class Schemas { z.object({ exactQuote: z.string().describe("Exact relevant quote from the document, must be a soundbite, short and to the point, no fluff").max(30), url: z.string().describe("source URL; must be copy directly from existing knowledge real URLs, avoid example.com or any placeholder fake URLs").max(100), - dateTime: z.string().describe("Apply this evidence hierarchy to determine the source timestamp: (1) Explicit dates in metadata/content, (2) Internal time references, (3) Contextual clues, (4) Version history if available. Format as YYYY-MM-DD when possible; otherwise provide narrowest defensible range with confidence level (High/Medium/Low).").max(16), + dateTime: z.string().describe("Apply this evidence hierarchy to determine the source timestamp: (1) Explicit dates in metadata/content, (2) Internal time references, (3) Contextual clues, (4) Version history if available. Format as YYYY-MM-DD when possible; otherwise provide narrowest defensible range.").max(16), }).required() ).describe("Required when action='answer'. Must be an array of references that support the answer, each reference must contain an exact quote and the URL of the document"), answer: z.string() diff --git a/src/utils/url-tools.ts b/src/utils/url-tools.ts index c7105b9..2a3750e 100644 --- a/src/utils/url-tools.ts +++ b/src/utils/url-tools.ts @@ -297,3 +297,35 @@ export function sampleMultinomial(items: [T, number][]): T | null { // Fallback (should rarely happen due to floating point precision) return items[items.length - 1][0]; } + + + + +/** + * Fetches the last modified date for a URL using the datetime detection API + * @param url The URL to check for last modified date + * @returns Promise containing the last modified date or null if not found + */ +export async function getLastModified(url: string): Promise { + try { + // Call the API with proper encoding + const apiUrl = `https://api-beta-datetime.jina.ai?url=${encodeURIComponent(url)}`; + const response = await fetch(apiUrl); + + if (!response.ok) { + throw new Error(`API returned ${response.status}`); + } + + const data = await response.json(); + + // Return the bestGuess date if available + if (data.bestGuess) { + return data.bestGuess; + } + + return null; + } catch (error) { + console.error('Failed to fetch last modified date:', error); + return null; + } +}