mirror of
https://github.com/jina-ai/node-DeepResearch.git
synced 2026-03-22 07:29:35 +08:00
fix: url datetime guessing
This commit is contained in:
11
src/agent.ts
11
src/agent.ts
@@ -33,7 +33,7 @@ import {
|
||||
countUrlParts,
|
||||
getUnvisitedURLs,
|
||||
normalizeUrl, sampleMultinomial,
|
||||
weightedURLToString
|
||||
weightedURLToString, getLastModified
|
||||
} from "./utils/url-tools";
|
||||
import {buildMdFromAnswer, chooseK, removeExtraLineBreaks, removeHTMLtags} from "./utils/text-tools";
|
||||
import {MAX_QUERIES_PER_STEP, MAX_REFLECT_PER_STEP, MAX_URLS_PER_STEP, Schemas} from "./utils/schemas";
|
||||
@@ -384,10 +384,17 @@ export async function getResponse(question?: string,
|
||||
return {
|
||||
exactQuote: ref?.exactQuote || '',
|
||||
title: normalizedUrl ? (allURLs[normalizedUrl]?.title || '') : '',
|
||||
url: normalizedUrl
|
||||
url: normalizedUrl,
|
||||
}
|
||||
});
|
||||
|
||||
// parallel process guess all url datetime
|
||||
await Promise.all(thisStep.references.map(async ref => {
|
||||
ref.dateTime = await getLastModified(ref.url) || ref?.dateTime || ''
|
||||
}));
|
||||
|
||||
console.log('Updated references:', thisStep.references)
|
||||
|
||||
if (step === 1 && thisStep.references.length === 0) {
|
||||
// LLM is so confident and answer immediately, skip all evaluations
|
||||
// however, if it does give any reference, it must be evaluated, case study: "How to configure a timeout when loading a huggingface dataset with python?"
|
||||
|
||||
@@ -220,7 +220,7 @@ export class Schemas {
|
||||
z.object({
|
||||
exactQuote: z.string().describe("Exact relevant quote from the document, must be a soundbite, short and to the point, no fluff").max(30),
|
||||
url: z.string().describe("source URL; must be copy directly from existing knowledge real URLs, avoid example.com or any placeholder fake URLs").max(100),
|
||||
dateTime: z.string().describe("Apply this evidence hierarchy to determine the source timestamp: (1) Explicit dates in metadata/content, (2) Internal time references, (3) Contextual clues, (4) Version history if available. Format as YYYY-MM-DD when possible; otherwise provide narrowest defensible range with confidence level (High/Medium/Low).").max(16),
|
||||
dateTime: z.string().describe("Apply this evidence hierarchy to determine the source timestamp: (1) Explicit dates in metadata/content, (2) Internal time references, (3) Contextual clues, (4) Version history if available. Format as YYYY-MM-DD when possible; otherwise provide narrowest defensible range.").max(16),
|
||||
}).required()
|
||||
).describe("Required when action='answer'. Must be an array of references that support the answer, each reference must contain an exact quote and the URL of the document"),
|
||||
answer: z.string()
|
||||
|
||||
@@ -297,3 +297,35 @@ export function sampleMultinomial<T>(items: [T, number][]): T | null {
|
||||
// Fallback (should rarely happen due to floating point precision)
|
||||
return items[items.length - 1][0];
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Fetches the last modified date for a URL using the datetime detection API
|
||||
* @param url The URL to check for last modified date
|
||||
* @returns Promise containing the last modified date or null if not found
|
||||
*/
|
||||
export async function getLastModified(url: string): Promise<string | null> {
|
||||
try {
|
||||
// Call the API with proper encoding
|
||||
const apiUrl = `https://api-beta-datetime.jina.ai?url=${encodeURIComponent(url)}`;
|
||||
const response = await fetch(apiUrl);
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`API returned ${response.status}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
|
||||
// Return the bestGuess date if available
|
||||
if (data.bestGuess) {
|
||||
return data.bestGuess;
|
||||
}
|
||||
|
||||
return null;
|
||||
} catch (error) {
|
||||
console.error('Failed to fetch last modified date:', error);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user