mirror of
https://github.com/jina-ai/node-DeepResearch.git
synced 2026-03-22 07:29:35 +08:00
fix: url datetime guessing
This commit is contained in:
11
src/agent.ts
11
src/agent.ts
@@ -33,7 +33,7 @@ import {
|
|||||||
countUrlParts,
|
countUrlParts,
|
||||||
getUnvisitedURLs,
|
getUnvisitedURLs,
|
||||||
normalizeUrl, sampleMultinomial,
|
normalizeUrl, sampleMultinomial,
|
||||||
weightedURLToString
|
weightedURLToString, getLastModified
|
||||||
} from "./utils/url-tools";
|
} from "./utils/url-tools";
|
||||||
import {buildMdFromAnswer, chooseK, removeExtraLineBreaks, removeHTMLtags} from "./utils/text-tools";
|
import {buildMdFromAnswer, chooseK, removeExtraLineBreaks, removeHTMLtags} from "./utils/text-tools";
|
||||||
import {MAX_QUERIES_PER_STEP, MAX_REFLECT_PER_STEP, MAX_URLS_PER_STEP, Schemas} from "./utils/schemas";
|
import {MAX_QUERIES_PER_STEP, MAX_REFLECT_PER_STEP, MAX_URLS_PER_STEP, Schemas} from "./utils/schemas";
|
||||||
@@ -384,10 +384,17 @@ export async function getResponse(question?: string,
|
|||||||
return {
|
return {
|
||||||
exactQuote: ref?.exactQuote || '',
|
exactQuote: ref?.exactQuote || '',
|
||||||
title: normalizedUrl ? (allURLs[normalizedUrl]?.title || '') : '',
|
title: normalizedUrl ? (allURLs[normalizedUrl]?.title || '') : '',
|
||||||
url: normalizedUrl
|
url: normalizedUrl,
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// parallel process guess all url datetime
|
||||||
|
await Promise.all(thisStep.references.map(async ref => {
|
||||||
|
ref.dateTime = await getLastModified(ref.url) || ref?.dateTime || ''
|
||||||
|
}));
|
||||||
|
|
||||||
|
console.log('Updated references:', thisStep.references)
|
||||||
|
|
||||||
if (step === 1 && thisStep.references.length === 0) {
|
if (step === 1 && thisStep.references.length === 0) {
|
||||||
// LLM is so confident and answer immediately, skip all evaluations
|
// LLM is so confident and answer immediately, skip all evaluations
|
||||||
// however, if it does give any reference, it must be evaluated, case study: "How to configure a timeout when loading a huggingface dataset with python?"
|
// however, if it does give any reference, it must be evaluated, case study: "How to configure a timeout when loading a huggingface dataset with python?"
|
||||||
|
|||||||
@@ -220,7 +220,7 @@ export class Schemas {
|
|||||||
z.object({
|
z.object({
|
||||||
exactQuote: z.string().describe("Exact relevant quote from the document, must be a soundbite, short and to the point, no fluff").max(30),
|
exactQuote: z.string().describe("Exact relevant quote from the document, must be a soundbite, short and to the point, no fluff").max(30),
|
||||||
url: z.string().describe("source URL; must be copy directly from existing knowledge real URLs, avoid example.com or any placeholder fake URLs").max(100),
|
url: z.string().describe("source URL; must be copy directly from existing knowledge real URLs, avoid example.com or any placeholder fake URLs").max(100),
|
||||||
dateTime: z.string().describe("Apply this evidence hierarchy to determine the source timestamp: (1) Explicit dates in metadata/content, (2) Internal time references, (3) Contextual clues, (4) Version history if available. Format as YYYY-MM-DD when possible; otherwise provide narrowest defensible range with confidence level (High/Medium/Low).").max(16),
|
dateTime: z.string().describe("Apply this evidence hierarchy to determine the source timestamp: (1) Explicit dates in metadata/content, (2) Internal time references, (3) Contextual clues, (4) Version history if available. Format as YYYY-MM-DD when possible; otherwise provide narrowest defensible range.").max(16),
|
||||||
}).required()
|
}).required()
|
||||||
).describe("Required when action='answer'. Must be an array of references that support the answer, each reference must contain an exact quote and the URL of the document"),
|
).describe("Required when action='answer'. Must be an array of references that support the answer, each reference must contain an exact quote and the URL of the document"),
|
||||||
answer: z.string()
|
answer: z.string()
|
||||||
|
|||||||
@@ -297,3 +297,35 @@ export function sampleMultinomial<T>(items: [T, number][]): T | null {
|
|||||||
// Fallback (should rarely happen due to floating point precision)
|
// Fallback (should rarely happen due to floating point precision)
|
||||||
return items[items.length - 1][0];
|
return items[items.length - 1][0];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Fetches the last modified date for a URL using the datetime detection API
|
||||||
|
* @param url The URL to check for last modified date
|
||||||
|
* @returns Promise containing the last modified date or null if not found
|
||||||
|
*/
|
||||||
|
export async function getLastModified(url: string): Promise<string | null> {
|
||||||
|
try {
|
||||||
|
// Call the API with proper encoding
|
||||||
|
const apiUrl = `https://api-beta-datetime.jina.ai?url=${encodeURIComponent(url)}`;
|
||||||
|
const response = await fetch(apiUrl);
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
throw new Error(`API returned ${response.status}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const data = await response.json();
|
||||||
|
|
||||||
|
// Return the bestGuess date if available
|
||||||
|
if (data.bestGuess) {
|
||||||
|
return data.bestGuess;
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Failed to fetch last modified date:', error);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user