mirror of
https://github.com/jina-ai/node-DeepResearch.git
synced 2026-03-22 07:29:35 +08:00
fix: url datetime guessing
This commit is contained in:
@@ -693,9 +693,10 @@ You decided to think out of the box or cut from a completely different angle.
|
||||
const r: SearchSnippet = {
|
||||
title: link[0],
|
||||
url: normalizeUrl(link[1]),
|
||||
description: link[0]
|
||||
description: link[0],
|
||||
}
|
||||
addToAllURLs(r, allURLs);
|
||||
// in-page link has lower initial weight comparing to search links
|
||||
addToAllURLs(r, allURLs, 0.1);
|
||||
})
|
||||
|
||||
return {url, result: response};
|
||||
|
||||
@@ -223,7 +223,7 @@ export const rankURLs = (urlItems: SearchSnippet[], options: any = {}, trackers:
|
||||
const {hostname, path} = extractUrlParts(item.url);
|
||||
|
||||
// Base weight from original
|
||||
const freq = item.weight || 1.0; // Default to 1 if weight is missing
|
||||
const freq = item.weight || 0; // Default to 1 if weight is missing
|
||||
|
||||
// Hostname boost (normalized by total URLs)
|
||||
const hostnameFreq = normalizeCount(hostnameCount[hostname] || 0, totalUrls);
|
||||
@@ -264,12 +264,12 @@ export const rankURLs = (urlItems: SearchSnippet[], options: any = {}, trackers:
|
||||
}).sort((a, b) => b.finalScore - a.finalScore);
|
||||
};
|
||||
|
||||
export const addToAllURLs = (r: SearchSnippet, allURLs: Record<string, SearchSnippet>) => {
|
||||
export const addToAllURLs = (r: SearchSnippet, allURLs: Record<string, SearchSnippet>, weightDelta = 1) => {
|
||||
if (!allURLs[r.url]) {
|
||||
allURLs[r.url] = r;
|
||||
allURLs[r.url].weight = 1;
|
||||
allURLs[r.url].weight = weightDelta;
|
||||
} else {
|
||||
(allURLs[r.url].weight as number)++;
|
||||
(allURLs[r.url].weight as number)+= weightDelta;
|
||||
const curDesc = allURLs[r.url].description;
|
||||
allURLs[r.url].description = smartMergeStrings(curDesc, r.description);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user