mirror of
https://github.com/jina-ai/node-DeepResearch.git
synced 2026-03-22 07:29:35 +08:00
fix: url datetime guessing
This commit is contained in:
@@ -693,9 +693,10 @@ You decided to think out of the box or cut from a completely different angle.
|
|||||||
const r: SearchSnippet = {
|
const r: SearchSnippet = {
|
||||||
title: link[0],
|
title: link[0],
|
||||||
url: normalizeUrl(link[1]),
|
url: normalizeUrl(link[1]),
|
||||||
description: link[0]
|
description: link[0],
|
||||||
}
|
}
|
||||||
addToAllURLs(r, allURLs);
|
// in-page link has lower initial weight comparing to search links
|
||||||
|
addToAllURLs(r, allURLs, 0.1);
|
||||||
})
|
})
|
||||||
|
|
||||||
return {url, result: response};
|
return {url, result: response};
|
||||||
|
|||||||
@@ -223,7 +223,7 @@ export const rankURLs = (urlItems: SearchSnippet[], options: any = {}, trackers:
|
|||||||
const {hostname, path} = extractUrlParts(item.url);
|
const {hostname, path} = extractUrlParts(item.url);
|
||||||
|
|
||||||
// Base weight from original
|
// Base weight from original
|
||||||
const freq = item.weight || 1.0; // Default to 1 if weight is missing
|
const freq = item.weight || 0; // Default to 1 if weight is missing
|
||||||
|
|
||||||
// Hostname boost (normalized by total URLs)
|
// Hostname boost (normalized by total URLs)
|
||||||
const hostnameFreq = normalizeCount(hostnameCount[hostname] || 0, totalUrls);
|
const hostnameFreq = normalizeCount(hostnameCount[hostname] || 0, totalUrls);
|
||||||
@@ -264,12 +264,12 @@ export const rankURLs = (urlItems: SearchSnippet[], options: any = {}, trackers:
|
|||||||
}).sort((a, b) => b.finalScore - a.finalScore);
|
}).sort((a, b) => b.finalScore - a.finalScore);
|
||||||
};
|
};
|
||||||
|
|
||||||
export const addToAllURLs = (r: SearchSnippet, allURLs: Record<string, SearchSnippet>) => {
|
export const addToAllURLs = (r: SearchSnippet, allURLs: Record<string, SearchSnippet>, weightDelta = 1) => {
|
||||||
if (!allURLs[r.url]) {
|
if (!allURLs[r.url]) {
|
||||||
allURLs[r.url] = r;
|
allURLs[r.url] = r;
|
||||||
allURLs[r.url].weight = 1;
|
allURLs[r.url].weight = weightDelta;
|
||||||
} else {
|
} else {
|
||||||
(allURLs[r.url].weight as number)++;
|
(allURLs[r.url].weight as number)+= weightDelta;
|
||||||
const curDesc = allURLs[r.url].description;
|
const curDesc = allURLs[r.url].description;
|
||||||
allURLs[r.url].description = smartMergeStrings(curDesc, r.description);
|
allURLs[r.url].description = smartMergeStrings(curDesc, r.description);
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user