From 1e097a9eccf40ca85c6dc6f4986fab2abaa7a937 Mon Sep 17 00:00:00 2001 From: Han Xiao Date: Fri, 7 Mar 2025 14:32:47 +0800 Subject: [PATCH] fix: url datetime guessing --- src/agent.ts | 5 +++-- src/utils/url-tools.ts | 8 ++++---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/agent.ts b/src/agent.ts index 6bc0d49..34538e0 100644 --- a/src/agent.ts +++ b/src/agent.ts @@ -693,9 +693,10 @@ You decided to think out of the box or cut from a completely different angle. const r: SearchSnippet = { title: link[0], url: normalizeUrl(link[1]), - description: link[0] + description: link[0], } - addToAllURLs(r, allURLs); + // in-page link has lower initial weight comparing to search links + addToAllURLs(r, allURLs, 0.1); }) return {url, result: response}; diff --git a/src/utils/url-tools.ts b/src/utils/url-tools.ts index dd60db8..c13f6c6 100644 --- a/src/utils/url-tools.ts +++ b/src/utils/url-tools.ts @@ -223,7 +223,7 @@ export const rankURLs = (urlItems: SearchSnippet[], options: any = {}, trackers: const {hostname, path} = extractUrlParts(item.url); // Base weight from original - const freq = item.weight || 1.0; // Default to 1 if weight is missing + const freq = item.weight || 0; // Default to 1 if weight is missing // Hostname boost (normalized by total URLs) const hostnameFreq = normalizeCount(hostnameCount[hostname] || 0, totalUrls); @@ -264,12 +264,12 @@ export const rankURLs = (urlItems: SearchSnippet[], options: any = {}, trackers: }).sort((a, b) => b.finalScore - a.finalScore); }; -export const addToAllURLs = (r: SearchSnippet, allURLs: Record) => { +export const addToAllURLs = (r: SearchSnippet, allURLs: Record, weightDelta = 1) => { if (!allURLs[r.url]) { allURLs[r.url] = r; - allURLs[r.url].weight = 1; + allURLs[r.url].weight = weightDelta; } else { - (allURLs[r.url].weight as number)++; + (allURLs[r.url].weight as number)+= weightDelta; const curDesc = allURLs[r.url].description; allURLs[r.url].description = smartMergeStrings(curDesc, r.description); }