From ae95d74ca024472f39e4df741886a365bf806d0c Mon Sep 17 00:00:00 2001 From: Han Xiao Date: Tue, 4 Mar 2025 16:51:53 +0800 Subject: [PATCH] feat: add url ranking --- src/agent.ts | 5 ++--- src/tools/jina-dedup.ts | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/agent.ts b/src/agent.ts index 4851603..3c843f1 100644 --- a/src/agent.ts +++ b/src/agent.ts @@ -137,8 +137,7 @@ ${learnedStrategy} urlList = (weightedURLs) .filter(r => 'url' in r) .sort((a, b) => (b.boostedWeight || 0) - (a.boostedWeight || 0)) - .slice(0, 10) // save context window and reduce noise, only keep top 10 urls - .map(r => ` + weight: ${r.boostedWeight.toFixed(3)} "${r.url}": "${r.title}"`) + .map(r => ` + weight: ${r.boostedWeight.toFixed(2)} "${r.url}": "${r.title}"`) .join('\n'); } @@ -147,7 +146,7 @@ ${learnedStrategy} - Access and read full content from URLs - Must check URLs mentioned in if any ${urlList ? ` -- Choose and visit relevant URLs below for more knowledge. higher weight means more relevant and you should visit first: +- Choose and visit relevant URLs below for more knowledge. higher weight suggests more relevant: ${urlList} diff --git a/src/tools/jina-dedup.ts b/src/tools/jina-dedup.ts index f18768a..2679a76 100644 --- a/src/tools/jina-dedup.ts +++ b/src/tools/jina-dedup.ts @@ -3,7 +3,7 @@ import {TokenTracker} from "../utils/token-tracker"; import {JINA_API_KEY} from "../config"; const JINA_API_URL = 'https://api.jina.ai/v1/embeddings'; -const SIMILARITY_THRESHOLD = 0.888; // Adjustable threshold for cosine similarity +const SIMILARITY_THRESHOLD = 0.86; // Adjustable threshold for cosine similarity const JINA_API_CONFIG = { MODEL: 'jina-embeddings-v3',