From f1b37c4f34a05fe26b439bc1d73ee06a3b5c8f5c Mon Sep 17 00:00:00 2001 From: Han Xiao Date: Fri, 7 Feb 2025 16:09:05 +0800 Subject: [PATCH] feat: improve dedup with jina embeddings --- src/tools/jina-dedup.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tools/jina-dedup.ts b/src/tools/jina-dedup.ts index 2583e38..a40d051 100644 --- a/src/tools/jina-dedup.ts +++ b/src/tools/jina-dedup.ts @@ -3,7 +3,7 @@ import { TokenTracker } from "../utils/token-tracker"; import {JINA_API_KEY} from "../config"; const JINA_API_URL = 'https://api.jina.ai/v1/embeddings'; -const SIMILARITY_THRESHOLD = 0.85; // Adjustable threshold for cosine similarity +const SIMILARITY_THRESHOLD = 0.95; // Adjustable threshold for cosine similarity // Types for Jina API interface JinaEmbeddingRequest {