mirror of
https://github.com/jina-ai/node-DeepResearch.git
synced 2026-03-22 07:29:35 +08:00
feat: improve dedup with jina embeddings
This commit is contained in:
22
README.md
22
README.md
@@ -238,14 +238,14 @@ Plain `gemini-2.0-flash` can be run by setting `tokenBudget` to zero, skipping t
|
||||
|
||||
It should not be surprised that plain `gemini-2.0-flash` has a 0% pass rate, as I intentionally filtered out the questions that LLMs can answer.
|
||||
|
||||
| Metric | gemini-2.0-flash | gemini-2.0-flash + node-deepresearch |
|
||||
|--------|------------------|--------------------------------------|
|
||||
| Pass Rate | 0% | 60% |
|
||||
| Average Steps | 1 | 6 |
|
||||
| Maximum Steps | 1 | 21 |
|
||||
| Minimum Steps | 1 | 2 |
|
||||
| Median Steps | 1 | 3 |
|
||||
| Average Tokens | 428 | 67,650 |
|
||||
| Median Tokens | 434 | 19,800 |
|
||||
| Maximum Tokens | 463 | 374,903 |
|
||||
| Minimum Tokens | 374 | 7,347 |
|
||||
| Metric | gemini-2.0-flash | gemini-2.0-flash + node-deepresearch (#5e80ed4) |
|
||||
|--------|------------------|-------------------------------------------------|
|
||||
| Pass Rate | 0% | 60% |
|
||||
| Average Steps | 1 | 5 |
|
||||
| Maximum Steps | 1 | 13 |
|
||||
| Minimum Steps | 1 | 2 |
|
||||
| Median Steps | 1 | 3 |
|
||||
| Average Tokens | 428 | 59,408 |
|
||||
| Median Tokens | 434 | 16,001 |
|
||||
| Maximum Tokens | 463 | 347,222 |
|
||||
| Minimum Tokens | 374 | 5,594 |
|
||||
@@ -3,7 +3,7 @@ import { TokenTracker } from "../utils/token-tracker";
|
||||
import {JINA_API_KEY} from "../config";
|
||||
|
||||
const JINA_API_URL = 'https://api.jina.ai/v1/embeddings';
|
||||
const SIMILARITY_THRESHOLD = 0.90; // Adjustable threshold for cosine similarity
|
||||
const SIMILARITY_THRESHOLD = 0.93; // Adjustable threshold for cosine similarity
|
||||
|
||||
// Types for Jina API
|
||||
interface JinaEmbeddingRequest {
|
||||
|
||||
Reference in New Issue
Block a user