feat: add hostnames bw filter

2025-12-25 22:16:49 +08:00 · 2025-03-18 11:24:53 +08:00 · 2025-03-18 11:24:53 +08:00 · aac0db67e4
commit aac0db67e4
parent 51e8540b21
3 changed files with 16 additions and 32 deletions
--- a/README.md
+++ b/README.md
@ -36,12 +36,20 @@ Whether you like this implementation or not, I highly recommend you to read Deep
 - [中文微信公众号 第一讲](https://mp.weixin.qq.com/s/-pPhHDi2nz8hp5R3Lm_mww), [第二讲](https://mp.weixin.qq.com/s/apnorBj4TZs3-Mo23xUReQ)
 - [日本語: DeepSearch/DeepResearch 実装の実践ガイド](https://jina.ai/ja/news/a-practical-guide-to-implementing-deepsearch-deepresearch)
-## Test it Yourself
+## Try it Yourself
 We host an online deployment of this **exact** codebase, which allows you to do a vibe-check; or use it as daily productivity tools.
 https://search.jina.ai
 The official API is also available for you to use:
 ```
 https://deepsearch.jina.ai/v1/chat/completions
 ```
 Learn more about the API at https://jina.ai/deepsearch
@ -323,31 +331,3 @@ flowchart TD
    BeastMode --> FinalAnswer[Generate final answer] --> End
 ```
 ## Evaluation
 I kept the evaluation simple, LLM-as-a-judge and collect some [ego questions](./src/evals/ego-questions.json) for evaluation. These are the questions about Jina AI that I know 100% the answer but LLMs do not.
 I mainly look at 3 things: total steps, total tokens, and the correctness of the final answer.
 ```bash
 npm run eval ./src/evals/questions.json
 ```
 Here's the table comparing plain `gemini-2.0-flash` and `gemini-2.0-flash + node-deepresearch` on the ego set.
 Plain `gemini-2.0-flash` can be run by setting `tokenBudget` to zero, skipping the while-loop and directly answering the question. 
 It should not be surprised that plain `gemini-2.0-flash` has a 0% pass rate, as I intentionally filtered out the questions that LLMs can answer.
 | Metric | gemini-2.0-flash | #188f1bb |
 |--------|------------------|----------|
 | Pass Rate | 0% | 75%      |
 | Average Steps | 1 | 4        |
 | Maximum Steps | 1 | 13       |
 | Minimum Steps | 1 | 2        |
 | Median Steps | 1 | 3        |
 | Average Tokens | 428 | 68,574   |
 | Median Tokens | 434 | 31,541   |
 | Maximum Tokens | 463 | 363,655  |
 | Minimum Tokens | 374 | 7,963    |
--- a/src/agent.ts
+++ b/src/agent.ts
@ -279,7 +279,7 @@ async function executeSearchQueries(
  const newKnowledge: KnowledgeItem[] = [];
  const searchedQueries: string[] = [];
  context.actionTracker.trackThink('search_for', SchemaGen.languageCode, {keywords: uniqQOnly.join(', ')});
-
+  let utilityScore = 0;
  for (const query of keywordsQueries) {
    let results: SearchResult[] = [];
    const oldQuery = query.q;
@ -328,7 +328,7 @@ async function executeSearchQueries(
      .filter(Boolean) as SearchSnippet[]; // Filter out null entries and assert type
    minResults.forEach(r => {
-      addToAllURLs(r, allURLs);
+      utilityScore = utilityScore + addToAllURLs(r, allURLs);
    });
    searchedQueries.push(query.q)
@ -340,6 +340,8 @@ async function executeSearchQueries(
      updated: query.tbs ? formatDateRange(query) : undefined
    });
  }
  console.log(`Utility/Queries: ${utilityScore}/${searchedQueries.length}`);
  return {
    newKnowledge,
    searchedQueries
--- a/src/utils/url-tools.ts
+++ b/src/utils/url-tools.ts
@ -275,14 +275,16 @@ export const rankURLs = (urlItems: SearchSnippet[], options: any = {}, trackers:
 export const addToAllURLs = (r: SearchSnippet, allURLs: Record<string, SearchSnippet>, weightDelta = 1) => {
  const nURL = normalizeUrl(r.url);
-  if (!nURL) return;
+  if (!nURL) return 0;
  if (!allURLs[nURL]) {
    allURLs[nURL] = r;
    allURLs[nURL].weight = weightDelta;
    return 1;
  } else {
    (allURLs[nURL].weight as number) += weightDelta;
    const curDesc = allURLs[nURL].description;
    allURLs[nURL].description = smartMergeStrings(curDesc, r.description);
    return 0;
  }
 }