diff --git a/README.md b/README.md
index 6efb79b..e02e877 100644
--- a/README.md
+++ b/README.md
@@ -36,12 +36,20 @@ Whether you like this implementation or not, I highly recommend you to read Deep
 - [中文微信公众号 第一讲](https://mp.weixin.qq.com/s/-pPhHDi2nz8hp5R3Lm_mww), [第二讲](https://mp.weixin.qq.com/s/apnorBj4TZs3-Mo23xUReQ)
 - [日本語: DeepSearch/DeepResearch 実装の実践ガイド](https://jina.ai/ja/news/a-practical-guide-to-implementing-deepsearch-deepresearch)
 
-## Test it Yourself
+## Try it Yourself
 
 We host an online deployment of this **exact** codebase, which allows you to do a vibe-check; or use it as daily productivity tools.
 
 https://search.jina.ai
 
+The official API is also available for you to use:
+
+```
+https://deepsearch.jina.ai/v1/chat/completions
+```
+
+Learn more about the API at https://jina.ai/deepsearch
+
 
 
 
@@ -323,31 +331,3 @@ flowchart TD
 
     BeastMode --> FinalAnswer[Generate final answer] --> End
 ```
-
-## Evaluation
-
-I kept the evaluation simple, LLM-as-a-judge and collect some [ego questions](./src/evals/ego-questions.json) for evaluation. These are the questions about Jina AI that I know 100% the answer but LLMs do not.
-
-I mainly look at 3 things: total steps, total tokens, and the correctness of the final answer.
-
-```bash
-npm run eval ./src/evals/questions.json
-```
-
-Here's the table comparing plain `gemini-2.0-flash` and `gemini-2.0-flash + node-deepresearch` on the ego set.
-
-Plain `gemini-2.0-flash` can be run by setting `tokenBudget` to zero, skipping the while-loop and directly answering the question. 
-
-It should not be surprised that plain `gemini-2.0-flash` has a 0% pass rate, as I intentionally filtered out the questions that LLMs can answer.
-
-| Metric | gemini-2.0-flash | #188f1bb |
-|--------|------------------|----------|
-| Pass Rate | 0% | 75%      |
-| Average Steps | 1 | 4        |
-| Maximum Steps | 1 | 13       |
-| Minimum Steps | 1 | 2        |
-| Median Steps | 1 | 3        |
-| Average Tokens | 428 | 68,574   |
-| Median Tokens | 434 | 31,541   |
-| Maximum Tokens | 463 | 363,655  |
-| Minimum Tokens | 374 | 7,963    |
diff --git a/src/agent.ts b/src/agent.ts
index 9bd9d29..782e753 100644
--- a/src/agent.ts
+++ b/src/agent.ts
@@ -279,7 +279,7 @@ async function executeSearchQueries(
   const newKnowledge: KnowledgeItem[] = [];
   const searchedQueries: string[] = [];
   context.actionTracker.trackThink('search_for', SchemaGen.languageCode, {keywords: uniqQOnly.join(', ')});
-
+  let utilityScore = 0;
   for (const query of keywordsQueries) {
     let results: SearchResult[] = [];
     const oldQuery = query.q;
@@ -328,7 +328,7 @@ async function executeSearchQueries(
       .filter(Boolean) as SearchSnippet[]; // Filter out null entries and assert type
 
     minResults.forEach(r => {
-      addToAllURLs(r, allURLs);
+      utilityScore = utilityScore + addToAllURLs(r, allURLs);
     });
 
     searchedQueries.push(query.q)
@@ -340,6 +340,8 @@ async function executeSearchQueries(
       updated: query.tbs ? formatDateRange(query) : undefined
     });
   }
+
+  console.log(`Utility/Queries: ${utilityScore}/${searchedQueries.length}`);
   return {
     newKnowledge,
     searchedQueries
diff --git a/src/utils/url-tools.ts b/src/utils/url-tools.ts
index 8c8a5d9..b52579f 100644
--- a/src/utils/url-tools.ts
+++ b/src/utils/url-tools.ts
@@ -275,14 +275,16 @@ export const rankURLs = (urlItems: SearchSnippet[], options: any = {}, trackers:
 
 export const addToAllURLs = (r: SearchSnippet, allURLs: Record<string, SearchSnippet>, weightDelta = 1) => {
   const nURL = normalizeUrl(r.url);
-  if (!nURL) return;
+  if (!nURL) return 0;
   if (!allURLs[nURL]) {
     allURLs[nURL] = r;
     allURLs[nURL].weight = weightDelta;
+    return 1;
   } else {
     (allURLs[nURL].weight as number) += weightDelta;
     const curDesc = allURLs[nURL].description;
     allURLs[nURL].description = smartMergeStrings(curDesc, r.description);
+    return 0;
   }
 }