feat: add messages and urls for Jina AI blog posts

This commit is contained in:
Han Xiao 2025-05-10 06:31:20 -07:00
parent 16f01546f5
commit ab27cd053d
2 changed files with 973 additions and 0 deletions

106
messages.json Normal file

File diff suppressed because one or more lines are too long

867
urls.json Normal file
View File

@ -0,0 +1,867 @@
[
{
"title": "DeepSearch - Jina AI",
"url": "https://jina.ai/deepsearch",
"description": "What's the latest blog post from OpenAI? what is the idea behind node-deepresearch project? what exactly jina-DeepSearch API is free to use. By providing your API key, you can access a higher rate limit, and you key won't be charged. open_in_new ...",
"weight": 2.2,
"date": "Mar 13, 2025",
"jinaRerankBoost": 0.6395538330078125,
"freqBoost": 0.005314009661835749,
"hostnameBoost": 0.07004830917874397,
"pathBoost": 0.003864734299516908,
"finalScore": 0.7187808861479091
},
{
"title": "Jina AI (@JinaAI_) / X",
"url": "https://x.com/jinaai_?lang=en",
"description": "Jina AI's posts Model soups blend diversity into something greater than the sum of their parts. Learn more from our blog post below https://jina.ai/news/model-sou",
"weight": 2,
"jinaRerankBoost": 0.5302371978759766,
"freqBoost": 0.004830917874396135,
"hostnameBoost": 0.028985507246376812,
"pathBoost": 0.001932367149758454,
"finalScore": 0.565985990146508
},
{
"title": "jina-ai/node-DeepResearch - GitHub",
"url": "https://github.com/jina-ai/node-DeepResearch",
"description": "Blog Post. Whether you like this implementation or not, I highly recommend you to read DeepSearch/DeepResearch implementation guide I wrote, whichIf you are building a web/local/mobile client that uses Jina DeepSearch API , here are some design guidelines: Our API is fully compatible with OpenAI API ...GitHub - jina-ai/node-DeepResearch: Keep searching, reading webpages, reasoning until it finds the answer (or exceeding the token budget)Keep searching, reading webpages, reasoning until it finds the answer (or exceeding the token budget) - jina-ai/node-DeepResearchGitHubjina-ai",
"weight": 2.2,
"jinaRerankBoost": 0.48202352523803715,
"freqBoost": 0.005314009661835749,
"hostnameBoost": 0.016908212560386472,
"pathBoost": 0.00927536231884058,
"finalScore": 0.5135211097790999
},
{
"title": "Jina AI on X: \"Model soups blend diversity into something greater",
"url": "https://twitter.com/JinaAI_/status/1920189685399216546",
"description": "Model soups blend diversity into something greater than the sum of their parts. Learn more from our blog post below https://t.co/c1K4IAsnDi",
"weight": 1,
"date": "May 7, 2025",
"jinaRerankBoost": 0.45243091583251954,
"freqBoost": 0.0024154589371980675,
"hostnameBoost": 0.01932367149758454,
"pathBoost": 0.014840579710144929,
"finalScore": 0.48901062597744704
},
{
"title": "Jina AI - X",
"url": "https://x.com/JinaAI_/status/1920189685399216546",
"description": "Model soups blend diversity into something greater than the sum of their parts. Learn more from our blog post below https://jina.ai/news/model",
"weight": 1,
"date": "May 7, 2025",
"jinaRerankBoost": 0.42496752738952637,
"freqBoost": 0.0024154589371980675,
"hostnameBoost": 0.028985507246376812,
"pathBoost": 0.014840579710144929,
"finalScore": 0.4712090732832462
},
{
"title": "Newsroom - Jina AI",
"url": "https://jina.ai/news",
"description": "Read the latest news and updates from Jina AI.Newsroom",
"weight": 1.4000000000000004,
"jinaRerankBoost": 0.3604808807373047,
"freqBoost": 0.0033816425120772957,
"hostnameBoost": 0.07004830917874397,
"pathBoost": 0.025120772946859906,
"finalScore": 0.4590316053749859
},
{
"title": "Cookie Policy",
"url": "https://businesswire.com/cookie-policy",
"description": "Cookie Policy",
"weight": 0.1,
"jinaRerankBoost": 0.23993978500366211,
"freqBoost": 0.00024154589371980678,
"hostnameBoost": 0.11594202898550725,
"pathBoost": 0.001932367149758454,
"finalScore": 0.35805572703264765
},
{
"title": "Jina AI Releases Reader-LM 0.5b and 1.5b for converting HTML to",
"url": "https://reddit.com/r/LocalLLaMA/comments/1feiip0/jina_ai_releases_readerlm_05b_and_15b_for",
"description": "Jina AI just released Reader-LM, a new set of small language models designed to convert raw HTML into clean markdown.",
"weight": 1,
"date": "Sep 11, 2024",
"jinaRerankBoost": 0.31238067150115967,
"freqBoost": 0.0024154589371980675,
"hostnameBoost": 0.012077294685990338,
"pathBoost": 0.008428212560386474,
"finalScore": 0.33530163768473453
},
{
"title": "What Makes Jina.ai the Future of Web Scraping? - Free (for Now!)",
"url": "https://medium.com/@kawsarlog/what-makes-jina-ai-the-future-of-web-scraping-free-for-now-085f2f1b3fa4",
"description": "It's a smarter, AI-powered framework that makes data extraction easier and more powerful. By combining neural search and intelligent workflows, Jina.ai is",
"weight": 1,
"date": "Feb 21, 2025",
"jinaRerankBoost": 0.2976390361785889,
"freqBoost": 0.0024154589371980675,
"hostnameBoost": 0.012077294685990338,
"pathBoost": 0.005410628019323672,
"finalScore": 0.317542417821101
},
{
"title": "Jina.ai's Reader API: A Game Changer for Developers | by MD Kawsar",
"url": "https://medium.com/@kawsarlog/jina-ais-reader-api-a-game-changer-for-developers-be66154b2692",
"description": "Jina.ai's Reader API is a powerful tool that simplifies web scraping and content extraction. Whether you're a developer, researcher, or AI enthusiast, it's",
"weight": 2,
"date": "Feb 23, 2025",
"jinaRerankBoost": 0.29182205200195316,
"freqBoost": 0.004830917874396135,
"hostnameBoost": 0.012077294685990338,
"pathBoost": 0.005410628019323672,
"finalScore": 0.31414089258166333
},
{
"title": "jinaai/jina-embeddings-v3 - Hugging Face",
"url": "https://huggingface.co/jinaai/jina-embeddings-v3",
"description": "We're on a journey to advance and democratize artificial intelligence through open source and open science.",
"weight": 1,
"jinaRerankBoost": 0.26282353401184083,
"freqBoost": 0.0024154589371980675,
"hostnameBoost": 0.014492753623188406,
"pathBoost": 0.00927536231884058,
"finalScore": 0.2890071088910679
},
{
"title": "An explainer on DeepResearch by Jina AI : r/Rag",
"url": "https://reddit.com/r/Rag/comments/1k089kb/an_explainer_on_deepresearch_by_jina_ai",
"description": "The full guide at jina.ai has more details on system prompts, URL ranking, and web crawling that are worth checking out if you're building ...",
"weight": 1,
"date": "3 weeks ago",
"jinaRerankBoost": 0.2614466667175293,
"freqBoost": 0.0024154589371980675,
"hostnameBoost": 0.012077294685990338,
"pathBoost": 0.008428212560386474,
"finalScore": 0.2843676329011042
},
{
"title": "Elasticsearch Open Inference API now Supports Jina AI - Nasdaq",
"url": "https://nasdaq.com/press-release/elasticsearch-open-inference-api-now-supports-jina-ai-embeddings-and-rerank-model",
"description": "Developers using Elastic to build search and RAG applications can now use the latest Jina AI embedding and reranking models without additional integration",
"weight": 1,
"jinaRerankBoost": 0.25597658157348635,
"freqBoost": 0.0024154589371980675,
"hostnameBoost": 0.0024154589371980675,
"pathBoost": 0.003478260869565217,
"finalScore": 0.2642857603174477
},
{
"title": "jina-ai/reader: Convert any URL to an LLM-friendly input - GitHub",
"url": "https://github.com/jina-ai/reader",
"description": "With Reader, http://s.jina.ai automatically fetches the content from the top 5 search result URLs for you (reusing the tech stack behind http://r.jina.ai ).",
"weight": 1,
"jinaRerankBoost": 0.2238627910614014,
"freqBoost": 0.0024154589371980675,
"hostnameBoost": 0.016908212560386472,
"pathBoost": 0.00927536231884058,
"finalScore": 0.25246182487782654
},
{
"title": "Elastic enhances search AI with Jina AI integration - Investing.com",
"url": "https://investing.com/news/company-news/elastic-enhances-search-ai-with-jina-ai-integration-93CH-3881283",
"description": "This collaboration enables developers to utilize Jina AI's advanced tools for building semantic search and rapid answer generation (RAG) applications.",
"weight": 1,
"date": "Feb 20, 2025",
"jinaRerankBoost": 0.2176196336746216,
"freqBoost": 0.0024154589371980675,
"hostnameBoost": 0.0024154589371980675,
"pathBoost": 0.02790338164251208,
"finalScore": 0.2503539331915298
},
{
"title": "Cookie Policy",
"url": "https://support.twitter.com/articles/20170514",
"description": "Cookie Policy",
"weight": 0.1,
"jinaRerankBoost": 0.23993978500366211,
"freqBoost": 0.00024154589371980678,
"hostnameBoost": 0.0024154589371980675,
"pathBoost": 0.006956521739130434,
"finalScore": 0.2495533115737104
},
{
"title": "Cookie Policy",
"url": "https://support.x.com/articles/20170514",
"description": "Cookie Policy",
"weight": 0.1,
"jinaRerankBoost": 0.23993978500366211,
"freqBoost": 0.00024154589371980678,
"hostnameBoost": 0.0024154589371980675,
"pathBoost": 0.006956521739130434,
"finalScore": 0.2495533115737104
},
{
"title": "Blog",
"url": "https://businesswire.com/blog",
"description": "Blog",
"weight": 0.2,
"jinaRerankBoost": 0.11302512884140015,
"freqBoost": 0.00048309178743961357,
"hostnameBoost": 0.11594202898550725,
"pathBoost": 0.007729468599033816,
"finalScore": 0.23717971821338082
},
{
"title": "Cookie Use.",
"url": "https://help.x.com/rules-and-policies/twitter-cookies",
"description": "Cookie Use.",
"weight": 0.1,
"jinaRerankBoost": 0.22260544300079346,
"freqBoost": 0.00024154589371980678,
"hostnameBoost": 0.007246376811594203,
"pathBoost": 0.003478260869565217,
"finalScore": 0.23357162657567268
},
{
"title": "Jina AI - Elasticsearch Labs",
"url": "https://elastic.co/search-labs/integrations/jina",
"description": "Explore the Jina AI and Elasticsearch integration, Learn about Jina reranker, Jina clip, and embedding models integrated with Elasticsearch.",
"weight": 1,
"jinaRerankBoost": 0.21576809883117676,
"freqBoost": 0.0024154589371980675,
"hostnameBoost": 0.007246376811594203,
"pathBoost": 0.006647342995169083,
"finalScore": 0.2320772775751381
},
{
"title": "How to scrape the web for LLM in 2024: Jina AI (Reader API",
"url": "https://dicloak.com/blog-detail/how-to-scrape-the-web-for-llm-in-2024-jina-ai-reader-api-mendable-firecrawl-and-scrapegraph-ai",
"description": "The article discusses emerging trends and innovative tools in web scraping for 2024, highlighting startups like Mendable and technologies",
"weight": 2,
"date": "Jan 2, 2025",
"jinaRerankBoost": 0.2163841724395752,
"freqBoost": 0.004830917874396135,
"hostnameBoost": 0.0024154589371980675,
"pathBoost": 0.003478260869565217,
"finalScore": 0.2271088101207346
},
{
"title": "Click-Worthy Content with PromptPerfect: AI Marketing for Newsletters and Social Media",
"url": "https://t.co/UqqF1c81J5",
"description": "Click-Worthy Content with PromptPerfect: AI Marketing for Newsletters and Social MediaFrom jina.ai",
"weight": 0.2,
"jinaRerankBoost": 0.20786049365997317,
"freqBoost": 0.00048309178743961357,
"hostnameBoost": 0.0024154589371980675,
"pathBoost": 0.001932367149758454,
"finalScore": 0.2126914115343693
},
{
"title": "Elastic NV (ESTC) Enhances Elasticsearch with Jina AI Integration",
"url": "https://gurufocus.com/news/2709507/elastic-nv-estc-enhances-elasticsearch-with-jina-ai-integration",
"description": "Integration with Jina AI enhances Elasticsearch's capabilities for semantic search and RAG applications. Support for multilingual text embeddings and reranking",
"weight": 1,
"jinaRerankBoost": 0.17493548393249514,
"freqBoost": 0.0024154589371980675,
"hostnameBoost": 0.0024154589371980675,
"pathBoost": 0.02790338164251208,
"finalScore": 0.20766978344940334
},
{
"title": "Elasticsearch Open Inference API adds support for Jina AI",
"url": "https://elastic.co/search-labs/blog/jina-ai-embeddings-rerank-model-open-inference-api",
"description": "Our friends at Jina AI added native integration for Jina AI's embedding models and reranking products to the Elasticsearch open Inference API.",
"weight": 2,
"date": "Feb 20, 2025",
"jinaRerankBoost": 0.18252519369125367,
"freqBoost": 0.004830917874396135,
"hostnameBoost": 0.007246376811594203,
"pathBoost": 0.006647342995169083,
"finalScore": 0.2012498313724131
},
{
"title": "Scrape Websites at No Cost with Jina :Get LLM-friendly input from a",
"url": "https://community.make.com/t/scrape-websites-at-no-cost-with-jina-get-llm-friendly-input-from-a-url/39143",
"description": "Web scraping can be a complex and expensive process, but Jina AI simplifies it by allowing you to extract valuable content from websites using just a URL.",
"weight": 1,
"date": "May 28, 2024",
"jinaRerankBoost": 0.18473582267761232,
"freqBoost": 0.0024154589371980675,
"hostnameBoost": 0.0024154589371980675,
"pathBoost": 0.004714975845410628,
"finalScore": 0.1942817163974191
},
{
"title": "jinaai/jina-embeddings-v2-base-es - Hugging Face",
"url": "https://huggingface.co/jinaai/jina-embeddings-v2-base-es",
"description": "jina-embeddings-v2-base-es is a Spanish/English bilingual text embedding model supporting 8192 sequence length. It is based on a BERT",
"weight": 1,
"date": "Sep 17, 2024",
"jinaRerankBoost": 0.16654313802719117,
"freqBoost": 0.0024154589371980675,
"hostnameBoost": 0.014492753623188406,
"pathBoost": 0.00927536231884058,
"finalScore": 0.19272671290641824
},
{
"title": "Elasticsearch Open Inference API Supports Jina AI Embeddings and",
"url": "https://apmdigest.com/elasticsearch-open-inference-api-supports-jina-ai-embeddings-and-rerank-model",
"description": "This integration includes support for multilingual text embeddings and multilingual reranking, and is optimized for retrieval, clustering, and classification. “",
"weight": 1,
"jinaRerankBoost": 0.18362817764282227,
"freqBoost": 0.0024154589371980675,
"hostnameBoost": 0.0024154589371980675,
"pathBoost": 0.001932367149758454,
"finalScore": 0.19039146266697687
},
{
"title": "Jina AI - Model Soup's Recipe for Embeddings",
"url": "https://linkedin.com/posts/jinaai_model-soups-recipe-for-embeddings-activity-7325930994111246337-ZuXm",
"description": "Boost robustness and performance with model soups: averaging weights. No extra cost, better results. https://lnkd.in/ezAjU7VS.",
"weight": 1,
"date": "May 7, 2025",
"jinaRerankBoost": 0.16044615507125856,
"freqBoost": 0.0024154589371980675,
"hostnameBoost": 0.016908212560386472,
"pathBoost": 0.005410628019323672,
"finalScore": 0.1851804545881668
},
{
"title": "Elastic integrates Jina AI models into Elasticsearch API - SDx Central",
"url": "https://sdxcentral.com/news/elastic-integrates-jina-ai-models-into-elasticsearch-api",
"description": "Elastic integrates Jina AI's embedding models into its Open Inference API, enhancing search and retrieval capabilities.",
"weight": 1,
"date": "Feb 20, 2025",
"jinaRerankBoost": 0.13952821493148804,
"freqBoost": 0.0024154589371980675,
"hostnameBoost": 0.0024154589371980675,
"pathBoost": 0.02666666666666667,
"finalScore": 0.17102579947255084
},
{
"title": "",
"url": "https://jinaai.cn/",
"description": "",
"weight": 0.1,
"jinaRerankBoost": 0.12082223892211914,
"freqBoost": 0.00024154589371980678,
"hostnameBoost": 0.04106280193236715,
"pathBoost": 0,
"finalScore": 0.1621265867482061
},
{
"title": "How to scrape the web for LLM in 2024: Jina AI (Reader - YouTube",
"url": "https://youtube.com/watch?v=QxHE4af5BQE",
"description": "How to scrape the web for LLM in 2024: Jina AI (Reader API), Mendable (firecrawl) and Scrapegraph-ai · Comments178.",
"weight": 1,
"date": "May 17, 2024",
"jinaRerankBoost": 0.14224869012832642,
"freqBoost": 0.0024154589371980675,
"hostnameBoost": 0.007246376811594203,
"pathBoost": 0.003864734299516908,
"finalScore": 0.1557752601766356
},
{
"title": "Jina AI - AWS Marketplace",
"url": "https://aws.amazon.com/marketplace/seller-profile?id=seller-stch2ludm6vgy",
"description": "Jina Embeddings v2 Base model is optimized for highly accurate embeddings - For speed of inference and memory efficiency use the Small model. jina-embeddings-v2",
"weight": 1,
"jinaRerankBoost": 0.14594042301177979,
"freqBoost": 0.0024154589371980675,
"hostnameBoost": 0.0024154589371980675,
"pathBoost": 0.003478260869565217,
"finalScore": 0.15424960175574115
},
{
"title": "",
"url": "https://twitter.com/intent/tweet?url=https%3A%2F%2Fjina.ai%2Fnews%2Fmodel-soups-recipe-for-embeddings%2F",
"description": "",
"weight": 0.1,
"jinaRerankBoost": 0.12082223892211914,
"freqBoost": 0.00024154589371980678,
"hostnameBoost": 0.01932367149758454,
"pathBoost": 0.010434782608695653,
"finalScore": 0.15082223892211916
},
{
"title": "blog",
"url": "https://cts.businesswire.com/ct/CT?anchor=blog&esheet=54211070&id=smartlink&index=2&lan=en-US&md5=1277635ab8f2f599617706b118c9669d&newsitemid=20250220781575&url=https%3A%2F%2Fwww.elastic.co%2Fsearch-labs%2Fblog%2Fjina-ai-embeddings-rerank-model-open-inference-api",
"description": "blog",
"weight": 0.1,
"jinaRerankBoost": 0.1377374768257141,
"freqBoost": 0.00024154589371980678,
"hostnameBoost": 0.004830917874396135,
"pathBoost": 0.006956521739130434,
"finalScore": 0.14976646233296048
},
{
"title": "",
"url": "https://linkedin.com/sharing/share-offsite?url=https%3A%2F%2Fjina.ai%2Fnews%2Fmodel-soups-recipe-for-embeddings%2F",
"description": "",
"weight": 0.1,
"jinaRerankBoost": 0.12082223892211914,
"freqBoost": 0.00024154589371980678,
"hostnameBoost": 0.016908212560386472,
"pathBoost": 0.010434782608695653,
"finalScore": 0.14840677998492108
},
{
"title": "",
"url": "https://facebook.com/sharer/sharer.php?u=https%3A%2F%2Fjina.ai%2Fnews%2Fmodel-soups-recipe-for-embeddings%2F",
"description": "",
"weight": 0.1,
"jinaRerankBoost": 0.12082223892211914,
"freqBoost": 0.00024154589371980678,
"hostnameBoost": 0.00966183574879227,
"pathBoost": 0.010434782608695653,
"finalScore": 0.14116040317332687
},
{
"title": "",
"url": "https://facebook.com/sharer/sharer.php?u=https%3A%2F%2Fjina.ai%2Fnews%2Fa-practical-guide-to-implementing-deepsearch-deepresearch%2F",
"description": "",
"weight": 0.1,
"jinaRerankBoost": 0.12082223892211914,
"freqBoost": 0.00024154589371980678,
"hostnameBoost": 0.00966183574879227,
"pathBoost": 0.010434782608695653,
"finalScore": 0.14116040317332687
},
{
"title": "jina-embeddings-v3",
"url": "https://jinaai.cn/?model=jina-embeddings-v3&sui=",
"description": "jina-embeddings-v3",
"weight": 0.1,
"jinaRerankBoost": 0.09405770897865295,
"freqBoost": 0.00024154589371980678,
"hostnameBoost": 0.04106280193236715,
"pathBoost": 0,
"finalScore": 0.13536205680473992
},
{
"title": "",
"url": "https://app.eu.vanta.com/jinaai/trust/vz7f4mohp0847aho84lmva",
"description": "",
"weight": 0.30000000000000004,
"jinaRerankBoost": 0.12082223892211914,
"freqBoost": 0.0007246376811594204,
"hostnameBoost": 0.0024154589371980675,
"pathBoost": 0.01051207729468599,
"finalScore": 0.13447441283516262
},
{
"title": "",
"url": "https://news.ycombinator.com/submitlink?u=https%3A%2F%2Fjina.ai%2Fnews%2Fmodel-soups-recipe-for-embeddings%2F",
"description": "",
"weight": 0.1,
"jinaRerankBoost": 0.12082223892211914,
"freqBoost": 0.00024154589371980678,
"hostnameBoost": 0.007246376811594203,
"pathBoost": 0.005797101449275362,
"finalScore": 0.1341072630767085
},
{
"title": "",
"url": "https://news.ycombinator.com/submitlink?u=https%3A%2F%2Fjina.ai%2Fnews%2Fa-practical-guide-to-implementing-deepsearch-deepresearch%2F",
"description": "",
"weight": 0.1,
"jinaRerankBoost": 0.12082223892211914,
"freqBoost": 0.00024154589371980678,
"hostnameBoost": 0.007246376811594203,
"pathBoost": 0.005797101449275362,
"finalScore": 0.1341072630767085
},
{
"title": "",
"url": "https://youtube.com/user/businesswire",
"description": "",
"weight": 0.1,
"jinaRerankBoost": 0.12082223892211914,
"freqBoost": 0.00024154589371980678,
"hostnameBoost": 0.007246376811594203,
"pathBoost": 0.003478260869565217,
"finalScore": 0.13178842249699838
},
{
"title": "Jina AI + Elastic → Efficient Data Storage in the Cloud",
"url": "https://shubhamsaboo111.medium.com/jina-ai-elastic-efficient-data-storage-in-the-cloud-6878db47d92c",
"description": "Elasticsearch backend provides the much-needed scalability for Jina search applications and can be used as a document store for DocumentArray.",
"weight": 1,
"date": "Jun 7, 2022",
"jinaRerankBoost": 0.12162580490112306,
"freqBoost": 0.0024154589371980675,
"hostnameBoost": 0.0024154589371980675,
"pathBoost": 0.001932367149758454,
"finalScore": 0.12838908992527764
},
{
"title": "",
"url": "https://instagram.com/businesswire",
"description": "",
"weight": 0.1,
"jinaRerankBoost": 0.12082223892211914,
"freqBoost": 0.00024154589371980678,
"hostnameBoost": 0.0024154589371980675,
"pathBoost": 0.003864734299516908,
"finalScore": 0.12734397805255393
},
{
"title": "",
"url": "https://discord.jina.ai/",
"description": "",
"weight": 0.30000000000000004,
"jinaRerankBoost": 0.12082223892211914,
"freqBoost": 0.0007246376811594204,
"hostnameBoost": 0.0024154589371980675,
"pathBoost": 0,
"finalScore": 0.12396233554047663
},
{
"title": "Jina Embeddings - Qdrant",
"url": "https://qdrant.tech/documentation/embeddings/jina-embeddings",
"description": "You can get a free trial key from Jina Embeddings to get embeddings. Qdrant users can receive a 10% discount on Jina AI APIs by using the code QDRANT. Technical",
"weight": 1,
"jinaRerankBoost": 0.11302512884140015,
"freqBoost": 0.0024154589371980675,
"hostnameBoost": 0.0024154589371980675,
"pathBoost": 0.004714975845410628,
"finalScore": 0.12257102256120692
},
{
"title": "Jina AI Deep SearchAI deep search: read, reason, search until best answer found.",
"url": "https://search.jina.ai/",
"description": "Jina AI Deep SearchAI deep search: read, reason, search until best answer found.search.jina.ai",
"weight": 0.4,
"jinaRerankBoost": 0.1153190851211548,
"freqBoost": 0.0009661835748792271,
"hostnameBoost": 0.0024154589371980675,
"pathBoost": 0,
"finalScore": 0.11870072763323208
},
{
"title": "API Status",
"url": "https://status.jina.ai/",
"description": "API Status",
"weight": 0.30000000000000004,
"jinaRerankBoost": 0.08070290088653564,
"freqBoost": 0.0007246376811594204,
"hostnameBoost": 0.0024154589371980675,
"pathBoost": 0,
"finalScore": 0.08384299750489313
},
{
"title": "Izmailov et al. 2018",
"url": "https://auai.org/uai2018/proceedings/papers/313.pdf",
"description": "Izmailov et al. 2018",
"weight": 0.1,
"jinaRerankBoost": 0.07005107402801514,
"freqBoost": 0.00024154589371980678,
"hostnameBoost": 0.0024154589371980675,
"pathBoost": 0.005704347826086957,
"finalScore": 0.07841242668501996
},
{
"title": "Vercel AI SDK",
"url": "https://sdk.vercel.ai/docs/introduction",
"description": "Vercel AI SDK",
"weight": 0.2,
"jinaRerankBoost": 0.06711230874061584,
"freqBoost": 0.00048309178743961357,
"hostnameBoost": 0.0024154589371980675,
"pathBoost": 0.003478260869565217,
"finalScore": 0.07348912033481875
},
{
"title": "DeepSeek-R1",
"url": "https://api-docs.deepseek.com/news/news250120",
"description": "DeepSeek-R1",
"weight": 0.2,
"jinaRerankBoost": 0.04335897862911225,
"freqBoost": 0.00048309178743961357,
"hostnameBoost": 0.0024154589371980675,
"pathBoost": 0.02666666666666667,
"finalScore": 0.0729241960204166
},
{
"title": "AI-Powered Web Scraping: The Future of Data Extraction",
"url": "https://madebyagents.com/blog/ai-powered-web-scraping-the-future-of-data-extraction",
"description": "How AI is revolutionizing web scraping. Discover cost-effective tools, build a robust scraping dashboard, and solve common data extraction",
"weight": 1,
"date": "Apr 26, 2025",
"jinaRerankBoost": 0.05403735637664795,
"freqBoost": 0.0024154589371980675,
"hostnameBoost": 0.0024154589371980675,
"pathBoost": 0.00927536231884058,
"finalScore": 0.06814363656988466
},
{
"title": "Ads info",
"url": "https://business.twitter.com/en/help/troubleshooting/how-twitter-ads-work.html",
"description": "Ads info",
"weight": 0.1,
"jinaRerankBoost": 0.05097199678421021,
"freqBoost": 0.00024154589371980678,
"hostnameBoost": 0.0024154589371980675,
"pathBoost": 0.011408695652173914,
"finalScore": 0.06503769726730199
},
{
"title": "Ads info",
"url": "https://business.x.com/en/help/troubleshooting/how-twitter-ads-work.html",
"description": "Ads info",
"weight": 0.1,
"jinaRerankBoost": 0.05097199678421021,
"freqBoost": 0.00024154589371980678,
"hostnameBoost": 0.0024154589371980675,
"pathBoost": 0.011408695652173914,
"finalScore": 0.06503769726730199
},
{
"title": "Grok3",
"url": "https://x.ai/blog/grok-3",
"description": "Grok3Grok 3 Beta",
"weight": 0.4,
"jinaRerankBoost": 0.04532194137573242,
"freqBoost": 0.0009661835748792271,
"hostnameBoost": 0.0024154589371980675,
"pathBoost": 0.00927536231884058,
"finalScore": 0.0579789462066503
},
{
"title": "Jina Embeddings v2 Base - Azure Marketplace",
"url": "https://azuremarketplace.microsoft.com/en-us/marketplace/apps/jinaai.jina-embeddings-v2-base-en?tab=Overview",
"description": "Jina Embeddings v2 Base model is optimized for highly accurate embeddings - For speed of inference and memory efficiency use the Small model.",
"weight": 1,
"jinaRerankBoost": 0.04736820459365845,
"freqBoost": 0.0024154589371980675,
"hostnameBoost": 0.0024154589371980675,
"pathBoost": 0.005704347826086957,
"finalScore": 0.05790347029414154
},
{
"title": "Help Center",
"url": "https://help.x.com/using-x/x-supported-browsers",
"description": "Help Center",
"weight": 0.1,
"jinaRerankBoost": 0.04400422871112824,
"freqBoost": 0.00024154589371980678,
"hostnameBoost": 0.007246376811594203,
"pathBoost": 0.003478260869565217,
"finalScore": 0.054970412286007464
},
{
"title": "Elastic",
"url": "https://cts.businesswire.com/ct/CT?anchor=Elastic&esheet=54211070&id=smartlink&index=1&lan=en-US&md5=3534c60f229ac2e04c8d4282425417d4&newsitemid=20250220781575&url=https%3A%2F%2Fwww.elastic.co%2F",
"description": "Elastic",
"weight": 0.1,
"jinaRerankBoost": 0.04086619019508362,
"freqBoost": 0.00024154589371980678,
"hostnameBoost": 0.004830917874396135,
"pathBoost": 0.006956521739130434,
"finalScore": 0.052895175702329995
},
{
"title": "OpenAI",
"url": "https://openai.com/index/introducing-deep-research",
"description": "OpenAIntroducing Deep Research",
"weight": 0.4,
"jinaRerankBoost": 0.04400422871112824,
"freqBoost": 0.0009661835748792271,
"hostnameBoost": 0.0024154589371980675,
"pathBoost": 0.003478260869565217,
"finalScore": 0.05086413209277075
},
{
"title": "Takuya et al. 2025",
"url": "https://doi.org/10.1038/s42256-024-00975-8",
"description": "Takuya et al. 2025",
"weight": 0.1,
"jinaRerankBoost": 0.04465851485729218,
"freqBoost": 0.00024154589371980678,
"hostnameBoost": 0.0024154589371980675,
"pathBoost": 0.003478260869565217,
"finalScore": 0.05079378055777527
},
{
"title": "Conneau et al. 2020",
"url": "https://aclanthology.org/2020.acl-main.747",
"description": "Conneau et al. 2020",
"weight": 0.1,
"jinaRerankBoost": 0.04599462449550629,
"freqBoost": 0.00024154589371980678,
"hostnameBoost": 0.0024154589371980675,
"pathBoost": 0.001932367149758454,
"finalScore": 0.05058399647618261
},
{
"title": "Zhang et al. 2023",
"url": "https://direct.mit.edu/tacl/article/doi/10.1162/tacl_a_00595/117438/MIRACL-A-Multilingual-Retrieval-Dataset-Covering",
"description": "Zhang et al. 2023",
"weight": 0.1,
"jinaRerankBoost": 0.03628206551074982,
"freqBoost": 0.00024154589371980678,
"hostnameBoost": 0.0024154589371980675,
"pathBoost": 0.00763560193236715,
"finalScore": 0.04657467227403485
},
{
"title": "Baidu Integrates DeepSeek-R1",
"url": "https://chat.baidu.com/search?extParamsJson=%7B%22enter_type%22%3A%22ai_explore_home%22%7D&isShowHello=1&pd=csaitab&setype=csaitab&usedModel=%7B%22modelName%22%3A%22DeepSeek-R1%22%7D",
"description": "Baidu Integrates DeepSeek-R1",
"weight": 0.2,
"jinaRerankBoost": 0.04147637188434601,
"freqBoost": 0.00048309178743961357,
"hostnameBoost": 0.0024154589371980675,
"pathBoost": 0.001932367149758454,
"finalScore": 0.04630728975874215
},
{
"title": "Enevoldsen et al. 2025",
"url": "https://arxiv.org/abs/2502.13595",
"description": "Enevoldsen et al. 2025",
"weight": 0.1,
"jinaRerankBoost": 0.034692639112472536,
"freqBoost": 0.00024154589371980678,
"hostnameBoost": 0.004830917874396135,
"pathBoost": 0.005410628019323672,
"finalScore": 0.04517573089991215
},
{
"title": "API Documentation",
"url": "https://docs.jina.ai/",
"description": "API DocumentationAPI DocsAuto codegen for your copilot IDE or LLMopen_in_new",
"weight": 0.4,
"jinaRerankBoost": 0.039671221375465394,
"freqBoost": 0.0009661835748792271,
"hostnameBoost": 0.0024154589371980675,
"pathBoost": 0,
"finalScore": 0.04305286388754269
},
{
"title": "Tencent Weixin Integrates DeepSeek",
"url": "https://reuters.com/technology/artificial-intelligence/tencents-messaging-app-weixin-launches-beta-testing-with-deepseek-2025-02-16",
"description": "Tencent Weixin Integrates DeepSeek",
"weight": 0.2,
"jinaRerankBoost": 0.03521491885185242,
"freqBoost": 0.00048309178743961357,
"hostnameBoost": 0.0024154589371980675,
"pathBoost": 0.004714975845410628,
"finalScore": 0.04282844542190073
},
{
"title": "Perplexity",
"url": "https://perplexity.ai/hub/blog/introducing-perplexity-deep-research",
"description": "Introducing Perplexity Deep Research",
"weight": 0.4,
"jinaRerankBoost": 0.03267657160758972,
"freqBoost": 0.0009661835748792271,
"hostnameBoost": 0.0024154589371980675,
"pathBoost": 0.004714975845410628,
"finalScore": 0.040773189965077644
},
{
"title": "Springer et al., 2025",
"url": "https://arxiv.org/abs/2503.19206v2",
"description": "Springer et al., 2025",
"weight": 0.1,
"jinaRerankBoost": 0.028976044058799746,
"freqBoost": 0.00024154589371980678,
"hostnameBoost": 0.004830917874396135,
"pathBoost": 0.005410628019323672,
"finalScore": 0.03945913584623936
},
{
"title": "Google",
"url": "https://blog.google/products/gemini/google-gemini-deep-research",
"description": "Google Gemini 2",
"weight": 0.4,
"jinaRerankBoost": 0.03031394183635712,
"freqBoost": 0.0009661835748792271,
"hostnameBoost": 0.0024154589371980675,
"pathBoost": 0.004714975845410628,
"finalScore": 0.03841056019384504
},
{
"title": "Join usopen_in_new",
"url": "https://app.dover.com/jobs/jinaai",
"description": "Join usopen_in_new",
"weight": 0.4,
"jinaRerankBoost": 0.026871606707572937,
"freqBoost": 0.0009661835748792271,
"hostnameBoost": 0.0024154589371980675,
"pathBoost": 0.003478260869565217,
"finalScore": 0.03373151008921545
},
{
"title": "Wortsman et al. (2022)",
"url": "https://proceedings.mlr.press/v162/wortsman22a.html",
"description": "Wortsman et al. (2022)Model soups: averaging weights of multiple fine-tuned models improves accuracy without increasing inference timeThe conventional recipe for maximizing model accuracy is to (1) train multiple models with various hyperparameters and (2) pick the individual model which performs best on a held-out validation set…PMLR(Wortsman et al. 2022)",
"weight": 0.4,
"jinaRerankBoost": 0.0241716668009758,
"freqBoost": 0.0009661835748792271,
"hostnameBoost": 0.0024154589371980675,
"pathBoost": 0.003478260869565217,
"finalScore": 0.031031570182618314
},
{
"title": "Baidu Search and Tencent WeChat Search",
"url": "https://scmp.com/tech/big-tech/article/3298981/baidu-adopts-deepseek-ai-models-chasing-tencent-race-embrace-hot-start",
"description": "Baidu Search and Tencent WeChat Search",
"weight": 0.2,
"jinaRerankBoost": 0.02108428329229355,
"freqBoost": 0.00048309178743961357,
"hostnameBoost": 0.0024154589371980675,
"pathBoost": 0.0064958454106280195,
"finalScore": 0.03047867942755925
},
{
"title": "delayed gratification",
"url": "https://en.wikipedia.org/wiki/Delayed_gratification",
"description": "delayed gratification",
"weight": 0.2,
"jinaRerankBoost": 0.02344978451728821,
"freqBoost": 0.00048309178743961357,
"hostnameBoost": 0.0024154589371980675,
"pathBoost": 0.003478260869565217,
"finalScore": 0.02982659611149111
},
{
"title": "A Practical Guide to Implementing DeepSearch ...",
"url": "https://simonwillison.net/2025/Mar/4/deepsearch-deepresearch",
"description": "DeepSearch runs through an iterative loop of searching, reading, and reasoning until it finds the optimal answer.",
"weight": 1,
"date": "4 Mar 2025",
"jinaRerankBoost": 0.018951575458049777,
"freqBoost": 0.0024154589371980675,
"hostnameBoost": 0.0024154589371980675,
"pathBoost": 0.005704347826086957,
"finalScore": 0.029486841158532867
},
{
"title": "Imprint",
"url": "https://legal.twitter.com/imprint.html",
"description": "Imprint",
"weight": 0.1,
"jinaRerankBoost": 0.02454064190387726,
"freqBoost": 0.00024154589371980678,
"hostnameBoost": 0.0024154589371980675,
"pathBoost": 0.001932367149758454,
"finalScore": 0.029130013884553586
},
{
"title": "Stanford NLP Labs released the STORM",
"url": "https://storm-project.stanford.edu/research/storm",
"description": "Stanford NLP Labs released the STORM",
"weight": 0.2,
"jinaRerankBoost": 0.018951575458049777,
"freqBoost": 0.00048309178743961357,
"hostnameBoost": 0.0024154589371980675,
"pathBoost": 0.003478260869565217,
"finalScore": 0.025328387052252675
},
{
"title": "MIRACL benchmark",
"url": "https://project-miracl.github.io/",
"description": "MIRACL benchmark",
"weight": 0.2,
"jinaRerankBoost": 0.022406016290187836,
"freqBoost": 0.00048309178743961357,
"hostnameBoost": 0.0024154589371980675,
"pathBoost": 0,
"finalScore": 0.025304567014825516
}
]