From a9dfec9a41a2be9cc2b40db72b531fa3a18d5355 Mon Sep 17 00:00:00 2001 From: Han Xiao Date: Wed, 5 Feb 2025 15:15:17 +0800 Subject: [PATCH] feat: use s.jina.ai for search --- README.md | 3 +-- src/agent.ts | 9 +++++++-- src/config.ts | 2 +- src/tools/__tests__/read.test.ts | 4 ++-- src/tools/__tests__/search.test.ts | 6 +++--- src/tools/{search.ts => jinaSearch.ts} | 7 ++++--- src/tools/read.ts | 5 +++-- 7 files changed, 21 insertions(+), 15 deletions(-) rename src/tools/{search.ts => jinaSearch.ts} (85%) diff --git a/README.md b/README.md index 7b2bfbc..891afd7 100644 --- a/README.md +++ b/README.md @@ -25,12 +25,11 @@ flowchart LR ## Install -We use gemini for llm, brave/duckduckgo for search, [jina reader](https://jina.ai/reader) for reading a webpage. +We use gemini for llm, [jina reader](https://jina.ai/reader) for searching and reading webpages. ```bash export GEMINI_API_KEY=... # for gemini api, ask han export JINA_API_KEY=jina_... # free jina api key, get from https://jina.ai/reader -export BRAVE_API_KEY=... # (optional, when not given it uses duckduckgo) brave search provide free key, ask han git clone https://github.com/jina-ai/node-DeepResearch.git cd node-DeepResearch diff --git a/src/agent.ts b/src/agent.ts index 25d2368..5b12011 100644 --- a/src/agent.ts +++ b/src/agent.ts @@ -7,11 +7,12 @@ import {rewriteQuery} from "./tools/query-rewriter"; import {dedupQueries} from "./tools/dedup"; import {evaluateAnswer} from "./tools/evaluator"; import {analyzeSteps} from "./tools/error-analyzer"; -import {GEMINI_API_KEY, JINA_API_KEY, SEARCH_PROVIDER, STEP_SLEEP, modelConfigs} from "./config"; +import {GEMINI_API_KEY, SEARCH_PROVIDER, STEP_SLEEP, modelConfigs} from "./config"; import {TokenTracker} from "./utils/token-tracker"; import {ActionTracker} from "./utils/action-tracker"; import {StepAction, SchemaProperty, ResponseSchema, AnswerAction} from "./types"; import {TrackerContext} from "./types"; +import {jinaSearch} from "./tools/jinaSearch"; async function sleep(ms: number) { const seconds = Math.ceil(ms / 1000); @@ -541,6 +542,10 @@ But then you realized you have asked them before. You decided to to think out of let results; switch (SEARCH_PROVIDER) { + case 'jina': + // use jinaSearch + results = {results: (await jinaSearch(query, context.tokenTracker)).response?.data || []}; + break; case 'duck': results = await duckSearch(query, {safeSearch: SafeSearchType.STRICT}); break; @@ -625,7 +630,7 @@ You decided to think out of the box or cut from a completely different angle. const urlResults = await Promise.all( uniqueURLs.map(async (url: string) => { - const {response, tokens} = await readUrl(url, JINA_API_KEY, context.tokenTracker); + const {response, tokens} = await readUrl(url, context.tokenTracker); allKnowledge.push({ question: `What is in ${response.data?.url || 'the URL'}?`, answer: removeAllLineBreaks(response.data?.content || 'No content available'), diff --git a/src/config.ts b/src/config.ts index 5f9ba4f..3543b5f 100644 --- a/src/config.ts +++ b/src/config.ts @@ -32,7 +32,7 @@ if (process.env.https_proxy) { export const GEMINI_API_KEY = process.env.GEMINI_API_KEY as string; export const JINA_API_KEY = process.env.JINA_API_KEY as string; export const BRAVE_API_KEY = process.env.BRAVE_API_KEY as string; -export const SEARCH_PROVIDER = BRAVE_API_KEY ? 'brave' : 'duck'; +export const SEARCH_PROVIDER: 'brave' | 'jina' | 'duck' = 'jina' const DEFAULT_MODEL = 'gemini-1.5-flash'; diff --git a/src/tools/__tests__/read.test.ts b/src/tools/__tests__/read.test.ts index 51656e6..b2dbdcd 100644 --- a/src/tools/__tests__/read.test.ts +++ b/src/tools/__tests__/read.test.ts @@ -4,7 +4,7 @@ import { TokenTracker } from '../../utils/token-tracker'; describe('readUrl', () => { it.skip('should read and parse URL content (skipped due to insufficient balance)', async () => { const tokenTracker = new TokenTracker(); - const { response } = await readUrl('https://www.typescriptlang.org', process.env.JINA_API_KEY!, tokenTracker); + const { response } = await readUrl('https://www.typescriptlang.org', tokenTracker); expect(response).toHaveProperty('code'); expect(response).toHaveProperty('status'); expect(response.data).toHaveProperty('content'); @@ -12,7 +12,7 @@ describe('readUrl', () => { }, 15000); it.skip('should handle invalid URLs (skipped due to insufficient balance)', async () => { - await expect(readUrl('invalid-url', process.env.JINA_API_KEY!)).rejects.toThrow(); + await expect(readUrl('invalid-url')).rejects.toThrow(); }, 15000); beforeEach(() => { diff --git a/src/tools/__tests__/search.test.ts b/src/tools/__tests__/search.test.ts index 77736b2..98f3029 100644 --- a/src/tools/__tests__/search.test.ts +++ b/src/tools/__tests__/search.test.ts @@ -1,10 +1,10 @@ -import { search } from '../search'; +import { jinaSearch } from '../jinaSearch'; import { TokenTracker } from '../../utils/token-tracker'; describe('search', () => { it.skip('should perform search with Jina API (skipped due to insufficient balance)', async () => { const tokenTracker = new TokenTracker(); - const { response } = await search('TypeScript programming', process.env.JINA_API_KEY!, tokenTracker); + const { response } = await jinaSearch('TypeScript programming', process.env.JINA_API_KEY!, tokenTracker); expect(response).toBeDefined(); expect(response.data).toBeDefined(); if (response.data === null) { @@ -15,7 +15,7 @@ describe('search', () => { }, 15000); it('should handle empty query', async () => { - await expect(search('', process.env.JINA_API_KEY!)).rejects.toThrow(); + await expect(jinaSearch('', process.env.JINA_API_KEY!)).rejects.toThrow(); }, 15000); beforeEach(() => { diff --git a/src/tools/search.ts b/src/tools/jinaSearch.ts similarity index 85% rename from src/tools/search.ts rename to src/tools/jinaSearch.ts index c4d58c6..54d3c0e 100644 --- a/src/tools/search.ts +++ b/src/tools/jinaSearch.ts @@ -2,8 +2,9 @@ import https from 'https'; import { TokenTracker } from "../utils/token-tracker"; import { SearchResponse } from '../types'; +import {JINA_API_KEY} from "../config"; -export function search(query: string, token: string, tracker?: TokenTracker): Promise<{ response: SearchResponse, tokens: number }> { +export function jinaSearch(query: string, tracker?: TokenTracker): Promise<{ response: SearchResponse, tokens: number }> { return new Promise((resolve, reject) => { if (!query.trim()) { reject(new Error('Query cannot be empty')); @@ -13,11 +14,11 @@ export function search(query: string, token: string, tracker?: TokenTracker): Pr const options = { hostname: 's.jina.ai', port: 443, - path: `/${encodeURIComponent(query)}`, + path: `/${encodeURIComponent(query)}?count=0`, method: 'GET', headers: { 'Accept': 'application/json', - 'Authorization': `Bearer ${token}`, + 'Authorization': `Bearer ${JINA_API_KEY}`, 'X-Retain-Images': 'none' } }; diff --git a/src/tools/read.ts b/src/tools/read.ts index bbe2db0..ac8723e 100644 --- a/src/tools/read.ts +++ b/src/tools/read.ts @@ -2,8 +2,9 @@ import https from 'https'; import { TokenTracker } from "../utils/token-tracker"; import { ReadResponse } from '../types'; +import {JINA_API_KEY} from "../config"; -export function readUrl(url: string, token: string, tracker?: TokenTracker): Promise<{ response: ReadResponse, tokens: number }> { +export function readUrl(url: string, tracker?: TokenTracker): Promise<{ response: ReadResponse, tokens: number }> { return new Promise((resolve, reject) => { const data = JSON.stringify({url}); @@ -14,7 +15,7 @@ export function readUrl(url: string, token: string, tracker?: TokenTracker): Pro method: 'POST', headers: { 'Accept': 'application/json', - 'Authorization': `Bearer ${token}`, + 'Authorization': `Bearer ${JINA_API_KEY}`, 'Content-Type': 'application/json', 'Content-Length': data.length, 'X-Retain-Images': 'none',