feat: use s.jina.ai for search

This commit is contained in:
Han Xiao 2025-02-05 15:15:17 +08:00
parent d8636d16ce
commit a9dfec9a41
7 changed files with 21 additions and 15 deletions

View File

@ -25,12 +25,11 @@ flowchart LR
## Install
We use gemini for llm, brave/duckduckgo for search, [jina reader](https://jina.ai/reader) for reading a webpage.
We use gemini for llm, [jina reader](https://jina.ai/reader) for searching and reading webpages.
```bash
export GEMINI_API_KEY=... # for gemini api, ask han
export JINA_API_KEY=jina_... # free jina api key, get from https://jina.ai/reader
export BRAVE_API_KEY=... # (optional, when not given it uses duckduckgo) brave search provide free key, ask han
git clone https://github.com/jina-ai/node-DeepResearch.git
cd node-DeepResearch

View File

@ -7,11 +7,12 @@ import {rewriteQuery} from "./tools/query-rewriter";
import {dedupQueries} from "./tools/dedup";
import {evaluateAnswer} from "./tools/evaluator";
import {analyzeSteps} from "./tools/error-analyzer";
import {GEMINI_API_KEY, JINA_API_KEY, SEARCH_PROVIDER, STEP_SLEEP, modelConfigs} from "./config";
import {GEMINI_API_KEY, SEARCH_PROVIDER, STEP_SLEEP, modelConfigs} from "./config";
import {TokenTracker} from "./utils/token-tracker";
import {ActionTracker} from "./utils/action-tracker";
import {StepAction, SchemaProperty, ResponseSchema, AnswerAction} from "./types";
import {TrackerContext} from "./types";
import {jinaSearch} from "./tools/jinaSearch";
async function sleep(ms: number) {
const seconds = Math.ceil(ms / 1000);
@ -541,6 +542,10 @@ But then you realized you have asked them before. You decided to to think out of
let results;
switch (SEARCH_PROVIDER) {
case 'jina':
// use jinaSearch
results = {results: (await jinaSearch(query, context.tokenTracker)).response?.data || []};
break;
case 'duck':
results = await duckSearch(query, {safeSearch: SafeSearchType.STRICT});
break;
@ -625,7 +630,7 @@ You decided to think out of the box or cut from a completely different angle.
const urlResults = await Promise.all(
uniqueURLs.map(async (url: string) => {
const {response, tokens} = await readUrl(url, JINA_API_KEY, context.tokenTracker);
const {response, tokens} = await readUrl(url, context.tokenTracker);
allKnowledge.push({
question: `What is in ${response.data?.url || 'the URL'}?`,
answer: removeAllLineBreaks(response.data?.content || 'No content available'),

View File

@ -32,7 +32,7 @@ if (process.env.https_proxy) {
export const GEMINI_API_KEY = process.env.GEMINI_API_KEY as string;
export const JINA_API_KEY = process.env.JINA_API_KEY as string;
export const BRAVE_API_KEY = process.env.BRAVE_API_KEY as string;
export const SEARCH_PROVIDER = BRAVE_API_KEY ? 'brave' : 'duck';
export const SEARCH_PROVIDER: 'brave' | 'jina' | 'duck' = 'jina'
const DEFAULT_MODEL = 'gemini-1.5-flash';

View File

@ -4,7 +4,7 @@ import { TokenTracker } from '../../utils/token-tracker';
describe('readUrl', () => {
it.skip('should read and parse URL content (skipped due to insufficient balance)', async () => {
const tokenTracker = new TokenTracker();
const { response } = await readUrl('https://www.typescriptlang.org', process.env.JINA_API_KEY!, tokenTracker);
const { response } = await readUrl('https://www.typescriptlang.org', tokenTracker);
expect(response).toHaveProperty('code');
expect(response).toHaveProperty('status');
expect(response.data).toHaveProperty('content');
@ -12,7 +12,7 @@ describe('readUrl', () => {
}, 15000);
it.skip('should handle invalid URLs (skipped due to insufficient balance)', async () => {
await expect(readUrl('invalid-url', process.env.JINA_API_KEY!)).rejects.toThrow();
await expect(readUrl('invalid-url')).rejects.toThrow();
}, 15000);
beforeEach(() => {

View File

@ -1,10 +1,10 @@
import { search } from '../search';
import { jinaSearch } from '../jinaSearch';
import { TokenTracker } from '../../utils/token-tracker';
describe('search', () => {
it.skip('should perform search with Jina API (skipped due to insufficient balance)', async () => {
const tokenTracker = new TokenTracker();
const { response } = await search('TypeScript programming', process.env.JINA_API_KEY!, tokenTracker);
const { response } = await jinaSearch('TypeScript programming', process.env.JINA_API_KEY!, tokenTracker);
expect(response).toBeDefined();
expect(response.data).toBeDefined();
if (response.data === null) {
@ -15,7 +15,7 @@ describe('search', () => {
}, 15000);
it('should handle empty query', async () => {
await expect(search('', process.env.JINA_API_KEY!)).rejects.toThrow();
await expect(jinaSearch('', process.env.JINA_API_KEY!)).rejects.toThrow();
}, 15000);
beforeEach(() => {

View File

@ -2,8 +2,9 @@ import https from 'https';
import { TokenTracker } from "../utils/token-tracker";
import { SearchResponse } from '../types';
import {JINA_API_KEY} from "../config";
export function search(query: string, token: string, tracker?: TokenTracker): Promise<{ response: SearchResponse, tokens: number }> {
export function jinaSearch(query: string, tracker?: TokenTracker): Promise<{ response: SearchResponse, tokens: number }> {
return new Promise((resolve, reject) => {
if (!query.trim()) {
reject(new Error('Query cannot be empty'));
@ -13,11 +14,11 @@ export function search(query: string, token: string, tracker?: TokenTracker): Pr
const options = {
hostname: 's.jina.ai',
port: 443,
path: `/${encodeURIComponent(query)}`,
path: `/${encodeURIComponent(query)}?count=0`,
method: 'GET',
headers: {
'Accept': 'application/json',
'Authorization': `Bearer ${token}`,
'Authorization': `Bearer ${JINA_API_KEY}`,
'X-Retain-Images': 'none'
}
};

View File

@ -2,8 +2,9 @@ import https from 'https';
import { TokenTracker } from "../utils/token-tracker";
import { ReadResponse } from '../types';
import {JINA_API_KEY} from "../config";
export function readUrl(url: string, token: string, tracker?: TokenTracker): Promise<{ response: ReadResponse, tokens: number }> {
export function readUrl(url: string, tracker?: TokenTracker): Promise<{ response: ReadResponse, tokens: number }> {
return new Promise((resolve, reject) => {
const data = JSON.stringify({url});
@ -14,7 +15,7 @@ export function readUrl(url: string, token: string, tracker?: TokenTracker): Pro
method: 'POST',
headers: {
'Accept': 'application/json',
'Authorization': `Bearer ${token}`,
'Authorization': `Bearer ${JINA_API_KEY}`,
'Content-Type': 'application/json',
'Content-Length': data.length,
'X-Retain-Images': 'none',