mirror of
https://github.com/jina-ai/node-DeepResearch.git
synced 2025-12-26 06:28:56 +08:00
feat: use s.jina.ai for search
This commit is contained in:
parent
d8636d16ce
commit
a9dfec9a41
@ -25,12 +25,11 @@ flowchart LR
|
||||
|
||||
## Install
|
||||
|
||||
We use gemini for llm, brave/duckduckgo for search, [jina reader](https://jina.ai/reader) for reading a webpage.
|
||||
We use gemini for llm, [jina reader](https://jina.ai/reader) for searching and reading webpages.
|
||||
|
||||
```bash
|
||||
export GEMINI_API_KEY=... # for gemini api, ask han
|
||||
export JINA_API_KEY=jina_... # free jina api key, get from https://jina.ai/reader
|
||||
export BRAVE_API_KEY=... # (optional, when not given it uses duckduckgo) brave search provide free key, ask han
|
||||
|
||||
git clone https://github.com/jina-ai/node-DeepResearch.git
|
||||
cd node-DeepResearch
|
||||
|
||||
@ -7,11 +7,12 @@ import {rewriteQuery} from "./tools/query-rewriter";
|
||||
import {dedupQueries} from "./tools/dedup";
|
||||
import {evaluateAnswer} from "./tools/evaluator";
|
||||
import {analyzeSteps} from "./tools/error-analyzer";
|
||||
import {GEMINI_API_KEY, JINA_API_KEY, SEARCH_PROVIDER, STEP_SLEEP, modelConfigs} from "./config";
|
||||
import {GEMINI_API_KEY, SEARCH_PROVIDER, STEP_SLEEP, modelConfigs} from "./config";
|
||||
import {TokenTracker} from "./utils/token-tracker";
|
||||
import {ActionTracker} from "./utils/action-tracker";
|
||||
import {StepAction, SchemaProperty, ResponseSchema, AnswerAction} from "./types";
|
||||
import {TrackerContext} from "./types";
|
||||
import {jinaSearch} from "./tools/jinaSearch";
|
||||
|
||||
async function sleep(ms: number) {
|
||||
const seconds = Math.ceil(ms / 1000);
|
||||
@ -541,6 +542,10 @@ But then you realized you have asked them before. You decided to to think out of
|
||||
|
||||
let results;
|
||||
switch (SEARCH_PROVIDER) {
|
||||
case 'jina':
|
||||
// use jinaSearch
|
||||
results = {results: (await jinaSearch(query, context.tokenTracker)).response?.data || []};
|
||||
break;
|
||||
case 'duck':
|
||||
results = await duckSearch(query, {safeSearch: SafeSearchType.STRICT});
|
||||
break;
|
||||
@ -625,7 +630,7 @@ You decided to think out of the box or cut from a completely different angle.
|
||||
|
||||
const urlResults = await Promise.all(
|
||||
uniqueURLs.map(async (url: string) => {
|
||||
const {response, tokens} = await readUrl(url, JINA_API_KEY, context.tokenTracker);
|
||||
const {response, tokens} = await readUrl(url, context.tokenTracker);
|
||||
allKnowledge.push({
|
||||
question: `What is in ${response.data?.url || 'the URL'}?`,
|
||||
answer: removeAllLineBreaks(response.data?.content || 'No content available'),
|
||||
|
||||
@ -32,7 +32,7 @@ if (process.env.https_proxy) {
|
||||
export const GEMINI_API_KEY = process.env.GEMINI_API_KEY as string;
|
||||
export const JINA_API_KEY = process.env.JINA_API_KEY as string;
|
||||
export const BRAVE_API_KEY = process.env.BRAVE_API_KEY as string;
|
||||
export const SEARCH_PROVIDER = BRAVE_API_KEY ? 'brave' : 'duck';
|
||||
export const SEARCH_PROVIDER: 'brave' | 'jina' | 'duck' = 'jina'
|
||||
|
||||
const DEFAULT_MODEL = 'gemini-1.5-flash';
|
||||
|
||||
|
||||
@ -4,7 +4,7 @@ import { TokenTracker } from '../../utils/token-tracker';
|
||||
describe('readUrl', () => {
|
||||
it.skip('should read and parse URL content (skipped due to insufficient balance)', async () => {
|
||||
const tokenTracker = new TokenTracker();
|
||||
const { response } = await readUrl('https://www.typescriptlang.org', process.env.JINA_API_KEY!, tokenTracker);
|
||||
const { response } = await readUrl('https://www.typescriptlang.org', tokenTracker);
|
||||
expect(response).toHaveProperty('code');
|
||||
expect(response).toHaveProperty('status');
|
||||
expect(response.data).toHaveProperty('content');
|
||||
@ -12,7 +12,7 @@ describe('readUrl', () => {
|
||||
}, 15000);
|
||||
|
||||
it.skip('should handle invalid URLs (skipped due to insufficient balance)', async () => {
|
||||
await expect(readUrl('invalid-url', process.env.JINA_API_KEY!)).rejects.toThrow();
|
||||
await expect(readUrl('invalid-url')).rejects.toThrow();
|
||||
}, 15000);
|
||||
|
||||
beforeEach(() => {
|
||||
|
||||
@ -1,10 +1,10 @@
|
||||
import { search } from '../search';
|
||||
import { jinaSearch } from '../jinaSearch';
|
||||
import { TokenTracker } from '../../utils/token-tracker';
|
||||
|
||||
describe('search', () => {
|
||||
it.skip('should perform search with Jina API (skipped due to insufficient balance)', async () => {
|
||||
const tokenTracker = new TokenTracker();
|
||||
const { response } = await search('TypeScript programming', process.env.JINA_API_KEY!, tokenTracker);
|
||||
const { response } = await jinaSearch('TypeScript programming', process.env.JINA_API_KEY!, tokenTracker);
|
||||
expect(response).toBeDefined();
|
||||
expect(response.data).toBeDefined();
|
||||
if (response.data === null) {
|
||||
@ -15,7 +15,7 @@ describe('search', () => {
|
||||
}, 15000);
|
||||
|
||||
it('should handle empty query', async () => {
|
||||
await expect(search('', process.env.JINA_API_KEY!)).rejects.toThrow();
|
||||
await expect(jinaSearch('', process.env.JINA_API_KEY!)).rejects.toThrow();
|
||||
}, 15000);
|
||||
|
||||
beforeEach(() => {
|
||||
|
||||
@ -2,8 +2,9 @@ import https from 'https';
|
||||
import { TokenTracker } from "../utils/token-tracker";
|
||||
|
||||
import { SearchResponse } from '../types';
|
||||
import {JINA_API_KEY} from "../config";
|
||||
|
||||
export function search(query: string, token: string, tracker?: TokenTracker): Promise<{ response: SearchResponse, tokens: number }> {
|
||||
export function jinaSearch(query: string, tracker?: TokenTracker): Promise<{ response: SearchResponse, tokens: number }> {
|
||||
return new Promise((resolve, reject) => {
|
||||
if (!query.trim()) {
|
||||
reject(new Error('Query cannot be empty'));
|
||||
@ -13,11 +14,11 @@ export function search(query: string, token: string, tracker?: TokenTracker): Pr
|
||||
const options = {
|
||||
hostname: 's.jina.ai',
|
||||
port: 443,
|
||||
path: `/${encodeURIComponent(query)}`,
|
||||
path: `/${encodeURIComponent(query)}?count=0`,
|
||||
method: 'GET',
|
||||
headers: {
|
||||
'Accept': 'application/json',
|
||||
'Authorization': `Bearer ${token}`,
|
||||
'Authorization': `Bearer ${JINA_API_KEY}`,
|
||||
'X-Retain-Images': 'none'
|
||||
}
|
||||
};
|
||||
@ -2,8 +2,9 @@ import https from 'https';
|
||||
import { TokenTracker } from "../utils/token-tracker";
|
||||
|
||||
import { ReadResponse } from '../types';
|
||||
import {JINA_API_KEY} from "../config";
|
||||
|
||||
export function readUrl(url: string, token: string, tracker?: TokenTracker): Promise<{ response: ReadResponse, tokens: number }> {
|
||||
export function readUrl(url: string, tracker?: TokenTracker): Promise<{ response: ReadResponse, tokens: number }> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const data = JSON.stringify({url});
|
||||
|
||||
@ -14,7 +15,7 @@ export function readUrl(url: string, token: string, tracker?: TokenTracker): Pro
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Accept': 'application/json',
|
||||
'Authorization': `Bearer ${token}`,
|
||||
'Authorization': `Bearer ${JINA_API_KEY}`,
|
||||
'Content-Type': 'application/json',
|
||||
'Content-Length': data.length,
|
||||
'X-Retain-Images': 'none',
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user