Merge branch 'jina-ai:main' into main

This commit is contained in:
Paul Ascenzi
2025-02-06 22:03:32 -05:00
committed by GitHub
4 changed files with 115 additions and 49 deletions

View File

@@ -6,29 +6,28 @@ on:
jobs:
test:
if: !startsWith(github.event.head_commit.message, 'chore')
if: "!startsWith(github.event.head_commit.message, 'chore')"
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Use Node.js
uses: actions/setup-node@v4
with:
node-version: '20.x'
cache: 'npm'
- name: Install dependencies
run: npm ci
- name: Run lint
run: npm run lint
- name: Run tests
env:
BRAVE_API_KEY: ${{ secrets.BRAVE_API_KEY }}
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
JINA_API_KEY: ${{ secrets.JINA_API_KEY }}
GOOGLE_API_KEY: ${{ secrets.GEMINI_API_KEY }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
run: npm test
- uses: actions/checkout@v4
- name: Use Node.js
uses: actions/setup-node@v4
with:
node-version: '20.x'
cache: 'npm'
- name: Install dependencies
run: npm ci
- name: Run lint
run: npm run lint
- name: Run tests
env:
BRAVE_API_KEY: ${{ secrets.BRAVE_API_KEY }}
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
JINA_API_KEY: ${{ secrets.JINA_API_KEY }}
GOOGLE_API_KEY: ${{ secrets.GEMINI_API_KEY }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
run: npm test

View File

@@ -1,6 +1,6 @@
# DeepResearch
Keep searching, reading webpages, reasoning until it finds the answer (or exceeding the token budget).
Keep searching, reading webpages, reasoning until an answer is found (or the token budget is exceeded).
```mermaid
---
@@ -33,6 +33,9 @@ npm install
[安装部署视频教程 on Youtube](https://youtu.be/vrpraFiPUyA)
It is also available on npm but not recommended for now, as the code is still under active development.
## Usage
We use Gemini (latest `gemini-2.0-flash`) / OpenAI / [LocalLLM](#use-local-llm) for reasoning, [Jina Reader](https://jina.ai/reader) for searching and reading webpages, you can get a free API key with 1M tokens from jina.ai.
@@ -95,6 +98,7 @@ If you use Ollama or LMStudio, you can redirect the reasoning request to your lo
```bash
export LLM_PROVIDER=openai # yes, that's right - for local llm we still use openai client
export OPENAI_BASE_URL=http://127.0.0.1:1234/v1 # your local llm endpoint
export OPENAI_API_KEY=whatever # random string would do, as we don't use it (unless your local LLM has authentication)
export DEFAULT_MODEL_NAME=qwen2.5-7b # your local llm model name
```

View File

@@ -1,8 +1,7 @@
import https from 'https';
import { TokenTracker } from "../utils/token-tracker";
import { SearchResponse } from '../types';
import {JINA_API_KEY} from "../config";
import { JINA_API_KEY } from "../config";
export function jinaSearch(query: string, tracker?: TokenTracker): Promise<{ response: SearchResponse, tokens: number }> {
return new Promise((resolve, reject) => {
@@ -25,17 +24,33 @@ export function jinaSearch(query: string, tracker?: TokenTracker): Promise<{ res
const req = https.request(options, (res) => {
let responseData = '';
res.on('data', (chunk) => responseData += chunk);
res.on('end', () => {
const response = JSON.parse(responseData) as SearchResponse;
if (!query.trim()) {
reject(new Error('Query cannot be empty'));
res.on('data', (chunk) => responseData += chunk);
res.on('end', () => {
// Check HTTP status code first
if (res.statusCode && res.statusCode >= 400) {
try {
// Try to parse error message from response if available
const errorResponse = JSON.parse(responseData);
if (res.statusCode === 402) {
reject(new Error(errorResponse.readableMessage || 'Insufficient balance'));
return;
}
reject(new Error(errorResponse.readableMessage || `HTTP Error ${res.statusCode}`));
} catch {
// If parsing fails, just return the status code
reject(new Error(`HTTP Error ${res.statusCode}`));
}
return;
}
if (response.code === 402) {
reject(new Error(response.readableMessage || 'Insufficient balance'));
// Only parse JSON for successful responses
let response: SearchResponse;
try {
response = JSON.parse(responseData) as SearchResponse;
} catch (error: unknown) {
reject(new Error(`Failed to parse response: ${error instanceof Error ? error.message : 'Unknown error'}`));
return;
}
@@ -46,12 +61,24 @@ export function jinaSearch(query: string, tracker?: TokenTracker): Promise<{ res
const totalTokens = response.data.reduce((sum, item) => sum + (item.usage?.tokens || 0), 0);
console.log('Total URLs:', response.data.length);
(tracker || new TokenTracker()).trackUsage('search', totalTokens);
const tokenTracker = tracker || new TokenTracker();
tokenTracker.trackUsage('search', totalTokens);
resolve({ response, tokens: totalTokens });
});
});
req.on('error', reject);
// Add timeout handling
req.setTimeout(30000, () => {
req.destroy();
reject(new Error('Request timed out'));
});
req.on('error', (error) => {
reject(new Error(`Request failed: ${error.message}`));
});
req.end();
});
}
}

View File

@@ -1,12 +1,16 @@
import https from 'https';
import { TokenTracker } from "../utils/token-tracker";
import { ReadResponse } from '../types';
import {JINA_API_KEY} from "../config";
import { JINA_API_KEY } from "../config";
export function readUrl(url: string, tracker?: TokenTracker): Promise<{ response: ReadResponse, tokens: number }> {
return new Promise((resolve, reject) => {
const data = JSON.stringify({url});
if (!url.trim()) {
reject(new Error('URL cannot be empty'));
return;
}
const data = JSON.stringify({ url });
const options = {
hostname: 'r.jina.ai',
@@ -25,13 +29,33 @@ export function readUrl(url: string, tracker?: TokenTracker): Promise<{ response
const req = https.request(options, (res) => {
let responseData = '';
res.on('data', (chunk) => responseData += chunk);
res.on('end', () => {
const response = JSON.parse(responseData) as ReadResponse;
// console.log('Raw read response:', response);
if (response.code === 402) {
reject(new Error(response.readableMessage || 'Insufficient balance'));
res.on('data', (chunk) => responseData += chunk);
res.on('end', () => {
// Check HTTP status code first
if (res.statusCode && res.statusCode >= 400) {
try {
// Try to parse error message from response if available
const errorResponse = JSON.parse(responseData);
if (res.statusCode === 402) {
reject(new Error(errorResponse.readableMessage || 'Insufficient balance'));
return;
}
reject(new Error(errorResponse.readableMessage || `HTTP Error ${res.statusCode}`));
} catch (error: unknown) {
// If parsing fails, just return the status code
reject(new Error(`HTTP Error ${res.statusCode}`));
}
return;
}
// Only parse JSON for successful responses
let response: ReadResponse;
try {
response = JSON.parse(responseData) as ReadResponse;
} catch (error: unknown) {
reject(new Error(`Failed to parse response: ${error instanceof Error ? error.message : 'Unknown error'}`));
return;
}
@@ -45,14 +69,26 @@ export function readUrl(url: string, tracker?: TokenTracker): Promise<{ response
url: response.data.url,
tokens: response.data.usage?.tokens || 0
});
const tokens = response.data.usage?.tokens || 0;
(tracker || new TokenTracker()).trackUsage('read', tokens);
const tokenTracker = tracker || new TokenTracker();
tokenTracker.trackUsage('read', tokens);
resolve({ response, tokens });
});
});
req.on('error', reject);
// Add timeout handling
req.setTimeout(30000, () => {
req.destroy();
reject(new Error('Request timed out'));
});
req.on('error', (error: Error) => {
reject(new Error(`Request failed: ${error.message}`));
});
req.write(data);
req.end();
});
}
}