Merge branch 'jina-ai:main' into main

2026-03-22 07:29:35 +08:00 · 2025-02-06 22:03:32 -05:00
parent 57bf183697 0a6ba24b27
commit ec865b1650
4 changed files with 115 additions and 49 deletions
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -6,29 +6,28 @@ on:

 jobs:
  test:
-    if: !startsWith(github.event.head_commit.message, 'chore')
+    if: "!startsWith(github.event.head_commit.message, 'chore')"
    runs-on: ubuntu-latest
-
    steps:
-    - uses: actions/checkout@v4
-    
-    - name: Use Node.js
-      uses: actions/setup-node@v4
-      with:
-        node-version: '20.x'
-        cache: 'npm'
-    
-    - name: Install dependencies
-      run: npm ci
-    
-    - name: Run lint
-      run: npm run lint
-    
-    - name: Run tests
-      env:
-        BRAVE_API_KEY: ${{ secrets.BRAVE_API_KEY }}
-        GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
-        JINA_API_KEY: ${{ secrets.JINA_API_KEY }}
-        GOOGLE_API_KEY: ${{ secrets.GEMINI_API_KEY }}
-        OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-      run: npm test
+      - uses: actions/checkout@v4
+
+      - name: Use Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version: '20.x'
+          cache: 'npm'
+
+      - name: Install dependencies
+        run: npm ci
+
+      - name: Run lint
+        run: npm run lint
+
+      - name: Run tests
+        env:
+          BRAVE_API_KEY: ${{ secrets.BRAVE_API_KEY }}
+          GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
+          JINA_API_KEY: ${{ secrets.JINA_API_KEY }}
+          GOOGLE_API_KEY: ${{ secrets.GEMINI_API_KEY }}
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+        run: npm test
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # DeepResearch

-Keep searching, reading webpages, reasoning until it finds the answer (or exceeding the token budget).
+Keep searching, reading webpages, reasoning until an answer is found (or the token budget is exceeded).

 ```mermaid
 ---
@@ -33,6 +33,9 @@ npm install

 [安装部署视频教程 on Youtube](https://youtu.be/vrpraFiPUyA)

+It is also available on npm but not recommended for now, as the code is still under active development.
+
+
 ## Usage

 We use Gemini (latest `gemini-2.0-flash`) / OpenAI / [LocalLLM](#use-local-llm) for reasoning, [Jina Reader](https://jina.ai/reader) for searching and reading webpages, you can get a free API key with 1M tokens from jina.ai. 
@@ -95,6 +98,7 @@ If you use Ollama or LMStudio, you can redirect the reasoning request to your lo
 ```bash
 export LLM_PROVIDER=openai  # yes, that's right - for local llm we still use openai client
 export OPENAI_BASE_URL=http://127.0.0.1:1234/v1  # your local llm endpoint
+export OPENAI_API_KEY=whatever  # random string would do, as we don't use it (unless your local LLM has authentication)
 export DEFAULT_MODEL_NAME=qwen2.5-7b  # your local llm model name
 ```

--- a/src/tools/jinaSearch.ts
+++ b/src/tools/jinaSearch.ts
@@ -1,8 +1,7 @@
 import https from 'https';
 import { TokenTracker } from "../utils/token-tracker";
-
 import { SearchResponse } from '../types';
-import {JINA_API_KEY} from "../config";
+import { JINA_API_KEY } from "../config";

 export function jinaSearch(query: string, tracker?: TokenTracker): Promise<{ response: SearchResponse, tokens: number }> {
  return new Promise((resolve, reject) => {
@@ -25,17 +24,33 @@ export function jinaSearch(query: string, tracker?: TokenTracker): Promise<{ res

    const req = https.request(options, (res) => {
      let responseData = '';
-      res.on('data', (chunk) => responseData += chunk);
-      res.on('end', () => {
-        const response = JSON.parse(responseData) as SearchResponse;

-        if (!query.trim()) {
-          reject(new Error('Query cannot be empty'));
+      res.on('data', (chunk) => responseData += chunk);
+
+      res.on('end', () => {
+        // Check HTTP status code first
+        if (res.statusCode && res.statusCode >= 400) {
+          try {
+            // Try to parse error message from response if available
+            const errorResponse = JSON.parse(responseData);
+            if (res.statusCode === 402) {
+              reject(new Error(errorResponse.readableMessage || 'Insufficient balance'));
+              return;
+            }
+            reject(new Error(errorResponse.readableMessage || `HTTP Error ${res.statusCode}`));
+          } catch {
+            // If parsing fails, just return the status code
+            reject(new Error(`HTTP Error ${res.statusCode}`));
+          }
          return;
        }

-        if (response.code === 402) {
-          reject(new Error(response.readableMessage || 'Insufficient balance'));
+        // Only parse JSON for successful responses
+        let response: SearchResponse;
+        try {
+          response = JSON.parse(responseData) as SearchResponse;
+        } catch (error: unknown) {
+          reject(new Error(`Failed to parse response: ${error instanceof Error ? error.message : 'Unknown error'}`));
          return;
        }

@@ -46,12 +61,24 @@ export function jinaSearch(query: string, tracker?: TokenTracker): Promise<{ res

        const totalTokens = response.data.reduce((sum, item) => sum + (item.usage?.tokens || 0), 0);
        console.log('Total URLs:', response.data.length);
-        (tracker || new TokenTracker()).trackUsage('search', totalTokens);
+
+        const tokenTracker = tracker || new TokenTracker();
+        tokenTracker.trackUsage('search', totalTokens);
+
        resolve({ response, tokens: totalTokens });
      });
    });

-    req.on('error', reject);
+    // Add timeout handling
+    req.setTimeout(30000, () => {
+      req.destroy();
+      reject(new Error('Request timed out'));
+    });
+
+    req.on('error', (error) => {
+      reject(new Error(`Request failed: ${error.message}`));
+    });
+
    req.end();
  });
-}
+}
--- a/src/tools/read.ts
+++ b/src/tools/read.ts
@@ -1,12 +1,16 @@
 import https from 'https';
 import { TokenTracker } from "../utils/token-tracker";
-
 import { ReadResponse } from '../types';
-import {JINA_API_KEY} from "../config";
+import { JINA_API_KEY } from "../config";

 export function readUrl(url: string, tracker?: TokenTracker): Promise<{ response: ReadResponse, tokens: number }> {
  return new Promise((resolve, reject) => {
-    const data = JSON.stringify({url});
+    if (!url.trim()) {
+      reject(new Error('URL cannot be empty'));
+      return;
+    }
+
+    const data = JSON.stringify({ url });

    const options = {
      hostname: 'r.jina.ai',
@@ -25,13 +29,33 @@ export function readUrl(url: string, tracker?: TokenTracker): Promise<{ response

    const req = https.request(options, (res) => {
      let responseData = '';
-      res.on('data', (chunk) => responseData += chunk);
-      res.on('end', () => {
-        const response = JSON.parse(responseData) as ReadResponse;
-        // console.log('Raw read response:', response);

-        if (response.code === 402) {
-          reject(new Error(response.readableMessage || 'Insufficient balance'));
+      res.on('data', (chunk) => responseData += chunk);
+
+      res.on('end', () => {
+        // Check HTTP status code first
+        if (res.statusCode && res.statusCode >= 400) {
+          try {
+            // Try to parse error message from response if available
+            const errorResponse = JSON.parse(responseData);
+            if (res.statusCode === 402) {
+              reject(new Error(errorResponse.readableMessage || 'Insufficient balance'));
+              return;
+            }
+            reject(new Error(errorResponse.readableMessage || `HTTP Error ${res.statusCode}`));
+          } catch (error: unknown) {
+            // If parsing fails, just return the status code
+            reject(new Error(`HTTP Error ${res.statusCode}`));
+          }
+          return;
+        }
+
+        // Only parse JSON for successful responses
+        let response: ReadResponse;
+        try {
+          response = JSON.parse(responseData) as ReadResponse;
+        } catch (error: unknown) {
+          reject(new Error(`Failed to parse response: ${error instanceof Error ? error.message : 'Unknown error'}`));
          return;
        }

@@ -45,14 +69,26 @@ export function readUrl(url: string, tracker?: TokenTracker): Promise<{ response
          url: response.data.url,
          tokens: response.data.usage?.tokens || 0
        });
+
        const tokens = response.data.usage?.tokens || 0;
-        (tracker || new TokenTracker()).trackUsage('read', tokens);
+        const tokenTracker = tracker || new TokenTracker();
+        tokenTracker.trackUsage('read', tokens);
+
        resolve({ response, tokens });
      });
    });

-    req.on('error', reject);
+    // Add timeout handling
+    req.setTimeout(30000, () => {
+      req.destroy();
+      reject(new Error('Request timed out'));
+    });
+
+    req.on('error', (error: Error) => {
+      reject(new Error(`Request failed: ${error.message}`));
+    });
+
    req.write(data);
    req.end();
  });
-}
+}