mirror of
https://github.com/jina-ai/node-DeepResearch.git
synced 2025-12-26 06:28:56 +08:00
fix: optimize webContent for references, add axios client
This commit is contained in:
parent
a03e20f0bf
commit
5674402bb1
@ -346,7 +346,7 @@ async function executeSearchQueries(
|
||||
utilityScore = utilityScore + addToAllURLs(r, allURLs);
|
||||
webContents[r.url] = {
|
||||
title: r.title,
|
||||
full: r.description,
|
||||
// full: r.description,
|
||||
chunks: [r.description],
|
||||
chunk_positions: [[0, r.description?.length]],
|
||||
}
|
||||
|
||||
@ -371,13 +371,19 @@ app.post('/v1/chat/completions', (async (req: Request, res: Response) => {
|
||||
}
|
||||
}
|
||||
|
||||
const clientIp = req.headers['cf-connecting-ip'] ||
|
||||
req.headers['x-forwarded-for'] ||
|
||||
req.ip ||
|
||||
req.socket.remoteAddress ||
|
||||
'unknown';
|
||||
// Log request details (excluding sensitive data)
|
||||
console.log('[chat/completions] Request:', {
|
||||
model: req.body.model,
|
||||
stream: req.body.stream,
|
||||
messageCount: req.body.messages?.length,
|
||||
hasAuth: !!req.headers.authorization,
|
||||
requestId: Date.now().toString()
|
||||
requestId: Date.now().toString(),
|
||||
clientIp: clientIp,
|
||||
});
|
||||
|
||||
const body = req.body as ChatCompletionRequest;
|
||||
|
||||
@ -1,10 +1,10 @@
|
||||
import axios from 'axios';
|
||||
import {BRAVE_API_KEY} from "../config";
|
||||
import axiosClient from "../utils/axios-client";
|
||||
|
||||
import { BraveSearchResponse } from '../types';
|
||||
|
||||
export async function braveSearch(query: string): Promise<{ response: BraveSearchResponse }> {
|
||||
const response = await axios.get<BraveSearchResponse>('https://api.search.brave.com/res/v1/web/search', {
|
||||
const response = await axiosClient.get<BraveSearchResponse>('https://api.search.brave.com/res/v1/web/search', {
|
||||
params: {
|
||||
q: query,
|
||||
count: 10,
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
import {JINA_API_KEY} from "../config";
|
||||
import {JinaEmbeddingRequest, JinaEmbeddingResponse} from "../types";
|
||||
import axios, {AxiosError} from "axios";
|
||||
import axiosClient from "../utils/axios-client";
|
||||
|
||||
const BATCH_SIZE = 128;
|
||||
const API_URL = "https://api.jina.ai/v1/embeddings";
|
||||
@ -101,7 +101,7 @@ async function getBatchEmbeddingsWithRetry(
|
||||
if (options.embedding_type) request.embedding_type = options.embedding_type;
|
||||
|
||||
try {
|
||||
const response = await axios.post<JinaEmbeddingResponse>(
|
||||
const response = await axiosClient.post<JinaEmbeddingResponse>(
|
||||
API_URL,
|
||||
request,
|
||||
{
|
||||
@ -179,9 +179,9 @@ async function getBatchEmbeddingsWithRetry(
|
||||
// Increment retry count and log
|
||||
retryCount++;
|
||||
console.log(`[embeddings] Batch ${currentBatch}/${batchCount} - Retrying ${textsToProcess.length} texts (attempt ${retryCount}/${MAX_RETRIES})`);
|
||||
} catch (error) {
|
||||
} catch (error: any) {
|
||||
console.error('Error calling Jina Embeddings API:', error);
|
||||
if (error instanceof AxiosError && error.response?.status === 402) {
|
||||
if (error.response?.status === 402 || error.message.includes('InsufficientBalanceError') || error.message.includes('insufficient balance')) {
|
||||
return { batchEmbeddings: [], batchTokens: 0 };
|
||||
}
|
||||
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
import axios from 'axios';
|
||||
import { TokenTracker } from "../utils/token-tracker";
|
||||
import { JINA_API_KEY } from "../config";
|
||||
import axiosClient from "../utils/axios-client";
|
||||
|
||||
const JINA_API_URL = 'https://api.jina.ai/v1/classify';
|
||||
|
||||
@ -49,7 +49,7 @@ export async function classifyText(
|
||||
});
|
||||
|
||||
// Make the API request with axios
|
||||
const apiRequestPromise = axios.post<JinaClassifyResponse>(
|
||||
const apiRequestPromise = axiosClient.post<JinaClassifyResponse>(
|
||||
JINA_API_URL,
|
||||
request,
|
||||
{
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
import axios from 'axios';
|
||||
import {TokenTracker} from "../utils/token-tracker";
|
||||
import {JINA_API_KEY} from "../config";
|
||||
import axiosClient from '../utils/axios-client';
|
||||
|
||||
const JINA_API_URL = 'https://api.jina.ai/v1/rerank';
|
||||
|
||||
@ -57,7 +57,7 @@ export async function rerankDocuments(
|
||||
documents: batchDocuments
|
||||
};
|
||||
|
||||
const response = await axios.post<JinaRerankResponse>(
|
||||
const response = await axiosClient.post<JinaRerankResponse>(
|
||||
JINA_API_URL,
|
||||
request,
|
||||
{
|
||||
|
||||
@ -1,14 +1,14 @@
|
||||
import axios from 'axios';
|
||||
import { TokenTracker } from "../utils/token-tracker";
|
||||
import { SearchResponse, SERPQuery } from '../types';
|
||||
import { JINA_API_KEY } from "../config";
|
||||
import axiosClient from '../utils/axios-client';
|
||||
|
||||
export async function search(
|
||||
query: SERPQuery,
|
||||
tracker?: TokenTracker
|
||||
): Promise<{ response: SearchResponse }> {
|
||||
try {
|
||||
const { data } = await axios.post<SearchResponse>(
|
||||
const { data } = await axiosClient.post<SearchResponse>(
|
||||
`https://s.jina.ai/`,
|
||||
query,
|
||||
{
|
||||
@ -42,21 +42,7 @@ export async function search(
|
||||
|
||||
return { response: data };
|
||||
} catch (error) {
|
||||
if (axios.isAxiosError(error)) {
|
||||
if (error.response) {
|
||||
const status = error.response.status;
|
||||
const errorData = error.response.data as any;
|
||||
|
||||
if (status === 402) {
|
||||
throw new Error(errorData?.readableMessage || 'Insufficient balance');
|
||||
}
|
||||
throw new Error(errorData?.readableMessage || `HTTP Error ${status}`);
|
||||
} else if (error.request) {
|
||||
throw new Error('No response received from server');
|
||||
} else {
|
||||
throw new Error(`Request failed: ${error.message}`);
|
||||
}
|
||||
}
|
||||
console.error('Error in jina search:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
@ -1,7 +1,7 @@
|
||||
import axios from 'axios';
|
||||
import { TokenTracker } from "../utils/token-tracker";
|
||||
import { ReadResponse } from '../types';
|
||||
import { JINA_API_KEY } from "../config";
|
||||
import axiosClient from "../utils/axios-client";
|
||||
|
||||
export async function readUrl(
|
||||
url: string,
|
||||
@ -30,7 +30,7 @@ export async function readUrl(
|
||||
|
||||
try {
|
||||
// Use axios which handles encoding properly
|
||||
const { data } = await axios.post<ReadResponse>(
|
||||
const { data } = await axiosClient.post<ReadResponse>(
|
||||
'https://r.jina.ai/',
|
||||
{ url },
|
||||
{
|
||||
@ -59,28 +59,8 @@ export async function readUrl(
|
||||
});
|
||||
|
||||
return { response: data };
|
||||
} catch (error) {
|
||||
// Handle axios errors with better type safety
|
||||
if (axios.isAxiosError(error)) {
|
||||
if (error.response) {
|
||||
// The request was made and the server responded with a status code
|
||||
// that falls out of the range of 2xx
|
||||
const status = error.response.status;
|
||||
const errorData = error.response.data as any;
|
||||
|
||||
if (status === 402) {
|
||||
throw new Error(errorData?.readableMessage || 'Insufficient balance');
|
||||
}
|
||||
throw new Error(errorData?.readableMessage || `HTTP Error ${status}`);
|
||||
} else if (error.request) {
|
||||
// The request was made but no response was received
|
||||
throw new Error('No response received from server');
|
||||
} else {
|
||||
// Something happened in setting up the request
|
||||
throw new Error(`Request failed: ${error.message}`);
|
||||
}
|
||||
}
|
||||
// For non-axios errors
|
||||
} catch (error: any) {
|
||||
console.error(`Error reading URL: ${error.message}`);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
@ -1,7 +1,7 @@
|
||||
import axios from 'axios';
|
||||
import {TokenTracker} from "../utils/token-tracker";
|
||||
import {JINA_API_KEY} from "../config";
|
||||
import {TrackerContext} from "../types";
|
||||
import axiosClient from "../utils/axios-client";
|
||||
|
||||
export async function segmentText(
|
||||
content: string,
|
||||
@ -39,7 +39,7 @@ export async function segmentText(
|
||||
console.log(`[Segment] Processing batch ${i + 1}/${batches.length} (size: ${batch.length})`);
|
||||
|
||||
try {
|
||||
const {data} = await axios.post(
|
||||
const {data} = await axiosClient.post(
|
||||
'https://api.jina.ai/v1/segment',
|
||||
{
|
||||
content: batch,
|
||||
@ -84,8 +84,9 @@ export async function segmentText(
|
||||
positions: adjustedPositions,
|
||||
tokens: data.usage?.tokens || 0
|
||||
};
|
||||
} catch (error) {
|
||||
handleSegmentationError(error);
|
||||
} catch (error: any) {
|
||||
console.error(`Error processing batch ${i + 1}: ${error.message}`);
|
||||
throw error;
|
||||
}
|
||||
});
|
||||
|
||||
@ -178,26 +179,4 @@ function findLastSentenceBreak(text: string, startIndex: number, endIndex: numbe
|
||||
}
|
||||
}
|
||||
return -1; // No sentence break found
|
||||
}
|
||||
|
||||
/**
|
||||
* Handles errors from the segmentation API
|
||||
*/
|
||||
function handleSegmentationError(error: any): never {
|
||||
if (axios.isAxiosError(error)) {
|
||||
if (error.response) {
|
||||
const status = error.response.status;
|
||||
const errorData = error.response.data;
|
||||
|
||||
if (status === 402) {
|
||||
throw new Error(errorData?.readableMessage || 'Insufficient balance');
|
||||
}
|
||||
throw new Error(errorData?.readableMessage || `HTTP Error ${status}`);
|
||||
} else if (error.request) {
|
||||
throw new Error('No response received from server');
|
||||
} else {
|
||||
throw new Error(`Request failed: ${error.message}`);
|
||||
}
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
@ -1,11 +1,11 @@
|
||||
import axios from 'axios';
|
||||
import {SERPER_API_KEY} from "../config";
|
||||
import axiosClient from "../utils/axios-client";
|
||||
|
||||
import {SerperSearchResponse, SERPQuery} from '../types';
|
||||
|
||||
|
||||
export async function serperSearch(query: SERPQuery): Promise<{ response: SerperSearchResponse }> {
|
||||
const response = await axios.post<SerperSearchResponse>('https://google.serper.dev/search', {
|
||||
const response = await axiosClient.post<SerperSearchResponse>('https://google.serper.dev/search', {
|
||||
...query,
|
||||
autocorrect: false,
|
||||
}, {
|
||||
@ -26,7 +26,7 @@ export async function serperSearch(query: SERPQuery): Promise<{ response: Serper
|
||||
|
||||
|
||||
export async function serperSearchOld(query: string): Promise<{ response: SerperSearchResponse }> {
|
||||
const response = await axios.post<SerperSearchResponse>('https://google.serper.dev/search', {
|
||||
const response = await axiosClient.post<SerperSearchResponse>('https://google.serper.dev/search', {
|
||||
q: query,
|
||||
autocorrect: false,
|
||||
}, {
|
||||
|
||||
@ -196,7 +196,7 @@ export type SearchSnippet = UnNormalizedSearchSnippet & {
|
||||
};
|
||||
|
||||
export type WebContent = {
|
||||
full: string,
|
||||
full?: string,
|
||||
chunks: string[]
|
||||
chunk_positions: number[][],
|
||||
title: string
|
||||
|
||||
80
src/utils/axios-client.ts
Normal file
80
src/utils/axios-client.ts
Normal file
@ -0,0 +1,80 @@
|
||||
import axios, { AxiosRequestConfig } from 'axios';
|
||||
// import { JINA_API_KEY, SERPER_API_KEY, BRAVE_API_KEY } from "../config";
|
||||
|
||||
// Default timeout in milliseconds
|
||||
const DEFAULT_TIMEOUT = 30000;
|
||||
|
||||
// Maximum content length to prevent OOM issues (10MB)
|
||||
const MAX_CONTENT_LENGTH = 10 * 1024 * 1024;
|
||||
|
||||
// Maximum number of redirects to follow
|
||||
const MAX_REDIRECTS = 5;
|
||||
|
||||
// Maximum number of sockets to keep open
|
||||
const MAX_SOCKETS = 50;
|
||||
|
||||
// Maximum number of free sockets to keep open
|
||||
const MAX_FREE_SOCKETS = 10;
|
||||
|
||||
// Keep-alive timeout in milliseconds
|
||||
const KEEP_ALIVE_TIMEOUT = 30000;
|
||||
|
||||
// Scheduling strategy for HTTP/2 connections
|
||||
// LIFO (Last In, First Out) is generally better for performance
|
||||
const SCHEDULING = 'lifo';
|
||||
|
||||
// Base configuration for all axios instances
|
||||
const baseConfig: AxiosRequestConfig = {
|
||||
timeout: DEFAULT_TIMEOUT,
|
||||
maxContentLength: MAX_CONTENT_LENGTH,
|
||||
maxRedirects: MAX_REDIRECTS,
|
||||
httpsAgent: new (require('https').Agent)({
|
||||
maxSockets: MAX_SOCKETS,
|
||||
maxFreeSockets: MAX_FREE_SOCKETS,
|
||||
keepAlive: true,
|
||||
timeout: KEEP_ALIVE_TIMEOUT,
|
||||
scheduling: SCHEDULING,
|
||||
}),
|
||||
httpAgent: new (require('http').Agent)({
|
||||
maxSockets: MAX_SOCKETS,
|
||||
maxFreeSockets: MAX_FREE_SOCKETS,
|
||||
keepAlive: true,
|
||||
timeout: KEEP_ALIVE_TIMEOUT,
|
||||
scheduling: SCHEDULING,
|
||||
}),
|
||||
headers: {
|
||||
'Accept': 'application/json',
|
||||
'Content-Type': 'application/json',
|
||||
}
|
||||
};
|
||||
|
||||
// Create a single axios instance with the base configuration
|
||||
const axiosClient = axios.create(baseConfig);
|
||||
|
||||
// Add response interceptor for consistent error handling
|
||||
axiosClient.interceptors.response.use(
|
||||
(response) => response,
|
||||
(error) => {
|
||||
if (axios.isAxiosError(error)) {
|
||||
if (error.code === 'ECONNABORTED') {
|
||||
error.request?.destroy?.();
|
||||
}
|
||||
if (error.response) {
|
||||
const status = error.response.status;
|
||||
const errorData = error.response.data as any;
|
||||
|
||||
if (status === 402) {
|
||||
throw new Error(errorData?.readableMessage || 'Insufficient balance');
|
||||
}
|
||||
throw new Error(errorData?.readableMessage || `HTTP Error ${status}`);
|
||||
} else if (error.request) {
|
||||
throw new Error(`No response received from server: ${error.message}`);
|
||||
} else {
|
||||
throw new Error(`Request failed: ${error.message}`);
|
||||
}
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
);
|
||||
|
||||
export default axiosClient;
|
||||
@ -515,9 +515,18 @@ export async function processURLs(
|
||||
}
|
||||
|
||||
// add to web contents
|
||||
const {chunks, chunk_positions } = await segmentText(data.content, context)
|
||||
const {chunks, chunk_positions } = await segmentText(data.content, context);
|
||||
// filter out the chunks that are too short, minChunkLength is 80
|
||||
const minChunkLength = 80;
|
||||
for (let i = 0; i < chunks.length; i++) {
|
||||
if (chunks[i].length < minChunkLength) {
|
||||
chunks.splice(i, 1);
|
||||
chunk_positions.splice(i, 1);
|
||||
i--;
|
||||
}
|
||||
}
|
||||
webContents[data.url] = {
|
||||
full: data.content,
|
||||
// full: data.content,
|
||||
chunks,
|
||||
chunk_positions,
|
||||
title: data.title
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user