feat: use new jina serp api

This commit is contained in:
Han Xiao
2025-06-09 18:38:17 -07:00
parent 10b084ce08
commit ae2e7c7fc6
4 changed files with 22 additions and 67 deletions

View File

@@ -42,7 +42,6 @@ import { MAX_QUERIES_PER_STEP, MAX_REFLECT_PER_STEP, MAX_URLS_PER_STEP, Schemas
import { formatDateBasedOnType, formatDateRange } from "./utils/date-tools"; import { formatDateBasedOnType, formatDateRange } from "./utils/date-tools";
import { reviseAnswer } from "./tools/md-fixer"; import { reviseAnswer } from "./tools/md-fixer";
import { buildReferences } from "./tools/build-ref"; import { buildReferences } from "./tools/build-ref";
import { arxivSearch } from './tools/arxiv-search';
async function sleep(ms: number) { async function sleep(ms: number) {
const seconds = Math.ceil(ms / 1000); const seconds = Math.ceil(ms / 1000);
@@ -297,7 +296,8 @@ async function executeSearchQueries(
console.log('Search query:', query); console.log('Search query:', query);
switch (searchProvider || SEARCH_PROVIDER) { switch (searchProvider || SEARCH_PROVIDER) {
case 'jina': case 'jina':
results = (await search(query, context.tokenTracker)).response?.data || []; case 'arxiv':
results = (await search(query, searchProvider, 30, context.tokenTracker)).response.results || [];
break; break;
case 'duck': case 'duck':
results = (await duckSearch(query.q, { safeSearch: SafeSearchType.STRICT })).results; results = (await duckSearch(query.q, { safeSearch: SafeSearchType.STRICT })).results;
@@ -305,9 +305,6 @@ async function executeSearchQueries(
case 'brave': case 'brave':
results = (await braveSearch(query.q)).response.web?.results || []; results = (await braveSearch(query.q)).response.web?.results || [];
break; break;
case 'arxiv':
results = (await arxivSearch(query)).response.results || [];
break;
case 'serper': case 'serper':
results = (await serperSearch(query)).response.organic || []; results = (await serperSearch(query)).response.organic || [];
break; break;

View File

@@ -1,46 +0,0 @@
import { TokenTracker } from "../utils/token-tracker";
import { ArxivSearchResponse, SERPQuery } from '../types';
import { JINA_API_KEY } from "../config";
import axiosClient from '../utils/axios-client';
export async function arxivSearch(
query: SERPQuery,
tracker?: TokenTracker
): Promise<{ response: ArxivSearchResponse }> {
try {
const { data } = await axiosClient.post<ArxivSearchResponse>(
`https://svip.jina.ai/`,
{
...query,
domain: 'arxiv',
},
{
headers: {
'Accept': 'application/json',
'Authorization': `Bearer ${JINA_API_KEY}`,
},
timeout: 10000,
responseType: 'json'
}
);
if (!data.results || !Array.isArray(data.results)) {
throw new Error('Invalid response format');
}
console.log('Total URLs:', data.meta.num_results);
const tokenTracker = tracker || new TokenTracker();
tokenTracker.trackUsage('search', {
totalTokens: data.meta.credits,
promptTokens: query.q.length,
completionTokens: 0
});
return { response: data };
} catch (error) {
console.error('Error in arxiv search:', error instanceof Error ? error.message : 'Unknown error occurred');
throw new Error(error instanceof Error ? error.message : 'Unknown error occurred');
}
}

View File

@@ -1,43 +1,47 @@
import { TokenTracker } from "../utils/token-tracker"; import { TokenTracker } from "../utils/token-tracker";
import { SearchResponse, SERPQuery } from '../types'; import { JinaSearchResponse, SERPQuery } from '../types';
import { JINA_API_KEY } from "../config"; import { JINA_API_KEY } from "../config";
import axiosClient from '../utils/axios-client'; import axiosClient from '../utils/axios-client';
export async function search( export async function search(
query: SERPQuery, query: SERPQuery,
domain?: string,
num?: number,
tracker?: TokenTracker tracker?: TokenTracker
): Promise<{ response: SearchResponse }> { ): Promise<{ response: JinaSearchResponse }> {
try { try {
const { data } = await axiosClient.post<SearchResponse>( if (domain !== 'arxiv') {
`https://s.jina.ai/`, domain = undefined; // default to general search
query, }
const { data } = await axiosClient.post<JinaSearchResponse>(
`https://svip.jina.ai/`,
{
...query,
domain,
num
},
{ {
headers: { headers: {
'Accept': 'application/json', 'Accept': 'application/json',
'Authorization': `Bearer ${JINA_API_KEY}`, 'Authorization': `Bearer ${JINA_API_KEY}`,
'X-Respond-With': 'no-content',
}, },
timeout: 10000, timeout: 10000,
responseType: 'json' responseType: 'json'
} }
); );
if (!data.data || !Array.isArray(data.data)) { if (!data.results || !Array.isArray(data.results)) {
throw new Error('Invalid response format'); throw new Error('Invalid response format');
} }
const totalTokens = data.data.reduce( console.log('Search results meta:', data.meta);
(sum, item) => sum + (item.usage?.tokens || 0),
0
);
console.log('Total URLs:', data.data.length);
const tokenTracker = tracker || new TokenTracker(); const tokenTracker = tracker || new TokenTracker();
tokenTracker.trackUsage('search', { tokenTracker.trackUsage('search', {
totalTokens, totalTokens: data.meta.credits,
promptTokens: query.q.length, promptTokens: query.q.length,
completionTokens: totalTokens completionTokens: 0
}); });
return { response: data }; return { response: data };

View File

@@ -75,7 +75,7 @@ export interface TokenUsage {
usage: LanguageModelUsage; usage: LanguageModelUsage;
} }
export interface ArxivSearchResponse { export interface JinaSearchResponse {
results: Array<{ results: Array<{
title: string; title: string;
snippet: string; snippet: string;