From d5a31bce526a53c45b155275915777f16f3bb9c9 Mon Sep 17 00:00:00 2001 From: Han Xiao Date: Mon, 9 Jun 2025 13:29:52 -0700 Subject: [PATCH] chore: update .gitignore, format config.json, add arxiv search functionality (#113) --- .gitignore | 2 ++ config.json | 64 +++++++++++++++++++++++++++++---------- src/agent.ts | 6 +++- src/tools/arxiv-search.ts | 46 ++++++++++++++++++++++++++++ src/types.ts | 14 +++++++++ 5 files changed, 115 insertions(+), 17 deletions(-) create mode 100644 src/tools/arxiv-search.ts diff --git a/.gitignore b/.gitignore index 4fa8a89..7a5e126 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,6 @@ .DS_Store +messages.json +urls.json # Files tasks/ diff --git a/config.json b/config.json index d963fbd..14e2ce4 100644 --- a/config.json +++ b/config.json @@ -33,15 +33,33 @@ "maxTokens": 2000 }, "tools": { - "coder": { "temperature": 0.7 }, - "searchGrounding": { "temperature": 0 }, - "dedup": { "temperature": 0.1 }, - "evaluator": {"temperature": 0.6, "maxTokens": 200}, + "coder": { + "temperature": 0.7 + }, + "searchGrounding": { + "temperature": 0 + }, + "dedup": { + "temperature": 0.1 + }, + "evaluator": { + "temperature": 0.6, + "maxTokens": 200 + }, "errorAnalyzer": {}, - "queryRewriter": { "temperature": 0.1 }, - "agent": { "temperature": 0.7 }, - "agentBeastMode": { "temperature": 0.7 }, - "fallback": {"maxTokens": 8000, "model": "gemini-2.0-flash-lite"} + "queryRewriter": { + "temperature": 0.1 + }, + "agent": { + "temperature": 0.7 + }, + "agentBeastMode": { + "temperature": 0.7 + }, + "fallback": { + "maxTokens": 8000, + "model": "gemini-2.0-flash-lite" + } } }, "openai": { @@ -51,16 +69,30 @@ "maxTokens": 8000 }, "tools": { - "coder": { "temperature": 0.7 }, - "searchGrounding": { "temperature": 0 }, - "dedup": { "temperature": 0.1 }, + "coder": { + "temperature": 0.7 + }, + "searchGrounding": { + "temperature": 0 + }, + "dedup": { + "temperature": 0.1 + }, "evaluator": {}, "errorAnalyzer": {}, - "queryRewriter": { "temperature": 0.1 }, - "agent": { "temperature": 0.7 }, - "agentBeastMode": { "temperature": 0.7 }, - "fallback": { "temperature": 0 } + "queryRewriter": { + "temperature": 0.1 + }, + "agent": { + "temperature": 0.7 + }, + "agentBeastMode": { + "temperature": 0.7 + }, + "fallback": { + "temperature": 0 + } } } } -} +} \ No newline at end of file diff --git a/src/agent.ts b/src/agent.ts index f56456f..13134a0 100644 --- a/src/agent.ts +++ b/src/agent.ts @@ -43,6 +43,7 @@ import { formatDateBasedOnType, formatDateRange } from "./utils/date-tools"; import { repairUnknownChars } from "./tools/broken-ch-fixer"; import { reviseAnswer } from "./tools/md-fixer"; import { buildReferences } from "./tools/build-ref"; +import { arxivSearch } from './tools/arxiv-search'; async function sleep(ms: number) { const seconds = Math.ceil(ms / 1000); @@ -304,6 +305,9 @@ async function executeSearchQueries( case 'brave': results = (await braveSearch(query.q)).response.web?.results || []; break; + case 'arxiv': + results = (await arxivSearch(query)).response.results || []; + break; case 'serper': results = (await serperSearch(query)).response.organic || []; break; @@ -395,7 +399,7 @@ export async function getResponse(question?: string, let totalStep = 0; const allContext: StepAction[] = []; // all steps in the current session, including those leads to wrong results - const updateContext = function(step: any) { + const updateContext = function (step: any) { allContext.push(step); } diff --git a/src/tools/arxiv-search.ts b/src/tools/arxiv-search.ts new file mode 100644 index 0000000..21f7589 --- /dev/null +++ b/src/tools/arxiv-search.ts @@ -0,0 +1,46 @@ +import { TokenTracker } from "../utils/token-tracker"; +import { ArxivSearchResponse, SERPQuery } from '../types'; +import { JINA_API_KEY } from "../config"; +import axiosClient from '../utils/axios-client'; + +export async function arxivSearch( + query: SERPQuery, + tracker?: TokenTracker +): Promise<{ response: ArxivSearchResponse }> { + try { + const { data } = await axiosClient.post( + `https://svip.jina.ai/`, + { + q: query.q, + domain: 'arxiv', + }, + { + headers: { + 'Accept': 'application/json', + 'Authorization': `Bearer ${JINA_API_KEY}`, + }, + timeout: 10000, + responseType: 'json' + } + ); + + if (!data.results || !Array.isArray(data.results)) { + throw new Error('Invalid response format'); + } + + + console.log('Total URLs:', data.meta.num_results); + + const tokenTracker = tracker || new TokenTracker(); + tokenTracker.trackUsage('search', { + totalTokens: data.meta.credits, + promptTokens: query.q.length, + completionTokens: 0 + }); + + return { response: data }; + } catch (error) { + console.error('Error in arxiv search:', error instanceof Error ? error.message : 'Unknown error occurred'); + throw new Error(error instanceof Error ? error.message : 'Unknown error occurred'); + } +} \ No newline at end of file diff --git a/src/types.ts b/src/types.ts index 4e8cf56..cbbe1ea 100644 --- a/src/types.ts +++ b/src/types.ts @@ -75,6 +75,20 @@ export interface TokenUsage { usage: LanguageModelUsage; } +export interface ArxivSearchResponse { + results: Array<{ + title: string; + snippet: string; + url: string; + }>; + meta: { + query: string; + num_results: number; + latency: number; + credits: number; + } +} + export interface SearchResponse { code: number; status: number;