This commit is contained in:
Paul Ascenzi 2025-02-07 09:10:09 -05:00
commit 825419d0b9
21 changed files with 495 additions and 313 deletions

1
.gitignore vendored
View File

@ -5,6 +5,7 @@ knowledge.json
prompt-*.txt
queries.json
questions.json
eval-*.json
# Logs
logs

View File

@ -28,7 +28,8 @@ COPY package*.json ./
# Install production dependencies only
RUN npm install --production --ignore-scripts
# Copy built files from the build stage
# Copy config.json and built files from builder
COPY --from=builder /app/config.json ./
COPY --from=builder /app/dist ./dist
# Set environment variables (Recommended to set at runtime, avoid hardcoding)
@ -41,4 +42,4 @@ ENV BRAVE_API_KEY=${BRAVE_API_KEY}
EXPOSE 3000
# Set startup command
CMD ["node", "./dist/server.js"]
CMD ["node", "./dist/server.js"]

View File

@ -224,10 +224,28 @@ flowchart TD
## Evaluation
I kept the evaluation simple, LLM-as-a-judge and collect some ego questions (i.e. questions about Jina AI that I know 100% the answer) for evaluation.
I kept the evaluation simple, LLM-as-a-judge and collect some [ego questions](./src/evals/ego-questions.json) for evaluation. These are the questions about Jina AI that I know 100% the answer but LLMs do not.
I mainly look at 3 things: total steps, total tokens, and the correctness of the final answer.
```bash
npm run eval ./src/evals/ego-questions.json
```
```
Here's the table comparing plain `gemini-2.0-flash` and `gemini-2.0-flash + node-deepresearch` on the ego set.
Plain `gemini-2.0-flash` can be run by setting `tokenBudget` to zero, skipping the while-loop and directly answering the question.
It should not be surprised that plain `gemini-2.0-flash` has a 0% pass rate, as I intentionally filtered out the questions that LLMs can answer.
| Metric | gemini-2.0-flash | gemini-2.0-flash + node-deepresearch #5e80ed4 |
|--------|------------------|-------------------------------------------------|
| Pass Rate | 0% | 60% |
| Average Steps | 1 | 5 |
| Maximum Steps | 1 | 13 |
| Minimum Steps | 1 | 2 |
| Median Steps | 1 | 3 |
| Average Tokens | 428 | 59,408 |
| Median Tokens | 434 | 16,001 |
| Maximum Tokens | 463 | 347,222 |
| Minimum Tokens | 374 | 5,594 |

59
config.json Normal file
View File

@ -0,0 +1,59 @@
{
"env": {
"https_proxy": "",
"OPENAI_BASE_URL": "",
"GEMINI_API_KEY": "",
"OPENAI_API_KEY": "",
"JINA_API_KEY": "",
"BRAVE_API_KEY": "",
"DEFAULT_MODEL_NAME": ""
},
"defaults": {
"search_provider": "jina",
"llm_provider": "gemini",
"step_sleep": 1000
},
"providers": {
"gemini": {
"createClient": "createGoogleGenerativeAI"
},
"openai": {
"createClient": "createOpenAI",
"clientConfig": {
"compatibility": "strict"
}
}
},
"models": {
"gemini": {
"default": {
"model": "gemini-2.0-flash",
"temperature": 0,
"maxTokens": 8000
},
"tools": {
"dedup": { "temperature": 0.1 },
"evaluator": {},
"errorAnalyzer": {},
"queryRewriter": { "temperature": 0.1 },
"agent": { "temperature": 0.7 },
"agentBeastMode": { "temperature": 0.7 }
}
},
"openai": {
"default": {
"model": "gpt-4o-mini",
"temperature": 0,
"maxTokens": 8000
},
"tools": {
"dedup": { "temperature": 0.1 },
"evaluator": {},
"errorAnalyzer": {},
"queryRewriter": { "temperature": 0.1 },
"agent": { "temperature": 0.7 },
"agentBeastMode": { "temperature": 0.7 }
}
}
}
}

View File

@ -18,7 +18,7 @@
"lint:fix": "eslint . --ext .ts --fix",
"serve": "ts-node src/server.ts",
"eval": "ts-node src/evals/batch-evals.ts",
"test": "jest",
"test": "jest --testTimeout=30000",
"test:watch": "jest --watch"
},
"keywords": [],

View File

@ -1,11 +1,15 @@
import { getResponse } from '../agent';
describe('getResponse', () => {
afterEach(() => {
jest.useRealTimers();
});
it('should handle search action', async () => {
const result = await getResponse('What is TypeScript?', 1000);
const result = await getResponse('What is TypeScript?', 10000);
expect(result.result.action).toBeDefined();
expect(result.context).toBeDefined();
expect(result.context.tokenTracker).toBeDefined();
expect(result.context.actionTracker).toBeDefined();
});
}, 30000);
});

View File

@ -1,40 +0,0 @@
import { exec } from 'child_process';
import { promisify } from 'util';
const execAsync = promisify(exec);
// Mock environment variables
process.env.GEMINI_API_KEY = 'test-key';
process.env.JINA_API_KEY = 'test-key';
jest.mock('../agent', () => ({
getResponse: jest.fn().mockResolvedValue({
result: {
action: 'answer',
answer: 'Test answer',
references: []
}
})
}));
describe('CLI', () => {
test('shows version', async () => {
const { stdout } = await execAsync('ts-node src/cli.ts --version');
expect(stdout.trim()).toMatch(/\d+\.\d+\.\d+/);
});
test('shows help', async () => {
const { stdout } = await execAsync('ts-node src/cli.ts --help');
expect(stdout).toContain('deepresearch');
expect(stdout).toContain('AI-powered research assistant');
});
test('handles invalid token budget', async () => {
try {
await execAsync('ts-node src/cli.ts -t invalid "test query"');
fail('Should have thrown');
} catch (error) {
expect((error as { stderr: string }).stderr).toContain('Invalid token budget: must be a number');
}
});
});

View File

@ -7,14 +7,14 @@ import fs from 'fs/promises';
import {SafeSearchType, search as duckSearch} from "duck-duck-scrape";
import {braveSearch} from "./tools/brave-search";
import {rewriteQuery} from "./tools/query-rewriter";
import {dedupQueries} from "./tools/dedup";
import {dedupQueries} from "./tools/jina-dedup";
import {evaluateAnswer} from "./tools/evaluator";
import {analyzeSteps} from "./tools/error-analyzer";
import {TokenTracker} from "./utils/token-tracker";
import {ActionTracker} from "./utils/action-tracker";
import {StepAction, AnswerAction} from "./types";
import {TrackerContext} from "./types";
import {jinaSearch} from "./tools/jinaSearch";
import {search} from "./tools/jina-search";
async function sleep(ms: number) {
const seconds = Math.ceil(ms / 1000);
@ -32,7 +32,7 @@ function getSchema(allowReflect: boolean, allowRead: boolean, allowAnswer: boole
if (allowSearch) {
actions.push("search");
properties.searchQuery = z.string().max(30)
.describe("Only required when choosing 'search' action, must be a short, keyword-based query that BM25, tf-idf based search engines can understand.").optional();
.describe("Only required when choosing 'search' action, must be a short, keyword-based query that BM25, tf-idf based search engines can understand. Existing queries must be avoided").optional();
}
if (allowAnswer) {
@ -75,6 +75,7 @@ function getPrompt(
question: string,
context?: string[],
allQuestions?: string[],
allKeywords?: string[],
allowReflect: boolean = true,
allowAnswer: boolean = true,
allowRead: boolean = true,
@ -190,11 +191,18 @@ ${urlList}
}
if (allowSearch) {
actionSections.push(`
<action-search>
- Query external sources using a public search engine
- Focus on solving one specific aspect of the question
- Only give keywords search query, not full sentences
${allKeywords?.length ? `
- Avoid the searched queries below as they do not give any useful information, you need to think out of the box and propose queries from a completely different angle:
<bad-queries>
${allKeywords.join('\n')}
</bad-queries>
`.trim() : ''}
- Propose some unique new queries that might help you find the answer to the question
- Focus on solving one specific aspect of the original question
- Only use keywords, not full sentences
</action-search>
`);
}
@ -249,7 +257,11 @@ Critical Requirements:
- Exclude all non-JSON text, markdown, or explanations
- Maintain strict JSON syntax`);
return sections.join('\n\n');
return removeExtraLineBreaks(sections.join('\n\n'));
}
const removeExtraLineBreaks = (text: string) => {
return text.replace(/\n{2,}/gm, '\n\n');
}
const allContext: StepAction[] = []; // all steps in the current session, including those leads to wrong results
@ -314,6 +326,7 @@ export async function getResponse(question: string, tokenBudget: number = 1_000_
currentQuestion,
diaryContext,
allQuestions,
allKeywords,
allowReflect,
allowAnswer,
allowRead,
@ -497,7 +510,7 @@ But then you realized you have asked them before. You decided to to think out of
switch (SEARCH_PROVIDER) {
case 'jina':
// use jinaSearch
results = {results: (await jinaSearch(query, context.tokenTracker)).response?.data || []};
results = {results: (await search(query, context.tokenTracker)).response?.data || []};
break;
case 'duck':
results = await duckSearch(query, {safeSearch: SafeSearchType.STRICT});
@ -640,6 +653,7 @@ You decided to think out of the box or cut from a completely different angle.`);
question,
diaryContext,
allQuestions,
allKeywords,
false,
false,
false,
@ -652,7 +666,7 @@ You decided to think out of the box or cut from a completely different angle.`);
const model = getModel('agentBeastMode');
let object;
let totalTokens = 0;
let totalTokens;
try {
const result = await generateObject({
model,
@ -667,10 +681,10 @@ You decided to think out of the box or cut from a completely different angle.`);
object = result.object;
totalTokens = result.totalTokens;
}
context.tokenTracker.trackUsage('agent', totalTokens);
await storeContext(prompt, [allContext, allKeywords, allQuestions, allKnowledge], totalStep);
thisStep = object as StepAction;
context.actionTracker.trackAction({totalStep, thisStep, gaps, badAttempts});
context.tokenTracker.trackUsage('agent', totalTokens);
console.log(thisStep)
return {result: thisStep, context};
}

View File

@ -1,50 +1,48 @@
import dotenv from 'dotenv';
import { ProxyAgent, setGlobalDispatcher } from 'undici';
import { createGoogleGenerativeAI } from '@ai-sdk/google';
import {createOpenAI, OpenAIProviderSettings} from '@ai-sdk/openai';
export type LLMProvider = 'openai' | 'gemini';
export type ToolName = keyof ToolConfigs;
function isValidProvider(provider: string): provider is LLMProvider {
return provider === 'openai' || provider === 'gemini';
}
function validateModelConfig(config: ModelConfig, toolName: string): ModelConfig {
if (typeof config.model !== 'string' || config.model.length === 0) {
throw new Error(`Invalid model name for ${toolName}`);
}
if (typeof config.temperature !== 'number' || config.temperature < 0 || config.temperature > 1) {
throw new Error(`Invalid temperature for ${toolName}`);
}
if (typeof config.maxTokens !== 'number' || config.maxTokens <= 0) {
throw new Error(`Invalid maxTokens for ${toolName}`);
}
return config;
}
export interface ModelConfig {
model: string;
temperature: number;
maxTokens: number;
}
export interface ToolConfigs {
dedup: ModelConfig;
evaluator: ModelConfig;
errorAnalyzer: ModelConfig;
queryRewriter: ModelConfig;
agent: ModelConfig;
agentBeastMode: ModelConfig;
}
import { createOpenAI, OpenAIProviderSettings } from '@ai-sdk/openai';
import configJson from '../config.json';
// Load environment variables
dotenv.config();
// Setup the proxy globally if present
if (process.env.https_proxy) {
// Types
export type LLMProvider = 'openai' | 'gemini';
export type ToolName = keyof typeof configJson.models.gemini.tools;
// Type definitions for our config structure
type EnvConfig = typeof configJson.env;
interface ProviderConfigBase {
createClient: string;
}
interface OpenAIProviderConfig extends ProviderConfigBase {
clientConfig: {
compatibility: "strict" | "compatible";
};
}
interface GeminiProviderConfig extends ProviderConfigBase {}
type ProviderConfig = {
openai: OpenAIProviderConfig;
gemini: GeminiProviderConfig;
};
// Environment setup
const env: EnvConfig = { ...configJson.env };
(Object.keys(env) as (keyof EnvConfig)[]).forEach(key => {
if (process.env[key]) {
env[key] = process.env[key] || env[key];
}
});
// Setup proxy if present
if (env.https_proxy) {
try {
const proxyUrl = new URL(process.env.https_proxy).toString();
const proxyUrl = new URL(env.https_proxy).toString();
const dispatcher = new ProxyAgent({ uri: proxyUrl });
setGlobalDispatcher(dispatcher);
} catch (error) {
@ -52,79 +50,73 @@ if (process.env.https_proxy) {
}
}
export const OPENAI_BASE_URL = process.env.OPENAI_BASE_URL;
export const GEMINI_API_KEY = process.env.GEMINI_API_KEY as string;
export const OPENAI_API_KEY = process.env.OPENAI_API_KEY as string;
export const JINA_API_KEY = process.env.JINA_API_KEY as string;
export const BRAVE_API_KEY = process.env.BRAVE_API_KEY as string;
export const SEARCH_PROVIDER: 'brave' | 'jina' | 'duck' = 'jina';
// Export environment variables
export const OPENAI_BASE_URL = env.OPENAI_BASE_URL;
export const GEMINI_API_KEY = env.GEMINI_API_KEY;
export const OPENAI_API_KEY = env.OPENAI_API_KEY;
export const JINA_API_KEY = env.JINA_API_KEY;
export const BRAVE_API_KEY = env.BRAVE_API_KEY;
export const SEARCH_PROVIDER = configJson.defaults.search_provider;
export const STEP_SLEEP = configJson.defaults.step_sleep;
// Determine LLM provider
export const LLM_PROVIDER: LLMProvider = (() => {
const provider = process.env.LLM_PROVIDER || 'gemini';
const provider = process.env.LLM_PROVIDER || configJson.defaults.llm_provider;
if (!isValidProvider(provider)) {
throw new Error(`Invalid LLM provider: ${provider}`);
}
return provider;
})();
const DEFAULT_GEMINI_MODEL = process.env.DEFAULT_MODEL_NAME || 'gemini-2.0-flash';
const DEFAULT_OPENAI_MODEL = process.env.DEFAULT_MODEL_NAME || 'gpt-4o-mini';
function isValidProvider(provider: string): provider is LLMProvider {
return provider === 'openai' || provider === 'gemini';
}
const defaultGeminiConfig: ModelConfig = {
model: DEFAULT_GEMINI_MODEL,
temperature: 0,
maxTokens: 8000
};
interface ToolConfig {
model: string;
temperature: number;
maxTokens: number;
}
const defaultOpenAIConfig: ModelConfig = {
model: DEFAULT_OPENAI_MODEL,
temperature: 0,
maxTokens: 8000
};
interface ToolOverrides {
temperature?: number;
maxTokens?: number;
}
export const modelConfigs: Record<LLMProvider, ToolConfigs> = {
gemini: {
dedup: validateModelConfig({ ...defaultGeminiConfig, temperature: 0.1 }, 'dedup'),
evaluator: validateModelConfig({ ...defaultGeminiConfig, temperature: 0 }, 'evaluator'),
errorAnalyzer: validateModelConfig({ ...defaultGeminiConfig, temperature: 0 }, 'errorAnalyzer'),
queryRewriter: validateModelConfig({ ...defaultGeminiConfig, temperature: 0.1 }, 'queryRewriter'),
agent: validateModelConfig({ ...defaultGeminiConfig, temperature: 0.7 }, 'agent'),
agentBeastMode: validateModelConfig({ ...defaultGeminiConfig, temperature: 0.7 }, 'agentBeastMode')
},
openai: {
dedup: validateModelConfig({ ...defaultOpenAIConfig, temperature: 0.1 }, 'dedup'),
evaluator: validateModelConfig({ ...defaultOpenAIConfig, temperature: 0 }, 'evaluator'),
errorAnalyzer: validateModelConfig({ ...defaultOpenAIConfig, temperature: 0 }, 'errorAnalyzer'),
queryRewriter: validateModelConfig({ ...defaultOpenAIConfig, temperature: 0.1 }, 'queryRewriter'),
agent: validateModelConfig({ ...defaultOpenAIConfig, temperature: 0.7 }, 'agent'),
agentBeastMode: validateModelConfig({ ...defaultOpenAIConfig, temperature: 0.7 }, 'agentBeastMode')
}
};
// Get tool configuration
export function getToolConfig(toolName: ToolName): ToolConfig {
const providerConfig = configJson.models[LLM_PROVIDER];
const defaultConfig = providerConfig.default;
const toolOverrides = providerConfig.tools[toolName] as ToolOverrides;
export function getToolConfig(toolName: ToolName): ModelConfig {
if (!modelConfigs[LLM_PROVIDER][toolName]) {
throw new Error(`Invalid tool name: ${toolName}`);
}
return modelConfigs[LLM_PROVIDER][toolName];
return {
model: process.env.DEFAULT_MODEL_NAME || defaultConfig.model,
temperature: toolOverrides.temperature ?? defaultConfig.temperature,
maxTokens: toolOverrides.maxTokens ?? defaultConfig.maxTokens
};
}
export function getMaxTokens(toolName: ToolName): number {
return getToolConfig(toolName).maxTokens;
}
// Get model instance
export function getModel(toolName: ToolName) {
const config = getToolConfig(toolName);
const providerConfig = configJson.providers[LLM_PROVIDER] as ProviderConfig[typeof LLM_PROVIDER];
if (LLM_PROVIDER === 'openai') {
if (!OPENAI_API_KEY) {
throw new Error('OPENAI_API_KEY not found');
}
const opt: OpenAIProviderSettings = {
apiKey: OPENAI_API_KEY,
compatibility: 'strict'
}
compatibility: (providerConfig as OpenAIProviderConfig).clientConfig.compatibility
};
if (OPENAI_BASE_URL) {
opt.baseURL = OPENAI_BASE_URL
opt.baseURL = OPENAI_BASE_URL;
}
return createOpenAI(opt)(config.model);
@ -133,19 +125,36 @@ export function getModel(toolName: ToolName) {
if (!GEMINI_API_KEY) {
throw new Error('GEMINI_API_KEY not found');
}
return createGoogleGenerativeAI({ apiKey: GEMINI_API_KEY })(config.model);
}
export const STEP_SLEEP = 1000;
// Validate required environment variables
if (LLM_PROVIDER === 'gemini' && !GEMINI_API_KEY) throw new Error("GEMINI_API_KEY not found");
if (LLM_PROVIDER === 'openai' && !OPENAI_API_KEY) throw new Error("OPENAI_API_KEY not found");
if (!JINA_API_KEY) throw new Error("JINA_API_KEY not found");
console.log('LLM Provider:', LLM_PROVIDER)
if (LLM_PROVIDER === 'openai') {
console.log('OPENAI_BASE_URL', OPENAI_BASE_URL)
console.log('Default Model', DEFAULT_OPENAI_MODEL)
} else {
console.log('Default Model', DEFAULT_GEMINI_MODEL)
}
// Log all configurations
const configSummary = {
provider: {
name: LLM_PROVIDER,
model: LLM_PROVIDER === 'openai'
? configJson.models.openai.default.model
: configJson.models.gemini.default.model,
...(LLM_PROVIDER === 'openai' && { baseUrl: OPENAI_BASE_URL })
},
search: {
provider: SEARCH_PROVIDER
},
tools: Object.fromEntries(
Object.keys(configJson.models[LLM_PROVIDER].tools).map(name => [
name,
getToolConfig(name as ToolName)
])
),
defaults: {
stepSleep: STEP_SLEEP
}
};
console.log('Configuration Summary:', JSON.stringify(configSummary, null, 2));

View File

@ -3,9 +3,10 @@ import {exec} from 'child_process';
import {promisify} from 'util';
import {getResponse} from '../agent';
import {generateObject} from 'ai';
import {getModel, getMaxTokens} from '../config';
import {GEMINI_API_KEY} from '../config';
import {z} from 'zod';
import {AnswerAction, TrackerContext} from "../types";
import {createGoogleGenerativeAI} from "@ai-sdk/google";
const execAsync = promisify(exec);
@ -24,6 +25,63 @@ interface EvaluationResult {
actual_answer: string;
}
interface EvaluationStats {
model_name: string;
pass_rate: number;
avg_steps: number;
max_steps: number;
min_steps: number;
median_steps: number;
avg_tokens: number;
median_tokens: number;
max_tokens: number;
min_tokens: number;
}
function calculateMedian(numbers: number[]): number {
const sorted = [...numbers].sort((a, b) => a - b);
const middle = Math.floor(sorted.length / 2);
if (sorted.length % 2 === 0) {
return (sorted[middle - 1] + sorted[middle]) / 2;
}
return sorted[middle];
}
function calculateStats(results: EvaluationResult[], modelName: string): EvaluationStats {
const steps = results.map(r => r.total_steps);
const tokens = results.map(r => r.total_tokens);
const passCount = results.filter(r => r.pass).length;
return {
model_name: modelName,
pass_rate: (passCount / results.length) * 100,
avg_steps: steps.reduce((a, b) => a + b, 0) / steps.length,
max_steps: Math.max(...steps),
min_steps: Math.min(...steps),
median_steps: calculateMedian(steps),
avg_tokens: tokens.reduce((a, b) => a + b, 0) / tokens.length,
median_tokens: calculateMedian(tokens),
max_tokens: Math.max(...tokens),
min_tokens: Math.min(...tokens)
};
}
function printStats(stats: EvaluationStats): void {
console.log('\n=== Evaluation Statistics ===');
console.log(`Model: ${stats.model_name}`);
console.log(`Pass Rate: ${stats.pass_rate.toFixed(0)}%`);
console.log(`Average Steps: ${stats.avg_steps.toFixed(0)}`);
console.log(`Maximum Steps: ${stats.max_steps}`);
console.log(`Minimum Steps: ${stats.min_steps}`);
console.log(`Median Steps: ${stats.median_steps.toFixed(0)}`);
console.log(`Average Tokens: ${stats.avg_tokens.toFixed(0)}`);
console.log(`Median Tokens: ${stats.median_tokens.toFixed(0)}`);
console.log(`Maximum Tokens: ${stats.max_tokens}`);
console.log(`Minimum Tokens: ${stats.min_tokens}`);
console.log('===========================\n');
}
async function getCurrentGitCommit(): Promise<string> {
try {
const {stdout} = await execAsync('git rev-parse --short HEAD');
@ -49,10 +107,10 @@ Minor wording differences are acceptable as long as the core information of the
try {
const result = await generateObject({
model: getModel('evaluator'),
model: createGoogleGenerativeAI({ apiKey: GEMINI_API_KEY })('gemini-2.0-flash'), // fix to gemini-2.0-flash for evaluation
schema,
prompt,
maxTokens: getMaxTokens('evaluator'),
maxTokens: 1000,
temperature: 0 // Setting temperature to 0 for deterministic output
});
@ -71,7 +129,9 @@ async function batchEvaluate(inputFile: string): Promise<void> {
const questions: Question[] = JSON.parse(await fs.readFile(inputFile, 'utf-8'));
const results: EvaluationResult[] = [];
const gitCommit = await getCurrentGitCommit();
const outputFile = `eval-${gitCommit}.json`;
const modelName = process.env.DEFAULT_MODEL_NAME || 'unknown';
const outputFile = `eval-${gitCommit}-${modelName}.json`;
// Process each question
for (let i = 0; i < questions.length; i++) {
const {question, answer: expectedAnswer} = questions[i];
@ -113,12 +173,19 @@ async function batchEvaluate(inputFile: string): Promise<void> {
actual_answer: 'Error occurred'
});
}
// Save results
await fs.writeFile(outputFile, JSON.stringify(results, null, 2));
console.log(`\nEvaluation results saved to ${outputFile}`);
}
// Calculate and print statistics
const stats = calculateStats(results, modelName);
printStats(stats);
// Save results
await fs.writeFile(outputFile, JSON.stringify({
results,
statistics: stats
}, null, 2));
console.log(`\nEvaluation results saved to ${outputFile}`);
}
// Run batch evaluation if this is the main module

View File

@ -1,7 +1,7 @@
[
{
"question": "what is jina ai ceo's twitter account",
"answer": "hxiao"
"question": "what did jina ai ceo say about deepseek that went viral and become a meme?",
"answer": "a side project"
},
{
"question": "when was jina ai founded?",
@ -12,7 +12,7 @@
"answer": "ReaderLM-2.0"
},
{
"question": "what is the lastest blog post that jina ai published?",
"question": "what is the latest blog post that jina ai published?",
"answer": "A Practical Guide to Deploying Search Foundation Models in Production"
},
{
@ -24,19 +24,59 @@
"answer": "30"
},
{
"question": "how much rate limit for r.jina.ai api without an api key?",
"answer": "20 RPM (requests per minute)"
"question": "when was jina reader released?",
"answer": "April 2024"
},
{
"question": "How many offices do Jina AI have and where are they?",
"answer": "four: sunnyvale, berlin, beijing, shenzhen"
},
{
"question": "Does jina reranker v2 support multilingual?",
"answer": "Yes"
"question": "what exactly jina-colbert-v2 improves over jina-colbert-v1?",
"answer": "v2 add multilingual support"
},
{
"question": "who are the authors of jina-clip-v2 paper?",
"answer": "Andreas Koukounas, Georgios Mastrapas, Bo Wang, Mohammad Kalim Akram, Sedigheh Eslami, Michael Günther, Isabelle Mohr, Saba Sturua, Scott Martens, Nan Wang, Han Xiao"
},
{
"question": "who created the node-deepresearch project?",
"answer": "Han Xiao / jina ai"
},
{
"question": "Which countries are the investors of Jina AI from?",
"answer": "USA and China only, no German investors"
},
{
"question": "what is the grounding api endpoint of jina ai?",
"answer": "g.jina.ai"
},
{
"question": "which of the following models do not support Matryoshka representation? jina-embeddings-v3, jina-embeddings-v2-base-en, jina-clip-v2, jina-clip-v1",
"answer": "jina-embeddings-v2-base-en and jina-clip-v1"
},
{
"question": "Can I purchase the 2024 yearbook that jina ai published today?",
"answer": "No it is sold out."
},
{
"question": "How many free tokens do you get from a new jina api key?",
"answer": "1 million."
},
{
"question": "Who is the legal signatory of Jina AI gmbh?",
"answer": "Jiao Liu"
},
{
"question": "what is the key idea behind node-deepresearch project?",
"answer": "It keeps searching, reading webpages, reasoning until an answer is found."
},
{
"question": "what is the name of the jina ai's mascot?",
"answer": "No, Jina AI does not have a mascot."
},
{
"question": "Does late chunking work with cls pooling?",
"answer": "No. late chunking only works with mean pooling."
}
]

View File

@ -1,12 +0,0 @@
import { braveSearch } from '../brave-search';
describe('braveSearch', () => {
it('should return search results', async () => {
const { response } = await braveSearch('test query');
expect(response.web.results).toBeDefined();
expect(response.web.results.length).toBeGreaterThan(0);
expect(response.web.results[0]).toHaveProperty('title');
expect(response.web.results[0]).toHaveProperty('url');
expect(response.web.results[0]).toHaveProperty('description');
});
});

View File

@ -1,37 +0,0 @@
import { dedupQueries } from '../dedup';
import { LLMProvider } from '../../config';
describe('dedupQueries', () => {
const providers: Array<LLMProvider> = ['openai', 'gemini'];
const originalEnv = process.env;
beforeEach(() => {
jest.resetModules();
process.env = { ...originalEnv };
});
afterEach(() => {
process.env = originalEnv;
});
providers.forEach(provider => {
describe(`with ${provider} provider`, () => {
beforeEach(() => {
process.env.LLM_PROVIDER = provider;
});
it('should remove duplicate queries', async () => {
jest.setTimeout(10000);
const queries = ['typescript tutorial', 'typescript tutorial', 'javascript basics'];
const { unique_queries } = await dedupQueries(queries, []);
expect(unique_queries).toHaveLength(2);
expect(unique_queries).toContain('javascript basics');
});
it('should handle empty input', async () => {
const { unique_queries } = await dedupQueries([], []);
expect(unique_queries).toHaveLength(0);
});
});
});
});

View File

@ -25,7 +25,7 @@ describe('analyzeSteps', () => {
expect(response).toHaveProperty('recap');
expect(response).toHaveProperty('blame');
expect(response).toHaveProperty('improvement');
});
}, 30000);
});
});
});

View File

@ -32,25 +32,6 @@ describe('evaluateAnswer', () => {
expect(response).toHaveProperty('pass');
expect(response).toHaveProperty('think');
expect(response.type).toBe('definitive');
expect(response.pass).toBe(true);
});
it('should evaluate answer freshness', async () => {
const tokenTracker = new TokenTracker();
const { response } = await evaluateAnswer(
'What is the latest version of Node.js?',
'The latest version of Node.js is 14.0.0, released in April 2020.',
['freshness'],
tokenTracker
);
expect(response).toHaveProperty('pass');
expect(response).toHaveProperty('think');
expect(response.type).toBe('freshness');
expect(response.freshness_analysis).toBeDefined();
expect(response.freshness_analysis?.likely_outdated).toBe(true);
expect(response.freshness_analysis?.dates_mentioned).toContain('2020-04');
expect(response.freshness_analysis?.current_time).toBeDefined();
expect(response.pass).toBe(false);
});
it('should evaluate answer plurality', async () => {
@ -64,38 +45,7 @@ describe('evaluateAnswer', () => {
expect(response).toHaveProperty('pass');
expect(response).toHaveProperty('think');
expect(response.type).toBe('plurality');
expect(response.plurality_analysis).toBeDefined();
expect(response.plurality_analysis?.expects_multiple).toBe(true);
expect(response.plurality_analysis?.provides_multiple).toBe(false);
expect(response.plurality_analysis?.count_expected).toBe(3);
expect(response.plurality_analysis?.count_provided).toBe(1);
expect(response.pass).toBe(false);
});
it('should evaluate in order and stop at first failure', async () => {
const tokenTracker = new TokenTracker();
const { response } = await evaluateAnswer(
'List the latest Node.js versions.',
'I am not sure about the Node.js versions.',
['definitive', 'freshness', 'plurality'],
tokenTracker
);
expect(response.type).toBe('definitive');
expect(response.pass).toBe(false);
expect(response.freshness_analysis).toBeUndefined();
expect(response.plurality_analysis).toBeUndefined();
});
it('should track token usage', async () => {
const tokenTracker = new TokenTracker();
const spy = jest.spyOn(tokenTracker, 'trackUsage');
await evaluateAnswer(
'What is TypeScript?',
'TypeScript is a strongly typed programming language that builds on JavaScript.',
['definitive', 'freshness', 'plurality'],
tokenTracker
);
expect(spy).toHaveBeenCalledWith('evaluator', expect.any(Number));
});
});
});

View File

@ -1,34 +0,0 @@
import { rewriteQuery } from '../query-rewriter';
import { LLMProvider } from '../../config';
describe('rewriteQuery', () => {
const providers: Array<LLMProvider> = ['openai', 'gemini'];
const originalEnv = process.env;
beforeEach(() => {
jest.resetModules();
process.env = { ...originalEnv };
});
afterEach(() => {
process.env = originalEnv;
});
providers.forEach(provider => {
describe(`with ${provider} provider`, () => {
beforeEach(() => {
process.env.LLM_PROVIDER = provider;
});
it('should rewrite search query', async () => {
const { queries } = await rewriteQuery({
action: 'search',
searchQuery: 'how does typescript work',
think: 'Understanding TypeScript basics'
});
expect(Array.isArray(queries)).toBe(true);
expect(queries.length).toBeGreaterThan(0);
});
});
});
});

View File

@ -1,10 +1,10 @@
import { jinaSearch } from '../jinaSearch';
import { search } from '../jina-search';
import { TokenTracker } from '../../utils/token-tracker';
describe('search', () => {
it.skip('should perform search with Jina API (skipped due to insufficient balance)', async () => {
const tokenTracker = new TokenTracker();
const { response } = await jinaSearch('TypeScript programming', tokenTracker);
const { response } = await search('TypeScript programming', tokenTracker);
expect(response).toBeDefined();
expect(response.data).toBeDefined();
if (response.data === null) {
@ -15,7 +15,7 @@ describe('search', () => {
}, 15000);
it('should handle empty query', async () => {
await expect(jinaSearch('')).rejects.toThrow();
await expect(search('')).rejects.toThrow();
}, 15000);
beforeEach(() => {

View File

@ -285,14 +285,13 @@ export async function evaluateAnswer(
break;
}
} catch (error) {
console.error(`Error in ${evaluationType} evaluation:`, error);
const errorResult = await handleGenerateObjectError<EvaluationResponse>(error);
(tracker || new TokenTracker()).trackUsage('evaluator', errorResult.totalTokens || 0);
if (!errorResult.object.pass) {
return { response: errorResult.object };
}
// Always return from catch block to prevent undefined result
return { response: errorResult.object };
}
}
// Only reach this point if all evaluations pass
return { response: result!.object };
}

143
src/tools/jina-dedup.ts Normal file
View File

@ -0,0 +1,143 @@
import axios from 'axios';
import { TokenTracker } from "../utils/token-tracker";
import {JINA_API_KEY} from "../config";
const JINA_API_URL = 'https://api.jina.ai/v1/embeddings';
const SIMILARITY_THRESHOLD = 0.93; // Adjustable threshold for cosine similarity
// Types for Jina API
interface JinaEmbeddingRequest {
model: string;
input: string[];
}
interface JinaEmbeddingResponse {
model: string;
object: string;
usage: {
total_tokens: number;
prompt_tokens: number;
};
data: Array<{
object: string;
index: number;
embedding: number[];
}>;
}
// Compute cosine similarity between two vectors
function cosineSimilarity(vecA: number[], vecB: number[]): number {
const dotProduct = vecA.reduce((sum, a, i) => sum + a * vecB[i], 0);
const normA = Math.sqrt(vecA.reduce((sum, a) => sum + a * a, 0));
const normB = Math.sqrt(vecB.reduce((sum, b) => sum + b * b, 0));
return dotProduct / (normA * normB);
}
// Get embeddings for all queries in one batch
async function getEmbeddings(queries: string[]): Promise<{ embeddings: number[][], tokens: number }> {
if (!JINA_API_KEY) {
throw new Error('JINA_API_KEY is not set');
}
const request: JinaEmbeddingRequest = {
model: 'jina-embeddings-v3',
input: queries
};
try {
const response = await axios.post<JinaEmbeddingResponse>(
JINA_API_URL,
request,
{
headers: {
'Content-Type': 'application/json',
'Authorization': `Bearer ${JINA_API_KEY}`
}
}
);
// Sort embeddings by index to maintain original order
const embeddings = response.data.data
.sort((a, b) => a.index - b.index)
.map(item => item.embedding);
return {
embeddings,
tokens: response.data.usage.total_tokens
};
} catch (error) {
console.error('Error getting embeddings from Jina:', error);
throw error;
}
}
export async function dedupQueries(
newQueries: string[],
existingQueries: string[],
tracker?: TokenTracker
): Promise<{ unique_queries: string[], tokens: number }> {
try {
// Quick return for single new query with no existing queries
if (newQueries.length === 1 && existingQueries.length === 0) {
console.log('Dedup (quick return):', newQueries);
return {
unique_queries: newQueries,
tokens: 0 // No tokens used since we didn't call the API
};
}
// Get embeddings for all queries in one batch
const allQueries = [...newQueries, ...existingQueries];
const { embeddings: allEmbeddings, tokens } = await getEmbeddings(allQueries);
// Split embeddings back into new and existing
const newEmbeddings = allEmbeddings.slice(0, newQueries.length);
const existingEmbeddings = allEmbeddings.slice(newQueries.length);
const uniqueQueries: string[] = [];
const usedIndices = new Set<number>();
// Compare each new query against existing queries and already accepted queries
for (let i = 0; i < newQueries.length; i++) {
let isUnique = true;
// Check against existing queries
for (let j = 0; j < existingQueries.length; j++) {
const similarity = cosineSimilarity(newEmbeddings[i], existingEmbeddings[j]);
if (similarity >= SIMILARITY_THRESHOLD) {
isUnique = false;
break;
}
}
// Check against already accepted queries
if (isUnique) {
for (const usedIndex of usedIndices) {
const similarity = cosineSimilarity(newEmbeddings[i], newEmbeddings[usedIndex]);
if (similarity >= SIMILARITY_THRESHOLD) {
isUnique = false;
break;
}
}
}
// Add to unique queries if passed all checks
if (isUnique) {
uniqueQueries.push(newQueries[i]);
usedIndices.add(i);
}
}
// Track token usage from the API
(tracker || new TokenTracker()).trackUsage('dedup', tokens);
console.log('Dedup:', uniqueQueries);
return {
unique_queries: uniqueQueries,
tokens
};
} catch (error) {
console.error('Error in deduplication analysis:', error);
throw error;
}
}

View File

@ -3,7 +3,7 @@ import { TokenTracker } from "../utils/token-tracker";
import { SearchResponse } from '../types';
import { JINA_API_KEY } from "../config";
export function jinaSearch(query: string, tracker?: TokenTracker): Promise<{ response: SearchResponse, tokens: number }> {
export function search(query: string, tracker?: TokenTracker): Promise<{ response: SearchResponse, tokens: number }> {
return new Promise((resolve, reject) => {
if (!query.trim()) {
reject(new Error('Query cannot be empty'));

View File

@ -18,7 +18,7 @@ const responseSchema = z.object({
function getPrompt(action: SearchAction): string {
return `You are an expert Information Retrieval Assistant. Transform user queries into precise keyword combinations with strategic reasoning and appropriate search operators.
return `You are an expert Information Retrieval query optimizer. Optimize user queries into precise keyword combinations with strategic reasoning and appropriate search operators.
<rules>
1. Generate search queries that directly include appropriate operators
@ -61,7 +61,7 @@ Input Query: How to fix a leaking kitchen faucet?
<think>
This is a how-to query seeking practical solutions. User likely wants step-by-step guidance and visual demonstrations for DIY repair. We'll target both video tutorials and written guides.
</think>
Queries: [
Output Queries: [
"kitchen faucet leak repair",
"faucet drip fix site:youtube.com",
"how to repair faucet "
@ -71,7 +71,7 @@ Input Query: What are healthy breakfast options for type 2 diabetes?
<think>
This is a health-specific informational query. User needs authoritative medical advice combined with practical meal suggestions. Splitting into medical guidelines and recipes will provide comprehensive coverage.
</think>
Queries: [
Output Queries: [
"what to eat for type 2 diabetes",
"type 2 diabetes breakfast guidelines",
"diabetic breakfast recipes"
@ -81,7 +81,7 @@ Input Query: Latest AWS Lambda features for serverless applications
<think>
This is a product research query focused on recent updates. User wants current information about specific technology features, likely for implementation purposes. We'll target official docs and community insights.
</think>
Queries: [
Output Queries: [
"aws lambda features site:aws.amazon.com intitle:2025",
"new features lambda serverless"
]