mirror of
https://github.com/jina-ai/node-DeepResearch.git
synced 2026-03-22 07:29:35 +08:00
chore: update readme
This commit is contained in:
@@ -1,7 +1,7 @@
|
|||||||
import {z, ZodObject} from 'zod';
|
import {z, ZodObject} from 'zod';
|
||||||
import {generateObject} from 'ai';
|
import {generateObject} from 'ai';
|
||||||
import {getModel, getMaxTokens, SEARCH_PROVIDER, STEP_SLEEP} from "./config";
|
import {getModel, getMaxTokens, SEARCH_PROVIDER, STEP_SLEEP} from "./config";
|
||||||
import {readUrl} from "./tools/read";
|
import {readUrl, removeAllLineBreaks} from "./tools/read";
|
||||||
import {handleGenerateObjectError} from './utils/error-handling';
|
import {handleGenerateObjectError} from './utils/error-handling';
|
||||||
import fs from 'fs/promises';
|
import fs from 'fs/promises';
|
||||||
import {SafeSearchType, search as duckSearch} from "duck-duck-scrape";
|
import {SafeSearchType, search as duckSearch} from "duck-duck-scrape";
|
||||||
@@ -273,9 +273,7 @@ function updateContext(step: any) {
|
|||||||
allContext.push(step)
|
allContext.push(step)
|
||||||
}
|
}
|
||||||
|
|
||||||
function removeAllLineBreaks(text: string) {
|
|
||||||
return text.replace(/(\r\n|\n|\r)/gm, " ");
|
|
||||||
}
|
|
||||||
|
|
||||||
function removeHTMLtags(text: string) {
|
function removeHTMLtags(text: string) {
|
||||||
return text.replace(/<[^>]*>?/gm, '');
|
return text.replace(/<[^>]*>?/gm, '');
|
||||||
@@ -390,7 +388,7 @@ export async function getResponse(question: string, tokenBudget: number = 1_000_
|
|||||||
...thisStep,
|
...thisStep,
|
||||||
});
|
});
|
||||||
|
|
||||||
const {response: evaluation} = await evaluateAnswer(currentQuestion, thisStep.answer,
|
const {response: evaluation} = await evaluateAnswer(currentQuestion, thisStep,
|
||||||
evaluationMetrics[currentQuestion], context.tokenTracker);
|
evaluationMetrics[currentQuestion], context.tokenTracker);
|
||||||
|
|
||||||
if (currentQuestion === question) {
|
if (currentQuestion === question) {
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ export async function braveSearch(query: string): Promise<{ response: BraveSearc
|
|||||||
const response = await axios.get<BraveSearchResponse>('https://api.search.brave.com/res/v1/web/search', {
|
const response = await axios.get<BraveSearchResponse>('https://api.search.brave.com/res/v1/web/search', {
|
||||||
params: {
|
params: {
|
||||||
q: query,
|
q: query,
|
||||||
count: 5,
|
count: 10,
|
||||||
safesearch: 'off'
|
safesearch: 'off'
|
||||||
},
|
},
|
||||||
headers: {
|
headers: {
|
||||||
|
|||||||
@@ -1,13 +1,14 @@
|
|||||||
import { z } from 'zod';
|
import {z} from 'zod';
|
||||||
import { generateObject } from 'ai';
|
import {generateObject} from 'ai';
|
||||||
import { getModel, getMaxTokens } from "../config";
|
import {getModel, getMaxTokens} from "../config";
|
||||||
import { TokenTracker } from "../utils/token-tracker";
|
import {TokenTracker} from "../utils/token-tracker";
|
||||||
import { EvaluationResponse } from '../types';
|
import {AnswerAction, EvaluationResponse} from '../types';
|
||||||
import { handleGenerateObjectError } from '../utils/error-handling';
|
import {handleGenerateObjectError} from '../utils/error-handling';
|
||||||
|
import {readUrl, removeAllLineBreaks} from "./read";
|
||||||
|
|
||||||
const model = getModel('evaluator');
|
const model = getModel('evaluator');
|
||||||
|
|
||||||
type EvaluationType = 'definitive' | 'freshness' | 'plurality';
|
type EvaluationType = 'definitive' | 'freshness' | 'plurality' | 'attribution';
|
||||||
|
|
||||||
const baseSchema = {
|
const baseSchema = {
|
||||||
pass: z.boolean().describe('Whether the answer passes the evaluation criteria defined by the evaluator'),
|
pass: z.boolean().describe('Whether the answer passes the evaluation criteria defined by the evaluator'),
|
||||||
@@ -41,6 +42,73 @@ const pluralitySchema = z.object({
|
|||||||
})
|
})
|
||||||
});
|
});
|
||||||
|
|
||||||
|
const attributionSchema = z.object({
|
||||||
|
...baseSchema,
|
||||||
|
type: z.literal('attribution'),
|
||||||
|
attribution_analysis: z.object({
|
||||||
|
sources_provided: z.boolean().describe('Whether the answer provides source references'),
|
||||||
|
sources_verified: z.boolean().describe('Whether the provided sources contain the claimed information'),
|
||||||
|
quotes_accurate: z.boolean().describe('Whether the quotes accurately represent the source content')
|
||||||
|
})
|
||||||
|
});
|
||||||
|
|
||||||
|
function getAttributionPrompt(question: string, answer: string, sourceContent: string): string {
|
||||||
|
return `You are an evaluator that verifies if answer content is properly attributed to and supported by the provided sources.
|
||||||
|
|
||||||
|
<rules>
|
||||||
|
1. Source Verification:
|
||||||
|
- Check if answer claims are supported by the provided source content
|
||||||
|
- Verify that quotes are accurate and in proper context
|
||||||
|
- Ensure numerical data and statistics match the source
|
||||||
|
- Flag any claims that go beyond what the sources support
|
||||||
|
|
||||||
|
2. Attribution Analysis:
|
||||||
|
- Check if answer properly references its sources
|
||||||
|
- Verify that important claims have clear source attribution
|
||||||
|
- Ensure quotes are properly marked and cited
|
||||||
|
- Check for any unsupported generalizations
|
||||||
|
|
||||||
|
3. Accuracy Requirements:
|
||||||
|
- Direct quotes must match source exactly
|
||||||
|
- Paraphrasing must maintain original meaning
|
||||||
|
- Statistics and numbers must be precise
|
||||||
|
- Context must be preserved
|
||||||
|
</rules>
|
||||||
|
|
||||||
|
<examples>
|
||||||
|
Question: "What are Jina AI's main products?"
|
||||||
|
Answer: "According to Jina AI's website, their main products are DocArray and Jina Framework."
|
||||||
|
Source Content: "Jina AI's flagship products include DocArray, Jina Framework, and JCloud, offering a complete ecosystem for neural search applications."
|
||||||
|
Evaluation: {
|
||||||
|
"pass": false,
|
||||||
|
"think": "The answer omits JCloud which is mentioned as a main product in the source. The information provided is incomplete and potentially misleading as it fails to mention a significant product from the company's ecosystem.",
|
||||||
|
"attribution_analysis": {
|
||||||
|
"sources_provided": true,
|
||||||
|
"sources_verified": false,
|
||||||
|
"quotes_accurate": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Question: "When was Python first released?"
|
||||||
|
Answer: "Python was first released in 1991 by Guido van Rossum."
|
||||||
|
Source Content: "Python was first released in 1991 by Guido van Rossum while working at CWI."
|
||||||
|
Evaluation: {
|
||||||
|
"pass": true,
|
||||||
|
"think": "The answer accurately reflects the core information from the source about Python's release date and creator, though it omits the additional context about CWI which isn't essential to the question.",
|
||||||
|
"attribution_analysis": {
|
||||||
|
"sources_provided": true,
|
||||||
|
"sources_verified": true,
|
||||||
|
"quotes_accurate": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
</examples>
|
||||||
|
|
||||||
|
Now evaluate this pair:
|
||||||
|
Question: ${JSON.stringify(question)}
|
||||||
|
Answer: ${JSON.stringify(answer)}
|
||||||
|
Source Content: ${JSON.stringify(sourceContent)}`;
|
||||||
|
}
|
||||||
|
|
||||||
function getDefinitivePrompt(question: string, answer: string): string {
|
function getDefinitivePrompt(question: string, answer: string): string {
|
||||||
return `You are an evaluator of answer definitiveness. Analyze if the given answer provides a definitive response or not.
|
return `You are an evaluator of answer definitiveness. Analyze if the given answer provides a definitive response or not.
|
||||||
|
|
||||||
@@ -332,67 +400,161 @@ export async function evaluateQuestion(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Helper function to handle common evaluation logic
|
||||||
|
async function performEvaluation(
|
||||||
|
evaluationType: EvaluationType,
|
||||||
|
params: {
|
||||||
|
model: any;
|
||||||
|
schema: z.ZodType<any>;
|
||||||
|
prompt: string;
|
||||||
|
maxTokens: number;
|
||||||
|
},
|
||||||
|
tracker?: TokenTracker
|
||||||
|
): Promise<GenerateObjectResult> {
|
||||||
|
const result = await generateObject({
|
||||||
|
model: params.model,
|
||||||
|
schema: params.schema,
|
||||||
|
prompt: params.prompt,
|
||||||
|
maxTokens: params.maxTokens
|
||||||
|
});
|
||||||
|
|
||||||
|
(tracker || new TokenTracker()).trackUsage('evaluator', result.usage?.totalTokens || 0);
|
||||||
|
console.log(`${evaluationType} Evaluation:`, result.object);
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface GenerateObjectResult {
|
||||||
|
object: EvaluationResponse;
|
||||||
|
usage?: {
|
||||||
|
totalTokens: number;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Main evaluation function
|
||||||
export async function evaluateAnswer(
|
export async function evaluateAnswer(
|
||||||
question: string,
|
question: string,
|
||||||
answer: string,
|
action: AnswerAction,
|
||||||
evaluationOrder: EvaluationType[] = ['definitive', 'freshness', 'plurality'],
|
evaluationOrder: EvaluationType[] = ['definitive', 'freshness', 'plurality'],
|
||||||
tracker?: TokenTracker
|
tracker?: TokenTracker
|
||||||
): Promise<{ response: EvaluationResponse }> {
|
): Promise<{ response: EvaluationResponse }> {
|
||||||
let result;
|
let result: GenerateObjectResult;
|
||||||
|
|
||||||
|
// Only add attribution if we have valid references
|
||||||
|
if (action.references && action.references.length > 0) {
|
||||||
|
evaluationOrder = ['attribution', ...evaluationOrder];
|
||||||
|
}
|
||||||
|
|
||||||
for (const evaluationType of evaluationOrder) {
|
for (const evaluationType of evaluationOrder) {
|
||||||
try {
|
try {
|
||||||
switch (evaluationType) {
|
switch (evaluationType) {
|
||||||
case 'definitive':
|
case 'attribution': {
|
||||||
result = await generateObject({
|
// Safely handle references and ensure we have content
|
||||||
model,
|
const urls = action.references?.map(ref => ref.url) ?? [];
|
||||||
schema: definitiveSchema,
|
const uniqueURLs = [...new Set(urls)];
|
||||||
prompt: getDefinitivePrompt(question, answer),
|
const allKnowledge = await fetchSourceContent(uniqueURLs, tracker);
|
||||||
maxTokens: getMaxTokens('evaluator')
|
|
||||||
});
|
if (!allKnowledge.trim()) {
|
||||||
(tracker || new TokenTracker()).trackUsage('evaluator', result.usage?.totalTokens || 0);
|
return {
|
||||||
console.log('Evaluation:', result.object);
|
response: {
|
||||||
if (!result.object.pass) {
|
pass: false,
|
||||||
return { response: result.object };
|
think: "The answer does not provide any valid attribution references that could be verified. No accessible source content was found to validate the claims made in the answer.",
|
||||||
|
type: 'attribution',
|
||||||
|
}
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
result = await performEvaluation(
|
||||||
|
'attribution',
|
||||||
|
{
|
||||||
|
model,
|
||||||
|
schema: attributionSchema,
|
||||||
|
prompt: getAttributionPrompt(question, action.answer, allKnowledge),
|
||||||
|
maxTokens: getMaxTokens('evaluator')
|
||||||
|
},
|
||||||
|
tracker
|
||||||
|
);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case 'definitive':
|
||||||
|
result = await performEvaluation(
|
||||||
|
'definitive',
|
||||||
|
{
|
||||||
|
model,
|
||||||
|
schema: definitiveSchema,
|
||||||
|
prompt: getDefinitivePrompt(question, action.answer),
|
||||||
|
maxTokens: getMaxTokens('evaluator')
|
||||||
|
},
|
||||||
|
tracker
|
||||||
|
);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 'freshness':
|
case 'freshness':
|
||||||
result = await generateObject({
|
result = await performEvaluation(
|
||||||
model,
|
'freshness',
|
||||||
schema: freshnessSchema,
|
{
|
||||||
prompt: getFreshnessPrompt(question, answer, new Date().toISOString()),
|
model,
|
||||||
maxTokens: getMaxTokens('evaluator')
|
schema: freshnessSchema,
|
||||||
});
|
prompt: getFreshnessPrompt(question, action.answer, new Date().toISOString()),
|
||||||
(tracker || new TokenTracker()).trackUsage('evaluator', result.usage?.totalTokens || 0);
|
maxTokens: getMaxTokens('evaluator')
|
||||||
console.log('Evaluation:', result.object);
|
},
|
||||||
if (!result.object.pass) {
|
tracker
|
||||||
return { response: result.object };
|
);
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 'plurality':
|
case 'plurality':
|
||||||
result = await generateObject({
|
result = await performEvaluation(
|
||||||
model,
|
'plurality',
|
||||||
schema: pluralitySchema,
|
{
|
||||||
prompt: getPluralityPrompt(question, answer),
|
model,
|
||||||
maxTokens: getMaxTokens('evaluator')
|
schema: pluralitySchema,
|
||||||
});
|
prompt: getPluralityPrompt(question, action.answer),
|
||||||
(tracker || new TokenTracker()).trackUsage('evaluator', result.usage?.totalTokens || 0);
|
maxTokens: getMaxTokens('evaluator')
|
||||||
console.log('Evaluation:', result.object);
|
},
|
||||||
if (!result.object.pass) {
|
tracker
|
||||||
return { response: result.object };
|
);
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!result?.object.pass) {
|
||||||
|
return {response: result.object};
|
||||||
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
const errorResult = await handleGenerateObjectError<EvaluationResponse>(error);
|
const errorResult = await handleGenerateObjectError<EvaluationResponse>(error);
|
||||||
(tracker || new TokenTracker()).trackUsage('evaluator', errorResult.totalTokens || 0);
|
(tracker || new TokenTracker()).trackUsage('evaluator', errorResult.totalTokens || 0);
|
||||||
// Always return from catch block to prevent undefined result
|
return {response: errorResult.object};
|
||||||
return { response: errorResult.object };
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Only reach this point if all evaluations pass
|
return {response: result!.object};
|
||||||
return { response: result!.object };
|
}
|
||||||
|
|
||||||
|
// Helper function to fetch and combine source content
|
||||||
|
async function fetchSourceContent(urls: string[], tracker?: TokenTracker): Promise<string> {
|
||||||
|
if (!urls.length) return '';
|
||||||
|
|
||||||
|
try {
|
||||||
|
const results = await Promise.all(
|
||||||
|
urls.map(async (url) => {
|
||||||
|
try {
|
||||||
|
const {response} = await readUrl(url, tracker);
|
||||||
|
const content = response?.data?.content || '';
|
||||||
|
return removeAllLineBreaks(content);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error reading URL:', error);
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
})
|
||||||
|
);
|
||||||
|
|
||||||
|
// Filter out empty results and join with proper separation
|
||||||
|
return results
|
||||||
|
.filter(content => content.trim())
|
||||||
|
.join('\n\n');
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error fetching source content:', error);
|
||||||
|
return '';
|
||||||
|
}
|
||||||
}
|
}
|
||||||
@@ -91,4 +91,8 @@ export function readUrl(url: string, tracker?: TokenTracker): Promise<{ response
|
|||||||
req.write(data);
|
req.write(data);
|
||||||
req.end();
|
req.end();
|
||||||
});
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
export function removeAllLineBreaks(text: string) {
|
||||||
|
return text.replace(/(\r\n|\n|\r)/gm, " ");
|
||||||
}
|
}
|
||||||
@@ -100,7 +100,7 @@ export interface ReadResponse {
|
|||||||
export type EvaluationResponse = {
|
export type EvaluationResponse = {
|
||||||
pass: boolean;
|
pass: boolean;
|
||||||
think: string;
|
think: string;
|
||||||
type?: 'definitive' | 'freshness' | 'plurality';
|
type?: 'definitive' | 'freshness' | 'plurality' | 'attribution';
|
||||||
freshness_analysis?: {
|
freshness_analysis?: {
|
||||||
likely_outdated: boolean;
|
likely_outdated: boolean;
|
||||||
dates_mentioned: string[];
|
dates_mentioned: string[];
|
||||||
|
|||||||
Reference in New Issue
Block a user