mirror of
https://github.com/jina-ai/node-DeepResearch.git
synced 2026-03-22 07:29:35 +08:00
fix: update EvaluationResponse type and add comprehensive tests
Co-Authored-By: Han Xiao <han.xiao@jina.ai>
This commit is contained in:
@@ -31,6 +31,59 @@ describe('evaluateAnswer', () => {
|
|||||||
);
|
);
|
||||||
expect(response).toHaveProperty('pass');
|
expect(response).toHaveProperty('pass');
|
||||||
expect(response).toHaveProperty('think');
|
expect(response).toHaveProperty('think');
|
||||||
|
expect(response.type).toBe('definitive');
|
||||||
|
expect(response.pass).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should evaluate answer freshness', async () => {
|
||||||
|
const tokenTracker = new TokenTracker();
|
||||||
|
const { response } = await evaluateAnswer(
|
||||||
|
'What is the latest version of Node.js?',
|
||||||
|
'The latest version of Node.js is 14.0.0, released in April 2020.',
|
||||||
|
['freshness'],
|
||||||
|
tokenTracker
|
||||||
|
);
|
||||||
|
expect(response).toHaveProperty('pass');
|
||||||
|
expect(response).toHaveProperty('think');
|
||||||
|
expect(response.type).toBe('freshness');
|
||||||
|
expect(response.freshness_analysis).toBeDefined();
|
||||||
|
expect(response.freshness_analysis?.likely_outdated).toBe(true);
|
||||||
|
expect(response.freshness_analysis?.dates_mentioned).toContain('2020-04');
|
||||||
|
expect(response.freshness_analysis?.current_time).toBeDefined();
|
||||||
|
expect(response.pass).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should evaluate answer plurality', async () => {
|
||||||
|
const tokenTracker = new TokenTracker();
|
||||||
|
const { response } = await evaluateAnswer(
|
||||||
|
'List three programming languages.',
|
||||||
|
'Python is a programming language.',
|
||||||
|
['plurality'],
|
||||||
|
tokenTracker
|
||||||
|
);
|
||||||
|
expect(response).toHaveProperty('pass');
|
||||||
|
expect(response).toHaveProperty('think');
|
||||||
|
expect(response.type).toBe('plurality');
|
||||||
|
expect(response.plurality_analysis).toBeDefined();
|
||||||
|
expect(response.plurality_analysis?.expects_multiple).toBe(true);
|
||||||
|
expect(response.plurality_analysis?.provides_multiple).toBe(false);
|
||||||
|
expect(response.plurality_analysis?.count_expected).toBe(3);
|
||||||
|
expect(response.plurality_analysis?.count_provided).toBe(1);
|
||||||
|
expect(response.pass).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should evaluate in order and stop at first failure', async () => {
|
||||||
|
const tokenTracker = new TokenTracker();
|
||||||
|
const { response } = await evaluateAnswer(
|
||||||
|
'List the latest Node.js versions.',
|
||||||
|
'I am not sure about the Node.js versions.',
|
||||||
|
['definitive', 'freshness', 'plurality'],
|
||||||
|
tokenTracker
|
||||||
|
);
|
||||||
|
expect(response.type).toBe('definitive');
|
||||||
|
expect(response.pass).toBe(false);
|
||||||
|
expect(response.freshness_analysis).toBeUndefined();
|
||||||
|
expect(response.plurality_analysis).toBeUndefined();
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should track token usage', async () => {
|
it('should track token usage', async () => {
|
||||||
@@ -39,7 +92,7 @@ describe('evaluateAnswer', () => {
|
|||||||
await evaluateAnswer(
|
await evaluateAnswer(
|
||||||
'What is TypeScript?',
|
'What is TypeScript?',
|
||||||
'TypeScript is a strongly typed programming language that builds on JavaScript.',
|
'TypeScript is a strongly typed programming language that builds on JavaScript.',
|
||||||
['definitive'],
|
['definitive', 'freshness', 'plurality'],
|
||||||
tokenTracker
|
tokenTracker
|
||||||
);
|
);
|
||||||
expect(spy).toHaveBeenCalledWith('evaluator', expect.any(Number));
|
expect(spy).toHaveBeenCalledWith('evaluator', expect.any(Number));
|
||||||
|
|||||||
13
src/types.ts
13
src/types.ts
@@ -87,6 +87,19 @@ export interface ReadResponse {
|
|||||||
export type EvaluationResponse = {
|
export type EvaluationResponse = {
|
||||||
pass: boolean;
|
pass: boolean;
|
||||||
think: string;
|
think: string;
|
||||||
|
type?: 'definitive' | 'freshness' | 'plurality';
|
||||||
|
freshness_analysis?: {
|
||||||
|
likely_outdated: boolean;
|
||||||
|
dates_mentioned: string[];
|
||||||
|
current_time: string;
|
||||||
|
max_age_days?: number;
|
||||||
|
};
|
||||||
|
plurality_analysis?: {
|
||||||
|
expects_multiple: boolean;
|
||||||
|
provides_multiple: boolean;
|
||||||
|
count_expected?: number;
|
||||||
|
count_provided: number;
|
||||||
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
export type ErrorAnalysisResponse = {
|
export type ErrorAnalysisResponse = {
|
||||||
|
|||||||
Reference in New Issue
Block a user