fix: update EvaluationResponse type and add comprehensive tests

Co-Authored-By: Han Xiao <han.xiao@jina.ai>
This commit is contained in:
Devin AI 2025-02-06 13:47:58 +00:00
parent fa142df14e
commit bf92f40e9a
2 changed files with 67 additions and 1 deletions

View File

@ -31,6 +31,59 @@ describe('evaluateAnswer', () => {
);
expect(response).toHaveProperty('pass');
expect(response).toHaveProperty('think');
expect(response.type).toBe('definitive');
expect(response.pass).toBe(true);
});
it('should evaluate answer freshness', async () => {
const tokenTracker = new TokenTracker();
const { response } = await evaluateAnswer(
'What is the latest version of Node.js?',
'The latest version of Node.js is 14.0.0, released in April 2020.',
['freshness'],
tokenTracker
);
expect(response).toHaveProperty('pass');
expect(response).toHaveProperty('think');
expect(response.type).toBe('freshness');
expect(response.freshness_analysis).toBeDefined();
expect(response.freshness_analysis?.likely_outdated).toBe(true);
expect(response.freshness_analysis?.dates_mentioned).toContain('2020-04');
expect(response.freshness_analysis?.current_time).toBeDefined();
expect(response.pass).toBe(false);
});
it('should evaluate answer plurality', async () => {
const tokenTracker = new TokenTracker();
const { response } = await evaluateAnswer(
'List three programming languages.',
'Python is a programming language.',
['plurality'],
tokenTracker
);
expect(response).toHaveProperty('pass');
expect(response).toHaveProperty('think');
expect(response.type).toBe('plurality');
expect(response.plurality_analysis).toBeDefined();
expect(response.plurality_analysis?.expects_multiple).toBe(true);
expect(response.plurality_analysis?.provides_multiple).toBe(false);
expect(response.plurality_analysis?.count_expected).toBe(3);
expect(response.plurality_analysis?.count_provided).toBe(1);
expect(response.pass).toBe(false);
});
it('should evaluate in order and stop at first failure', async () => {
const tokenTracker = new TokenTracker();
const { response } = await evaluateAnswer(
'List the latest Node.js versions.',
'I am not sure about the Node.js versions.',
['definitive', 'freshness', 'plurality'],
tokenTracker
);
expect(response.type).toBe('definitive');
expect(response.pass).toBe(false);
expect(response.freshness_analysis).toBeUndefined();
expect(response.plurality_analysis).toBeUndefined();
});
it('should track token usage', async () => {
@ -39,7 +92,7 @@ describe('evaluateAnswer', () => {
await evaluateAnswer(
'What is TypeScript?',
'TypeScript is a strongly typed programming language that builds on JavaScript.',
['definitive'],
['definitive', 'freshness', 'plurality'],
tokenTracker
);
expect(spy).toHaveBeenCalledWith('evaluator', expect.any(Number));

View File

@ -87,6 +87,19 @@ export interface ReadResponse {
export type EvaluationResponse = {
pass: boolean;
think: string;
type?: 'definitive' | 'freshness' | 'plurality';
freshness_analysis?: {
likely_outdated: boolean;
dates_mentioned: string[];
current_time: string;
max_age_days?: number;
};
plurality_analysis?: {
expects_multiple: boolean;
provides_multiple: boolean;
count_expected?: number;
count_provided: number;
};
};
export type ErrorAnalysisResponse = {