mirror of
https://github.com/jina-ai/node-DeepResearch.git
synced 2025-12-26 06:28:56 +08:00
fix: update evaluator tests to match latest implementation (#34)
* fix: update evaluator tests to match latest implementation Co-Authored-By: Han Xiao <han.xiao@jina.ai> * fix: update EvaluationResponse type and add comprehensive tests Co-Authored-By: Han Xiao <han.xiao@jina.ai> --------- Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Co-authored-by: Han Xiao <han.xiao@jina.ai>
This commit is contained in:
parent
0c74746bb7
commit
df992517fe
@ -26,22 +26,76 @@ describe('evaluateAnswer', () => {
|
||||
const { response } = await evaluateAnswer(
|
||||
'What is TypeScript?',
|
||||
'TypeScript is a strongly typed programming language that builds on JavaScript.',
|
||||
['definitive'],
|
||||
tokenTracker
|
||||
);
|
||||
expect(response).toHaveProperty('is_definitive');
|
||||
expect(response).toHaveProperty('reasoning');
|
||||
expect(response).toHaveProperty('pass');
|
||||
expect(response).toHaveProperty('think');
|
||||
expect(response.type).toBe('definitive');
|
||||
expect(response.pass).toBe(true);
|
||||
});
|
||||
|
||||
it('should evaluate answer freshness', async () => {
|
||||
const tokenTracker = new TokenTracker();
|
||||
const { response } = await evaluateAnswer(
|
||||
'What is the latest version of Node.js?',
|
||||
'The latest version of Node.js is 14.0.0, released in April 2020.',
|
||||
['freshness'],
|
||||
tokenTracker
|
||||
);
|
||||
expect(response).toHaveProperty('pass');
|
||||
expect(response).toHaveProperty('think');
|
||||
expect(response.type).toBe('freshness');
|
||||
expect(response.freshness_analysis).toBeDefined();
|
||||
expect(response.freshness_analysis?.likely_outdated).toBe(true);
|
||||
expect(response.freshness_analysis?.dates_mentioned).toContain('2020-04');
|
||||
expect(response.freshness_analysis?.current_time).toBeDefined();
|
||||
expect(response.pass).toBe(false);
|
||||
});
|
||||
|
||||
it('should evaluate answer plurality', async () => {
|
||||
const tokenTracker = new TokenTracker();
|
||||
const { response } = await evaluateAnswer(
|
||||
'List three programming languages.',
|
||||
'Python is a programming language.',
|
||||
['plurality'],
|
||||
tokenTracker
|
||||
);
|
||||
expect(response).toHaveProperty('pass');
|
||||
expect(response).toHaveProperty('think');
|
||||
expect(response.type).toBe('plurality');
|
||||
expect(response.plurality_analysis).toBeDefined();
|
||||
expect(response.plurality_analysis?.expects_multiple).toBe(true);
|
||||
expect(response.plurality_analysis?.provides_multiple).toBe(false);
|
||||
expect(response.plurality_analysis?.count_expected).toBe(3);
|
||||
expect(response.plurality_analysis?.count_provided).toBe(1);
|
||||
expect(response.pass).toBe(false);
|
||||
});
|
||||
|
||||
it('should evaluate in order and stop at first failure', async () => {
|
||||
const tokenTracker = new TokenTracker();
|
||||
const { response } = await evaluateAnswer(
|
||||
'List the latest Node.js versions.',
|
||||
'I am not sure about the Node.js versions.',
|
||||
['definitive', 'freshness', 'plurality'],
|
||||
tokenTracker
|
||||
);
|
||||
expect(response.type).toBe('definitive');
|
||||
expect(response.pass).toBe(false);
|
||||
expect(response.freshness_analysis).toBeUndefined();
|
||||
expect(response.plurality_analysis).toBeUndefined();
|
||||
});
|
||||
|
||||
it('should track token usage', async () => {
|
||||
const tokenTracker = new TokenTracker();
|
||||
const spy = jest.spyOn(tokenTracker, 'trackUsage');
|
||||
const { tokens } = await evaluateAnswer(
|
||||
await evaluateAnswer(
|
||||
'What is TypeScript?',
|
||||
'TypeScript is a strongly typed programming language that builds on JavaScript.',
|
||||
['definitive', 'freshness', 'plurality'],
|
||||
tokenTracker
|
||||
);
|
||||
expect(spy).toHaveBeenCalledWith('evaluator', tokens);
|
||||
expect(tokens).toBeGreaterThan(0);
|
||||
expect(spy).toHaveBeenCalledWith('evaluator', expect.any(Number));
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
13
src/types.ts
13
src/types.ts
@ -87,6 +87,19 @@ export interface ReadResponse {
|
||||
export type EvaluationResponse = {
|
||||
pass: boolean;
|
||||
think: string;
|
||||
type?: 'definitive' | 'freshness' | 'plurality';
|
||||
freshness_analysis?: {
|
||||
likely_outdated: boolean;
|
||||
dates_mentioned: string[];
|
||||
current_time: string;
|
||||
max_age_days?: number;
|
||||
};
|
||||
plurality_analysis?: {
|
||||
expects_multiple: boolean;
|
||||
provides_multiple: boolean;
|
||||
count_expected?: number;
|
||||
count_provided: number;
|
||||
};
|
||||
};
|
||||
|
||||
export type ErrorAnalysisResponse = {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user