fix: update evaluator tests to use proper AnswerAction type (#58)

* fix: update evaluator tests to use proper AnswerAction type Co-Authored-By: Han Xiao <han.xiao@jina.ai> * fix: increase token budget and mock external calls in agent test Co-Authored-By: Han Xiao <han.xiao@jina.ai> * test: add Docker build and container tests Co-Authored-By: Han Xiao <han.xiao@jina.ai> * feat: add health check endpoint for Docker container verification Co-Authored-By: Han Xiao <han.xiao@jina.ai> * chore: add Docker test script to package.json Co-Authored-By: Han Xiao <han.xiao@jina.ai> * ci: add Docker test step to CI workflow Co-Authored-By: Han Xiao <han.xiao@jina.ai> * fix: remove unused stdout variable in docker test Co-Authored-By: Han Xiao <han.xiao@jina.ai> close #53 --------- Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Co-authored-by: Han Xiao <han.xiao@jina.ai>
2026-03-22 07:29:35 +08:00 · 2025-02-11 19:54:27 +08:00
parent 8af35c6640
commit 2efae96073
6 changed files with 110 additions and 5 deletions
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -31,3 +31,15 @@ jobs:
          GOOGLE_API_KEY: ${{ secrets.GEMINI_API_KEY }}
          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
        run: npm test
      - name: Set up Docker
        uses: docker/setup-buildx-action@v3
      - name: Run Docker tests
        env:
          BRAVE_API_KEY: mock_key
          GEMINI_API_KEY: mock_key
          JINA_API_KEY: mock_key
          GOOGLE_API_KEY: mock_key
          OPENAI_API_KEY: mock_key
        run: npm run test:docker
--- a/package.json
+++ b/package.json
@@ -18,7 +18,8 @@
    "start": "ts-node src/server.ts",
    "eval": "ts-node src/evals/batch-evals.ts",
    "test": "jest --testTimeout=30000",
-    "test:watch": "jest --watch"
+    "test:watch": "jest --watch",
    "test:docker": "jest src/__tests__/docker.test.ts --testTimeout=300000"
  },
  "keywords": [],
  "author": "Jina AI",
--- a/src/tests/agent.test.ts
+++ b/src/tests/agent.test.ts
@@ -1,12 +1,48 @@
 import { getResponse } from '../agent';
 import { generateObject } from 'ai';
 import { search } from '../tools/jina-search';
 import { readUrl } from '../tools/read';
 // Mock external dependencies
 jest.mock('ai', () => ({
  generateObject: jest.fn()
 }));
 jest.mock('../tools/jina-search', () => ({
  search: jest.fn()
 }));
 jest.mock('../tools/read', () => ({
  readUrl: jest.fn()
 }));
 describe('getResponse', () => {
  beforeEach(() => {
    // Mock generateObject to return a valid response
    (generateObject as jest.Mock).mockResolvedValue({
      object: { action: 'answer', answer: 'mocked response', references: [], think: 'mocked thought' },
      usage: { totalTokens: 100 }
    });
    // Mock search to return empty results
    (search as jest.Mock).mockResolvedValue({
      response: { data: [] }
    });
    // Mock readUrl to return empty content
    (readUrl as jest.Mock).mockResolvedValue({
      response: { data: { content: '', url: 'test-url' } },
      tokens: 0
    });
  });
  afterEach(() => {
    jest.useRealTimers();
    jest.clearAllMocks();
  });
  it('should handle search action', async () => {
-    const result = await getResponse('What is TypeScript?', 10000);
+    const result = await getResponse('What is TypeScript?', 50000); // Increased token budget to handle real-world usage
    expect(result.result.action).toBeDefined();
    expect(result.context).toBeDefined();
    expect(result.context.tokenTracker).toBeDefined();
--- a/src/tests/docker.test.ts
+++ b/src/tests/docker.test.ts
@@ -0,0 +1,41 @@
 import { exec } from 'child_process';
 import { promisify } from 'util';
 const execAsync = promisify(exec);
 describe('Docker build', () => {
  jest.setTimeout(300000); // 5 minutes for build
  it('should build Docker image successfully', async () => {
    const { stderr } = await execAsync('docker build -t node-deepresearch-test .');
    expect(stderr).not.toContain('error');
  });
  it('should start container and respond to health check', async () => {
    // Start container with mock API keys
    await execAsync(
      'docker run -d --name test-container -p 3001:3000 ' +
      '-e GEMINI_API_KEY=mock_key ' +
      '-e JINA_API_KEY=mock_key ' +
      'node-deepresearch-test'
    );
    // Wait for container to start
    await new Promise(resolve => setTimeout(resolve, 5000));
    try {
      // Check if server responds
      const { stdout } = await execAsync('curl -s http://localhost:3001/health');
      expect(stdout).toContain('ok');
    } finally {
      // Cleanup
      await execAsync('docker rm -f test-container').catch(console.error);
    }
  });
  afterAll(async () => {
    // Clean up any leftover containers
    await execAsync('docker rm -f test-container').catch(() => {});
    await execAsync('docker rmi node-deepresearch-test').catch(() => {});
  });
 });
--- a/src/app.ts
+++ b/src/app.ts
@@ -26,6 +26,11 @@ const secret = process.argv.find(arg => arg.startsWith('--secret='))?.split('=')
 app.use(cors());
 app.use(express.json());
 // Add health check endpoint for Docker container verification
 app.get('/health', (req, res) => {
  res.json({ status: 'ok' });
 });
 const eventEmitter = new EventEmitter();
 interface QueryRequest extends Request {
--- a/src/tools/tests/evaluator.test.ts
+++ b/src/tools/tests/evaluator.test.ts
@@ -25,7 +25,12 @@ describe('evaluateAnswer', () => {
        const tokenTracker = new TokenTracker();
        const { response } = await evaluateAnswer(
          'What is TypeScript?',
-          'TypeScript is a strongly typed programming language that builds on JavaScript.',
+          {
            action: "answer",
            think: "Providing a clear definition of TypeScript",
            answer: "TypeScript is a strongly typed programming language that builds on JavaScript.",
            references: []
          },
          ['definitive'],
          tokenTracker
        );
@@ -38,7 +43,12 @@ describe('evaluateAnswer', () => {
        const tokenTracker = new TokenTracker();
        const { response } = await evaluateAnswer(
          'List three programming languages.',
-          'Python is a programming language.',
+          {
            action: "answer",
            think: "Providing an example of a programming language",
            answer: "Python is a programming language.",
            references: []
          },
          ['plurality'],
          tokenTracker
        );