jina-ai: billing for saas service (#55)

* wip: jina billing * wip * fix: build issues * ci: cd gh action * fix: make ci happy
2025-12-25 22:16:49 +08:00 · 2025-02-11 18:27:15 +08:00 · 2025-02-11 18:27:15 +08:00 · 8af35c6640
commit 8af35c6640
parent c4639a2e92
26 changed files with 6150 additions and 647 deletions
--- a/.dockerignore
+++ b/.dockerignore
@ -0,0 +1 @@
+node_modules
--- a/.eslintrc.js
+++ b/.eslintrc.js
@ -16,5 +16,6 @@ module.exports = {
  rules: {
    'no-console': ['error', { allow: ['log', 'error'] }],
    '@typescript-eslint/no-explicit-any': 'off'
-  }
+  },
+  ignorePatterns: ["jina-ai/**/*"]
 };
--- a/.github/workflows/cd.yml
+++ b/.github/workflows/cd.yml
@ -0,0 +1,66 @@
+run-name: Build push and deploy (CD)
+on:
+  push:
+    branches:
+      - main
+      - ci-debug
+    tags:
+      - '*'
+
+jobs:
+  build-and-push-to-gcr:
+    runs-on: ubuntu-latest
+    concurrency:
+      group: ${{ github.ref_type == 'branch' && github.ref }}
+      cancel-in-progress: true
+    permissions:
+      contents: read
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          lfs: true
+      - uses: 'google-github-actions/auth@v2'
+        with:
+           credentials_json: '${{ secrets.GCLOUD_SERVICE_ACCOUNT_SECRET_JSON }}'
+      - name: 'Set up Cloud SDK'
+        uses: 'google-github-actions/setup-gcloud@v2'
+      - name: "Docker auth"
+        run: |-
+          gcloud auth configure-docker us-docker.pkg.dev --quiet
+      - name: Set controller release version
+        run: echo "RELEASE_VERSION=${GITHUB_REF#refs/*/}" >> $GITHUB_ENV
+      - name: Set up Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version: 22.12.0
+          cache: npm
+
+      - name: npm install
+        run: npm ci
+      - name: build application
+        run: npm run build
+      - name: Set package version
+        run: npm version --no-git-tag-version ${{ env.RELEASE_VERSION }}
+        if: github.ref_type == 'tag'
+      - name: Docker meta
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: |
+            us-docker.pkg.dev/research-df067/deepresearch/node-deep-research
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v3
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+      - name: Build and push
+        id: container
+        uses: docker/build-push-action@v6
+        with:
+          file: jina-ai/Dockerfile
+          push: true
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+      - name: Deploy with Tag
+        run: |
+          gcloud run deploy node-deep-research --image us-docker.pkg.dev/research-df067/deepresearch/node-deep-research@${{steps.container.outputs.imageid}} --tag ${{ env.RELEASE_VERSION }} --region us-central1 --async
+      
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@ -0,0 +1,24 @@
+{
+    "version": "0.2.0",
+    "configurations": [
+      {
+        "name": "Attach",
+        "port": 9229,
+        "request": "attach",
+        "skipFiles": [
+          "<node_internals>/**"
+        ],
+        "type": "node"
+      },
+      {
+        "name": "Attach by Process ID",
+        "processId": "${command:PickProcess}",
+        "request": "attach",
+        "skipFiles": [
+          "<node_internals>/**"
+        ],
+        "type": "node"
+      }
+    ]
+  }
+  
--- a/jina-ai/.dockerignore
+++ b/jina-ai/.dockerignore
@ -0,0 +1 @@
+node_modules
--- a/jina-ai/Dockerfile
+++ b/jina-ai/Dockerfile
@ -0,0 +1,50 @@
+# ---- BUILD STAGE ----
+FROM node:20-slim AS builder
+
+# Set working directory
+WORKDIR /app
+
+# Copy package.json and package-lock.json
+COPY ./package*.json ./
+COPY ./jina-ai/package*.json ./jina-ai/
+
+# Install dependencies
+RUN npm ci
+WORKDIR /app/jina-ai
+RUN npm ci
+
+WORKDIR /app
+
+# Copy application code
+COPY ./src ./src
+COPY ./config.json ./
+COPY ./tsconfig.json ./tsconfig.json
+RUN npm run build
+
+COPY ./jina-ai/src ./jina-ai/src
+COPY ./jina-ai/tsconfig.json ./jina-ai/tsconfig.json
+WORKDIR /app/jina-ai
+RUN npm run build
+
+# ---- PRODUCTION STAGE ----
+FROM node:20 AS production
+
+# Set working directory
+WORKDIR /app
+
+COPY --from=builder /app ./
+# Copy config.json and built files from builder
+
+WORKDIR /app/jina-ai
+
+# Set environment variables (Recommended to set at runtime, avoid hardcoding)
+ENV GEMINI_API_KEY=${GEMINI_API_KEY}
+ENV OPENAI_API_KEY=${OPENAI_API_KEY}
+ENV JINA_API_KEY=${JINA_API_KEY}
+ENV BRAVE_API_KEY=${BRAVE_API_KEY}
+
+# Expose the port the app runs on
+EXPOSE 3000
+
+# Set startup command
+CMD ["node", "./dist/server.js"]
--- a/jina-ai/package-lock.json
+++ b/jina-ai/package-lock.json
--- a/jina-ai/package.json
+++ b/jina-ai/package.json
@ -0,0 +1,38 @@
+{
+  "name": "@jina-ai/node-deepresearch",
+  "version": "1.0.0",
+  "main": "dist/app.js",
+  "files": [
+    "dist",
+    "README.md",
+    "LICENSE"
+  ],
+  "scripts": {
+    "build": "tsc",
+    "dev": "npx ts-node src/agent.ts",
+    "search": "npx ts-node src/test-duck.ts",
+    "rewrite": "npx ts-node src/tools/query-rewriter.ts",
+    "lint": "eslint . --ext .ts",
+    "lint:fix": "eslint . --ext .ts --fix",
+    "serve": "ts-node src/server.ts",
+    "eval": "ts-node src/evals/batch-evals.ts",
+    "test": "jest --testTimeout=30000",
+    "test:watch": "jest --watch"
+  },
+  "keywords": [],
+  "author": "Jina AI",
+  "license": "Apache-2.0",
+  "description": "",
+  "dependencies": {
+    "@google-cloud/firestore": "^7.11.0",
+    "civkit": "^0.8.3-15926cb",
+    "dayjs": "^1.11.13",
+    "lodash": "^4.17.21",
+    "reflect-metadata": "^0.2.2",
+    "tsyringe": "^4.8.0"
+  },
+  "devDependencies": {
+    "@types/lodash": "^4.17.15",
+    "pino-pretty": "^13.0.0"
+  }
+}
--- a/jina-ai/src/dto/jina-embeddings-auth.ts
+++ b/jina-ai/src/dto/jina-embeddings-auth.ts
@ -0,0 +1,347 @@
+import {
+    Also, AuthenticationFailedError, AuthenticationRequiredError,
+    DownstreamServiceFailureError, RPC_CALL_ENVIRONMENT,
+    ArrayOf, AutoCastable, Prop
+} from 'civkit/civ-rpc';
+import { parseJSONText } from 'civkit/vectorize';
+import { htmlEscape } from 'civkit/escape';
+import { marshalErrorLike } from 'civkit/lang';
+
+import type express from 'express';
+
+import logger from '../lib/logger';
+import { AsyncLocalContext } from '../lib/async-context';
+import { InjectProperty } from '../lib/registry';
+import { JinaEmbeddingsDashboardHTTP } from '../lib/billing';
+import envConfig from '../lib/env-config';
+
+import { FirestoreRecord } from '../lib/firestore';
+import _ from 'lodash';
+import { RateLimitDesc } from '../rate-limit';
+
+export class JinaWallet extends AutoCastable {
+    @Prop({
+        default: ''
+    })
+    user_id!: string;
+
+    @Prop({
+        default: 0
+    })
+    trial_balance!: number;
+
+    @Prop()
+    trial_start?: Date;
+
+    @Prop()
+    trial_end?: Date;
+
+    @Prop({
+        default: 0
+    })
+    regular_balance!: number;
+
+    @Prop({
+        default: 0
+    })
+    total_balance!: number;
+}
+
+export class JinaEmbeddingsTokenAccount extends FirestoreRecord {
+    static override collectionName = 'embeddingsTokenAccounts';
+
+    override _id!: string;
+
+    @Prop({
+        required: true
+    })
+    user_id!: string;
+
+    @Prop({
+        nullable: true,
+        type: String,
+    })
+    email?: string;
+
+    @Prop({
+        nullable: true,
+        type: String,
+    })
+    full_name?: string;
+
+    @Prop({
+        nullable: true,
+        type: String,
+    })
+    customer_id?: string;
+
+    @Prop({
+        nullable: true,
+        type: String,
+    })
+    avatar_url?: string;
+
+    // Not keeping sensitive info for now
+    // @Prop()
+    // billing_address?: object;
+
+    // @Prop()
+    // payment_method?: object;
+
+    @Prop({
+        required: true
+    })
+    wallet!: JinaWallet;
+
+    @Prop({
+        type: Object
+    })
+    metadata?: { [k: string]: any; };
+
+    @Prop({
+        defaultFactory: () => new Date()
+    })
+    lastSyncedAt!: Date;
+
+    @Prop({
+        dictOf: [ArrayOf(RateLimitDesc)]
+    })
+    customRateLimits?: { [k: string]: RateLimitDesc[]; };
+
+    static patchedFields = [
+    ];
+
+    static override from(input: any) {
+        for (const field of this.patchedFields) {
+            if (typeof input[field] === 'string') {
+                input[field] = parseJSONText(input[field]);
+            }
+        }
+
+        return super.from(input) as JinaEmbeddingsTokenAccount;
+    }
+
+    override degradeForFireStore() {
+        const copy: any = {
+            ...this,
+            wallet: { ...this.wallet },
+            // Firebase disability
+            customRateLimits: _.mapValues(this.customRateLimits, (v) => v.map((x) => ({ ...x }))),
+        };
+
+        for (const field of (this.constructor as typeof JinaEmbeddingsTokenAccount).patchedFields) {
+            if (typeof copy[field] === 'object') {
+                copy[field] = JSON.stringify(copy[field]) as any;
+            }
+        }
+
+        return copy;
+    }
+
+    [k: string]: any;
+}
+
+
+const authDtoLogger = logger.child({ service: 'JinaAuthDTO' });
+
+export interface FireBaseHTTPCtx {
+    req: express.Request,
+    res: express.Response,
+}
+
+const THE_VERY_SAME_JINA_EMBEDDINGS_CLIENT = new JinaEmbeddingsDashboardHTTP(envConfig.JINA_EMBEDDINGS_DASHBOARD_API_KEY);
+
+@Also({
+    openapi: {
+        operation: {
+            parameters: {
+                'Authorization': {
+                    description: htmlEscape`Jina Token for authentication.\n\n` +
+                        htmlEscape`- Member of <JinaEmbeddingsAuthDTO>\n\n` +
+                        `- Authorization: Bearer {YOUR_JINA_TOKEN}`
+                    ,
+                    in: 'header',
+                    schema: {
+                        anyOf: [
+                            { type: 'string', format: 'token' }
+                        ]
+                    }
+                }
+            }
+        }
+    }
+})
+export class JinaEmbeddingsAuthDTO extends AutoCastable {
+    uid?: string;
+    bearerToken?: string;
+    user?: JinaEmbeddingsTokenAccount;
+
+    @InjectProperty(AsyncLocalContext)
+    ctxMgr!: AsyncLocalContext;
+
+    jinaEmbeddingsDashboard = THE_VERY_SAME_JINA_EMBEDDINGS_CLIENT;
+
+    static override from(input: any) {
+        const instance = super.from(input) as JinaEmbeddingsAuthDTO;
+
+        const ctx = input[RPC_CALL_ENVIRONMENT];
+
+        const req = (ctx.rawRequest || ctx.req) as express.Request | undefined;
+
+        if (req) {
+            const authorization = req.get('authorization');
+
+            if (authorization) {
+                const authToken = authorization.split(' ')[1] || authorization;
+                instance.bearerToken = authToken;
+            }
+
+        }
+
+        if (!instance.bearerToken && input._token) {
+            instance.bearerToken = input._token;
+        }
+
+        return instance;
+    }
+
+    async getBrief(ignoreCache?: boolean | string) {
+        if (!this.bearerToken) {
+            throw new AuthenticationRequiredError({
+                message: 'Absence of bearer token'
+            });
+        }
+
+        let account;
+        try {
+            account = await JinaEmbeddingsTokenAccount.fromFirestore(this.bearerToken);
+        } catch (err) {
+            // FireStore would not accept any string as input and may throw if not happy with it
+            void 0;
+        }
+
+
+        const age = account?.lastSyncedAt ? Date.now() - account.lastSyncedAt.getTime() : Infinity;
+
+        if (account && !ignoreCache) {
+            if (account && age < 180_000) {
+                this.user = account;
+                this.uid = this.user?.user_id;
+
+                return account;
+            }
+        }
+
+        try {
+            const r = await this.jinaEmbeddingsDashboard.validateToken(this.bearerToken);
+            const brief = r.data;
+            const draftAccount = JinaEmbeddingsTokenAccount.from({
+                ...account, ...brief, _id: this.bearerToken,
+                lastSyncedAt: new Date()
+            });
+            await JinaEmbeddingsTokenAccount.save(draftAccount.degradeForFireStore(), undefined, { merge: true });
+
+            this.user = draftAccount;
+            this.uid = this.user?.user_id;
+
+            return draftAccount;
+        } catch (err: any) {
+            authDtoLogger.warn(`Failed to get user brief: ${err}`, { err: marshalErrorLike(err) });
+
+            if (err?.status === 401) {
+                throw new AuthenticationFailedError({
+                    message: 'Invalid bearer token'
+                });
+            }
+
+            if (account) {
+                this.user = account;
+                this.uid = this.user?.user_id;
+
+                return account;
+            }
+
+
+            throw new DownstreamServiceFailureError(`Failed to authenticate: ${err}`);
+        }
+    }
+
+    async reportUsage(tokenCount: number, mdl: string, endpoint: string = '/encode') {
+        const user = await this.assertUser();
+        const uid = user.user_id;
+        user.wallet.total_balance -= tokenCount;
+
+        return this.jinaEmbeddingsDashboard.reportUsage(this.bearerToken!, {
+            model_name: mdl,
+            api_endpoint: endpoint,
+            consumer: {
+                id: uid,
+                user_id: uid,
+            },
+            usage: {
+                total_tokens: tokenCount
+            },
+            labels: {
+                model_name: mdl
+            }
+        }).then((r) => {
+            JinaEmbeddingsTokenAccount.COLLECTION.doc(this.bearerToken!)
+                .update({ 'wallet.total_balance': JinaEmbeddingsTokenAccount.OPS.increment(-tokenCount) })
+                .catch((err) => {
+                    authDtoLogger.warn(`Failed to update cache for ${uid}: ${err}`, { err: marshalErrorLike(err) });
+                });
+
+            return r;
+        }).catch((err) => {
+            user.wallet.total_balance += tokenCount;
+            authDtoLogger.warn(`Failed to report usage for ${uid}: ${err}`, { err: marshalErrorLike(err) });
+        });
+    }
+
+    async solveUID() {
+        if (this.uid) {
+            this.ctxMgr.set('uid', this.uid);
+
+            return this.uid;
+        }
+
+        if (this.bearerToken) {
+            await this.getBrief();
+            this.ctxMgr.set('uid', this.uid);
+
+            return this.uid;
+        }
+
+        return undefined;
+    }
+
+    async assertUID() {
+        const uid = await this.solveUID();
+
+        if (!uid) {
+            throw new AuthenticationRequiredError('Authentication failed');
+        }
+
+        return uid;
+    }
+
+    async assertUser() {
+        if (this.user) {
+            return this.user;
+        }
+
+        await this.getBrief();
+
+        return this.user!;
+    }
+
+    getRateLimits(...tags: string[]) {
+        const descs = tags.map((x) => this.user?.customRateLimits?.[x] || []).flat().filter((x) => x.isEffective());
+
+        if (descs.length) {
+            return descs;
+        }
+
+        return undefined;
+    }
+}
--- a/jina-ai/src/lib/async-context.ts
+++ b/jina-ai/src/lib/async-context.ts
@ -0,0 +1,9 @@
+import { GlobalAsyncContext } from 'civkit/async-context';
+import { container, singleton } from 'tsyringe';
+
+@singleton()
+export class AsyncLocalContext extends GlobalAsyncContext {}
+
+const instance = container.resolve(AsyncLocalContext);
+Reflect.set(process, 'asyncLocalContext', instance);
+export default instance;
--- a/jina-ai/src/lib/billing.ts
+++ b/jina-ai/src/lib/billing.ts
@ -0,0 +1,102 @@
+import { HTTPService } from 'civkit';
+import _ from 'lodash';
+
+
+export interface JinaWallet {
+    trial_balance: number;
+    trial_start: Date;
+    trial_end: Date;
+    regular_balance: number;
+    total_balance: number;
+}
+
+
+export interface JinaUserBrief {
+    user_id: string;
+    email: string | null;
+    full_name: string | null;
+    customer_id: string | null;
+    avatar_url?: string;
+    billing_address: Partial<{
+        address: string;
+        city: string;
+        state: string;
+        country: string;
+        postal_code: string;
+    }>;
+    payment_method: Partial<{
+        brand: string;
+        last4: string;
+        exp_month: number;
+        exp_year: number;
+    }>;
+    wallet: JinaWallet;
+    metadata: {
+        [k: string]: any;
+    };
+}
+
+export interface JinaUsageReport {
+    model_name: string;
+    api_endpoint: string;
+    consumer: {
+        user_id: string;
+        customer_plan?: string;
+        [k: string]: any;
+    };
+    usage: {
+        total_tokens: number;
+    };
+    labels: {
+        user_type?: string;
+        model_name?: string;
+        [k: string]: any;
+    };
+}
+
+export class JinaEmbeddingsDashboardHTTP extends HTTPService {
+    name = 'JinaEmbeddingsDashboardHTTP';
+
+    constructor(
+        public apiKey: string,
+        public baseUri: string = 'https://embeddings-dashboard-api.jina.ai/api'
+    ) {
+        super(baseUri);
+
+        this.baseOptions.timeout = 30_000;  // 30 sec
+    }
+
+    async authorization(token: string) {
+        const r = await this.get<JinaUserBrief>('/v1/authorization', {
+            headers: {
+                Authorization: `Bearer ${token}`
+            },
+            responseType: 'json',
+        });
+
+        return r;
+    }
+
+    async validateToken(token: string) {
+        const r = await this.getWithSearchParams<JinaUserBrief>('/v1/api_key/user', {
+            api_key: token,
+        }, {
+            responseType: 'json',
+        });
+
+        return r;
+    }
+
+    async reportUsage(token: string, query: JinaUsageReport) {
+        const r = await this.postJson('/v1/usage', query, {
+            headers: {
+                Authorization: `Bearer ${token}`,
+                'x-api-key': this.apiKey,
+            },
+            responseType: 'text',
+        });
+
+        return r;
+    }
+
+}
--- a/jina-ai/src/lib/env-config.ts
+++ b/jina-ai/src/lib/env-config.ts
@ -0,0 +1,59 @@
+import { container, singleton } from 'tsyringe';
+
+export const SPECIAL_COMBINED_ENV_KEY = 'ENV_COMBINED';
+const CONF_ENV = [
+    'OPENAI_API_KEY',
+
+    'ANTHROPIC_API_KEY',
+
+    'REPLICATE_API_KEY',
+
+    'GOOGLE_AI_STUDIO_API_KEY',
+
+    'JINA_EMBEDDINGS_API_KEY',
+
+    'JINA_EMBEDDINGS_DASHBOARD_API_KEY',
+
+    'BRAVE_SEARCH_API_KEY',
+
+] as const;
+
+
+@singleton()
+export class EnvConfig {
+    dynamic!: Record<string, string>;
+
+    combined: Record<string, string> = {};
+    originalEnv: Record<string, string | undefined> = { ...process.env };
+
+    constructor() {
+        if (process.env[SPECIAL_COMBINED_ENV_KEY]) {
+            Object.assign(this.combined, JSON.parse(
+                Buffer.from(process.env[SPECIAL_COMBINED_ENV_KEY]!, 'base64').toString('utf-8')
+            ));
+            delete process.env[SPECIAL_COMBINED_ENV_KEY];
+        }
+
+        // Static config
+        for (const x of CONF_ENV) {
+            const s = this.combined[x] || process.env[x] || '';
+            Reflect.set(this, x, s);
+            if (x in process.env) {
+                delete process.env[x];
+            }
+        }
+
+        // Dynamic config
+        this.dynamic = new Proxy({
+            get: (_target: any, prop: string) => {
+                return this.combined[prop] || process.env[prop] || '';
+            }
+        }, {}) as any;
+    }
+}
+
+// eslint-disable-next-line @typescript-eslint/no-empty-interface
+export interface EnvConfig extends Record<typeof CONF_ENV[number], string> { }
+
+const instance = container.resolve(EnvConfig);
+export default instance;
--- a/jina-ai/src/lib/errors.ts
+++ b/jina-ai/src/lib/errors.ts
@ -0,0 +1,70 @@
+import { ApplicationError, Prop, RPC_TRANSFER_PROTOCOL_META_SYMBOL, StatusCode } from 'civkit';
+import _ from 'lodash';
+import dayjs from 'dayjs';
+import utc from 'dayjs/plugin/utc';
+
+dayjs.extend(utc);
+
+@StatusCode(50301)
+export class ServiceDisabledError extends ApplicationError { }
+
+@StatusCode(50302)
+export class ServiceCrashedError extends ApplicationError { }
+
+@StatusCode(50303)
+export class ServiceNodeResourceDrainError extends ApplicationError { }
+
+@StatusCode(40104)
+export class EmailUnverifiedError extends ApplicationError { }
+
+@StatusCode(40201)
+export class InsufficientCreditsError extends ApplicationError { }
+
+@StatusCode(40202)
+export class FreeFeatureLimitError extends ApplicationError { }
+
+@StatusCode(40203)
+export class InsufficientBalanceError extends ApplicationError { }
+
+@StatusCode(40903)
+export class LockConflictError extends ApplicationError { }
+
+@StatusCode(40904)
+export class BudgetExceededError extends ApplicationError { }
+
+@StatusCode(45101)
+export class HarmfulContentError extends ApplicationError { }
+
+@StatusCode(45102)
+export class SecurityCompromiseError extends ApplicationError { }
+
+@StatusCode(41201)
+export class BatchSizeTooLargeError extends ApplicationError { }
+
+
+@StatusCode(42903)
+export class RateLimitTriggeredError extends ApplicationError {
+
+    @Prop({
+        desc: 'Retry after seconds',
+    })
+    retryAfter?: number;
+
+    @Prop({
+        desc: 'Retry after date',
+    })
+    retryAfterDate?: Date;
+
+    protected override get [RPC_TRANSFER_PROTOCOL_META_SYMBOL]() {
+        const retryAfter = this.retryAfter || this.retryAfterDate;
+        if (!retryAfter) {
+            return super[RPC_TRANSFER_PROTOCOL_META_SYMBOL];
+        }
+
+        return _.merge(_.cloneDeep(super[RPC_TRANSFER_PROTOCOL_META_SYMBOL]), {
+            headers: {
+                'Retry-After': `${retryAfter instanceof Date ? dayjs(retryAfter).utc().format('ddd, DD MMM YYYY HH:mm:ss [GMT]') : retryAfter}`,
+            }
+        });
+    }
+}
--- a/jina-ai/src/lib/firestore.ts
+++ b/jina-ai/src/lib/firestore.ts
@ -0,0 +1,223 @@
+import _ from 'lodash';
+import { AutoCastable, Prop, RPC_MARSHAL } from 'civkit/civ-rpc';
+import {
+    Firestore, FieldValue, DocumentReference,
+    Query, Timestamp, SetOptions, DocumentSnapshot,
+} from '@google-cloud/firestore';
+
+// Firestore doesn't support JavaScript objects with custom prototypes (i.e. objects that were created via the \"new\" operator)
+function patchFireStoreArrogance(func: Function) {
+    return function (this: unknown) {
+        const origObjectGetPrototype = Object.getPrototypeOf;
+        Object.getPrototypeOf = function (x) {
+            const r = origObjectGetPrototype.call(this, x);
+            if (!r) {
+                return r;
+            }
+            return Object.prototype;
+        };
+        try {
+            return func.call(this, ...arguments);
+        } finally {
+            Object.getPrototypeOf = origObjectGetPrototype;
+        }
+    };
+}
+
+Reflect.set(DocumentReference.prototype, 'set', patchFireStoreArrogance(Reflect.get(DocumentReference.prototype, 'set')));
+Reflect.set(DocumentSnapshot, 'fromObject', patchFireStoreArrogance(Reflect.get(DocumentSnapshot, 'fromObject')));
+
+function mapValuesDeep(v: any, fn: (i: any) => any): any {
+    if (_.isPlainObject(v)) {
+        return _.mapValues(v, (i) => mapValuesDeep(i, fn));
+    } else if (_.isArray(v)) {
+        return v.map((i) => mapValuesDeep(i, fn));
+    } else {
+        return fn(v);
+    }
+}
+
+export type Constructor<T> = { new(...args: any[]): T; };
+export type Constructed<T> = T extends Partial<infer U> ? U : T extends object ? T : object;
+
+export function fromFirestore<T extends FirestoreRecord>(
+    this: Constructor<T>, id: string, overrideCollection?: string
+): Promise<T | undefined>;
+export async function fromFirestore(
+    this: any, id: string, overrideCollection?: string
+) {
+    const collection = overrideCollection || this.collectionName;
+    if (!collection) {
+        throw new Error(`Missing collection name to construct ${this.name}`);
+    }
+
+    const ref = this.DB.collection(overrideCollection || this.collectionName).doc(id);
+
+    const ptr = await ref.get();
+
+    if (!ptr.exists) {
+        return undefined;
+    }
+
+    const doc = this.from(
+        // Fixes non-native firebase types
+        mapValuesDeep(ptr.data(), (i: any) => {
+            if (i instanceof Timestamp) {
+                return i.toDate();
+            }
+
+            return i;
+        })
+    );
+
+    Object.defineProperty(doc, '_ref', { value: ref, enumerable: false });
+    Object.defineProperty(doc, '_id', { value: ptr.id, enumerable: true });
+
+    return doc;
+}
+
+export function fromFirestoreQuery<T extends FirestoreRecord>(
+    this: Constructor<T>, query: Query
+): Promise<T[]>;
+export async function fromFirestoreQuery(this: any, query: Query) {
+    const ptr = await query.get();
+
+    if (ptr.docs.length) {
+        return ptr.docs.map(doc => {
+            const r = this.from(
+                mapValuesDeep(doc.data(), (i: any) => {
+                    if (i instanceof Timestamp) {
+                        return i.toDate();
+                    }
+
+                    return i;
+                })
+            );
+            Object.defineProperty(r, '_ref', { value: doc.ref, enumerable: false });
+            Object.defineProperty(r, '_id', { value: doc.id, enumerable: true });
+
+            return r;
+        });
+    }
+
+    return [];
+}
+
+export function setToFirestore<T extends FirestoreRecord>(
+    this: Constructor<T>, doc: T, overrideCollection?: string, setOptions?: SetOptions
+): Promise<T>;
+export async function setToFirestore(
+    this: any, doc: any, overrideCollection?: string, setOptions?: SetOptions
+) {
+    let ref: DocumentReference<any> = doc._ref;
+    if (!ref) {
+        const collection = overrideCollection || this.collectionName;
+        if (!collection) {
+            throw new Error(`Missing collection name to construct ${this.name}`);
+        }
+
+        const predefinedId = doc._id || undefined;
+        const hdl = this.DB.collection(overrideCollection || this.collectionName);
+        ref = predefinedId ? hdl.doc(predefinedId) : hdl.doc();
+
+        Object.defineProperty(doc, '_ref', { value: ref, enumerable: false });
+        Object.defineProperty(doc, '_id', { value: ref.id, enumerable: true });
+    }
+
+    await ref.set(doc, { merge: true, ...setOptions });
+
+    return doc;
+}
+
+export function deleteQueryBatch<T extends FirestoreRecord>(
+    this: Constructor<T>, query: Query
+): Promise<T>;
+export async function deleteQueryBatch(this: any, query: Query) {
+    const snapshot = await query.get();
+
+    const batchSize = snapshot.size;
+    if (batchSize === 0) {
+        return;
+    }
+
+    // Delete documents in a batch
+    const batch = this.DB.batch();
+    snapshot.docs.forEach((doc) => {
+        batch.delete(doc.ref);
+    });
+    await batch.commit();
+
+    process.nextTick(() => {
+        this.deleteQueryBatch(query);
+    });
+};
+
+export function fromFirestoreDoc<T extends FirestoreRecord>(
+    this: Constructor<T>, snapshot: DocumentSnapshot,
+): T | undefined;
+export function fromFirestoreDoc(
+    this: any, snapshot: DocumentSnapshot,
+) {
+    const doc = this.from(
+        // Fixes non-native firebase types
+        mapValuesDeep(snapshot.data(), (i: any) => {
+            if (i instanceof Timestamp) {
+                return i.toDate();
+            }
+
+            return i;
+        })
+    );
+
+    Object.defineProperty(doc, '_ref', { value: snapshot.ref, enumerable: false });
+    Object.defineProperty(doc, '_id', { value: snapshot.id, enumerable: true });
+
+    return doc;
+}
+const defaultFireStore = new Firestore({
+    projectId: process.env.GCLOUD_PROJECT,
+});
+export class FirestoreRecord extends AutoCastable {
+    static collectionName?: string;
+    static OPS = FieldValue;
+    static DB = defaultFireStore;
+    static get COLLECTION() {
+        if (!this.collectionName) {
+            throw new Error('Not implemented');
+        }
+
+        return this.DB.collection(this.collectionName);
+    }
+
+    @Prop()
+    _id?: string;
+    _ref?: DocumentReference<Partial<Omit<this, '_ref' | '_id'>>>;
+
+    static fromFirestore = fromFirestore;
+    static fromFirestoreDoc = fromFirestoreDoc;
+    static fromFirestoreQuery = fromFirestoreQuery;
+
+    static save = setToFirestore;
+    static deleteQueryBatch = deleteQueryBatch;
+
+    [RPC_MARSHAL]() {
+        return {
+            ...this,
+            _id: this._id,
+            _ref: this._ref?.path
+        };
+    }
+
+    degradeForFireStore(): this {
+        return JSON.parse(JSON.stringify(this, function (k, v) {
+            if (k === '') {
+                return v;
+            }
+            if (typeof v === 'object' && v && (typeof v.degradeForFireStore === 'function')) {
+                return v.degradeForFireStore();
+            }
+
+            return v;
+        }));
+    }
+}
--- a/jina-ai/src/lib/logger.ts
+++ b/jina-ai/src/lib/logger.ts
@ -0,0 +1,56 @@
+import { AbstractPinoLogger } from 'civkit/pino-logger';
+import { singleton, container } from 'tsyringe';
+import { threadId } from 'node:worker_threads';
+import { getTraceCtx } from 'civkit/async-context';
+
+
+const levelToSeverityMap: { [k: string]: string | undefined; } = {
+    trace: 'DEFAULT',
+    debug: 'DEBUG',
+    info: 'INFO',
+    warn: 'WARNING',
+    error: 'ERROR',
+    fatal: 'CRITICAL',
+};
+
+@singleton()
+export class GlobalLogger extends AbstractPinoLogger {
+    loggerOptions = {
+        level: 'debug',
+        base: {
+            tid: threadId,
+        }
+    };
+
+    override init(): void {
+        if (process.env['NODE_ENV']?.startsWith('prod')) {
+            super.init(process.stdout);
+        } else {
+            const PinoPretty = require('pino-pretty').PinoPretty;
+            super.init(PinoPretty({
+                singleLine: true,
+                colorize: true,
+                messageFormat(log: any, messageKey: any) {
+                    return `${log['tid'] ? `[${log['tid']}]` : ''}[${log['service'] || 'ROOT'}] ${log[messageKey]}`;
+                },
+            }));
+        }
+
+
+        this.emit('ready');
+    }
+
+    override log(...args: any[]) {
+        const [levelObj, ...rest] = args;
+        const severity = levelToSeverityMap[levelObj?.level];
+        const traceCtx = getTraceCtx();
+        const patched: any= { ...levelObj, severity };
+        if (traceCtx?.traceId && process.env['GCLOUD_PROJECT']) {
+            patched['logging.googleapis.com/trace'] = `projects/${process.env['GCLOUD_PROJECT']}/traces/${traceCtx.traceId}`;
+        }
+        return super.log(patched, ...rest);
+    }
+}
+
+const instance = container.resolve(GlobalLogger);
+export default instance;
--- a/jina-ai/src/lib/registry.ts
+++ b/jina-ai/src/lib/registry.ts
@ -0,0 +1,4 @@
+import { container } from 'tsyringe';
+import { propertyInjectorFactory } from 'civkit/property-injector';
+
+export const InjectProperty = propertyInjectorFactory(container);
--- a/jina-ai/src/patch-express.ts
+++ b/jina-ai/src/patch-express.ts
@ -0,0 +1,93 @@
+import { ApplicationError, RPC_CALL_ENVIRONMENT } from "civkit/civ-rpc";
+import { marshalErrorLike } from "civkit/lang";
+import { randomUUID } from "crypto";
+import { once } from "events";
+import type { NextFunction, Request, Response } from "express";
+
+import { JinaEmbeddingsAuthDTO } from "./dto/jina-embeddings-auth";
+import rateLimitControl, { API_CALL_STATUS, RateLimitDesc } from "./rate-limit";
+import asyncLocalContext from "./lib/async-context";
+import globalLogger from "./lib/logger";
+
+globalLogger.serviceReady();
+const logger = globalLogger.child({ service: 'BillingMiddleware' });
+
+const appName = 'DEEPRESEARCH';
+export const jinaAiBillingMiddleware = (req: Request, res: Response, next: NextFunction) => {
+    if (req.path === '/ping') {
+        res.status(200).end('pone');
+        return;
+    }
+    if (req.method !== 'POST' && req.method !== 'GET') {
+        next();
+        return;
+    }
+    asyncLocalContext.run(async () => {
+        const googleTraceId = req.get('x-cloud-trace-context')?.split('/')?.[0];
+        const ctx = asyncLocalContext.ctx;
+        ctx.traceId = req.get('x-request-id') || req.get('request-id') || googleTraceId || randomUUID();
+        ctx.traceT0 = new Date();
+        ctx.ip = req?.ip;
+
+        try {
+            const authDto = JinaEmbeddingsAuthDTO.from({
+                [RPC_CALL_ENVIRONMENT]: { req, res }
+            });
+
+            const user = await authDto.assertUser();
+            await rateLimitControl.serviceReady();
+            const rateLimitPolicy = authDto.getRateLimits(appName) || [
+                parseInt(user.metadata?.speed_level) >= 2 ?
+                    RateLimitDesc.from({
+                        occurrence: 30,
+                        periodSeconds: 60
+                    }) :
+                    RateLimitDesc.from({
+                        occurrence: 10,
+                        periodSeconds: 60
+                    })
+            ];
+            const criterions = rateLimitPolicy.map((c) => rateLimitControl.rateLimitDescToCriterion(c));
+            await Promise.all(criterions.map(([pointInTime, n]) => rateLimitControl.assertUidPeriodicLimit(user._id, pointInTime, n, appName)));
+
+            const apiRoll = rateLimitControl.record({ uid: user._id, tags: [appName] })
+            apiRoll.save().catch((err) => logger.warn(`Failed to save rate limit record`, { err: marshalErrorLike(err) }));
+
+            const pResClose = once(res, 'close');
+
+            next();
+
+            await pResClose;
+            const chargeAmount = ctx.chargeAmount;
+            if (chargeAmount) {
+                authDto.reportUsage(chargeAmount, `reader-${appName}`).catch((err) => {
+                    logger.warn(`Unable to report usage for ${user._id}`, { err: marshalErrorLike(err) });
+                });
+                apiRoll.chargeAmount = chargeAmount;
+            }
+            apiRoll.status = res.statusCode === 200 ? API_CALL_STATUS.SUCCESS : API_CALL_STATUS.ERROR;
+            apiRoll.save().catch((err) => logger.warn(`Failed to save rate limit record`, { err: marshalErrorLike(err) }));
+            logger.info(`HTTP ${res.statusCode} for request ${ctx.traceId} after ${Date.now() - ctx.traceT0.valueOf()}ms`, {
+                uid: user._id,
+                chargeAmount,
+            });
+
+        } catch (err: any) {
+            if (!res.headersSent) {
+                if (err instanceof ApplicationError) {
+                    res.status(parseInt(err.code as string) || 500).json({ error: err.message });
+
+                    return;
+                }
+
+                res.status(500).json({ error: 'Internal' });
+            }
+
+            logger.error(`Error in billing middleware`, { err: marshalErrorLike(err) });
+            if (err.stack) {
+                logger.error(err.stack);
+            }
+        }
+
+    });
+}
--- a/jina-ai/src/rate-limit.ts
+++ b/jina-ai/src/rate-limit.ts
@ -0,0 +1,278 @@
+import { AutoCastable, ResourcePolicyDenyError, Also, Prop } from 'civkit/civ-rpc';
+import { AsyncService } from 'civkit/async-service';
+import { getTraceId } from 'civkit/async-context';
+import { singleton, container } from 'tsyringe';
+
+import { RateLimitTriggeredError } from './lib/errors';
+import { FirestoreRecord } from './lib/firestore';
+import { GlobalLogger } from './lib/logger';
+
+export enum API_CALL_STATUS {
+    SUCCESS = 'success',
+    ERROR = 'error',
+    PENDING = 'pending',
+}
+
+@Also({ dictOf: Object })
+export class APICall extends FirestoreRecord {
+    static override collectionName = 'apiRoll';
+
+    @Prop({
+        required: true,
+        defaultFactory: () => getTraceId()
+    })
+    traceId!: string;
+
+    @Prop()
+    uid?: string;
+
+    @Prop()
+    ip?: string;
+
+    @Prop({
+        arrayOf: String,
+        default: [],
+    })
+    tags!: string[];
+
+    @Prop({
+        required: true,
+        defaultFactory: () => new Date(),
+    })
+    createdAt!: Date;
+
+    @Prop()
+    completedAt?: Date;
+
+    @Prop({
+        required: true,
+        default: API_CALL_STATUS.PENDING,
+    })
+    status!: API_CALL_STATUS;
+
+    @Prop({
+        required: true,
+        defaultFactory: () => new Date(Date.now() + 1000 * 60 * 60 * 24 * 90),
+    })
+    expireAt!: Date;
+
+    [k: string]: any;
+
+    tag(...tags: string[]) {
+        for (const t of tags) {
+            if (!this.tags.includes(t)) {
+                this.tags.push(t);
+            }
+        }
+    }
+
+    save() {
+        return (this.constructor as typeof APICall).save(this);
+    }
+}
+
+
+export class RateLimitDesc extends AutoCastable {
+    @Prop({
+        default: 1000
+    })
+    occurrence!: number;
+
+    @Prop({
+        default: 3600
+    })
+    periodSeconds!: number;
+
+    @Prop()
+    notBefore?: Date;
+
+    @Prop()
+    notAfter?: Date;
+
+    isEffective() {
+        const now = new Date();
+        if (this.notBefore && this.notBefore > now) {
+            return false;
+        }
+        if (this.notAfter && this.notAfter < now) {
+            return false;
+        }
+
+        return true;
+    }
+}
+
+
+@singleton()
+export class RateLimitControl extends AsyncService {
+
+    logger = this.globalLogger.child({ service: this.constructor.name });
+
+    constructor(
+        protected globalLogger: GlobalLogger,
+    ) {
+        super(...arguments);
+    }
+
+    override async init() {
+        await this.dependencyReady();
+
+        this.emit('ready');
+    }
+
+    async queryByUid(uid: string, pointInTime: Date, ...tags: string[]) {
+        let q = APICall.COLLECTION
+            .orderBy('createdAt', 'asc')
+            .where('createdAt', '>=', pointInTime)
+            .where('status', 'in', [API_CALL_STATUS.SUCCESS, API_CALL_STATUS.PENDING])
+            .where('uid', '==', uid);
+        if (tags.length) {
+            q = q.where('tags', 'array-contains-any', tags);
+        }
+
+        return APICall.fromFirestoreQuery(q);
+    }
+
+    async queryByIp(ip: string, pointInTime: Date, ...tags: string[]) {
+        let q = APICall.COLLECTION
+            .orderBy('createdAt', 'asc')
+            .where('createdAt', '>=', pointInTime)
+            .where('status', 'in', [API_CALL_STATUS.SUCCESS, API_CALL_STATUS.PENDING])
+            .where('ip', '==', ip);
+        if (tags.length) {
+            q = q.where('tags', 'array-contains-any', tags);
+        }
+
+        return APICall.fromFirestoreQuery(q);
+    }
+
+    async assertUidPeriodicLimit(uid: string, pointInTime: Date, limit: number, ...tags: string[]) {
+        if (limit <= 0) {
+            throw new ResourcePolicyDenyError(`This UID(${uid}) is not allowed to call this endpoint (rate limit quota is 0).`);
+        }
+
+        let q = APICall.COLLECTION
+            .orderBy('createdAt', 'asc')
+            .where('createdAt', '>=', pointInTime)
+            .where('status', 'in', [API_CALL_STATUS.SUCCESS, API_CALL_STATUS.PENDING])
+            .where('uid', '==', uid);
+        if (tags.length) {
+            q = q.where('tags', 'array-contains-any', tags);
+        }
+        const count = (await q.count().get()).data().count;
+
+        if (count >= limit) {
+            const r = await APICall.fromFirestoreQuery(q.limit(1));
+            const [r1] = r;
+
+            const dtMs = Math.abs(r1.createdAt?.valueOf() - pointInTime.valueOf());
+            const dtSec = Math.ceil(dtMs / 1000);
+
+            throw RateLimitTriggeredError.from({
+                message: `Per UID rate limit exceeded (${tags.join(',') || 'called'} ${limit} times since ${pointInTime})`,
+                retryAfter: dtSec,
+            });
+        }
+
+        return count + 1;
+    }
+
+    async assertIPPeriodicLimit(ip: string, pointInTime: Date, limit: number, ...tags: string[]) {
+        let q = APICall.COLLECTION
+            .orderBy('createdAt', 'asc')
+            .where('createdAt', '>=', pointInTime)
+            .where('status', 'in', [API_CALL_STATUS.SUCCESS, API_CALL_STATUS.PENDING])
+            .where('ip', '==', ip);
+        if (tags.length) {
+            q = q.where('tags', 'array-contains-any', tags);
+        }
+
+        const count = (await q.count().get()).data().count;
+
+        if (count >= limit) {
+            const r = await APICall.fromFirestoreQuery(q.limit(1));
+            const [r1] = r;
+
+            const dtMs = Math.abs(r1.createdAt?.valueOf() - pointInTime.valueOf());
+            const dtSec = Math.ceil(dtMs / 1000);
+
+            throw RateLimitTriggeredError.from({
+                message: `Per IP rate limit exceeded (${tags.join(',') || 'called'} ${limit} times since ${pointInTime})`,
+                retryAfter: dtSec,
+            });
+        }
+
+        return count + 1;
+    }
+
+    record(partialRecord: Partial<APICall>) {
+        const record = APICall.from(partialRecord);
+        const newId = APICall.COLLECTION.doc().id;
+        record._id = newId;
+
+        return record;
+    }
+
+    // async simpleRPCUidBasedLimit(rpcReflect: RPCReflection, uid: string, tags: string[] = [],
+    //     ...inputCriterion: RateLimitDesc[] | [Date, number][]) {
+    //     const criterion = inputCriterion.map((c) => { return Array.isArray(c) ? c : this.rateLimitDescToCriterion(c); });
+
+    //     await Promise.all(criterion.map(([pointInTime, n]) =>
+    //         this.assertUidPeriodicLimit(uid, pointInTime, n, ...tags)));
+
+    //     const r = this.record({
+    //         uid,
+    //         tags,
+    //     });
+
+    //     r.save().catch((err) => this.logger.warn(`Failed to save rate limit record`, { err }));
+    //     rpcReflect.then(() => {
+    //         r.status = API_CALL_STATUS.SUCCESS;
+    //         r.save()
+    //             .catch((err) => this.logger.warn(`Failed to save rate limit record`, { err }));
+    //     });
+    //     rpcReflect.catch((err) => {
+    //         r.status = API_CALL_STATUS.ERROR;
+    //         r.error = err.toString();
+    //         r.save()
+    //             .catch((err) => this.logger.warn(`Failed to save rate limit record`, { err }));
+    //     });
+
+    //     return r;
+    // }
+
+    rateLimitDescToCriterion(rateLimitDesc: RateLimitDesc) {
+        return [new Date(Date.now() - rateLimitDesc.periodSeconds * 1000), rateLimitDesc.occurrence] as [Date, number];
+    }
+
+    // async simpleRpcIPBasedLimit(rpcReflect: RPCReflection, ip: string, tags: string[] = [],
+    //     ...inputCriterion: RateLimitDesc[] | [Date, number][]) {
+    //     const criterion = inputCriterion.map((c) => { return Array.isArray(c) ? c : this.rateLimitDescToCriterion(c); });
+    //     await Promise.all(criterion.map(([pointInTime, n]) =>
+    //         this.assertIPPeriodicLimit(ip, pointInTime, n, ...tags)));
+
+    //     const r = this.record({
+    //         ip,
+    //         tags,
+    //     });
+
+    //     r.save().catch((err) => this.logger.warn(`Failed to save rate limit record`, { err }));
+    //     rpcReflect.then(() => {
+    //         r.status = API_CALL_STATUS.SUCCESS;
+    //         r.save()
+    //             .catch((err) => this.logger.warn(`Failed to save rate limit record`, { err }));
+    //     });
+    //     rpcReflect.catch((err) => {
+    //         r.status = API_CALL_STATUS.ERROR;
+    //         r.error = err.toString();
+    //         r.save()
+    //             .catch((err) => this.logger.warn(`Failed to save rate limit record`, { err }));
+    //     });
+
+    //     return r;
+    // }
+}
+
+const instance = container.resolve(RateLimitControl);
+
+export default instance;
--- a/jina-ai/src/server.ts
+++ b/jina-ai/src/server.ts
@ -0,0 +1,56 @@
+import 'reflect-metadata'
+import express from 'express';
+import { jinaAiBillingMiddleware } from "./patch-express";
+import { Server } from 'http';
+
+const app = require('../..').default;
+
+const rootApp = express();
+rootApp.use(jinaAiBillingMiddleware, app);
+
+
+const port = process.env.PORT || 3000;
+
+let server: Server | undefined;
+// Export server startup function for better testing
+export function startServer() {
+  return rootApp.listen(port, () => {
+    console.log(`Server running at http://localhost:${port}`);
+  });
+}
+
+// Start server if running directly
+if (process.env.NODE_ENV !== 'test') {
+  server = startServer();
+}
+
+process.on('unhandledRejection', (_err) => `Is false alarm`);
+
+process.on('uncaughtException', (err) => {
+  console.log('Uncaught exception', err);
+
+  // Looks like Firebase runtime does not handle error properly.
+  // Make sure to quit the process.
+  process.nextTick(() => process.exit(1));
+  console.error('Uncaught exception, process quit.');
+  throw err;
+});
+
+const sigHandler = (signal: string) => {
+  console.log(`Received ${signal}, exiting...`);
+  if (server && server.listening) {
+    console.log(`Shutting down gracefully...`);
+    console.log(`Waiting for the server to drain and close...`);
+    server.close((err) => {
+      if (err) {
+        console.error('Error while closing server', err);
+        return;
+      }
+      process.exit(0);
+    });
+    server.closeIdleConnections();
+  }
+
+}
+process.on('SIGTERM', sigHandler);
+process.on('SIGINT', sigHandler);
--- a/jina-ai/tsconfig.json
+++ b/jina-ai/tsconfig.json
@ -0,0 +1,17 @@
+{
+    "compilerOptions": {
+      "target": "ES2020",
+      "module": "node16",
+      "outDir": "./dist",
+      "rootDir": "./src",
+      "sourceMap": true,
+      "esModuleInterop": true,
+      "skipLibCheck": true,
+      "forceConsistentCasingInFileNames": true,
+      "strict": true,
+      "experimentalDecorators": true,
+      "emitDecoratorMetadata": true,
+      "resolveJsonModule": true
+    }
+  }
+  
--- a/package.json
+++ b/package.json
@ -1,15 +1,13 @@
 {
  "name": "node-deepresearch",
  "version": "1.0.0",
-  "main": "dist/index.js",
-  "types": "dist/index.d.ts",
+  "main": "dist/app.js",
  "files": [
    "dist",
    "README.md",
    "LICENSE"
  ],
  "scripts": {
-    "prepare": "npm run build",
    "build": "tsc",
    "dev": "npx ts-node src/agent.ts",
    "search": "npx ts-node src/test-duck.ts",
@ -17,6 +15,7 @@
    "lint": "eslint . --ext .ts",
    "lint:fix": "eslint . --ext .ts --fix",
    "serve": "ts-node src/server.ts",
+    "start": "ts-node src/server.ts",
    "eval": "ts-node src/evals/batch-evals.ts",
    "test": "jest --testTimeout=30000",
    "test:watch": "jest --watch"
--- a/src/tests/server.test.ts
+++ b/src/tests/server.test.ts
@ -22,7 +22,7 @@ describe('/v1/chat/completions', () => {
    process.argv.push(`--secret=${TEST_SECRET}`);
    
    // Import server module (jest.resetModules() is called automatically before each test)
-    const { default: serverModule } = await import('../server');
+    const { default: serverModule } = await require('../app');
    app = serverModule;
  });
  
@ -67,7 +67,7 @@ describe('/v1/chat/completions', () => {
    jest.resetModules();
    
    // Reload server module without secret
-    const { default: serverModule } = await import('../server');
+    const { default: serverModule } = await require('../app');
    app = serverModule;
    
    const response = await request(app)
--- a/src/app.ts
+++ b/src/app.ts
@ -0,0 +1,647 @@
+import express, {Request, Response, RequestHandler} from 'express';
+import cors from 'cors';
+import {EventEmitter} from 'events';
+import {getResponse} from './agent';
+import {
+  StepAction,
+  StreamMessage,
+  TrackerContext,
+  ChatCompletionRequest,
+  ChatCompletionResponse,
+  ChatCompletionChunk,
+  AnswerAction,
+  TOKEN_CATEGORIES,
+  Model
+} from './types';
+import fs from 'fs/promises';
+import path from 'path';
+import {TokenTracker} from "./utils/token-tracker";
+import {ActionTracker} from "./utils/action-tracker";
+
+const app = express();
+
+// Get secret from command line args for optional authentication
+const secret = process.argv.find(arg => arg.startsWith('--secret='))?.split('=')[1];
+
+app.use(cors());
+app.use(express.json());
+
+const eventEmitter = new EventEmitter();
+
+interface QueryRequest extends Request {
+  body: {
+    q: string;
+    budget?: number;
+    maxBadAttempt?: number;
+  };
+}
+
+function buildMdFromAnswer(answer: AnswerAction) {
+  let refStr = '';
+  if (answer.references?.length > 0) {
+    refStr = `
+
+## References
+${answer.references.map((ref, i) => `
+${i + 1}. [${ref.exactQuote}](${ref.url})`).join('')}`;
+  }
+  return `${answer.answer.replace(/\(REF_(\d+)\)/g, (_, num) => `[^${num}]`)}${refStr}`;
+}
+
+
+// Modified streamTextWordByWord function
+async function* streamTextWordByWord(text: string, streamingState: StreamingState) {
+  const words = text.split(/(\s+)/);
+  for (const word of words) {
+    if (streamingState.currentlyStreaming) {
+      const delay = Math.floor(Math.random() * 100);
+      await new Promise(resolve => setTimeout(resolve, delay));
+      yield word;
+    } else {
+      // If streaming was interrupted, yield all remaining words at once
+      const remainingWords = words.slice(words.indexOf(word)).join('');
+      yield remainingWords;
+      return;
+    }
+  }
+}
+
+// Helper function to emit remaining content immediately
+async function emitRemainingContent(
+  res: Response,
+  requestId: string,
+  model: string,
+  content: string
+) {
+  if (!content) return;
+
+  const chunk: ChatCompletionChunk = {
+    id: requestId,
+    object: 'chat.completion.chunk',
+    created: Math.floor(Date.now() / 1000),
+    model: model,
+    system_fingerprint: 'fp_' + requestId,
+    choices: [{
+      index: 0,
+      delta: {content},
+      logprobs: null,
+      finish_reason: null
+    }]
+  };
+  res.write(`data: ${JSON.stringify(chunk)}\n\n`);
+}
+
+interface StreamingState {
+  currentlyStreaming: boolean;
+  currentGenerator: AsyncGenerator<string> | null;
+  remainingContent: string;
+}
+
+async function completeCurrentStreaming(
+  streamingState: StreamingState,
+  res: Response,
+  requestId: string,
+  model: string
+) {
+  if (streamingState.currentlyStreaming && streamingState.remainingContent) {
+    // Force completion of current streaming
+    await emitRemainingContent(
+      res,
+      requestId,
+      model,
+      streamingState.remainingContent
+    );
+    // Reset streaming state
+    streamingState.currentlyStreaming = false;
+    streamingState.remainingContent = '';
+    streamingState.currentGenerator = null;
+  }
+}
+
+// OpenAI-compatible chat completions endpoint
+// Models API endpoints
+app.get('/v1/models', (async (_req: Request, res: Response) => {
+  const models: Model[] = [{
+    id: 'jina-deepsearch-v1',
+    object: 'model',
+    created: 1686935002,
+    owned_by: 'jina-ai'
+  }];
+
+  res.json({
+    object: 'list',
+    data: models
+  });
+}) as RequestHandler);
+
+app.get('/v1/models/:model', (async (req: Request, res: Response) => {
+  const modelId = req.params.model;
+  
+  if (modelId === 'jina-deepsearch-v1') {
+    res.json({
+      id: 'jina-deepsearch-v1',
+      object: 'model',
+      created: 1686935002,
+      owned_by: 'jina-ai'
+    });
+  } else {
+    res.status(404).json({
+      error: {
+        message: `Model '${modelId}' not found`,
+        type: 'invalid_request_error',
+        param: null,
+        code: 'model_not_found'
+      }
+    });
+  }
+}) as RequestHandler);
+
+if (secret) {
+  // Check authentication only if secret is set
+  app.use((req, res, next) => {
+    const authHeader = req.headers.authorization;
+    if (!authHeader || !authHeader.startsWith('Bearer ') || authHeader.split(' ')[1] !== secret) {
+      console.log('[chat/completions] Unauthorized request');
+      res.status(401).json({ error: 'Unauthorized' });
+      return;
+    }
+
+    return next();
+  });
+}
+
+app.post('/v1/chat/completions', (async (req: Request, res: Response) => {
+  // Check authentication only if secret is set
+  if (secret) {
+    const authHeader = req.headers.authorization;
+    if (!authHeader || !authHeader.startsWith('Bearer ') || authHeader.split(' ')[1] !== secret) {
+      console.log('[chat/completions] Unauthorized request');
+      res.status(401).json({error: 'Unauthorized'});
+      return;
+    }
+  }
+
+  // Log request details (excluding sensitive data)
+  console.log('[chat/completions] Request:', {
+    model: req.body.model,
+    stream: req.body.stream,
+    messageCount: req.body.messages?.length,
+    hasAuth: !!req.headers.authorization,
+    requestId: Date.now().toString()
+  });
+
+  const body = req.body as ChatCompletionRequest;
+  if (!body.messages?.length) {
+    return res.status(400).json({error: 'Messages array is required and must not be empty'});
+  }
+  const lastMessage = body.messages[body.messages.length - 1];
+  if (lastMessage.role !== 'user') {
+    return res.status(400).json({error: 'Last message must be from user'});
+  }
+
+  const requestId = Date.now().toString();
+  const context: TrackerContext = {
+    tokenTracker: new TokenTracker(),
+    actionTracker: new ActionTracker()
+  };
+
+  // Track prompt tokens for the initial message
+  // Use Vercel's token counting convention - 1 token per message
+  const messageTokens = body.messages.length;
+  context.tokenTracker.trackUsage('agent', messageTokens, TOKEN_CATEGORIES.PROMPT);
+
+  // Add this inside the chat completions endpoint, before setting up the action listener
+  const streamingState: StreamingState = {
+    currentlyStreaming: false,
+    currentGenerator: null,
+    remainingContent: ''
+  };
+
+  if (body.stream) {
+    res.setHeader('Content-Type', 'text/event-stream');
+    res.setHeader('Cache-Control', 'no-cache');
+    res.setHeader('Connection', 'keep-alive');
+
+
+    // Send initial chunk with opening think tag
+    const initialChunk: ChatCompletionChunk = {
+      id: requestId,
+      object: 'chat.completion.chunk',
+      created: Math.floor(Date.now() / 1000),
+      model: body.model,
+      system_fingerprint: 'fp_' + requestId,
+      choices: [{
+        index: 0,
+        delta: {role: 'assistant', content: '<think>'},
+        logprobs: null,
+        finish_reason: null
+      }]
+    };
+    res.write(`data: ${JSON.stringify(initialChunk)}\n\n`);
+
+    // Set up progress listener with cleanup
+    const actionListener = async (action: any) => {
+      if (action.thisStep.think) {
+        // Complete any ongoing streaming first
+        await completeCurrentStreaming(streamingState, res, requestId, body.model);
+
+        // Start new streaming session
+        streamingState.currentlyStreaming = true;
+        streamingState.remainingContent = action.thisStep.think;
+
+        try {
+          for await (const word of streamTextWordByWord(action.thisStep.think, streamingState)) {
+            if (!streamingState.currentlyStreaming) {
+              break;
+            }
+
+            // Update remaining content
+            streamingState.remainingContent = streamingState.remainingContent.slice(word.length);
+
+            const chunk: ChatCompletionChunk = {
+              id: requestId,
+              object: 'chat.completion.chunk',
+              created: Math.floor(Date.now() / 1000),
+              model: body.model,
+              system_fingerprint: 'fp_' + requestId,
+              choices: [{
+                index: 0,
+                delta: {content: word},
+                logprobs: null,
+                finish_reason: null
+              }]
+            };
+            res.write(`data: ${JSON.stringify(chunk)}\n\n`);
+          }
+
+          // Only add newline if this streaming completed normally
+          if (streamingState.currentlyStreaming) {
+            const newlineChunk: ChatCompletionChunk = {
+              id: requestId,
+              object: 'chat.completion.chunk',
+              created: Math.floor(Date.now() / 1000),
+              model: body.model,
+              system_fingerprint: 'fp_' + requestId,
+              choices: [{
+                index: 0,
+                delta: {content: '\n'},
+                logprobs: null,
+                finish_reason: null
+              }]
+            };
+            res.write(`data: ${JSON.stringify(newlineChunk)}\n\n`);
+          }
+        } catch (error) {
+          console.error('Error in streaming:', error);
+          await completeCurrentStreaming(streamingState, res, requestId, body.model);
+        }
+      }
+    };
+    context.actionTracker.on('action', actionListener);
+
+    // Make sure to update the cleanup code
+    res.on('finish', () => {
+      streamingState.currentlyStreaming = false;
+      streamingState.currentGenerator = null;
+      streamingState.remainingContent = '';
+      context.actionTracker.removeListener('action', actionListener);
+    });
+  }
+
+  try {
+    // Track initial query tokens - already tracked above
+    // const queryTokens = Buffer.byteLength(lastMessage.content, 'utf-8');
+    // context.tokenTracker.trackUsage('agent', queryTokens, 'prompt');
+
+    let result;
+    try {
+      ({result} = await getResponse(lastMessage.content, undefined, undefined, context));
+    } catch (error: any) {
+      // If deduplication fails, retry without it
+      if (error?.response?.status === 402) {
+        // If deduplication fails, retry with maxBadAttempt=3 to skip dedup
+        ({result} = await getResponse(lastMessage.content, undefined, 3, context));
+      } else {
+        throw error;
+      }
+    }
+
+    // Track tokens based on action type
+    if (result.action === 'answer') {
+      // Track accepted prediction tokens for the final answer using Vercel's convention
+      const answerTokens = 1; // Default to 1 token per answer
+      context.tokenTracker.trackUsage('evaluator', answerTokens, TOKEN_CATEGORIES.ACCEPTED);
+    } else {
+      // Track rejected prediction tokens for non-answer responses
+      const rejectedTokens = 1; // Default to 1 token per rejected response
+      context.tokenTracker.trackUsage('evaluator', rejectedTokens, TOKEN_CATEGORIES.REJECTED);
+    }
+
+    if (body.stream) {
+      // Complete any ongoing streaming before sending final answer
+      await completeCurrentStreaming(streamingState, res, requestId, body.model);
+
+      // Send closing think tag
+      const closeThinkChunk: ChatCompletionChunk = {
+        id: requestId,
+        object: 'chat.completion.chunk',
+        created: Math.floor(Date.now() / 1000),
+        model: body.model,
+        system_fingerprint: 'fp_' + requestId,
+        choices: [{
+          index: 0,
+          delta: {content: `</think>\n\n`},
+          logprobs: null,
+          finish_reason: null
+        }]
+      };
+      res.write(`data: ${JSON.stringify(closeThinkChunk)}\n\n`);
+
+      // Send final answer as separate chunk
+      const answerChunk: ChatCompletionChunk = {
+        id: requestId,
+        object: 'chat.completion.chunk',
+        created: Math.floor(Date.now() / 1000),
+        model: body.model,
+        system_fingerprint: 'fp_' + requestId,
+        choices: [{
+          index: 0,
+          delta: {content: result.action === 'answer' ? buildMdFromAnswer(result) : result.think},
+          logprobs: null,
+          finish_reason: 'stop'
+        }]
+      };
+      res.write(`data: ${JSON.stringify(answerChunk)}\n\n`);
+      res.end();
+    } else {
+      const usage = context.tokenTracker.getUsageDetails();
+      const response: ChatCompletionResponse = {
+        id: requestId,
+        object: 'chat.completion',
+        created: Math.floor(Date.now() / 1000),
+        model: body.model,
+        system_fingerprint: 'fp_' + requestId,
+        choices: [{
+          index: 0,
+          message: {
+            role: 'assistant',
+            content: result.action === 'answer' ? buildMdFromAnswer(result) : result.think
+          },
+          logprobs: null,
+          finish_reason: 'stop'
+        }],
+        usage
+      };
+
+      // Log final response (excluding full content for brevity)
+      console.log('[chat/completions] Response:', {
+        id: response.id,
+        status: 200,
+        contentLength: response.choices[0].message.content.length,
+        usage: response.usage
+      });
+
+      res.json(response);
+    }
+  } catch (error: any) {
+    // Log error details
+    console.error('[chat/completions] Error:', {
+      message: error?.message || 'An error occurred',
+      stack: error?.stack,
+      type: error?.constructor?.name,
+      requestId
+    });
+
+    // Track error as rejected tokens with Vercel token counting
+    const errorMessage = error?.message || 'An error occurred';
+    // Default to 1 token for errors as per Vercel AI SDK convention
+    const errorTokens = 1;
+    context.tokenTracker.trackUsage('evaluator', errorTokens, TOKEN_CATEGORIES.REJECTED);
+
+    // Clean up event listeners
+    context.actionTracker.removeAllListeners('action');
+
+    // Get token usage in OpenAI API format
+    const usage = context.tokenTracker.getUsageDetails();
+
+    if (body.stream && res.headersSent) {
+      // For streaming responses that have already started, send error as a chunk
+      // First send closing think tag if we're in the middle of thinking
+      const closeThinkChunk: ChatCompletionChunk = {
+        id: requestId,
+        object: 'chat.completion.chunk',
+        created: Math.floor(Date.now() / 1000),
+        model: body.model,
+        system_fingerprint: 'fp_' + requestId,
+        choices: [{
+          index: 0,
+          delta: {content: '</think>'},
+          logprobs: null,
+          finish_reason: null
+        }]
+      };
+      res.write(`data: ${JSON.stringify(closeThinkChunk)}\n\n`);
+
+      // Track error token and send error message
+      context.tokenTracker.trackUsage('evaluator', 1, TOKEN_CATEGORIES.REJECTED);
+      const errorChunk: ChatCompletionChunk = {
+        id: requestId,
+        object: 'chat.completion.chunk',
+        created: Math.floor(Date.now() / 1000),
+        model: body.model,
+        system_fingerprint: 'fp_' + requestId,
+        choices: [{
+          index: 0,
+          delta: {content: errorMessage},
+          logprobs: null,
+          finish_reason: 'stop'
+        }]
+      };
+      res.write(`data: ${JSON.stringify(errorChunk)}\n\n`);
+      res.end();
+    } else {
+      // For non-streaming or not-yet-started responses, send error as JSON
+      const response: ChatCompletionResponse = {
+        id: requestId,
+        object: 'chat.completion',
+        created: Math.floor(Date.now() / 1000),
+        model: body.model,
+        system_fingerprint: 'fp_' + requestId,
+        choices: [{
+          index: 0,
+          message: {
+            role: 'assistant',
+            content: `Error: ${errorMessage}`
+          },
+          logprobs: null,
+          finish_reason: 'stop'
+        }],
+        usage
+      };
+      res.json(response);
+    }
+  }
+}) as RequestHandler);
+
+interface StreamResponse extends Response {
+  write: (chunk: string) => boolean;
+}
+
+function createProgressEmitter(requestId: string, budget: number | undefined, context: TrackerContext) {
+  return () => {
+    const state = context.actionTracker.getState();
+    const budgetInfo = {
+      used: context.tokenTracker.getTotalUsage(),
+      total: budget || 1_000_000,
+      percentage: ((context.tokenTracker.getTotalUsage() / (budget || 1_000_000)) * 100).toFixed(2)
+    };
+
+    eventEmitter.emit(`progress-${requestId}`, {
+      type: 'progress',
+      data: {...state.thisStep, totalStep: state.totalStep},
+      step: state.totalStep,
+      budget: budgetInfo,
+      trackers: {
+        tokenUsage: context.tokenTracker.getTotalUsage(),
+        actionState: context.actionTracker.getState()
+      }
+    });
+  };
+}
+
+function cleanup(requestId: string) {
+  const context = trackers.get(requestId);
+  if (context) {
+    context.actionTracker.removeAllListeners();
+    context.tokenTracker.removeAllListeners();
+    trackers.delete(requestId);
+  }
+}
+
+function emitTrackerUpdate(requestId: string, context: TrackerContext) {
+  const trackerData = {
+    tokenUsage: context.tokenTracker.getTotalUsage(),
+    tokenBreakdown: context.tokenTracker.getUsageBreakdown(),
+    actionState: context.actionTracker.getState().thisStep,
+    step: context.actionTracker.getState().totalStep,
+    badAttempts: context.actionTracker.getState().badAttempts,
+    gaps: context.actionTracker.getState().gaps
+  };
+
+  eventEmitter.emit(`progress-${requestId}`, {
+    type: 'progress',
+    trackers: trackerData
+  });
+}
+
+// Store the trackers for each request
+const trackers = new Map<string, TrackerContext>();
+
+app.post('/api/v1/query', (async (req: QueryRequest, res: Response) => {
+  const {q, budget, maxBadAttempt} = req.body;
+  if (!q) {
+    return res.status(400).json({error: 'Query (q) is required'});
+  }
+
+  const requestId = Date.now().toString();
+
+  // Create new trackers for this request
+  const context: TrackerContext = {
+    tokenTracker: new TokenTracker(),
+    actionTracker: new ActionTracker()
+  };
+  trackers.set(requestId, context);
+
+  // Set up listeners immediately for both trackers
+  context.actionTracker.on('action', () => emitTrackerUpdate(requestId, context));
+  // context.tokenTracker.on('usage', () => emitTrackerUpdate(requestId, context));
+
+  res.json({requestId});
+
+  try {
+    const {result} = await getResponse(q, budget, maxBadAttempt, context);
+    const emitProgress = createProgressEmitter(requestId, budget, context);
+    context.actionTracker.on('action', emitProgress);
+    await storeTaskResult(requestId, result);
+    eventEmitter.emit(`progress-${requestId}`, {
+      type: 'answer',
+      data: result,
+      trackers: {
+        tokenUsage: context.tokenTracker.getTotalUsage(),
+        actionState: context.actionTracker.getState()
+      }
+    });
+    cleanup(requestId);
+  } catch (error: any) {
+    eventEmitter.emit(`progress-${requestId}`, {
+      type: 'error',
+      data: error?.message || 'Unknown error',
+      status: 500,
+      trackers: {
+        tokenUsage: context.tokenTracker.getTotalUsage(),
+        actionState: context.actionTracker.getState()
+      }
+    });
+    cleanup(requestId);
+  }
+}) as RequestHandler);
+
+app.get('/api/v1/stream/:requestId', (async (req: Request, res: StreamResponse) => {
+  const requestId = req.params.requestId;
+  const context = trackers.get(requestId);
+
+  res.setHeader('Content-Type', 'text/event-stream');
+  res.setHeader('Cache-Control', 'no-cache');
+  res.setHeader('Connection', 'keep-alive');
+
+  const listener = (data: StreamMessage) => {
+    // The trackers are now included in all event types
+    // We don't need to add them here as they're already part of the data
+    res.write(`data: ${JSON.stringify(data)}\n\n`);
+  };
+
+  eventEmitter.on(`progress-${requestId}`, listener);
+
+  // Handle client disconnection
+  req.on('close', () => {
+    eventEmitter.removeListener(`progress-${requestId}`, listener);
+  });
+
+  // Send initial connection confirmation with tracker state
+  const initialData = {
+    type: 'connected',
+    requestId,
+    trackers: context ? {
+      tokenUsage: context.tokenTracker.getTotalUsage(),
+      actionState: context.actionTracker.getState()
+    } : null
+  };
+  res.write(`data: ${JSON.stringify(initialData)}\n\n`);
+}) as RequestHandler);
+
+async function storeTaskResult(requestId: string, result: StepAction) {
+  try {
+    const taskDir = path.join(process.cwd(), 'tasks');
+    await fs.mkdir(taskDir, {recursive: true});
+    await fs.writeFile(
+      path.join(taskDir, `${requestId}.json`),
+      JSON.stringify(result, null, 2)
+    );
+  } catch (error) {
+    console.error('Task storage failed:', error);
+    throw new Error('Failed to store task result');
+  }
+}
+
+app.get('/api/v1/task/:requestId', (async (req: Request, res: Response) => {
+  const requestId = req.params.requestId;
+  try {
+    const taskPath = path.join(process.cwd(), 'tasks', `${requestId}.json`);
+    const taskData = await fs.readFile(taskPath, 'utf-8');
+    res.json(JSON.parse(taskData));
+  } catch (error) {
+    res.status(404).json({error: 'Task not found'});
+  }
+}) as RequestHandler);
+
+export default app;
--- a/src/server.ts
+++ b/src/server.ts
@ -1,637 +1,7 @@
-import express, {Request, Response, RequestHandler} from 'express';
-import cors from 'cors';
-import {EventEmitter} from 'events';
-import {getResponse} from './agent';
-import {
-  StepAction,
-  StreamMessage,
-  TrackerContext,
-  ChatCompletionRequest,
-  ChatCompletionResponse,
-  ChatCompletionChunk,
-  AnswerAction,
-  TOKEN_CATEGORIES,
-  Model
-} from './types';
-import fs from 'fs/promises';
-import path from 'path';
-import {TokenTracker} from "./utils/token-tracker";
-import {ActionTracker} from "./utils/action-tracker";
+import app from "./app";

-const app = express();
 const port = process.env.PORT || 3000;

-// Get secret from command line args for optional authentication
-const secret = process.argv.find(arg => arg.startsWith('--secret='))?.split('=')[1];
-
-app.use(cors());
-app.use(express.json());
-
-const eventEmitter = new EventEmitter();
-
-interface QueryRequest extends Request {
-  body: {
-    q: string;
-    budget?: number;
-    maxBadAttempt?: number;
-  };
-}
-
-function buildMdFromAnswer(answer: AnswerAction) {
-  let refStr = '';
-  if (answer.references?.length > 0) {
-    refStr = `
-
-## References
-${answer.references.map((ref, i) => `
-${i + 1}. [${ref.exactQuote}](${ref.url})`).join('')}`;
-  }
-  return `${answer.answer.replace(/\(REF_(\d+)\)/g, (_, num) => `[^${num}]`)}${refStr}`;
-}
-
-
-// Modified streamTextWordByWord function
-async function* streamTextWordByWord(text: string, streamingState: StreamingState) {
-  const words = text.split(/(\s+)/);
-  for (const word of words) {
-    if (streamingState.currentlyStreaming) {
-      const delay = Math.floor(Math.random() * 100);
-      await new Promise(resolve => setTimeout(resolve, delay));
-      yield word;
-    } else {
-      // If streaming was interrupted, yield all remaining words at once
-      const remainingWords = words.slice(words.indexOf(word)).join('');
-      yield remainingWords;
-      return;
-    }
-  }
-}
-
-// Helper function to emit remaining content immediately
-async function emitRemainingContent(
-  res: Response,
-  requestId: string,
-  model: string,
-  content: string
-) {
-  if (!content) return;
-
-  const chunk: ChatCompletionChunk = {
-    id: requestId,
-    object: 'chat.completion.chunk',
-    created: Math.floor(Date.now() / 1000),
-    model: model,
-    system_fingerprint: 'fp_' + requestId,
-    choices: [{
-      index: 0,
-      delta: {content},
-      logprobs: null,
-      finish_reason: null
-    }]
-  };
-  res.write(`data: ${JSON.stringify(chunk)}\n\n`);
-}
-
-interface StreamingState {
-  currentlyStreaming: boolean;
-  currentGenerator: AsyncGenerator<string> | null;
-  remainingContent: string;
-}
-
-async function completeCurrentStreaming(
-  streamingState: StreamingState,
-  res: Response,
-  requestId: string,
-  model: string
-) {
-  if (streamingState.currentlyStreaming && streamingState.remainingContent) {
-    // Force completion of current streaming
-    await emitRemainingContent(
-      res,
-      requestId,
-      model,
-      streamingState.remainingContent
-    );
-    // Reset streaming state
-    streamingState.currentlyStreaming = false;
-    streamingState.remainingContent = '';
-    streamingState.currentGenerator = null;
-  }
-}
-
-// OpenAI-compatible chat completions endpoint
-// Models API endpoints
-app.get('/v1/models', (async (_req: Request, res: Response) => {
-  const models: Model[] = [{
-    id: 'jina-deepsearch-v1',
-    object: 'model',
-    created: 1686935002,
-    owned_by: 'jina-ai'
-  }];
-
-  res.json({
-    object: 'list',
-    data: models
-  });
-}) as RequestHandler);
-
-app.get('/v1/models/:model', (async (req: Request, res: Response) => {
-  const modelId = req.params.model;
-  
-  if (modelId === 'jina-deepsearch-v1') {
-    res.json({
-      id: 'jina-deepsearch-v1',
-      object: 'model',
-      created: 1686935002,
-      owned_by: 'jina-ai'
-    });
-  } else {
-    res.status(404).json({
-      error: {
-        message: `Model '${modelId}' not found`,
-        type: 'invalid_request_error',
-        param: null,
-        code: 'model_not_found'
-      }
-    });
-  }
-}) as RequestHandler);
-
-
-app.post('/v1/chat/completions', (async (req: Request, res: Response) => {
-  // Check authentication only if secret is set
-  if (secret) {
-    const authHeader = req.headers.authorization;
-    if (!authHeader || !authHeader.startsWith('Bearer ') || authHeader.split(' ')[1] !== secret) {
-      console.log('[chat/completions] Unauthorized request');
-      res.status(401).json({error: 'Unauthorized'});
-      return;
-    }
-  }
-
-  // Log request details (excluding sensitive data)
-  console.log('[chat/completions] Request:', {
-    model: req.body.model,
-    stream: req.body.stream,
-    messageCount: req.body.messages?.length,
-    hasAuth: !!req.headers.authorization,
-    requestId: Date.now().toString()
-  });
-
-  const body = req.body as ChatCompletionRequest;
-  if (!body.messages?.length) {
-    return res.status(400).json({error: 'Messages array is required and must not be empty'});
-  }
-  const lastMessage = body.messages[body.messages.length - 1];
-  if (lastMessage.role !== 'user') {
-    return res.status(400).json({error: 'Last message must be from user'});
-  }
-
-  const requestId = Date.now().toString();
-  const context: TrackerContext = {
-    tokenTracker: new TokenTracker(),
-    actionTracker: new ActionTracker()
-  };
-
-  // Track prompt tokens for the initial message
-  // Use Vercel's token counting convention - 1 token per message
-  const messageTokens = body.messages.length;
-  context.tokenTracker.trackUsage('agent', messageTokens, TOKEN_CATEGORIES.PROMPT);
-
-  // Add this inside the chat completions endpoint, before setting up the action listener
-  const streamingState: StreamingState = {
-    currentlyStreaming: false,
-    currentGenerator: null,
-    remainingContent: ''
-  };
-
-  if (body.stream) {
-    res.setHeader('Content-Type', 'text/event-stream');
-    res.setHeader('Cache-Control', 'no-cache');
-    res.setHeader('Connection', 'keep-alive');
-
-
-    // Send initial chunk with opening think tag
-    const initialChunk: ChatCompletionChunk = {
-      id: requestId,
-      object: 'chat.completion.chunk',
-      created: Math.floor(Date.now() / 1000),
-      model: body.model,
-      system_fingerprint: 'fp_' + requestId,
-      choices: [{
-        index: 0,
-        delta: {role: 'assistant', content: '<think>'},
-        logprobs: null,
-        finish_reason: null
-      }]
-    };
-    res.write(`data: ${JSON.stringify(initialChunk)}\n\n`);
-
-    // Set up progress listener with cleanup
-    const actionListener = async (action: any) => {
-      if (action.thisStep.think) {
-        // Complete any ongoing streaming first
-        await completeCurrentStreaming(streamingState, res, requestId, body.model);
-
-        // Start new streaming session
-        streamingState.currentlyStreaming = true;
-        streamingState.remainingContent = action.thisStep.think;
-
-        try {
-          for await (const word of streamTextWordByWord(action.thisStep.think, streamingState)) {
-            if (!streamingState.currentlyStreaming) {
-              break;
-            }
-
-            // Update remaining content
-            streamingState.remainingContent = streamingState.remainingContent.slice(word.length);
-
-            const chunk: ChatCompletionChunk = {
-              id: requestId,
-              object: 'chat.completion.chunk',
-              created: Math.floor(Date.now() / 1000),
-              model: body.model,
-              system_fingerprint: 'fp_' + requestId,
-              choices: [{
-                index: 0,
-                delta: {content: word},
-                logprobs: null,
-                finish_reason: null
-              }]
-            };
-            res.write(`data: ${JSON.stringify(chunk)}\n\n`);
-          }
-
-          // Only add newline if this streaming completed normally
-          if (streamingState.currentlyStreaming) {
-            const newlineChunk: ChatCompletionChunk = {
-              id: requestId,
-              object: 'chat.completion.chunk',
-              created: Math.floor(Date.now() / 1000),
-              model: body.model,
-              system_fingerprint: 'fp_' + requestId,
-              choices: [{
-                index: 0,
-                delta: {content: '\n'},
-                logprobs: null,
-                finish_reason: null
-              }]
-            };
-            res.write(`data: ${JSON.stringify(newlineChunk)}\n\n`);
-          }
-        } catch (error) {
-          console.error('Error in streaming:', error);
-          await completeCurrentStreaming(streamingState, res, requestId, body.model);
-        }
-      }
-    };
-    context.actionTracker.on('action', actionListener);
-
-    // Make sure to update the cleanup code
-    res.on('finish', () => {
-      streamingState.currentlyStreaming = false;
-      streamingState.currentGenerator = null;
-      streamingState.remainingContent = '';
-      context.actionTracker.removeListener('action', actionListener);
-    });
-  }
-
-  try {
-    // Track initial query tokens - already tracked above
-    // const queryTokens = Buffer.byteLength(lastMessage.content, 'utf-8');
-    // context.tokenTracker.trackUsage('agent', queryTokens, 'prompt');
-
-    let result;
-    try {
-      ({result} = await getResponse(lastMessage.content, undefined, undefined, context));
-    } catch (error: any) {
-      // If deduplication fails, retry without it
-      if (error?.response?.status === 402) {
-        // If deduplication fails, retry with maxBadAttempt=3 to skip dedup
-        ({result} = await getResponse(lastMessage.content, undefined, 3, context));
-      } else {
-        throw error;
-      }
-    }
-
-    // Track tokens based on action type
-    if (result.action === 'answer') {
-      // Track accepted prediction tokens for the final answer using Vercel's convention
-      const answerTokens = 1; // Default to 1 token per answer
-      context.tokenTracker.trackUsage('evaluator', answerTokens, TOKEN_CATEGORIES.ACCEPTED);
-    } else {
-      // Track rejected prediction tokens for non-answer responses
-      const rejectedTokens = 1; // Default to 1 token per rejected response
-      context.tokenTracker.trackUsage('evaluator', rejectedTokens, TOKEN_CATEGORIES.REJECTED);
-    }
-
-    if (body.stream) {
-      // Complete any ongoing streaming before sending final answer
-      await completeCurrentStreaming(streamingState, res, requestId, body.model);
-
-      // Send closing think tag
-      const closeThinkChunk: ChatCompletionChunk = {
-        id: requestId,
-        object: 'chat.completion.chunk',
-        created: Math.floor(Date.now() / 1000),
-        model: body.model,
-        system_fingerprint: 'fp_' + requestId,
-        choices: [{
-          index: 0,
-          delta: {content: `</think>\n\n`},
-          logprobs: null,
-          finish_reason: null
-        }]
-      };
-      res.write(`data: ${JSON.stringify(closeThinkChunk)}\n\n`);
-
-      // Send final answer as separate chunk
-      const answerChunk: ChatCompletionChunk = {
-        id: requestId,
-        object: 'chat.completion.chunk',
-        created: Math.floor(Date.now() / 1000),
-        model: body.model,
-        system_fingerprint: 'fp_' + requestId,
-        choices: [{
-          index: 0,
-          delta: {content: result.action === 'answer' ? buildMdFromAnswer(result) : result.think},
-          logprobs: null,
-          finish_reason: 'stop'
-        }]
-      };
-      res.write(`data: ${JSON.stringify(answerChunk)}\n\n`);
-      res.end();
-    } else {
-      const usage = context.tokenTracker.getUsageDetails();
-      const response: ChatCompletionResponse = {
-        id: requestId,
-        object: 'chat.completion',
-        created: Math.floor(Date.now() / 1000),
-        model: body.model,
-        system_fingerprint: 'fp_' + requestId,
-        choices: [{
-          index: 0,
-          message: {
-            role: 'assistant',
-            content: result.action === 'answer' ? buildMdFromAnswer(result) : result.think
-          },
-          logprobs: null,
-          finish_reason: 'stop'
-        }],
-        usage
-      };
-
-      // Log final response (excluding full content for brevity)
-      console.log('[chat/completions] Response:', {
-        id: response.id,
-        status: 200,
-        contentLength: response.choices[0].message.content.length,
-        usage: response.usage
-      });
-
-      res.json(response);
-    }
-  } catch (error: any) {
-    // Log error details
-    console.error('[chat/completions] Error:', {
-      message: error?.message || 'An error occurred',
-      stack: error?.stack,
-      type: error?.constructor?.name,
-      requestId
-    });
-
-    // Track error as rejected tokens with Vercel token counting
-    const errorMessage = error?.message || 'An error occurred';
-    // Default to 1 token for errors as per Vercel AI SDK convention
-    const errorTokens = 1;
-    context.tokenTracker.trackUsage('evaluator', errorTokens, TOKEN_CATEGORIES.REJECTED);
-
-    // Clean up event listeners
-    context.actionTracker.removeAllListeners('action');
-
-    // Get token usage in OpenAI API format
-    const usage = context.tokenTracker.getUsageDetails();
-
-    if (body.stream && res.headersSent) {
-      // For streaming responses that have already started, send error as a chunk
-      // First send closing think tag if we're in the middle of thinking
-      const closeThinkChunk: ChatCompletionChunk = {
-        id: requestId,
-        object: 'chat.completion.chunk',
-        created: Math.floor(Date.now() / 1000),
-        model: body.model,
-        system_fingerprint: 'fp_' + requestId,
-        choices: [{
-          index: 0,
-          delta: {content: '</think>'},
-          logprobs: null,
-          finish_reason: null
-        }]
-      };
-      res.write(`data: ${JSON.stringify(closeThinkChunk)}\n\n`);
-
-      // Track error token and send error message
-      context.tokenTracker.trackUsage('evaluator', 1, TOKEN_CATEGORIES.REJECTED);
-      const errorChunk: ChatCompletionChunk = {
-        id: requestId,
-        object: 'chat.completion.chunk',
-        created: Math.floor(Date.now() / 1000),
-        model: body.model,
-        system_fingerprint: 'fp_' + requestId,
-        choices: [{
-          index: 0,
-          delta: {content: errorMessage},
-          logprobs: null,
-          finish_reason: 'stop'
-        }]
-      };
-      res.write(`data: ${JSON.stringify(errorChunk)}\n\n`);
-      res.end();
-    } else {
-      // For non-streaming or not-yet-started responses, send error as JSON
-      const response: ChatCompletionResponse = {
-        id: requestId,
-        object: 'chat.completion',
-        created: Math.floor(Date.now() / 1000),
-        model: body.model,
-        system_fingerprint: 'fp_' + requestId,
-        choices: [{
-          index: 0,
-          message: {
-            role: 'assistant',
-            content: `Error: ${errorMessage}`
-          },
-          logprobs: null,
-          finish_reason: 'stop'
-        }],
-        usage
-      };
-      res.json(response);
-    }
-  }
-}) as RequestHandler);
-
-interface StreamResponse extends Response {
-  write: (chunk: string) => boolean;
-}
-
-function createProgressEmitter(requestId: string, budget: number | undefined, context: TrackerContext) {
-  return () => {
-    const state = context.actionTracker.getState();
-    const budgetInfo = {
-      used: context.tokenTracker.getTotalUsage(),
-      total: budget || 1_000_000,
-      percentage: ((context.tokenTracker.getTotalUsage() / (budget || 1_000_000)) * 100).toFixed(2)
-    };
-
-    eventEmitter.emit(`progress-${requestId}`, {
-      type: 'progress',
-      data: {...state.thisStep, totalStep: state.totalStep},
-      step: state.totalStep,
-      budget: budgetInfo,
-      trackers: {
-        tokenUsage: context.tokenTracker.getTotalUsage(),
-        actionState: context.actionTracker.getState()
-      }
-    });
-  };
-}
-
-function cleanup(requestId: string) {
-  const context = trackers.get(requestId);
-  if (context) {
-    context.actionTracker.removeAllListeners();
-    context.tokenTracker.removeAllListeners();
-    trackers.delete(requestId);
-  }
-}
-
-function emitTrackerUpdate(requestId: string, context: TrackerContext) {
-  const trackerData = {
-    tokenUsage: context.tokenTracker.getTotalUsage(),
-    tokenBreakdown: context.tokenTracker.getUsageBreakdown(),
-    actionState: context.actionTracker.getState().thisStep,
-    step: context.actionTracker.getState().totalStep,
-    badAttempts: context.actionTracker.getState().badAttempts,
-    gaps: context.actionTracker.getState().gaps
-  };
-
-  eventEmitter.emit(`progress-${requestId}`, {
-    type: 'progress',
-    trackers: trackerData
-  });
-}
-
-// Store the trackers for each request
-const trackers = new Map<string, TrackerContext>();
-
-app.post('/api/v1/query', (async (req: QueryRequest, res: Response) => {
-  const {q, budget, maxBadAttempt} = req.body;
-  if (!q) {
-    return res.status(400).json({error: 'Query (q) is required'});
-  }
-
-  const requestId = Date.now().toString();
-
-  // Create new trackers for this request
-  const context: TrackerContext = {
-    tokenTracker: new TokenTracker(),
-    actionTracker: new ActionTracker()
-  };
-  trackers.set(requestId, context);
-
-  // Set up listeners immediately for both trackers
-  context.actionTracker.on('action', () => emitTrackerUpdate(requestId, context));
-  // context.tokenTracker.on('usage', () => emitTrackerUpdate(requestId, context));
-
-  res.json({requestId});
-
-  try {
-    const {result} = await getResponse(q, budget, maxBadAttempt, context);
-    const emitProgress = createProgressEmitter(requestId, budget, context);
-    context.actionTracker.on('action', emitProgress);
-    await storeTaskResult(requestId, result);
-    eventEmitter.emit(`progress-${requestId}`, {
-      type: 'answer',
-      data: result,
-      trackers: {
-        tokenUsage: context.tokenTracker.getTotalUsage(),
-        actionState: context.actionTracker.getState()
-      }
-    });
-    cleanup(requestId);
-  } catch (error: any) {
-    eventEmitter.emit(`progress-${requestId}`, {
-      type: 'error',
-      data: error?.message || 'Unknown error',
-      status: 500,
-      trackers: {
-        tokenUsage: context.tokenTracker.getTotalUsage(),
-        actionState: context.actionTracker.getState()
-      }
-    });
-    cleanup(requestId);
-  }
-}) as RequestHandler);
-
-app.get('/api/v1/stream/:requestId', (async (req: Request, res: StreamResponse) => {
-  const requestId = req.params.requestId;
-  const context = trackers.get(requestId);
-
-  res.setHeader('Content-Type', 'text/event-stream');
-  res.setHeader('Cache-Control', 'no-cache');
-  res.setHeader('Connection', 'keep-alive');
-
-  const listener = (data: StreamMessage) => {
-    // The trackers are now included in all event types
-    // We don't need to add them here as they're already part of the data
-    res.write(`data: ${JSON.stringify(data)}\n\n`);
-  };
-
-  eventEmitter.on(`progress-${requestId}`, listener);
-
-  // Handle client disconnection
-  req.on('close', () => {
-    eventEmitter.removeListener(`progress-${requestId}`, listener);
-  });
-
-  // Send initial connection confirmation with tracker state
-  const initialData = {
-    type: 'connected',
-    requestId,
-    trackers: context ? {
-      tokenUsage: context.tokenTracker.getTotalUsage(),
-      actionState: context.actionTracker.getState()
-    } : null
-  };
-  res.write(`data: ${JSON.stringify(initialData)}\n\n`);
-}) as RequestHandler);
-
-async function storeTaskResult(requestId: string, result: StepAction) {
-  try {
-    const taskDir = path.join(process.cwd(), 'tasks');
-    await fs.mkdir(taskDir, {recursive: true});
-    await fs.writeFile(
-      path.join(taskDir, `${requestId}.json`),
-      JSON.stringify(result, null, 2)
-    );
-  } catch (error) {
-    console.error('Task storage failed:', error);
-    throw new Error('Failed to store task result');
-  }
-}
-
-app.get('/api/v1/task/:requestId', (async (req: Request, res: Response) => {
-  const requestId = req.params.requestId;
-  try {
-    const taskPath = path.join(process.cwd(), 'tasks', `${requestId}.json`);
-    const taskData = await fs.readFile(taskPath, 'utf-8');
-    res.json(JSON.parse(taskData));
-  } catch (error) {
-    res.status(404).json({error: 'Task not found'});
-  }
-}) as RequestHandler);
-
 // Export server startup function for better testing
 export function startServer() {
  return app.listen(port, () => {
@ -642,6 +12,4 @@ export function startServer() {
 // Start server if running directly
 if (process.env.NODE_ENV !== 'test') {
  startServer();
-}
-
-export default app;
+}
--- a/src/utils/token-tracker.ts
+++ b/src/utils/token-tracker.ts
@ -9,6 +9,16 @@ export class TokenTracker extends EventEmitter {
  constructor(budget?: number) {
    super();
    this.budget = budget;
+
+    if ('asyncLocalContext' in process) {
+      const asyncLocalContext = process.asyncLocalContext as any;
+      this.on('usage', () => {
+        if (asyncLocalContext.available()) {
+          asyncLocalContext.ctx.chargeAmount = this.getTotalUsage();
+        }
+      });
+      
+    }
  }

  trackUsage(tool: string, tokens: number, category?: TokenCategory) {
@ -53,9 +63,9 @@ export class TokenTracker extends EventEmitter {
    }, {} as Record<string, number>);

    const prompt_tokens = categoryBreakdown.prompt || 0;
-    const completion_tokens = 
-      (categoryBreakdown.reasoning || 0) + 
-      (categoryBreakdown.accepted || 0) + 
+    const completion_tokens =
+      (categoryBreakdown.reasoning || 0) +
+      (categoryBreakdown.accepted || 0) +
      (categoryBreakdown.rejected || 0);

    return {
--- a/tsconfig.json
+++ b/tsconfig.json
@ -1,14 +1,18 @@
 {
  "compilerOptions": {
    "target": "ES2020",
-    "module": "commonjs",
+    "module": "node16",
    "outDir": "./dist",
    "rootDir": "./src",
+    "sourceMap": true,
    "esModuleInterop": true,
    "skipLibCheck": true,
    "forceConsistentCasingInFileNames": true,
    "strict": true,
-    "resolveJsonModule": true,
-    "moduleResolution": "node"
-  }
+    "experimentalDecorators": true,
+    "emitDecoratorMetadata": true,
+    "resolveJsonModule": true
+  },
+  "include": ["src/**/*"],
+  "exclude": ["jina-ai/**/*", "**/__tests__/**/*"],
 }