jina-ai: billing for saas service (#55)

* wip: jina billing

* wip

* fix: build issues

* ci: cd gh action

* fix: make ci happy
This commit is contained in:
Yanlong Wang 2025-02-11 18:27:15 +08:00 committed by GitHub
parent c4639a2e92
commit 8af35c6640
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
26 changed files with 6150 additions and 647 deletions

1
.dockerignore Normal file
View File

@ -0,0 +1 @@
node_modules

View File

@ -16,5 +16,6 @@ module.exports = {
rules: {
'no-console': ['error', { allow: ['log', 'error'] }],
'@typescript-eslint/no-explicit-any': 'off'
}
},
ignorePatterns: ["jina-ai/**/*"]
};

66
.github/workflows/cd.yml vendored Normal file
View File

@ -0,0 +1,66 @@
run-name: Build push and deploy (CD)
on:
push:
branches:
- main
- ci-debug
tags:
- '*'
jobs:
build-and-push-to-gcr:
runs-on: ubuntu-latest
concurrency:
group: ${{ github.ref_type == 'branch' && github.ref }}
cancel-in-progress: true
permissions:
contents: read
steps:
- uses: actions/checkout@v4
with:
lfs: true
- uses: 'google-github-actions/auth@v2'
with:
credentials_json: '${{ secrets.GCLOUD_SERVICE_ACCOUNT_SECRET_JSON }}'
- name: 'Set up Cloud SDK'
uses: 'google-github-actions/setup-gcloud@v2'
- name: "Docker auth"
run: |-
gcloud auth configure-docker us-docker.pkg.dev --quiet
- name: Set controller release version
run: echo "RELEASE_VERSION=${GITHUB_REF#refs/*/}" >> $GITHUB_ENV
- name: Set up Node.js
uses: actions/setup-node@v4
with:
node-version: 22.12.0
cache: npm
- name: npm install
run: npm ci
- name: build application
run: npm run build
- name: Set package version
run: npm version --no-git-tag-version ${{ env.RELEASE_VERSION }}
if: github.ref_type == 'tag'
- name: Docker meta
id: meta
uses: docker/metadata-action@v5
with:
images: |
us-docker.pkg.dev/research-df067/deepresearch/node-deep-research
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Build and push
id: container
uses: docker/build-push-action@v6
with:
file: jina-ai/Dockerfile
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
- name: Deploy with Tag
run: |
gcloud run deploy node-deep-research --image us-docker.pkg.dev/research-df067/deepresearch/node-deep-research@${{steps.container.outputs.imageid}} --tag ${{ env.RELEASE_VERSION }} --region us-central1 --async

24
.vscode/launch.json vendored Normal file
View File

@ -0,0 +1,24 @@
{
"version": "0.2.0",
"configurations": [
{
"name": "Attach",
"port": 9229,
"request": "attach",
"skipFiles": [
"<node_internals>/**"
],
"type": "node"
},
{
"name": "Attach by Process ID",
"processId": "${command:PickProcess}",
"request": "attach",
"skipFiles": [
"<node_internals>/**"
],
"type": "node"
}
]
}

1
jina-ai/.dockerignore Normal file
View File

@ -0,0 +1 @@
node_modules

50
jina-ai/Dockerfile Normal file
View File

@ -0,0 +1,50 @@
# ---- BUILD STAGE ----
FROM node:20-slim AS builder
# Set working directory
WORKDIR /app
# Copy package.json and package-lock.json
COPY ./package*.json ./
COPY ./jina-ai/package*.json ./jina-ai/
# Install dependencies
RUN npm ci
WORKDIR /app/jina-ai
RUN npm ci
WORKDIR /app
# Copy application code
COPY ./src ./src
COPY ./config.json ./
COPY ./tsconfig.json ./tsconfig.json
RUN npm run build
COPY ./jina-ai/src ./jina-ai/src
COPY ./jina-ai/tsconfig.json ./jina-ai/tsconfig.json
WORKDIR /app/jina-ai
RUN npm run build
# ---- PRODUCTION STAGE ----
FROM node:20 AS production
# Set working directory
WORKDIR /app
COPY --from=builder /app ./
# Copy config.json and built files from builder
WORKDIR /app/jina-ai
# Set environment variables (Recommended to set at runtime, avoid hardcoding)
ENV GEMINI_API_KEY=${GEMINI_API_KEY}
ENV OPENAI_API_KEY=${OPENAI_API_KEY}
ENV JINA_API_KEY=${JINA_API_KEY}
ENV BRAVE_API_KEY=${BRAVE_API_KEY}
# Expose the port the app runs on
EXPOSE 3000
# Set startup command
CMD ["node", "./dist/server.js"]

3980
jina-ai/package-lock.json generated Normal file

File diff suppressed because it is too large Load Diff

38
jina-ai/package.json Normal file
View File

@ -0,0 +1,38 @@
{
"name": "@jina-ai/node-deepresearch",
"version": "1.0.0",
"main": "dist/app.js",
"files": [
"dist",
"README.md",
"LICENSE"
],
"scripts": {
"build": "tsc",
"dev": "npx ts-node src/agent.ts",
"search": "npx ts-node src/test-duck.ts",
"rewrite": "npx ts-node src/tools/query-rewriter.ts",
"lint": "eslint . --ext .ts",
"lint:fix": "eslint . --ext .ts --fix",
"serve": "ts-node src/server.ts",
"eval": "ts-node src/evals/batch-evals.ts",
"test": "jest --testTimeout=30000",
"test:watch": "jest --watch"
},
"keywords": [],
"author": "Jina AI",
"license": "Apache-2.0",
"description": "",
"dependencies": {
"@google-cloud/firestore": "^7.11.0",
"civkit": "^0.8.3-15926cb",
"dayjs": "^1.11.13",
"lodash": "^4.17.21",
"reflect-metadata": "^0.2.2",
"tsyringe": "^4.8.0"
},
"devDependencies": {
"@types/lodash": "^4.17.15",
"pino-pretty": "^13.0.0"
}
}

View File

@ -0,0 +1,347 @@
import {
Also, AuthenticationFailedError, AuthenticationRequiredError,
DownstreamServiceFailureError, RPC_CALL_ENVIRONMENT,
ArrayOf, AutoCastable, Prop
} from 'civkit/civ-rpc';
import { parseJSONText } from 'civkit/vectorize';
import { htmlEscape } from 'civkit/escape';
import { marshalErrorLike } from 'civkit/lang';
import type express from 'express';
import logger from '../lib/logger';
import { AsyncLocalContext } from '../lib/async-context';
import { InjectProperty } from '../lib/registry';
import { JinaEmbeddingsDashboardHTTP } from '../lib/billing';
import envConfig from '../lib/env-config';
import { FirestoreRecord } from '../lib/firestore';
import _ from 'lodash';
import { RateLimitDesc } from '../rate-limit';
export class JinaWallet extends AutoCastable {
@Prop({
default: ''
})
user_id!: string;
@Prop({
default: 0
})
trial_balance!: number;
@Prop()
trial_start?: Date;
@Prop()
trial_end?: Date;
@Prop({
default: 0
})
regular_balance!: number;
@Prop({
default: 0
})
total_balance!: number;
}
export class JinaEmbeddingsTokenAccount extends FirestoreRecord {
static override collectionName = 'embeddingsTokenAccounts';
override _id!: string;
@Prop({
required: true
})
user_id!: string;
@Prop({
nullable: true,
type: String,
})
email?: string;
@Prop({
nullable: true,
type: String,
})
full_name?: string;
@Prop({
nullable: true,
type: String,
})
customer_id?: string;
@Prop({
nullable: true,
type: String,
})
avatar_url?: string;
// Not keeping sensitive info for now
// @Prop()
// billing_address?: object;
// @Prop()
// payment_method?: object;
@Prop({
required: true
})
wallet!: JinaWallet;
@Prop({
type: Object
})
metadata?: { [k: string]: any; };
@Prop({
defaultFactory: () => new Date()
})
lastSyncedAt!: Date;
@Prop({
dictOf: [ArrayOf(RateLimitDesc)]
})
customRateLimits?: { [k: string]: RateLimitDesc[]; };
static patchedFields = [
];
static override from(input: any) {
for (const field of this.patchedFields) {
if (typeof input[field] === 'string') {
input[field] = parseJSONText(input[field]);
}
}
return super.from(input) as JinaEmbeddingsTokenAccount;
}
override degradeForFireStore() {
const copy: any = {
...this,
wallet: { ...this.wallet },
// Firebase disability
customRateLimits: _.mapValues(this.customRateLimits, (v) => v.map((x) => ({ ...x }))),
};
for (const field of (this.constructor as typeof JinaEmbeddingsTokenAccount).patchedFields) {
if (typeof copy[field] === 'object') {
copy[field] = JSON.stringify(copy[field]) as any;
}
}
return copy;
}
[k: string]: any;
}
const authDtoLogger = logger.child({ service: 'JinaAuthDTO' });
export interface FireBaseHTTPCtx {
req: express.Request,
res: express.Response,
}
const THE_VERY_SAME_JINA_EMBEDDINGS_CLIENT = new JinaEmbeddingsDashboardHTTP(envConfig.JINA_EMBEDDINGS_DASHBOARD_API_KEY);
@Also({
openapi: {
operation: {
parameters: {
'Authorization': {
description: htmlEscape`Jina Token for authentication.\n\n` +
htmlEscape`- Member of <JinaEmbeddingsAuthDTO>\n\n` +
`- Authorization: Bearer {YOUR_JINA_TOKEN}`
,
in: 'header',
schema: {
anyOf: [
{ type: 'string', format: 'token' }
]
}
}
}
}
}
})
export class JinaEmbeddingsAuthDTO extends AutoCastable {
uid?: string;
bearerToken?: string;
user?: JinaEmbeddingsTokenAccount;
@InjectProperty(AsyncLocalContext)
ctxMgr!: AsyncLocalContext;
jinaEmbeddingsDashboard = THE_VERY_SAME_JINA_EMBEDDINGS_CLIENT;
static override from(input: any) {
const instance = super.from(input) as JinaEmbeddingsAuthDTO;
const ctx = input[RPC_CALL_ENVIRONMENT];
const req = (ctx.rawRequest || ctx.req) as express.Request | undefined;
if (req) {
const authorization = req.get('authorization');
if (authorization) {
const authToken = authorization.split(' ')[1] || authorization;
instance.bearerToken = authToken;
}
}
if (!instance.bearerToken && input._token) {
instance.bearerToken = input._token;
}
return instance;
}
async getBrief(ignoreCache?: boolean | string) {
if (!this.bearerToken) {
throw new AuthenticationRequiredError({
message: 'Absence of bearer token'
});
}
let account;
try {
account = await JinaEmbeddingsTokenAccount.fromFirestore(this.bearerToken);
} catch (err) {
// FireStore would not accept any string as input and may throw if not happy with it
void 0;
}
const age = account?.lastSyncedAt ? Date.now() - account.lastSyncedAt.getTime() : Infinity;
if (account && !ignoreCache) {
if (account && age < 180_000) {
this.user = account;
this.uid = this.user?.user_id;
return account;
}
}
try {
const r = await this.jinaEmbeddingsDashboard.validateToken(this.bearerToken);
const brief = r.data;
const draftAccount = JinaEmbeddingsTokenAccount.from({
...account, ...brief, _id: this.bearerToken,
lastSyncedAt: new Date()
});
await JinaEmbeddingsTokenAccount.save(draftAccount.degradeForFireStore(), undefined, { merge: true });
this.user = draftAccount;
this.uid = this.user?.user_id;
return draftAccount;
} catch (err: any) {
authDtoLogger.warn(`Failed to get user brief: ${err}`, { err: marshalErrorLike(err) });
if (err?.status === 401) {
throw new AuthenticationFailedError({
message: 'Invalid bearer token'
});
}
if (account) {
this.user = account;
this.uid = this.user?.user_id;
return account;
}
throw new DownstreamServiceFailureError(`Failed to authenticate: ${err}`);
}
}
async reportUsage(tokenCount: number, mdl: string, endpoint: string = '/encode') {
const user = await this.assertUser();
const uid = user.user_id;
user.wallet.total_balance -= tokenCount;
return this.jinaEmbeddingsDashboard.reportUsage(this.bearerToken!, {
model_name: mdl,
api_endpoint: endpoint,
consumer: {
id: uid,
user_id: uid,
},
usage: {
total_tokens: tokenCount
},
labels: {
model_name: mdl
}
}).then((r) => {
JinaEmbeddingsTokenAccount.COLLECTION.doc(this.bearerToken!)
.update({ 'wallet.total_balance': JinaEmbeddingsTokenAccount.OPS.increment(-tokenCount) })
.catch((err) => {
authDtoLogger.warn(`Failed to update cache for ${uid}: ${err}`, { err: marshalErrorLike(err) });
});
return r;
}).catch((err) => {
user.wallet.total_balance += tokenCount;
authDtoLogger.warn(`Failed to report usage for ${uid}: ${err}`, { err: marshalErrorLike(err) });
});
}
async solveUID() {
if (this.uid) {
this.ctxMgr.set('uid', this.uid);
return this.uid;
}
if (this.bearerToken) {
await this.getBrief();
this.ctxMgr.set('uid', this.uid);
return this.uid;
}
return undefined;
}
async assertUID() {
const uid = await this.solveUID();
if (!uid) {
throw new AuthenticationRequiredError('Authentication failed');
}
return uid;
}
async assertUser() {
if (this.user) {
return this.user;
}
await this.getBrief();
return this.user!;
}
getRateLimits(...tags: string[]) {
const descs = tags.map((x) => this.user?.customRateLimits?.[x] || []).flat().filter((x) => x.isEffective());
if (descs.length) {
return descs;
}
return undefined;
}
}

View File

@ -0,0 +1,9 @@
import { GlobalAsyncContext } from 'civkit/async-context';
import { container, singleton } from 'tsyringe';
@singleton()
export class AsyncLocalContext extends GlobalAsyncContext {}
const instance = container.resolve(AsyncLocalContext);
Reflect.set(process, 'asyncLocalContext', instance);
export default instance;

102
jina-ai/src/lib/billing.ts Normal file
View File

@ -0,0 +1,102 @@
import { HTTPService } from 'civkit';
import _ from 'lodash';
export interface JinaWallet {
trial_balance: number;
trial_start: Date;
trial_end: Date;
regular_balance: number;
total_balance: number;
}
export interface JinaUserBrief {
user_id: string;
email: string | null;
full_name: string | null;
customer_id: string | null;
avatar_url?: string;
billing_address: Partial<{
address: string;
city: string;
state: string;
country: string;
postal_code: string;
}>;
payment_method: Partial<{
brand: string;
last4: string;
exp_month: number;
exp_year: number;
}>;
wallet: JinaWallet;
metadata: {
[k: string]: any;
};
}
export interface JinaUsageReport {
model_name: string;
api_endpoint: string;
consumer: {
user_id: string;
customer_plan?: string;
[k: string]: any;
};
usage: {
total_tokens: number;
};
labels: {
user_type?: string;
model_name?: string;
[k: string]: any;
};
}
export class JinaEmbeddingsDashboardHTTP extends HTTPService {
name = 'JinaEmbeddingsDashboardHTTP';
constructor(
public apiKey: string,
public baseUri: string = 'https://embeddings-dashboard-api.jina.ai/api'
) {
super(baseUri);
this.baseOptions.timeout = 30_000; // 30 sec
}
async authorization(token: string) {
const r = await this.get<JinaUserBrief>('/v1/authorization', {
headers: {
Authorization: `Bearer ${token}`
},
responseType: 'json',
});
return r;
}
async validateToken(token: string) {
const r = await this.getWithSearchParams<JinaUserBrief>('/v1/api_key/user', {
api_key: token,
}, {
responseType: 'json',
});
return r;
}
async reportUsage(token: string, query: JinaUsageReport) {
const r = await this.postJson('/v1/usage', query, {
headers: {
Authorization: `Bearer ${token}`,
'x-api-key': this.apiKey,
},
responseType: 'text',
});
return r;
}
}

View File

@ -0,0 +1,59 @@
import { container, singleton } from 'tsyringe';
export const SPECIAL_COMBINED_ENV_KEY = 'ENV_COMBINED';
const CONF_ENV = [
'OPENAI_API_KEY',
'ANTHROPIC_API_KEY',
'REPLICATE_API_KEY',
'GOOGLE_AI_STUDIO_API_KEY',
'JINA_EMBEDDINGS_API_KEY',
'JINA_EMBEDDINGS_DASHBOARD_API_KEY',
'BRAVE_SEARCH_API_KEY',
] as const;
@singleton()
export class EnvConfig {
dynamic!: Record<string, string>;
combined: Record<string, string> = {};
originalEnv: Record<string, string | undefined> = { ...process.env };
constructor() {
if (process.env[SPECIAL_COMBINED_ENV_KEY]) {
Object.assign(this.combined, JSON.parse(
Buffer.from(process.env[SPECIAL_COMBINED_ENV_KEY]!, 'base64').toString('utf-8')
));
delete process.env[SPECIAL_COMBINED_ENV_KEY];
}
// Static config
for (const x of CONF_ENV) {
const s = this.combined[x] || process.env[x] || '';
Reflect.set(this, x, s);
if (x in process.env) {
delete process.env[x];
}
}
// Dynamic config
this.dynamic = new Proxy({
get: (_target: any, prop: string) => {
return this.combined[prop] || process.env[prop] || '';
}
}, {}) as any;
}
}
// eslint-disable-next-line @typescript-eslint/no-empty-interface
export interface EnvConfig extends Record<typeof CONF_ENV[number], string> { }
const instance = container.resolve(EnvConfig);
export default instance;

70
jina-ai/src/lib/errors.ts Normal file
View File

@ -0,0 +1,70 @@
import { ApplicationError, Prop, RPC_TRANSFER_PROTOCOL_META_SYMBOL, StatusCode } from 'civkit';
import _ from 'lodash';
import dayjs from 'dayjs';
import utc from 'dayjs/plugin/utc';
dayjs.extend(utc);
@StatusCode(50301)
export class ServiceDisabledError extends ApplicationError { }
@StatusCode(50302)
export class ServiceCrashedError extends ApplicationError { }
@StatusCode(50303)
export class ServiceNodeResourceDrainError extends ApplicationError { }
@StatusCode(40104)
export class EmailUnverifiedError extends ApplicationError { }
@StatusCode(40201)
export class InsufficientCreditsError extends ApplicationError { }
@StatusCode(40202)
export class FreeFeatureLimitError extends ApplicationError { }
@StatusCode(40203)
export class InsufficientBalanceError extends ApplicationError { }
@StatusCode(40903)
export class LockConflictError extends ApplicationError { }
@StatusCode(40904)
export class BudgetExceededError extends ApplicationError { }
@StatusCode(45101)
export class HarmfulContentError extends ApplicationError { }
@StatusCode(45102)
export class SecurityCompromiseError extends ApplicationError { }
@StatusCode(41201)
export class BatchSizeTooLargeError extends ApplicationError { }
@StatusCode(42903)
export class RateLimitTriggeredError extends ApplicationError {
@Prop({
desc: 'Retry after seconds',
})
retryAfter?: number;
@Prop({
desc: 'Retry after date',
})
retryAfterDate?: Date;
protected override get [RPC_TRANSFER_PROTOCOL_META_SYMBOL]() {
const retryAfter = this.retryAfter || this.retryAfterDate;
if (!retryAfter) {
return super[RPC_TRANSFER_PROTOCOL_META_SYMBOL];
}
return _.merge(_.cloneDeep(super[RPC_TRANSFER_PROTOCOL_META_SYMBOL]), {
headers: {
'Retry-After': `${retryAfter instanceof Date ? dayjs(retryAfter).utc().format('ddd, DD MMM YYYY HH:mm:ss [GMT]') : retryAfter}`,
}
});
}
}

View File

@ -0,0 +1,223 @@
import _ from 'lodash';
import { AutoCastable, Prop, RPC_MARSHAL } from 'civkit/civ-rpc';
import {
Firestore, FieldValue, DocumentReference,
Query, Timestamp, SetOptions, DocumentSnapshot,
} from '@google-cloud/firestore';
// Firestore doesn't support JavaScript objects with custom prototypes (i.e. objects that were created via the \"new\" operator)
function patchFireStoreArrogance(func: Function) {
return function (this: unknown) {
const origObjectGetPrototype = Object.getPrototypeOf;
Object.getPrototypeOf = function (x) {
const r = origObjectGetPrototype.call(this, x);
if (!r) {
return r;
}
return Object.prototype;
};
try {
return func.call(this, ...arguments);
} finally {
Object.getPrototypeOf = origObjectGetPrototype;
}
};
}
Reflect.set(DocumentReference.prototype, 'set', patchFireStoreArrogance(Reflect.get(DocumentReference.prototype, 'set')));
Reflect.set(DocumentSnapshot, 'fromObject', patchFireStoreArrogance(Reflect.get(DocumentSnapshot, 'fromObject')));
function mapValuesDeep(v: any, fn: (i: any) => any): any {
if (_.isPlainObject(v)) {
return _.mapValues(v, (i) => mapValuesDeep(i, fn));
} else if (_.isArray(v)) {
return v.map((i) => mapValuesDeep(i, fn));
} else {
return fn(v);
}
}
export type Constructor<T> = { new(...args: any[]): T; };
export type Constructed<T> = T extends Partial<infer U> ? U : T extends object ? T : object;
export function fromFirestore<T extends FirestoreRecord>(
this: Constructor<T>, id: string, overrideCollection?: string
): Promise<T | undefined>;
export async function fromFirestore(
this: any, id: string, overrideCollection?: string
) {
const collection = overrideCollection || this.collectionName;
if (!collection) {
throw new Error(`Missing collection name to construct ${this.name}`);
}
const ref = this.DB.collection(overrideCollection || this.collectionName).doc(id);
const ptr = await ref.get();
if (!ptr.exists) {
return undefined;
}
const doc = this.from(
// Fixes non-native firebase types
mapValuesDeep(ptr.data(), (i: any) => {
if (i instanceof Timestamp) {
return i.toDate();
}
return i;
})
);
Object.defineProperty(doc, '_ref', { value: ref, enumerable: false });
Object.defineProperty(doc, '_id', { value: ptr.id, enumerable: true });
return doc;
}
export function fromFirestoreQuery<T extends FirestoreRecord>(
this: Constructor<T>, query: Query
): Promise<T[]>;
export async function fromFirestoreQuery(this: any, query: Query) {
const ptr = await query.get();
if (ptr.docs.length) {
return ptr.docs.map(doc => {
const r = this.from(
mapValuesDeep(doc.data(), (i: any) => {
if (i instanceof Timestamp) {
return i.toDate();
}
return i;
})
);
Object.defineProperty(r, '_ref', { value: doc.ref, enumerable: false });
Object.defineProperty(r, '_id', { value: doc.id, enumerable: true });
return r;
});
}
return [];
}
export function setToFirestore<T extends FirestoreRecord>(
this: Constructor<T>, doc: T, overrideCollection?: string, setOptions?: SetOptions
): Promise<T>;
export async function setToFirestore(
this: any, doc: any, overrideCollection?: string, setOptions?: SetOptions
) {
let ref: DocumentReference<any> = doc._ref;
if (!ref) {
const collection = overrideCollection || this.collectionName;
if (!collection) {
throw new Error(`Missing collection name to construct ${this.name}`);
}
const predefinedId = doc._id || undefined;
const hdl = this.DB.collection(overrideCollection || this.collectionName);
ref = predefinedId ? hdl.doc(predefinedId) : hdl.doc();
Object.defineProperty(doc, '_ref', { value: ref, enumerable: false });
Object.defineProperty(doc, '_id', { value: ref.id, enumerable: true });
}
await ref.set(doc, { merge: true, ...setOptions });
return doc;
}
export function deleteQueryBatch<T extends FirestoreRecord>(
this: Constructor<T>, query: Query
): Promise<T>;
export async function deleteQueryBatch(this: any, query: Query) {
const snapshot = await query.get();
const batchSize = snapshot.size;
if (batchSize === 0) {
return;
}
// Delete documents in a batch
const batch = this.DB.batch();
snapshot.docs.forEach((doc) => {
batch.delete(doc.ref);
});
await batch.commit();
process.nextTick(() => {
this.deleteQueryBatch(query);
});
};
export function fromFirestoreDoc<T extends FirestoreRecord>(
this: Constructor<T>, snapshot: DocumentSnapshot,
): T | undefined;
export function fromFirestoreDoc(
this: any, snapshot: DocumentSnapshot,
) {
const doc = this.from(
// Fixes non-native firebase types
mapValuesDeep(snapshot.data(), (i: any) => {
if (i instanceof Timestamp) {
return i.toDate();
}
return i;
})
);
Object.defineProperty(doc, '_ref', { value: snapshot.ref, enumerable: false });
Object.defineProperty(doc, '_id', { value: snapshot.id, enumerable: true });
return doc;
}
const defaultFireStore = new Firestore({
projectId: process.env.GCLOUD_PROJECT,
});
export class FirestoreRecord extends AutoCastable {
static collectionName?: string;
static OPS = FieldValue;
static DB = defaultFireStore;
static get COLLECTION() {
if (!this.collectionName) {
throw new Error('Not implemented');
}
return this.DB.collection(this.collectionName);
}
@Prop()
_id?: string;
_ref?: DocumentReference<Partial<Omit<this, '_ref' | '_id'>>>;
static fromFirestore = fromFirestore;
static fromFirestoreDoc = fromFirestoreDoc;
static fromFirestoreQuery = fromFirestoreQuery;
static save = setToFirestore;
static deleteQueryBatch = deleteQueryBatch;
[RPC_MARSHAL]() {
return {
...this,
_id: this._id,
_ref: this._ref?.path
};
}
degradeForFireStore(): this {
return JSON.parse(JSON.stringify(this, function (k, v) {
if (k === '') {
return v;
}
if (typeof v === 'object' && v && (typeof v.degradeForFireStore === 'function')) {
return v.degradeForFireStore();
}
return v;
}));
}
}

56
jina-ai/src/lib/logger.ts Normal file
View File

@ -0,0 +1,56 @@
import { AbstractPinoLogger } from 'civkit/pino-logger';
import { singleton, container } from 'tsyringe';
import { threadId } from 'node:worker_threads';
import { getTraceCtx } from 'civkit/async-context';
const levelToSeverityMap: { [k: string]: string | undefined; } = {
trace: 'DEFAULT',
debug: 'DEBUG',
info: 'INFO',
warn: 'WARNING',
error: 'ERROR',
fatal: 'CRITICAL',
};
@singleton()
export class GlobalLogger extends AbstractPinoLogger {
loggerOptions = {
level: 'debug',
base: {
tid: threadId,
}
};
override init(): void {
if (process.env['NODE_ENV']?.startsWith('prod')) {
super.init(process.stdout);
} else {
const PinoPretty = require('pino-pretty').PinoPretty;
super.init(PinoPretty({
singleLine: true,
colorize: true,
messageFormat(log: any, messageKey: any) {
return `${log['tid'] ? `[${log['tid']}]` : ''}[${log['service'] || 'ROOT'}] ${log[messageKey]}`;
},
}));
}
this.emit('ready');
}
override log(...args: any[]) {
const [levelObj, ...rest] = args;
const severity = levelToSeverityMap[levelObj?.level];
const traceCtx = getTraceCtx();
const patched: any= { ...levelObj, severity };
if (traceCtx?.traceId && process.env['GCLOUD_PROJECT']) {
patched['logging.googleapis.com/trace'] = `projects/${process.env['GCLOUD_PROJECT']}/traces/${traceCtx.traceId}`;
}
return super.log(patched, ...rest);
}
}
const instance = container.resolve(GlobalLogger);
export default instance;

View File

@ -0,0 +1,4 @@
import { container } from 'tsyringe';
import { propertyInjectorFactory } from 'civkit/property-injector';
export const InjectProperty = propertyInjectorFactory(container);

View File

@ -0,0 +1,93 @@
import { ApplicationError, RPC_CALL_ENVIRONMENT } from "civkit/civ-rpc";
import { marshalErrorLike } from "civkit/lang";
import { randomUUID } from "crypto";
import { once } from "events";
import type { NextFunction, Request, Response } from "express";
import { JinaEmbeddingsAuthDTO } from "./dto/jina-embeddings-auth";
import rateLimitControl, { API_CALL_STATUS, RateLimitDesc } from "./rate-limit";
import asyncLocalContext from "./lib/async-context";
import globalLogger from "./lib/logger";
globalLogger.serviceReady();
const logger = globalLogger.child({ service: 'BillingMiddleware' });
const appName = 'DEEPRESEARCH';
export const jinaAiBillingMiddleware = (req: Request, res: Response, next: NextFunction) => {
if (req.path === '/ping') {
res.status(200).end('pone');
return;
}
if (req.method !== 'POST' && req.method !== 'GET') {
next();
return;
}
asyncLocalContext.run(async () => {
const googleTraceId = req.get('x-cloud-trace-context')?.split('/')?.[0];
const ctx = asyncLocalContext.ctx;
ctx.traceId = req.get('x-request-id') || req.get('request-id') || googleTraceId || randomUUID();
ctx.traceT0 = new Date();
ctx.ip = req?.ip;
try {
const authDto = JinaEmbeddingsAuthDTO.from({
[RPC_CALL_ENVIRONMENT]: { req, res }
});
const user = await authDto.assertUser();
await rateLimitControl.serviceReady();
const rateLimitPolicy = authDto.getRateLimits(appName) || [
parseInt(user.metadata?.speed_level) >= 2 ?
RateLimitDesc.from({
occurrence: 30,
periodSeconds: 60
}) :
RateLimitDesc.from({
occurrence: 10,
periodSeconds: 60
})
];
const criterions = rateLimitPolicy.map((c) => rateLimitControl.rateLimitDescToCriterion(c));
await Promise.all(criterions.map(([pointInTime, n]) => rateLimitControl.assertUidPeriodicLimit(user._id, pointInTime, n, appName)));
const apiRoll = rateLimitControl.record({ uid: user._id, tags: [appName] })
apiRoll.save().catch((err) => logger.warn(`Failed to save rate limit record`, { err: marshalErrorLike(err) }));
const pResClose = once(res, 'close');
next();
await pResClose;
const chargeAmount = ctx.chargeAmount;
if (chargeAmount) {
authDto.reportUsage(chargeAmount, `reader-${appName}`).catch((err) => {
logger.warn(`Unable to report usage for ${user._id}`, { err: marshalErrorLike(err) });
});
apiRoll.chargeAmount = chargeAmount;
}
apiRoll.status = res.statusCode === 200 ? API_CALL_STATUS.SUCCESS : API_CALL_STATUS.ERROR;
apiRoll.save().catch((err) => logger.warn(`Failed to save rate limit record`, { err: marshalErrorLike(err) }));
logger.info(`HTTP ${res.statusCode} for request ${ctx.traceId} after ${Date.now() - ctx.traceT0.valueOf()}ms`, {
uid: user._id,
chargeAmount,
});
} catch (err: any) {
if (!res.headersSent) {
if (err instanceof ApplicationError) {
res.status(parseInt(err.code as string) || 500).json({ error: err.message });
return;
}
res.status(500).json({ error: 'Internal' });
}
logger.error(`Error in billing middleware`, { err: marshalErrorLike(err) });
if (err.stack) {
logger.error(err.stack);
}
}
});
}

278
jina-ai/src/rate-limit.ts Normal file
View File

@ -0,0 +1,278 @@
import { AutoCastable, ResourcePolicyDenyError, Also, Prop } from 'civkit/civ-rpc';
import { AsyncService } from 'civkit/async-service';
import { getTraceId } from 'civkit/async-context';
import { singleton, container } from 'tsyringe';
import { RateLimitTriggeredError } from './lib/errors';
import { FirestoreRecord } from './lib/firestore';
import { GlobalLogger } from './lib/logger';
export enum API_CALL_STATUS {
SUCCESS = 'success',
ERROR = 'error',
PENDING = 'pending',
}
@Also({ dictOf: Object })
export class APICall extends FirestoreRecord {
static override collectionName = 'apiRoll';
@Prop({
required: true,
defaultFactory: () => getTraceId()
})
traceId!: string;
@Prop()
uid?: string;
@Prop()
ip?: string;
@Prop({
arrayOf: String,
default: [],
})
tags!: string[];
@Prop({
required: true,
defaultFactory: () => new Date(),
})
createdAt!: Date;
@Prop()
completedAt?: Date;
@Prop({
required: true,
default: API_CALL_STATUS.PENDING,
})
status!: API_CALL_STATUS;
@Prop({
required: true,
defaultFactory: () => new Date(Date.now() + 1000 * 60 * 60 * 24 * 90),
})
expireAt!: Date;
[k: string]: any;
tag(...tags: string[]) {
for (const t of tags) {
if (!this.tags.includes(t)) {
this.tags.push(t);
}
}
}
save() {
return (this.constructor as typeof APICall).save(this);
}
}
export class RateLimitDesc extends AutoCastable {
@Prop({
default: 1000
})
occurrence!: number;
@Prop({
default: 3600
})
periodSeconds!: number;
@Prop()
notBefore?: Date;
@Prop()
notAfter?: Date;
isEffective() {
const now = new Date();
if (this.notBefore && this.notBefore > now) {
return false;
}
if (this.notAfter && this.notAfter < now) {
return false;
}
return true;
}
}
@singleton()
export class RateLimitControl extends AsyncService {
logger = this.globalLogger.child({ service: this.constructor.name });
constructor(
protected globalLogger: GlobalLogger,
) {
super(...arguments);
}
override async init() {
await this.dependencyReady();
this.emit('ready');
}
async queryByUid(uid: string, pointInTime: Date, ...tags: string[]) {
let q = APICall.COLLECTION
.orderBy('createdAt', 'asc')
.where('createdAt', '>=', pointInTime)
.where('status', 'in', [API_CALL_STATUS.SUCCESS, API_CALL_STATUS.PENDING])
.where('uid', '==', uid);
if (tags.length) {
q = q.where('tags', 'array-contains-any', tags);
}
return APICall.fromFirestoreQuery(q);
}
async queryByIp(ip: string, pointInTime: Date, ...tags: string[]) {
let q = APICall.COLLECTION
.orderBy('createdAt', 'asc')
.where('createdAt', '>=', pointInTime)
.where('status', 'in', [API_CALL_STATUS.SUCCESS, API_CALL_STATUS.PENDING])
.where('ip', '==', ip);
if (tags.length) {
q = q.where('tags', 'array-contains-any', tags);
}
return APICall.fromFirestoreQuery(q);
}
async assertUidPeriodicLimit(uid: string, pointInTime: Date, limit: number, ...tags: string[]) {
if (limit <= 0) {
throw new ResourcePolicyDenyError(`This UID(${uid}) is not allowed to call this endpoint (rate limit quota is 0).`);
}
let q = APICall.COLLECTION
.orderBy('createdAt', 'asc')
.where('createdAt', '>=', pointInTime)
.where('status', 'in', [API_CALL_STATUS.SUCCESS, API_CALL_STATUS.PENDING])
.where('uid', '==', uid);
if (tags.length) {
q = q.where('tags', 'array-contains-any', tags);
}
const count = (await q.count().get()).data().count;
if (count >= limit) {
const r = await APICall.fromFirestoreQuery(q.limit(1));
const [r1] = r;
const dtMs = Math.abs(r1.createdAt?.valueOf() - pointInTime.valueOf());
const dtSec = Math.ceil(dtMs / 1000);
throw RateLimitTriggeredError.from({
message: `Per UID rate limit exceeded (${tags.join(',') || 'called'} ${limit} times since ${pointInTime})`,
retryAfter: dtSec,
});
}
return count + 1;
}
async assertIPPeriodicLimit(ip: string, pointInTime: Date, limit: number, ...tags: string[]) {
let q = APICall.COLLECTION
.orderBy('createdAt', 'asc')
.where('createdAt', '>=', pointInTime)
.where('status', 'in', [API_CALL_STATUS.SUCCESS, API_CALL_STATUS.PENDING])
.where('ip', '==', ip);
if (tags.length) {
q = q.where('tags', 'array-contains-any', tags);
}
const count = (await q.count().get()).data().count;
if (count >= limit) {
const r = await APICall.fromFirestoreQuery(q.limit(1));
const [r1] = r;
const dtMs = Math.abs(r1.createdAt?.valueOf() - pointInTime.valueOf());
const dtSec = Math.ceil(dtMs / 1000);
throw RateLimitTriggeredError.from({
message: `Per IP rate limit exceeded (${tags.join(',') || 'called'} ${limit} times since ${pointInTime})`,
retryAfter: dtSec,
});
}
return count + 1;
}
record(partialRecord: Partial<APICall>) {
const record = APICall.from(partialRecord);
const newId = APICall.COLLECTION.doc().id;
record._id = newId;
return record;
}
// async simpleRPCUidBasedLimit(rpcReflect: RPCReflection, uid: string, tags: string[] = [],
// ...inputCriterion: RateLimitDesc[] | [Date, number][]) {
// const criterion = inputCriterion.map((c) => { return Array.isArray(c) ? c : this.rateLimitDescToCriterion(c); });
// await Promise.all(criterion.map(([pointInTime, n]) =>
// this.assertUidPeriodicLimit(uid, pointInTime, n, ...tags)));
// const r = this.record({
// uid,
// tags,
// });
// r.save().catch((err) => this.logger.warn(`Failed to save rate limit record`, { err }));
// rpcReflect.then(() => {
// r.status = API_CALL_STATUS.SUCCESS;
// r.save()
// .catch((err) => this.logger.warn(`Failed to save rate limit record`, { err }));
// });
// rpcReflect.catch((err) => {
// r.status = API_CALL_STATUS.ERROR;
// r.error = err.toString();
// r.save()
// .catch((err) => this.logger.warn(`Failed to save rate limit record`, { err }));
// });
// return r;
// }
rateLimitDescToCriterion(rateLimitDesc: RateLimitDesc) {
return [new Date(Date.now() - rateLimitDesc.periodSeconds * 1000), rateLimitDesc.occurrence] as [Date, number];
}
// async simpleRpcIPBasedLimit(rpcReflect: RPCReflection, ip: string, tags: string[] = [],
// ...inputCriterion: RateLimitDesc[] | [Date, number][]) {
// const criterion = inputCriterion.map((c) => { return Array.isArray(c) ? c : this.rateLimitDescToCriterion(c); });
// await Promise.all(criterion.map(([pointInTime, n]) =>
// this.assertIPPeriodicLimit(ip, pointInTime, n, ...tags)));
// const r = this.record({
// ip,
// tags,
// });
// r.save().catch((err) => this.logger.warn(`Failed to save rate limit record`, { err }));
// rpcReflect.then(() => {
// r.status = API_CALL_STATUS.SUCCESS;
// r.save()
// .catch((err) => this.logger.warn(`Failed to save rate limit record`, { err }));
// });
// rpcReflect.catch((err) => {
// r.status = API_CALL_STATUS.ERROR;
// r.error = err.toString();
// r.save()
// .catch((err) => this.logger.warn(`Failed to save rate limit record`, { err }));
// });
// return r;
// }
}
const instance = container.resolve(RateLimitControl);
export default instance;

56
jina-ai/src/server.ts Normal file
View File

@ -0,0 +1,56 @@
import 'reflect-metadata'
import express from 'express';
import { jinaAiBillingMiddleware } from "./patch-express";
import { Server } from 'http';
const app = require('../..').default;
const rootApp = express();
rootApp.use(jinaAiBillingMiddleware, app);
const port = process.env.PORT || 3000;
let server: Server | undefined;
// Export server startup function for better testing
export function startServer() {
return rootApp.listen(port, () => {
console.log(`Server running at http://localhost:${port}`);
});
}
// Start server if running directly
if (process.env.NODE_ENV !== 'test') {
server = startServer();
}
process.on('unhandledRejection', (_err) => `Is false alarm`);
process.on('uncaughtException', (err) => {
console.log('Uncaught exception', err);
// Looks like Firebase runtime does not handle error properly.
// Make sure to quit the process.
process.nextTick(() => process.exit(1));
console.error('Uncaught exception, process quit.');
throw err;
});
const sigHandler = (signal: string) => {
console.log(`Received ${signal}, exiting...`);
if (server && server.listening) {
console.log(`Shutting down gracefully...`);
console.log(`Waiting for the server to drain and close...`);
server.close((err) => {
if (err) {
console.error('Error while closing server', err);
return;
}
process.exit(0);
});
server.closeIdleConnections();
}
}
process.on('SIGTERM', sigHandler);
process.on('SIGINT', sigHandler);

17
jina-ai/tsconfig.json Normal file
View File

@ -0,0 +1,17 @@
{
"compilerOptions": {
"target": "ES2020",
"module": "node16",
"outDir": "./dist",
"rootDir": "./src",
"sourceMap": true,
"esModuleInterop": true,
"skipLibCheck": true,
"forceConsistentCasingInFileNames": true,
"strict": true,
"experimentalDecorators": true,
"emitDecoratorMetadata": true,
"resolveJsonModule": true
}
}

View File

@ -1,15 +1,13 @@
{
"name": "node-deepresearch",
"version": "1.0.0",
"main": "dist/index.js",
"types": "dist/index.d.ts",
"main": "dist/app.js",
"files": [
"dist",
"README.md",
"LICENSE"
],
"scripts": {
"prepare": "npm run build",
"build": "tsc",
"dev": "npx ts-node src/agent.ts",
"search": "npx ts-node src/test-duck.ts",
@ -17,6 +15,7 @@
"lint": "eslint . --ext .ts",
"lint:fix": "eslint . --ext .ts --fix",
"serve": "ts-node src/server.ts",
"start": "ts-node src/server.ts",
"eval": "ts-node src/evals/batch-evals.ts",
"test": "jest --testTimeout=30000",
"test:watch": "jest --watch"

View File

@ -22,7 +22,7 @@ describe('/v1/chat/completions', () => {
process.argv.push(`--secret=${TEST_SECRET}`);
// Import server module (jest.resetModules() is called automatically before each test)
const { default: serverModule } = await import('../server');
const { default: serverModule } = await require('../app');
app = serverModule;
});
@ -67,7 +67,7 @@ describe('/v1/chat/completions', () => {
jest.resetModules();
// Reload server module without secret
const { default: serverModule } = await import('../server');
const { default: serverModule } = await require('../app');
app = serverModule;
const response = await request(app)

647
src/app.ts Normal file
View File

@ -0,0 +1,647 @@
import express, {Request, Response, RequestHandler} from 'express';
import cors from 'cors';
import {EventEmitter} from 'events';
import {getResponse} from './agent';
import {
StepAction,
StreamMessage,
TrackerContext,
ChatCompletionRequest,
ChatCompletionResponse,
ChatCompletionChunk,
AnswerAction,
TOKEN_CATEGORIES,
Model
} from './types';
import fs from 'fs/promises';
import path from 'path';
import {TokenTracker} from "./utils/token-tracker";
import {ActionTracker} from "./utils/action-tracker";
const app = express();
// Get secret from command line args for optional authentication
const secret = process.argv.find(arg => arg.startsWith('--secret='))?.split('=')[1];
app.use(cors());
app.use(express.json());
const eventEmitter = new EventEmitter();
interface QueryRequest extends Request {
body: {
q: string;
budget?: number;
maxBadAttempt?: number;
};
}
function buildMdFromAnswer(answer: AnswerAction) {
let refStr = '';
if (answer.references?.length > 0) {
refStr = `
## References
${answer.references.map((ref, i) => `
${i + 1}. [${ref.exactQuote}](${ref.url})`).join('')}`;
}
return `${answer.answer.replace(/\(REF_(\d+)\)/g, (_, num) => `[^${num}]`)}${refStr}`;
}
// Modified streamTextWordByWord function
async function* streamTextWordByWord(text: string, streamingState: StreamingState) {
const words = text.split(/(\s+)/);
for (const word of words) {
if (streamingState.currentlyStreaming) {
const delay = Math.floor(Math.random() * 100);
await new Promise(resolve => setTimeout(resolve, delay));
yield word;
} else {
// If streaming was interrupted, yield all remaining words at once
const remainingWords = words.slice(words.indexOf(word)).join('');
yield remainingWords;
return;
}
}
}
// Helper function to emit remaining content immediately
async function emitRemainingContent(
res: Response,
requestId: string,
model: string,
content: string
) {
if (!content) return;
const chunk: ChatCompletionChunk = {
id: requestId,
object: 'chat.completion.chunk',
created: Math.floor(Date.now() / 1000),
model: model,
system_fingerprint: 'fp_' + requestId,
choices: [{
index: 0,
delta: {content},
logprobs: null,
finish_reason: null
}]
};
res.write(`data: ${JSON.stringify(chunk)}\n\n`);
}
interface StreamingState {
currentlyStreaming: boolean;
currentGenerator: AsyncGenerator<string> | null;
remainingContent: string;
}
async function completeCurrentStreaming(
streamingState: StreamingState,
res: Response,
requestId: string,
model: string
) {
if (streamingState.currentlyStreaming && streamingState.remainingContent) {
// Force completion of current streaming
await emitRemainingContent(
res,
requestId,
model,
streamingState.remainingContent
);
// Reset streaming state
streamingState.currentlyStreaming = false;
streamingState.remainingContent = '';
streamingState.currentGenerator = null;
}
}
// OpenAI-compatible chat completions endpoint
// Models API endpoints
app.get('/v1/models', (async (_req: Request, res: Response) => {
const models: Model[] = [{
id: 'jina-deepsearch-v1',
object: 'model',
created: 1686935002,
owned_by: 'jina-ai'
}];
res.json({
object: 'list',
data: models
});
}) as RequestHandler);
app.get('/v1/models/:model', (async (req: Request, res: Response) => {
const modelId = req.params.model;
if (modelId === 'jina-deepsearch-v1') {
res.json({
id: 'jina-deepsearch-v1',
object: 'model',
created: 1686935002,
owned_by: 'jina-ai'
});
} else {
res.status(404).json({
error: {
message: `Model '${modelId}' not found`,
type: 'invalid_request_error',
param: null,
code: 'model_not_found'
}
});
}
}) as RequestHandler);
if (secret) {
// Check authentication only if secret is set
app.use((req, res, next) => {
const authHeader = req.headers.authorization;
if (!authHeader || !authHeader.startsWith('Bearer ') || authHeader.split(' ')[1] !== secret) {
console.log('[chat/completions] Unauthorized request');
res.status(401).json({ error: 'Unauthorized' });
return;
}
return next();
});
}
app.post('/v1/chat/completions', (async (req: Request, res: Response) => {
// Check authentication only if secret is set
if (secret) {
const authHeader = req.headers.authorization;
if (!authHeader || !authHeader.startsWith('Bearer ') || authHeader.split(' ')[1] !== secret) {
console.log('[chat/completions] Unauthorized request');
res.status(401).json({error: 'Unauthorized'});
return;
}
}
// Log request details (excluding sensitive data)
console.log('[chat/completions] Request:', {
model: req.body.model,
stream: req.body.stream,
messageCount: req.body.messages?.length,
hasAuth: !!req.headers.authorization,
requestId: Date.now().toString()
});
const body = req.body as ChatCompletionRequest;
if (!body.messages?.length) {
return res.status(400).json({error: 'Messages array is required and must not be empty'});
}
const lastMessage = body.messages[body.messages.length - 1];
if (lastMessage.role !== 'user') {
return res.status(400).json({error: 'Last message must be from user'});
}
const requestId = Date.now().toString();
const context: TrackerContext = {
tokenTracker: new TokenTracker(),
actionTracker: new ActionTracker()
};
// Track prompt tokens for the initial message
// Use Vercel's token counting convention - 1 token per message
const messageTokens = body.messages.length;
context.tokenTracker.trackUsage('agent', messageTokens, TOKEN_CATEGORIES.PROMPT);
// Add this inside the chat completions endpoint, before setting up the action listener
const streamingState: StreamingState = {
currentlyStreaming: false,
currentGenerator: null,
remainingContent: ''
};
if (body.stream) {
res.setHeader('Content-Type', 'text/event-stream');
res.setHeader('Cache-Control', 'no-cache');
res.setHeader('Connection', 'keep-alive');
// Send initial chunk with opening think tag
const initialChunk: ChatCompletionChunk = {
id: requestId,
object: 'chat.completion.chunk',
created: Math.floor(Date.now() / 1000),
model: body.model,
system_fingerprint: 'fp_' + requestId,
choices: [{
index: 0,
delta: {role: 'assistant', content: '<think>'},
logprobs: null,
finish_reason: null
}]
};
res.write(`data: ${JSON.stringify(initialChunk)}\n\n`);
// Set up progress listener with cleanup
const actionListener = async (action: any) => {
if (action.thisStep.think) {
// Complete any ongoing streaming first
await completeCurrentStreaming(streamingState, res, requestId, body.model);
// Start new streaming session
streamingState.currentlyStreaming = true;
streamingState.remainingContent = action.thisStep.think;
try {
for await (const word of streamTextWordByWord(action.thisStep.think, streamingState)) {
if (!streamingState.currentlyStreaming) {
break;
}
// Update remaining content
streamingState.remainingContent = streamingState.remainingContent.slice(word.length);
const chunk: ChatCompletionChunk = {
id: requestId,
object: 'chat.completion.chunk',
created: Math.floor(Date.now() / 1000),
model: body.model,
system_fingerprint: 'fp_' + requestId,
choices: [{
index: 0,
delta: {content: word},
logprobs: null,
finish_reason: null
}]
};
res.write(`data: ${JSON.stringify(chunk)}\n\n`);
}
// Only add newline if this streaming completed normally
if (streamingState.currentlyStreaming) {
const newlineChunk: ChatCompletionChunk = {
id: requestId,
object: 'chat.completion.chunk',
created: Math.floor(Date.now() / 1000),
model: body.model,
system_fingerprint: 'fp_' + requestId,
choices: [{
index: 0,
delta: {content: '\n'},
logprobs: null,
finish_reason: null
}]
};
res.write(`data: ${JSON.stringify(newlineChunk)}\n\n`);
}
} catch (error) {
console.error('Error in streaming:', error);
await completeCurrentStreaming(streamingState, res, requestId, body.model);
}
}
};
context.actionTracker.on('action', actionListener);
// Make sure to update the cleanup code
res.on('finish', () => {
streamingState.currentlyStreaming = false;
streamingState.currentGenerator = null;
streamingState.remainingContent = '';
context.actionTracker.removeListener('action', actionListener);
});
}
try {
// Track initial query tokens - already tracked above
// const queryTokens = Buffer.byteLength(lastMessage.content, 'utf-8');
// context.tokenTracker.trackUsage('agent', queryTokens, 'prompt');
let result;
try {
({result} = await getResponse(lastMessage.content, undefined, undefined, context));
} catch (error: any) {
// If deduplication fails, retry without it
if (error?.response?.status === 402) {
// If deduplication fails, retry with maxBadAttempt=3 to skip dedup
({result} = await getResponse(lastMessage.content, undefined, 3, context));
} else {
throw error;
}
}
// Track tokens based on action type
if (result.action === 'answer') {
// Track accepted prediction tokens for the final answer using Vercel's convention
const answerTokens = 1; // Default to 1 token per answer
context.tokenTracker.trackUsage('evaluator', answerTokens, TOKEN_CATEGORIES.ACCEPTED);
} else {
// Track rejected prediction tokens for non-answer responses
const rejectedTokens = 1; // Default to 1 token per rejected response
context.tokenTracker.trackUsage('evaluator', rejectedTokens, TOKEN_CATEGORIES.REJECTED);
}
if (body.stream) {
// Complete any ongoing streaming before sending final answer
await completeCurrentStreaming(streamingState, res, requestId, body.model);
// Send closing think tag
const closeThinkChunk: ChatCompletionChunk = {
id: requestId,
object: 'chat.completion.chunk',
created: Math.floor(Date.now() / 1000),
model: body.model,
system_fingerprint: 'fp_' + requestId,
choices: [{
index: 0,
delta: {content: `</think>\n\n`},
logprobs: null,
finish_reason: null
}]
};
res.write(`data: ${JSON.stringify(closeThinkChunk)}\n\n`);
// Send final answer as separate chunk
const answerChunk: ChatCompletionChunk = {
id: requestId,
object: 'chat.completion.chunk',
created: Math.floor(Date.now() / 1000),
model: body.model,
system_fingerprint: 'fp_' + requestId,
choices: [{
index: 0,
delta: {content: result.action === 'answer' ? buildMdFromAnswer(result) : result.think},
logprobs: null,
finish_reason: 'stop'
}]
};
res.write(`data: ${JSON.stringify(answerChunk)}\n\n`);
res.end();
} else {
const usage = context.tokenTracker.getUsageDetails();
const response: ChatCompletionResponse = {
id: requestId,
object: 'chat.completion',
created: Math.floor(Date.now() / 1000),
model: body.model,
system_fingerprint: 'fp_' + requestId,
choices: [{
index: 0,
message: {
role: 'assistant',
content: result.action === 'answer' ? buildMdFromAnswer(result) : result.think
},
logprobs: null,
finish_reason: 'stop'
}],
usage
};
// Log final response (excluding full content for brevity)
console.log('[chat/completions] Response:', {
id: response.id,
status: 200,
contentLength: response.choices[0].message.content.length,
usage: response.usage
});
res.json(response);
}
} catch (error: any) {
// Log error details
console.error('[chat/completions] Error:', {
message: error?.message || 'An error occurred',
stack: error?.stack,
type: error?.constructor?.name,
requestId
});
// Track error as rejected tokens with Vercel token counting
const errorMessage = error?.message || 'An error occurred';
// Default to 1 token for errors as per Vercel AI SDK convention
const errorTokens = 1;
context.tokenTracker.trackUsage('evaluator', errorTokens, TOKEN_CATEGORIES.REJECTED);
// Clean up event listeners
context.actionTracker.removeAllListeners('action');
// Get token usage in OpenAI API format
const usage = context.tokenTracker.getUsageDetails();
if (body.stream && res.headersSent) {
// For streaming responses that have already started, send error as a chunk
// First send closing think tag if we're in the middle of thinking
const closeThinkChunk: ChatCompletionChunk = {
id: requestId,
object: 'chat.completion.chunk',
created: Math.floor(Date.now() / 1000),
model: body.model,
system_fingerprint: 'fp_' + requestId,
choices: [{
index: 0,
delta: {content: '</think>'},
logprobs: null,
finish_reason: null
}]
};
res.write(`data: ${JSON.stringify(closeThinkChunk)}\n\n`);
// Track error token and send error message
context.tokenTracker.trackUsage('evaluator', 1, TOKEN_CATEGORIES.REJECTED);
const errorChunk: ChatCompletionChunk = {
id: requestId,
object: 'chat.completion.chunk',
created: Math.floor(Date.now() / 1000),
model: body.model,
system_fingerprint: 'fp_' + requestId,
choices: [{
index: 0,
delta: {content: errorMessage},
logprobs: null,
finish_reason: 'stop'
}]
};
res.write(`data: ${JSON.stringify(errorChunk)}\n\n`);
res.end();
} else {
// For non-streaming or not-yet-started responses, send error as JSON
const response: ChatCompletionResponse = {
id: requestId,
object: 'chat.completion',
created: Math.floor(Date.now() / 1000),
model: body.model,
system_fingerprint: 'fp_' + requestId,
choices: [{
index: 0,
message: {
role: 'assistant',
content: `Error: ${errorMessage}`
},
logprobs: null,
finish_reason: 'stop'
}],
usage
};
res.json(response);
}
}
}) as RequestHandler);
interface StreamResponse extends Response {
write: (chunk: string) => boolean;
}
function createProgressEmitter(requestId: string, budget: number | undefined, context: TrackerContext) {
return () => {
const state = context.actionTracker.getState();
const budgetInfo = {
used: context.tokenTracker.getTotalUsage(),
total: budget || 1_000_000,
percentage: ((context.tokenTracker.getTotalUsage() / (budget || 1_000_000)) * 100).toFixed(2)
};
eventEmitter.emit(`progress-${requestId}`, {
type: 'progress',
data: {...state.thisStep, totalStep: state.totalStep},
step: state.totalStep,
budget: budgetInfo,
trackers: {
tokenUsage: context.tokenTracker.getTotalUsage(),
actionState: context.actionTracker.getState()
}
});
};
}
function cleanup(requestId: string) {
const context = trackers.get(requestId);
if (context) {
context.actionTracker.removeAllListeners();
context.tokenTracker.removeAllListeners();
trackers.delete(requestId);
}
}
function emitTrackerUpdate(requestId: string, context: TrackerContext) {
const trackerData = {
tokenUsage: context.tokenTracker.getTotalUsage(),
tokenBreakdown: context.tokenTracker.getUsageBreakdown(),
actionState: context.actionTracker.getState().thisStep,
step: context.actionTracker.getState().totalStep,
badAttempts: context.actionTracker.getState().badAttempts,
gaps: context.actionTracker.getState().gaps
};
eventEmitter.emit(`progress-${requestId}`, {
type: 'progress',
trackers: trackerData
});
}
// Store the trackers for each request
const trackers = new Map<string, TrackerContext>();
app.post('/api/v1/query', (async (req: QueryRequest, res: Response) => {
const {q, budget, maxBadAttempt} = req.body;
if (!q) {
return res.status(400).json({error: 'Query (q) is required'});
}
const requestId = Date.now().toString();
// Create new trackers for this request
const context: TrackerContext = {
tokenTracker: new TokenTracker(),
actionTracker: new ActionTracker()
};
trackers.set(requestId, context);
// Set up listeners immediately for both trackers
context.actionTracker.on('action', () => emitTrackerUpdate(requestId, context));
// context.tokenTracker.on('usage', () => emitTrackerUpdate(requestId, context));
res.json({requestId});
try {
const {result} = await getResponse(q, budget, maxBadAttempt, context);
const emitProgress = createProgressEmitter(requestId, budget, context);
context.actionTracker.on('action', emitProgress);
await storeTaskResult(requestId, result);
eventEmitter.emit(`progress-${requestId}`, {
type: 'answer',
data: result,
trackers: {
tokenUsage: context.tokenTracker.getTotalUsage(),
actionState: context.actionTracker.getState()
}
});
cleanup(requestId);
} catch (error: any) {
eventEmitter.emit(`progress-${requestId}`, {
type: 'error',
data: error?.message || 'Unknown error',
status: 500,
trackers: {
tokenUsage: context.tokenTracker.getTotalUsage(),
actionState: context.actionTracker.getState()
}
});
cleanup(requestId);
}
}) as RequestHandler);
app.get('/api/v1/stream/:requestId', (async (req: Request, res: StreamResponse) => {
const requestId = req.params.requestId;
const context = trackers.get(requestId);
res.setHeader('Content-Type', 'text/event-stream');
res.setHeader('Cache-Control', 'no-cache');
res.setHeader('Connection', 'keep-alive');
const listener = (data: StreamMessage) => {
// The trackers are now included in all event types
// We don't need to add them here as they're already part of the data
res.write(`data: ${JSON.stringify(data)}\n\n`);
};
eventEmitter.on(`progress-${requestId}`, listener);
// Handle client disconnection
req.on('close', () => {
eventEmitter.removeListener(`progress-${requestId}`, listener);
});
// Send initial connection confirmation with tracker state
const initialData = {
type: 'connected',
requestId,
trackers: context ? {
tokenUsage: context.tokenTracker.getTotalUsage(),
actionState: context.actionTracker.getState()
} : null
};
res.write(`data: ${JSON.stringify(initialData)}\n\n`);
}) as RequestHandler);
async function storeTaskResult(requestId: string, result: StepAction) {
try {
const taskDir = path.join(process.cwd(), 'tasks');
await fs.mkdir(taskDir, {recursive: true});
await fs.writeFile(
path.join(taskDir, `${requestId}.json`),
JSON.stringify(result, null, 2)
);
} catch (error) {
console.error('Task storage failed:', error);
throw new Error('Failed to store task result');
}
}
app.get('/api/v1/task/:requestId', (async (req: Request, res: Response) => {
const requestId = req.params.requestId;
try {
const taskPath = path.join(process.cwd(), 'tasks', `${requestId}.json`);
const taskData = await fs.readFile(taskPath, 'utf-8');
res.json(JSON.parse(taskData));
} catch (error) {
res.status(404).json({error: 'Task not found'});
}
}) as RequestHandler);
export default app;

View File

@ -1,637 +1,7 @@
import express, {Request, Response, RequestHandler} from 'express';
import cors from 'cors';
import {EventEmitter} from 'events';
import {getResponse} from './agent';
import {
StepAction,
StreamMessage,
TrackerContext,
ChatCompletionRequest,
ChatCompletionResponse,
ChatCompletionChunk,
AnswerAction,
TOKEN_CATEGORIES,
Model
} from './types';
import fs from 'fs/promises';
import path from 'path';
import {TokenTracker} from "./utils/token-tracker";
import {ActionTracker} from "./utils/action-tracker";
import app from "./app";
const app = express();
const port = process.env.PORT || 3000;
// Get secret from command line args for optional authentication
const secret = process.argv.find(arg => arg.startsWith('--secret='))?.split('=')[1];
app.use(cors());
app.use(express.json());
const eventEmitter = new EventEmitter();
interface QueryRequest extends Request {
body: {
q: string;
budget?: number;
maxBadAttempt?: number;
};
}
function buildMdFromAnswer(answer: AnswerAction) {
let refStr = '';
if (answer.references?.length > 0) {
refStr = `
## References
${answer.references.map((ref, i) => `
${i + 1}. [${ref.exactQuote}](${ref.url})`).join('')}`;
}
return `${answer.answer.replace(/\(REF_(\d+)\)/g, (_, num) => `[^${num}]`)}${refStr}`;
}
// Modified streamTextWordByWord function
async function* streamTextWordByWord(text: string, streamingState: StreamingState) {
const words = text.split(/(\s+)/);
for (const word of words) {
if (streamingState.currentlyStreaming) {
const delay = Math.floor(Math.random() * 100);
await new Promise(resolve => setTimeout(resolve, delay));
yield word;
} else {
// If streaming was interrupted, yield all remaining words at once
const remainingWords = words.slice(words.indexOf(word)).join('');
yield remainingWords;
return;
}
}
}
// Helper function to emit remaining content immediately
async function emitRemainingContent(
res: Response,
requestId: string,
model: string,
content: string
) {
if (!content) return;
const chunk: ChatCompletionChunk = {
id: requestId,
object: 'chat.completion.chunk',
created: Math.floor(Date.now() / 1000),
model: model,
system_fingerprint: 'fp_' + requestId,
choices: [{
index: 0,
delta: {content},
logprobs: null,
finish_reason: null
}]
};
res.write(`data: ${JSON.stringify(chunk)}\n\n`);
}
interface StreamingState {
currentlyStreaming: boolean;
currentGenerator: AsyncGenerator<string> | null;
remainingContent: string;
}
async function completeCurrentStreaming(
streamingState: StreamingState,
res: Response,
requestId: string,
model: string
) {
if (streamingState.currentlyStreaming && streamingState.remainingContent) {
// Force completion of current streaming
await emitRemainingContent(
res,
requestId,
model,
streamingState.remainingContent
);
// Reset streaming state
streamingState.currentlyStreaming = false;
streamingState.remainingContent = '';
streamingState.currentGenerator = null;
}
}
// OpenAI-compatible chat completions endpoint
// Models API endpoints
app.get('/v1/models', (async (_req: Request, res: Response) => {
const models: Model[] = [{
id: 'jina-deepsearch-v1',
object: 'model',
created: 1686935002,
owned_by: 'jina-ai'
}];
res.json({
object: 'list',
data: models
});
}) as RequestHandler);
app.get('/v1/models/:model', (async (req: Request, res: Response) => {
const modelId = req.params.model;
if (modelId === 'jina-deepsearch-v1') {
res.json({
id: 'jina-deepsearch-v1',
object: 'model',
created: 1686935002,
owned_by: 'jina-ai'
});
} else {
res.status(404).json({
error: {
message: `Model '${modelId}' not found`,
type: 'invalid_request_error',
param: null,
code: 'model_not_found'
}
});
}
}) as RequestHandler);
app.post('/v1/chat/completions', (async (req: Request, res: Response) => {
// Check authentication only if secret is set
if (secret) {
const authHeader = req.headers.authorization;
if (!authHeader || !authHeader.startsWith('Bearer ') || authHeader.split(' ')[1] !== secret) {
console.log('[chat/completions] Unauthorized request');
res.status(401).json({error: 'Unauthorized'});
return;
}
}
// Log request details (excluding sensitive data)
console.log('[chat/completions] Request:', {
model: req.body.model,
stream: req.body.stream,
messageCount: req.body.messages?.length,
hasAuth: !!req.headers.authorization,
requestId: Date.now().toString()
});
const body = req.body as ChatCompletionRequest;
if (!body.messages?.length) {
return res.status(400).json({error: 'Messages array is required and must not be empty'});
}
const lastMessage = body.messages[body.messages.length - 1];
if (lastMessage.role !== 'user') {
return res.status(400).json({error: 'Last message must be from user'});
}
const requestId = Date.now().toString();
const context: TrackerContext = {
tokenTracker: new TokenTracker(),
actionTracker: new ActionTracker()
};
// Track prompt tokens for the initial message
// Use Vercel's token counting convention - 1 token per message
const messageTokens = body.messages.length;
context.tokenTracker.trackUsage('agent', messageTokens, TOKEN_CATEGORIES.PROMPT);
// Add this inside the chat completions endpoint, before setting up the action listener
const streamingState: StreamingState = {
currentlyStreaming: false,
currentGenerator: null,
remainingContent: ''
};
if (body.stream) {
res.setHeader('Content-Type', 'text/event-stream');
res.setHeader('Cache-Control', 'no-cache');
res.setHeader('Connection', 'keep-alive');
// Send initial chunk with opening think tag
const initialChunk: ChatCompletionChunk = {
id: requestId,
object: 'chat.completion.chunk',
created: Math.floor(Date.now() / 1000),
model: body.model,
system_fingerprint: 'fp_' + requestId,
choices: [{
index: 0,
delta: {role: 'assistant', content: '<think>'},
logprobs: null,
finish_reason: null
}]
};
res.write(`data: ${JSON.stringify(initialChunk)}\n\n`);
// Set up progress listener with cleanup
const actionListener = async (action: any) => {
if (action.thisStep.think) {
// Complete any ongoing streaming first
await completeCurrentStreaming(streamingState, res, requestId, body.model);
// Start new streaming session
streamingState.currentlyStreaming = true;
streamingState.remainingContent = action.thisStep.think;
try {
for await (const word of streamTextWordByWord(action.thisStep.think, streamingState)) {
if (!streamingState.currentlyStreaming) {
break;
}
// Update remaining content
streamingState.remainingContent = streamingState.remainingContent.slice(word.length);
const chunk: ChatCompletionChunk = {
id: requestId,
object: 'chat.completion.chunk',
created: Math.floor(Date.now() / 1000),
model: body.model,
system_fingerprint: 'fp_' + requestId,
choices: [{
index: 0,
delta: {content: word},
logprobs: null,
finish_reason: null
}]
};
res.write(`data: ${JSON.stringify(chunk)}\n\n`);
}
// Only add newline if this streaming completed normally
if (streamingState.currentlyStreaming) {
const newlineChunk: ChatCompletionChunk = {
id: requestId,
object: 'chat.completion.chunk',
created: Math.floor(Date.now() / 1000),
model: body.model,
system_fingerprint: 'fp_' + requestId,
choices: [{
index: 0,
delta: {content: '\n'},
logprobs: null,
finish_reason: null
}]
};
res.write(`data: ${JSON.stringify(newlineChunk)}\n\n`);
}
} catch (error) {
console.error('Error in streaming:', error);
await completeCurrentStreaming(streamingState, res, requestId, body.model);
}
}
};
context.actionTracker.on('action', actionListener);
// Make sure to update the cleanup code
res.on('finish', () => {
streamingState.currentlyStreaming = false;
streamingState.currentGenerator = null;
streamingState.remainingContent = '';
context.actionTracker.removeListener('action', actionListener);
});
}
try {
// Track initial query tokens - already tracked above
// const queryTokens = Buffer.byteLength(lastMessage.content, 'utf-8');
// context.tokenTracker.trackUsage('agent', queryTokens, 'prompt');
let result;
try {
({result} = await getResponse(lastMessage.content, undefined, undefined, context));
} catch (error: any) {
// If deduplication fails, retry without it
if (error?.response?.status === 402) {
// If deduplication fails, retry with maxBadAttempt=3 to skip dedup
({result} = await getResponse(lastMessage.content, undefined, 3, context));
} else {
throw error;
}
}
// Track tokens based on action type
if (result.action === 'answer') {
// Track accepted prediction tokens for the final answer using Vercel's convention
const answerTokens = 1; // Default to 1 token per answer
context.tokenTracker.trackUsage('evaluator', answerTokens, TOKEN_CATEGORIES.ACCEPTED);
} else {
// Track rejected prediction tokens for non-answer responses
const rejectedTokens = 1; // Default to 1 token per rejected response
context.tokenTracker.trackUsage('evaluator', rejectedTokens, TOKEN_CATEGORIES.REJECTED);
}
if (body.stream) {
// Complete any ongoing streaming before sending final answer
await completeCurrentStreaming(streamingState, res, requestId, body.model);
// Send closing think tag
const closeThinkChunk: ChatCompletionChunk = {
id: requestId,
object: 'chat.completion.chunk',
created: Math.floor(Date.now() / 1000),
model: body.model,
system_fingerprint: 'fp_' + requestId,
choices: [{
index: 0,
delta: {content: `</think>\n\n`},
logprobs: null,
finish_reason: null
}]
};
res.write(`data: ${JSON.stringify(closeThinkChunk)}\n\n`);
// Send final answer as separate chunk
const answerChunk: ChatCompletionChunk = {
id: requestId,
object: 'chat.completion.chunk',
created: Math.floor(Date.now() / 1000),
model: body.model,
system_fingerprint: 'fp_' + requestId,
choices: [{
index: 0,
delta: {content: result.action === 'answer' ? buildMdFromAnswer(result) : result.think},
logprobs: null,
finish_reason: 'stop'
}]
};
res.write(`data: ${JSON.stringify(answerChunk)}\n\n`);
res.end();
} else {
const usage = context.tokenTracker.getUsageDetails();
const response: ChatCompletionResponse = {
id: requestId,
object: 'chat.completion',
created: Math.floor(Date.now() / 1000),
model: body.model,
system_fingerprint: 'fp_' + requestId,
choices: [{
index: 0,
message: {
role: 'assistant',
content: result.action === 'answer' ? buildMdFromAnswer(result) : result.think
},
logprobs: null,
finish_reason: 'stop'
}],
usage
};
// Log final response (excluding full content for brevity)
console.log('[chat/completions] Response:', {
id: response.id,
status: 200,
contentLength: response.choices[0].message.content.length,
usage: response.usage
});
res.json(response);
}
} catch (error: any) {
// Log error details
console.error('[chat/completions] Error:', {
message: error?.message || 'An error occurred',
stack: error?.stack,
type: error?.constructor?.name,
requestId
});
// Track error as rejected tokens with Vercel token counting
const errorMessage = error?.message || 'An error occurred';
// Default to 1 token for errors as per Vercel AI SDK convention
const errorTokens = 1;
context.tokenTracker.trackUsage('evaluator', errorTokens, TOKEN_CATEGORIES.REJECTED);
// Clean up event listeners
context.actionTracker.removeAllListeners('action');
// Get token usage in OpenAI API format
const usage = context.tokenTracker.getUsageDetails();
if (body.stream && res.headersSent) {
// For streaming responses that have already started, send error as a chunk
// First send closing think tag if we're in the middle of thinking
const closeThinkChunk: ChatCompletionChunk = {
id: requestId,
object: 'chat.completion.chunk',
created: Math.floor(Date.now() / 1000),
model: body.model,
system_fingerprint: 'fp_' + requestId,
choices: [{
index: 0,
delta: {content: '</think>'},
logprobs: null,
finish_reason: null
}]
};
res.write(`data: ${JSON.stringify(closeThinkChunk)}\n\n`);
// Track error token and send error message
context.tokenTracker.trackUsage('evaluator', 1, TOKEN_CATEGORIES.REJECTED);
const errorChunk: ChatCompletionChunk = {
id: requestId,
object: 'chat.completion.chunk',
created: Math.floor(Date.now() / 1000),
model: body.model,
system_fingerprint: 'fp_' + requestId,
choices: [{
index: 0,
delta: {content: errorMessage},
logprobs: null,
finish_reason: 'stop'
}]
};
res.write(`data: ${JSON.stringify(errorChunk)}\n\n`);
res.end();
} else {
// For non-streaming or not-yet-started responses, send error as JSON
const response: ChatCompletionResponse = {
id: requestId,
object: 'chat.completion',
created: Math.floor(Date.now() / 1000),
model: body.model,
system_fingerprint: 'fp_' + requestId,
choices: [{
index: 0,
message: {
role: 'assistant',
content: `Error: ${errorMessage}`
},
logprobs: null,
finish_reason: 'stop'
}],
usage
};
res.json(response);
}
}
}) as RequestHandler);
interface StreamResponse extends Response {
write: (chunk: string) => boolean;
}
function createProgressEmitter(requestId: string, budget: number | undefined, context: TrackerContext) {
return () => {
const state = context.actionTracker.getState();
const budgetInfo = {
used: context.tokenTracker.getTotalUsage(),
total: budget || 1_000_000,
percentage: ((context.tokenTracker.getTotalUsage() / (budget || 1_000_000)) * 100).toFixed(2)
};
eventEmitter.emit(`progress-${requestId}`, {
type: 'progress',
data: {...state.thisStep, totalStep: state.totalStep},
step: state.totalStep,
budget: budgetInfo,
trackers: {
tokenUsage: context.tokenTracker.getTotalUsage(),
actionState: context.actionTracker.getState()
}
});
};
}
function cleanup(requestId: string) {
const context = trackers.get(requestId);
if (context) {
context.actionTracker.removeAllListeners();
context.tokenTracker.removeAllListeners();
trackers.delete(requestId);
}
}
function emitTrackerUpdate(requestId: string, context: TrackerContext) {
const trackerData = {
tokenUsage: context.tokenTracker.getTotalUsage(),
tokenBreakdown: context.tokenTracker.getUsageBreakdown(),
actionState: context.actionTracker.getState().thisStep,
step: context.actionTracker.getState().totalStep,
badAttempts: context.actionTracker.getState().badAttempts,
gaps: context.actionTracker.getState().gaps
};
eventEmitter.emit(`progress-${requestId}`, {
type: 'progress',
trackers: trackerData
});
}
// Store the trackers for each request
const trackers = new Map<string, TrackerContext>();
app.post('/api/v1/query', (async (req: QueryRequest, res: Response) => {
const {q, budget, maxBadAttempt} = req.body;
if (!q) {
return res.status(400).json({error: 'Query (q) is required'});
}
const requestId = Date.now().toString();
// Create new trackers for this request
const context: TrackerContext = {
tokenTracker: new TokenTracker(),
actionTracker: new ActionTracker()
};
trackers.set(requestId, context);
// Set up listeners immediately for both trackers
context.actionTracker.on('action', () => emitTrackerUpdate(requestId, context));
// context.tokenTracker.on('usage', () => emitTrackerUpdate(requestId, context));
res.json({requestId});
try {
const {result} = await getResponse(q, budget, maxBadAttempt, context);
const emitProgress = createProgressEmitter(requestId, budget, context);
context.actionTracker.on('action', emitProgress);
await storeTaskResult(requestId, result);
eventEmitter.emit(`progress-${requestId}`, {
type: 'answer',
data: result,
trackers: {
tokenUsage: context.tokenTracker.getTotalUsage(),
actionState: context.actionTracker.getState()
}
});
cleanup(requestId);
} catch (error: any) {
eventEmitter.emit(`progress-${requestId}`, {
type: 'error',
data: error?.message || 'Unknown error',
status: 500,
trackers: {
tokenUsage: context.tokenTracker.getTotalUsage(),
actionState: context.actionTracker.getState()
}
});
cleanup(requestId);
}
}) as RequestHandler);
app.get('/api/v1/stream/:requestId', (async (req: Request, res: StreamResponse) => {
const requestId = req.params.requestId;
const context = trackers.get(requestId);
res.setHeader('Content-Type', 'text/event-stream');
res.setHeader('Cache-Control', 'no-cache');
res.setHeader('Connection', 'keep-alive');
const listener = (data: StreamMessage) => {
// The trackers are now included in all event types
// We don't need to add them here as they're already part of the data
res.write(`data: ${JSON.stringify(data)}\n\n`);
};
eventEmitter.on(`progress-${requestId}`, listener);
// Handle client disconnection
req.on('close', () => {
eventEmitter.removeListener(`progress-${requestId}`, listener);
});
// Send initial connection confirmation with tracker state
const initialData = {
type: 'connected',
requestId,
trackers: context ? {
tokenUsage: context.tokenTracker.getTotalUsage(),
actionState: context.actionTracker.getState()
} : null
};
res.write(`data: ${JSON.stringify(initialData)}\n\n`);
}) as RequestHandler);
async function storeTaskResult(requestId: string, result: StepAction) {
try {
const taskDir = path.join(process.cwd(), 'tasks');
await fs.mkdir(taskDir, {recursive: true});
await fs.writeFile(
path.join(taskDir, `${requestId}.json`),
JSON.stringify(result, null, 2)
);
} catch (error) {
console.error('Task storage failed:', error);
throw new Error('Failed to store task result');
}
}
app.get('/api/v1/task/:requestId', (async (req: Request, res: Response) => {
const requestId = req.params.requestId;
try {
const taskPath = path.join(process.cwd(), 'tasks', `${requestId}.json`);
const taskData = await fs.readFile(taskPath, 'utf-8');
res.json(JSON.parse(taskData));
} catch (error) {
res.status(404).json({error: 'Task not found'});
}
}) as RequestHandler);
// Export server startup function for better testing
export function startServer() {
return app.listen(port, () => {
@ -642,6 +12,4 @@ export function startServer() {
// Start server if running directly
if (process.env.NODE_ENV !== 'test') {
startServer();
}
export default app;
}

View File

@ -9,6 +9,16 @@ export class TokenTracker extends EventEmitter {
constructor(budget?: number) {
super();
this.budget = budget;
if ('asyncLocalContext' in process) {
const asyncLocalContext = process.asyncLocalContext as any;
this.on('usage', () => {
if (asyncLocalContext.available()) {
asyncLocalContext.ctx.chargeAmount = this.getTotalUsage();
}
});
}
}
trackUsage(tool: string, tokens: number, category?: TokenCategory) {
@ -53,9 +63,9 @@ export class TokenTracker extends EventEmitter {
}, {} as Record<string, number>);
const prompt_tokens = categoryBreakdown.prompt || 0;
const completion_tokens =
(categoryBreakdown.reasoning || 0) +
(categoryBreakdown.accepted || 0) +
const completion_tokens =
(categoryBreakdown.reasoning || 0) +
(categoryBreakdown.accepted || 0) +
(categoryBreakdown.rejected || 0);
return {

View File

@ -1,14 +1,18 @@
{
"compilerOptions": {
"target": "ES2020",
"module": "commonjs",
"module": "node16",
"outDir": "./dist",
"rootDir": "./src",
"sourceMap": true,
"esModuleInterop": true,
"skipLibCheck": true,
"forceConsistentCasingInFileNames": true,
"strict": true,
"resolveJsonModule": true,
"moduleResolution": "node"
}
"experimentalDecorators": true,
"emitDecoratorMetadata": true,
"resolveJsonModule": true
},
"include": ["src/**/*"],
"exclude": ["jina-ai/**/*", "**/__tests__/**/*"],
}