mirror of
https://github.com/jina-ai/node-DeepResearch.git
synced 2025-12-25 22:16:49 +08:00
jina-ai: billing for saas service (#55)
* wip: jina billing * wip * fix: build issues * ci: cd gh action * fix: make ci happy
This commit is contained in:
parent
c4639a2e92
commit
8af35c6640
1
.dockerignore
Normal file
1
.dockerignore
Normal file
@ -0,0 +1 @@
|
||||
node_modules
|
||||
@ -16,5 +16,6 @@ module.exports = {
|
||||
rules: {
|
||||
'no-console': ['error', { allow: ['log', 'error'] }],
|
||||
'@typescript-eslint/no-explicit-any': 'off'
|
||||
}
|
||||
},
|
||||
ignorePatterns: ["jina-ai/**/*"]
|
||||
};
|
||||
|
||||
66
.github/workflows/cd.yml
vendored
Normal file
66
.github/workflows/cd.yml
vendored
Normal file
@ -0,0 +1,66 @@
|
||||
run-name: Build push and deploy (CD)
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
- ci-debug
|
||||
tags:
|
||||
- '*'
|
||||
|
||||
jobs:
|
||||
build-and-push-to-gcr:
|
||||
runs-on: ubuntu-latest
|
||||
concurrency:
|
||||
group: ${{ github.ref_type == 'branch' && github.ref }}
|
||||
cancel-in-progress: true
|
||||
permissions:
|
||||
contents: read
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
lfs: true
|
||||
- uses: 'google-github-actions/auth@v2'
|
||||
with:
|
||||
credentials_json: '${{ secrets.GCLOUD_SERVICE_ACCOUNT_SECRET_JSON }}'
|
||||
- name: 'Set up Cloud SDK'
|
||||
uses: 'google-github-actions/setup-gcloud@v2'
|
||||
- name: "Docker auth"
|
||||
run: |-
|
||||
gcloud auth configure-docker us-docker.pkg.dev --quiet
|
||||
- name: Set controller release version
|
||||
run: echo "RELEASE_VERSION=${GITHUB_REF#refs/*/}" >> $GITHUB_ENV
|
||||
- name: Set up Node.js
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: 22.12.0
|
||||
cache: npm
|
||||
|
||||
- name: npm install
|
||||
run: npm ci
|
||||
- name: build application
|
||||
run: npm run build
|
||||
- name: Set package version
|
||||
run: npm version --no-git-tag-version ${{ env.RELEASE_VERSION }}
|
||||
if: github.ref_type == 'tag'
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: |
|
||||
us-docker.pkg.dev/research-df067/deepresearch/node-deep-research
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
- name: Build and push
|
||||
id: container
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
file: jina-ai/Dockerfile
|
||||
push: true
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
- name: Deploy with Tag
|
||||
run: |
|
||||
gcloud run deploy node-deep-research --image us-docker.pkg.dev/research-df067/deepresearch/node-deep-research@${{steps.container.outputs.imageid}} --tag ${{ env.RELEASE_VERSION }} --region us-central1 --async
|
||||
|
||||
24
.vscode/launch.json
vendored
Normal file
24
.vscode/launch.json
vendored
Normal file
@ -0,0 +1,24 @@
|
||||
{
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"name": "Attach",
|
||||
"port": 9229,
|
||||
"request": "attach",
|
||||
"skipFiles": [
|
||||
"<node_internals>/**"
|
||||
],
|
||||
"type": "node"
|
||||
},
|
||||
{
|
||||
"name": "Attach by Process ID",
|
||||
"processId": "${command:PickProcess}",
|
||||
"request": "attach",
|
||||
"skipFiles": [
|
||||
"<node_internals>/**"
|
||||
],
|
||||
"type": "node"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
1
jina-ai/.dockerignore
Normal file
1
jina-ai/.dockerignore
Normal file
@ -0,0 +1 @@
|
||||
node_modules
|
||||
50
jina-ai/Dockerfile
Normal file
50
jina-ai/Dockerfile
Normal file
@ -0,0 +1,50 @@
|
||||
# ---- BUILD STAGE ----
|
||||
FROM node:20-slim AS builder
|
||||
|
||||
# Set working directory
|
||||
WORKDIR /app
|
||||
|
||||
# Copy package.json and package-lock.json
|
||||
COPY ./package*.json ./
|
||||
COPY ./jina-ai/package*.json ./jina-ai/
|
||||
|
||||
# Install dependencies
|
||||
RUN npm ci
|
||||
WORKDIR /app/jina-ai
|
||||
RUN npm ci
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Copy application code
|
||||
COPY ./src ./src
|
||||
COPY ./config.json ./
|
||||
COPY ./tsconfig.json ./tsconfig.json
|
||||
RUN npm run build
|
||||
|
||||
COPY ./jina-ai/src ./jina-ai/src
|
||||
COPY ./jina-ai/tsconfig.json ./jina-ai/tsconfig.json
|
||||
WORKDIR /app/jina-ai
|
||||
RUN npm run build
|
||||
|
||||
# ---- PRODUCTION STAGE ----
|
||||
FROM node:20 AS production
|
||||
|
||||
# Set working directory
|
||||
WORKDIR /app
|
||||
|
||||
COPY --from=builder /app ./
|
||||
# Copy config.json and built files from builder
|
||||
|
||||
WORKDIR /app/jina-ai
|
||||
|
||||
# Set environment variables (Recommended to set at runtime, avoid hardcoding)
|
||||
ENV GEMINI_API_KEY=${GEMINI_API_KEY}
|
||||
ENV OPENAI_API_KEY=${OPENAI_API_KEY}
|
||||
ENV JINA_API_KEY=${JINA_API_KEY}
|
||||
ENV BRAVE_API_KEY=${BRAVE_API_KEY}
|
||||
|
||||
# Expose the port the app runs on
|
||||
EXPOSE 3000
|
||||
|
||||
# Set startup command
|
||||
CMD ["node", "./dist/server.js"]
|
||||
3980
jina-ai/package-lock.json
generated
Normal file
3980
jina-ai/package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load Diff
38
jina-ai/package.json
Normal file
38
jina-ai/package.json
Normal file
@ -0,0 +1,38 @@
|
||||
{
|
||||
"name": "@jina-ai/node-deepresearch",
|
||||
"version": "1.0.0",
|
||||
"main": "dist/app.js",
|
||||
"files": [
|
||||
"dist",
|
||||
"README.md",
|
||||
"LICENSE"
|
||||
],
|
||||
"scripts": {
|
||||
"build": "tsc",
|
||||
"dev": "npx ts-node src/agent.ts",
|
||||
"search": "npx ts-node src/test-duck.ts",
|
||||
"rewrite": "npx ts-node src/tools/query-rewriter.ts",
|
||||
"lint": "eslint . --ext .ts",
|
||||
"lint:fix": "eslint . --ext .ts --fix",
|
||||
"serve": "ts-node src/server.ts",
|
||||
"eval": "ts-node src/evals/batch-evals.ts",
|
||||
"test": "jest --testTimeout=30000",
|
||||
"test:watch": "jest --watch"
|
||||
},
|
||||
"keywords": [],
|
||||
"author": "Jina AI",
|
||||
"license": "Apache-2.0",
|
||||
"description": "",
|
||||
"dependencies": {
|
||||
"@google-cloud/firestore": "^7.11.0",
|
||||
"civkit": "^0.8.3-15926cb",
|
||||
"dayjs": "^1.11.13",
|
||||
"lodash": "^4.17.21",
|
||||
"reflect-metadata": "^0.2.2",
|
||||
"tsyringe": "^4.8.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/lodash": "^4.17.15",
|
||||
"pino-pretty": "^13.0.0"
|
||||
}
|
||||
}
|
||||
347
jina-ai/src/dto/jina-embeddings-auth.ts
Normal file
347
jina-ai/src/dto/jina-embeddings-auth.ts
Normal file
@ -0,0 +1,347 @@
|
||||
import {
|
||||
Also, AuthenticationFailedError, AuthenticationRequiredError,
|
||||
DownstreamServiceFailureError, RPC_CALL_ENVIRONMENT,
|
||||
ArrayOf, AutoCastable, Prop
|
||||
} from 'civkit/civ-rpc';
|
||||
import { parseJSONText } from 'civkit/vectorize';
|
||||
import { htmlEscape } from 'civkit/escape';
|
||||
import { marshalErrorLike } from 'civkit/lang';
|
||||
|
||||
import type express from 'express';
|
||||
|
||||
import logger from '../lib/logger';
|
||||
import { AsyncLocalContext } from '../lib/async-context';
|
||||
import { InjectProperty } from '../lib/registry';
|
||||
import { JinaEmbeddingsDashboardHTTP } from '../lib/billing';
|
||||
import envConfig from '../lib/env-config';
|
||||
|
||||
import { FirestoreRecord } from '../lib/firestore';
|
||||
import _ from 'lodash';
|
||||
import { RateLimitDesc } from '../rate-limit';
|
||||
|
||||
export class JinaWallet extends AutoCastable {
|
||||
@Prop({
|
||||
default: ''
|
||||
})
|
||||
user_id!: string;
|
||||
|
||||
@Prop({
|
||||
default: 0
|
||||
})
|
||||
trial_balance!: number;
|
||||
|
||||
@Prop()
|
||||
trial_start?: Date;
|
||||
|
||||
@Prop()
|
||||
trial_end?: Date;
|
||||
|
||||
@Prop({
|
||||
default: 0
|
||||
})
|
||||
regular_balance!: number;
|
||||
|
||||
@Prop({
|
||||
default: 0
|
||||
})
|
||||
total_balance!: number;
|
||||
}
|
||||
|
||||
export class JinaEmbeddingsTokenAccount extends FirestoreRecord {
|
||||
static override collectionName = 'embeddingsTokenAccounts';
|
||||
|
||||
override _id!: string;
|
||||
|
||||
@Prop({
|
||||
required: true
|
||||
})
|
||||
user_id!: string;
|
||||
|
||||
@Prop({
|
||||
nullable: true,
|
||||
type: String,
|
||||
})
|
||||
email?: string;
|
||||
|
||||
@Prop({
|
||||
nullable: true,
|
||||
type: String,
|
||||
})
|
||||
full_name?: string;
|
||||
|
||||
@Prop({
|
||||
nullable: true,
|
||||
type: String,
|
||||
})
|
||||
customer_id?: string;
|
||||
|
||||
@Prop({
|
||||
nullable: true,
|
||||
type: String,
|
||||
})
|
||||
avatar_url?: string;
|
||||
|
||||
// Not keeping sensitive info for now
|
||||
// @Prop()
|
||||
// billing_address?: object;
|
||||
|
||||
// @Prop()
|
||||
// payment_method?: object;
|
||||
|
||||
@Prop({
|
||||
required: true
|
||||
})
|
||||
wallet!: JinaWallet;
|
||||
|
||||
@Prop({
|
||||
type: Object
|
||||
})
|
||||
metadata?: { [k: string]: any; };
|
||||
|
||||
@Prop({
|
||||
defaultFactory: () => new Date()
|
||||
})
|
||||
lastSyncedAt!: Date;
|
||||
|
||||
@Prop({
|
||||
dictOf: [ArrayOf(RateLimitDesc)]
|
||||
})
|
||||
customRateLimits?: { [k: string]: RateLimitDesc[]; };
|
||||
|
||||
static patchedFields = [
|
||||
];
|
||||
|
||||
static override from(input: any) {
|
||||
for (const field of this.patchedFields) {
|
||||
if (typeof input[field] === 'string') {
|
||||
input[field] = parseJSONText(input[field]);
|
||||
}
|
||||
}
|
||||
|
||||
return super.from(input) as JinaEmbeddingsTokenAccount;
|
||||
}
|
||||
|
||||
override degradeForFireStore() {
|
||||
const copy: any = {
|
||||
...this,
|
||||
wallet: { ...this.wallet },
|
||||
// Firebase disability
|
||||
customRateLimits: _.mapValues(this.customRateLimits, (v) => v.map((x) => ({ ...x }))),
|
||||
};
|
||||
|
||||
for (const field of (this.constructor as typeof JinaEmbeddingsTokenAccount).patchedFields) {
|
||||
if (typeof copy[field] === 'object') {
|
||||
copy[field] = JSON.stringify(copy[field]) as any;
|
||||
}
|
||||
}
|
||||
|
||||
return copy;
|
||||
}
|
||||
|
||||
[k: string]: any;
|
||||
}
|
||||
|
||||
|
||||
const authDtoLogger = logger.child({ service: 'JinaAuthDTO' });
|
||||
|
||||
export interface FireBaseHTTPCtx {
|
||||
req: express.Request,
|
||||
res: express.Response,
|
||||
}
|
||||
|
||||
const THE_VERY_SAME_JINA_EMBEDDINGS_CLIENT = new JinaEmbeddingsDashboardHTTP(envConfig.JINA_EMBEDDINGS_DASHBOARD_API_KEY);
|
||||
|
||||
@Also({
|
||||
openapi: {
|
||||
operation: {
|
||||
parameters: {
|
||||
'Authorization': {
|
||||
description: htmlEscape`Jina Token for authentication.\n\n` +
|
||||
htmlEscape`- Member of <JinaEmbeddingsAuthDTO>\n\n` +
|
||||
`- Authorization: Bearer {YOUR_JINA_TOKEN}`
|
||||
,
|
||||
in: 'header',
|
||||
schema: {
|
||||
anyOf: [
|
||||
{ type: 'string', format: 'token' }
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
export class JinaEmbeddingsAuthDTO extends AutoCastable {
|
||||
uid?: string;
|
||||
bearerToken?: string;
|
||||
user?: JinaEmbeddingsTokenAccount;
|
||||
|
||||
@InjectProperty(AsyncLocalContext)
|
||||
ctxMgr!: AsyncLocalContext;
|
||||
|
||||
jinaEmbeddingsDashboard = THE_VERY_SAME_JINA_EMBEDDINGS_CLIENT;
|
||||
|
||||
static override from(input: any) {
|
||||
const instance = super.from(input) as JinaEmbeddingsAuthDTO;
|
||||
|
||||
const ctx = input[RPC_CALL_ENVIRONMENT];
|
||||
|
||||
const req = (ctx.rawRequest || ctx.req) as express.Request | undefined;
|
||||
|
||||
if (req) {
|
||||
const authorization = req.get('authorization');
|
||||
|
||||
if (authorization) {
|
||||
const authToken = authorization.split(' ')[1] || authorization;
|
||||
instance.bearerToken = authToken;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (!instance.bearerToken && input._token) {
|
||||
instance.bearerToken = input._token;
|
||||
}
|
||||
|
||||
return instance;
|
||||
}
|
||||
|
||||
async getBrief(ignoreCache?: boolean | string) {
|
||||
if (!this.bearerToken) {
|
||||
throw new AuthenticationRequiredError({
|
||||
message: 'Absence of bearer token'
|
||||
});
|
||||
}
|
||||
|
||||
let account;
|
||||
try {
|
||||
account = await JinaEmbeddingsTokenAccount.fromFirestore(this.bearerToken);
|
||||
} catch (err) {
|
||||
// FireStore would not accept any string as input and may throw if not happy with it
|
||||
void 0;
|
||||
}
|
||||
|
||||
|
||||
const age = account?.lastSyncedAt ? Date.now() - account.lastSyncedAt.getTime() : Infinity;
|
||||
|
||||
if (account && !ignoreCache) {
|
||||
if (account && age < 180_000) {
|
||||
this.user = account;
|
||||
this.uid = this.user?.user_id;
|
||||
|
||||
return account;
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
const r = await this.jinaEmbeddingsDashboard.validateToken(this.bearerToken);
|
||||
const brief = r.data;
|
||||
const draftAccount = JinaEmbeddingsTokenAccount.from({
|
||||
...account, ...brief, _id: this.bearerToken,
|
||||
lastSyncedAt: new Date()
|
||||
});
|
||||
await JinaEmbeddingsTokenAccount.save(draftAccount.degradeForFireStore(), undefined, { merge: true });
|
||||
|
||||
this.user = draftAccount;
|
||||
this.uid = this.user?.user_id;
|
||||
|
||||
return draftAccount;
|
||||
} catch (err: any) {
|
||||
authDtoLogger.warn(`Failed to get user brief: ${err}`, { err: marshalErrorLike(err) });
|
||||
|
||||
if (err?.status === 401) {
|
||||
throw new AuthenticationFailedError({
|
||||
message: 'Invalid bearer token'
|
||||
});
|
||||
}
|
||||
|
||||
if (account) {
|
||||
this.user = account;
|
||||
this.uid = this.user?.user_id;
|
||||
|
||||
return account;
|
||||
}
|
||||
|
||||
|
||||
throw new DownstreamServiceFailureError(`Failed to authenticate: ${err}`);
|
||||
}
|
||||
}
|
||||
|
||||
async reportUsage(tokenCount: number, mdl: string, endpoint: string = '/encode') {
|
||||
const user = await this.assertUser();
|
||||
const uid = user.user_id;
|
||||
user.wallet.total_balance -= tokenCount;
|
||||
|
||||
return this.jinaEmbeddingsDashboard.reportUsage(this.bearerToken!, {
|
||||
model_name: mdl,
|
||||
api_endpoint: endpoint,
|
||||
consumer: {
|
||||
id: uid,
|
||||
user_id: uid,
|
||||
},
|
||||
usage: {
|
||||
total_tokens: tokenCount
|
||||
},
|
||||
labels: {
|
||||
model_name: mdl
|
||||
}
|
||||
}).then((r) => {
|
||||
JinaEmbeddingsTokenAccount.COLLECTION.doc(this.bearerToken!)
|
||||
.update({ 'wallet.total_balance': JinaEmbeddingsTokenAccount.OPS.increment(-tokenCount) })
|
||||
.catch((err) => {
|
||||
authDtoLogger.warn(`Failed to update cache for ${uid}: ${err}`, { err: marshalErrorLike(err) });
|
||||
});
|
||||
|
||||
return r;
|
||||
}).catch((err) => {
|
||||
user.wallet.total_balance += tokenCount;
|
||||
authDtoLogger.warn(`Failed to report usage for ${uid}: ${err}`, { err: marshalErrorLike(err) });
|
||||
});
|
||||
}
|
||||
|
||||
async solveUID() {
|
||||
if (this.uid) {
|
||||
this.ctxMgr.set('uid', this.uid);
|
||||
|
||||
return this.uid;
|
||||
}
|
||||
|
||||
if (this.bearerToken) {
|
||||
await this.getBrief();
|
||||
this.ctxMgr.set('uid', this.uid);
|
||||
|
||||
return this.uid;
|
||||
}
|
||||
|
||||
return undefined;
|
||||
}
|
||||
|
||||
async assertUID() {
|
||||
const uid = await this.solveUID();
|
||||
|
||||
if (!uid) {
|
||||
throw new AuthenticationRequiredError('Authentication failed');
|
||||
}
|
||||
|
||||
return uid;
|
||||
}
|
||||
|
||||
async assertUser() {
|
||||
if (this.user) {
|
||||
return this.user;
|
||||
}
|
||||
|
||||
await this.getBrief();
|
||||
|
||||
return this.user!;
|
||||
}
|
||||
|
||||
getRateLimits(...tags: string[]) {
|
||||
const descs = tags.map((x) => this.user?.customRateLimits?.[x] || []).flat().filter((x) => x.isEffective());
|
||||
|
||||
if (descs.length) {
|
||||
return descs;
|
||||
}
|
||||
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
9
jina-ai/src/lib/async-context.ts
Normal file
9
jina-ai/src/lib/async-context.ts
Normal file
@ -0,0 +1,9 @@
|
||||
import { GlobalAsyncContext } from 'civkit/async-context';
|
||||
import { container, singleton } from 'tsyringe';
|
||||
|
||||
@singleton()
|
||||
export class AsyncLocalContext extends GlobalAsyncContext {}
|
||||
|
||||
const instance = container.resolve(AsyncLocalContext);
|
||||
Reflect.set(process, 'asyncLocalContext', instance);
|
||||
export default instance;
|
||||
102
jina-ai/src/lib/billing.ts
Normal file
102
jina-ai/src/lib/billing.ts
Normal file
@ -0,0 +1,102 @@
|
||||
import { HTTPService } from 'civkit';
|
||||
import _ from 'lodash';
|
||||
|
||||
|
||||
export interface JinaWallet {
|
||||
trial_balance: number;
|
||||
trial_start: Date;
|
||||
trial_end: Date;
|
||||
regular_balance: number;
|
||||
total_balance: number;
|
||||
}
|
||||
|
||||
|
||||
export interface JinaUserBrief {
|
||||
user_id: string;
|
||||
email: string | null;
|
||||
full_name: string | null;
|
||||
customer_id: string | null;
|
||||
avatar_url?: string;
|
||||
billing_address: Partial<{
|
||||
address: string;
|
||||
city: string;
|
||||
state: string;
|
||||
country: string;
|
||||
postal_code: string;
|
||||
}>;
|
||||
payment_method: Partial<{
|
||||
brand: string;
|
||||
last4: string;
|
||||
exp_month: number;
|
||||
exp_year: number;
|
||||
}>;
|
||||
wallet: JinaWallet;
|
||||
metadata: {
|
||||
[k: string]: any;
|
||||
};
|
||||
}
|
||||
|
||||
export interface JinaUsageReport {
|
||||
model_name: string;
|
||||
api_endpoint: string;
|
||||
consumer: {
|
||||
user_id: string;
|
||||
customer_plan?: string;
|
||||
[k: string]: any;
|
||||
};
|
||||
usage: {
|
||||
total_tokens: number;
|
||||
};
|
||||
labels: {
|
||||
user_type?: string;
|
||||
model_name?: string;
|
||||
[k: string]: any;
|
||||
};
|
||||
}
|
||||
|
||||
export class JinaEmbeddingsDashboardHTTP extends HTTPService {
|
||||
name = 'JinaEmbeddingsDashboardHTTP';
|
||||
|
||||
constructor(
|
||||
public apiKey: string,
|
||||
public baseUri: string = 'https://embeddings-dashboard-api.jina.ai/api'
|
||||
) {
|
||||
super(baseUri);
|
||||
|
||||
this.baseOptions.timeout = 30_000; // 30 sec
|
||||
}
|
||||
|
||||
async authorization(token: string) {
|
||||
const r = await this.get<JinaUserBrief>('/v1/authorization', {
|
||||
headers: {
|
||||
Authorization: `Bearer ${token}`
|
||||
},
|
||||
responseType: 'json',
|
||||
});
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
async validateToken(token: string) {
|
||||
const r = await this.getWithSearchParams<JinaUserBrief>('/v1/api_key/user', {
|
||||
api_key: token,
|
||||
}, {
|
||||
responseType: 'json',
|
||||
});
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
async reportUsage(token: string, query: JinaUsageReport) {
|
||||
const r = await this.postJson('/v1/usage', query, {
|
||||
headers: {
|
||||
Authorization: `Bearer ${token}`,
|
||||
'x-api-key': this.apiKey,
|
||||
},
|
||||
responseType: 'text',
|
||||
});
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
}
|
||||
59
jina-ai/src/lib/env-config.ts
Normal file
59
jina-ai/src/lib/env-config.ts
Normal file
@ -0,0 +1,59 @@
|
||||
import { container, singleton } from 'tsyringe';
|
||||
|
||||
export const SPECIAL_COMBINED_ENV_KEY = 'ENV_COMBINED';
|
||||
const CONF_ENV = [
|
||||
'OPENAI_API_KEY',
|
||||
|
||||
'ANTHROPIC_API_KEY',
|
||||
|
||||
'REPLICATE_API_KEY',
|
||||
|
||||
'GOOGLE_AI_STUDIO_API_KEY',
|
||||
|
||||
'JINA_EMBEDDINGS_API_KEY',
|
||||
|
||||
'JINA_EMBEDDINGS_DASHBOARD_API_KEY',
|
||||
|
||||
'BRAVE_SEARCH_API_KEY',
|
||||
|
||||
] as const;
|
||||
|
||||
|
||||
@singleton()
|
||||
export class EnvConfig {
|
||||
dynamic!: Record<string, string>;
|
||||
|
||||
combined: Record<string, string> = {};
|
||||
originalEnv: Record<string, string | undefined> = { ...process.env };
|
||||
|
||||
constructor() {
|
||||
if (process.env[SPECIAL_COMBINED_ENV_KEY]) {
|
||||
Object.assign(this.combined, JSON.parse(
|
||||
Buffer.from(process.env[SPECIAL_COMBINED_ENV_KEY]!, 'base64').toString('utf-8')
|
||||
));
|
||||
delete process.env[SPECIAL_COMBINED_ENV_KEY];
|
||||
}
|
||||
|
||||
// Static config
|
||||
for (const x of CONF_ENV) {
|
||||
const s = this.combined[x] || process.env[x] || '';
|
||||
Reflect.set(this, x, s);
|
||||
if (x in process.env) {
|
||||
delete process.env[x];
|
||||
}
|
||||
}
|
||||
|
||||
// Dynamic config
|
||||
this.dynamic = new Proxy({
|
||||
get: (_target: any, prop: string) => {
|
||||
return this.combined[prop] || process.env[prop] || '';
|
||||
}
|
||||
}, {}) as any;
|
||||
}
|
||||
}
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-empty-interface
|
||||
export interface EnvConfig extends Record<typeof CONF_ENV[number], string> { }
|
||||
|
||||
const instance = container.resolve(EnvConfig);
|
||||
export default instance;
|
||||
70
jina-ai/src/lib/errors.ts
Normal file
70
jina-ai/src/lib/errors.ts
Normal file
@ -0,0 +1,70 @@
|
||||
import { ApplicationError, Prop, RPC_TRANSFER_PROTOCOL_META_SYMBOL, StatusCode } from 'civkit';
|
||||
import _ from 'lodash';
|
||||
import dayjs from 'dayjs';
|
||||
import utc from 'dayjs/plugin/utc';
|
||||
|
||||
dayjs.extend(utc);
|
||||
|
||||
@StatusCode(50301)
|
||||
export class ServiceDisabledError extends ApplicationError { }
|
||||
|
||||
@StatusCode(50302)
|
||||
export class ServiceCrashedError extends ApplicationError { }
|
||||
|
||||
@StatusCode(50303)
|
||||
export class ServiceNodeResourceDrainError extends ApplicationError { }
|
||||
|
||||
@StatusCode(40104)
|
||||
export class EmailUnverifiedError extends ApplicationError { }
|
||||
|
||||
@StatusCode(40201)
|
||||
export class InsufficientCreditsError extends ApplicationError { }
|
||||
|
||||
@StatusCode(40202)
|
||||
export class FreeFeatureLimitError extends ApplicationError { }
|
||||
|
||||
@StatusCode(40203)
|
||||
export class InsufficientBalanceError extends ApplicationError { }
|
||||
|
||||
@StatusCode(40903)
|
||||
export class LockConflictError extends ApplicationError { }
|
||||
|
||||
@StatusCode(40904)
|
||||
export class BudgetExceededError extends ApplicationError { }
|
||||
|
||||
@StatusCode(45101)
|
||||
export class HarmfulContentError extends ApplicationError { }
|
||||
|
||||
@StatusCode(45102)
|
||||
export class SecurityCompromiseError extends ApplicationError { }
|
||||
|
||||
@StatusCode(41201)
|
||||
export class BatchSizeTooLargeError extends ApplicationError { }
|
||||
|
||||
|
||||
@StatusCode(42903)
|
||||
export class RateLimitTriggeredError extends ApplicationError {
|
||||
|
||||
@Prop({
|
||||
desc: 'Retry after seconds',
|
||||
})
|
||||
retryAfter?: number;
|
||||
|
||||
@Prop({
|
||||
desc: 'Retry after date',
|
||||
})
|
||||
retryAfterDate?: Date;
|
||||
|
||||
protected override get [RPC_TRANSFER_PROTOCOL_META_SYMBOL]() {
|
||||
const retryAfter = this.retryAfter || this.retryAfterDate;
|
||||
if (!retryAfter) {
|
||||
return super[RPC_TRANSFER_PROTOCOL_META_SYMBOL];
|
||||
}
|
||||
|
||||
return _.merge(_.cloneDeep(super[RPC_TRANSFER_PROTOCOL_META_SYMBOL]), {
|
||||
headers: {
|
||||
'Retry-After': `${retryAfter instanceof Date ? dayjs(retryAfter).utc().format('ddd, DD MMM YYYY HH:mm:ss [GMT]') : retryAfter}`,
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
223
jina-ai/src/lib/firestore.ts
Normal file
223
jina-ai/src/lib/firestore.ts
Normal file
@ -0,0 +1,223 @@
|
||||
import _ from 'lodash';
|
||||
import { AutoCastable, Prop, RPC_MARSHAL } from 'civkit/civ-rpc';
|
||||
import {
|
||||
Firestore, FieldValue, DocumentReference,
|
||||
Query, Timestamp, SetOptions, DocumentSnapshot,
|
||||
} from '@google-cloud/firestore';
|
||||
|
||||
// Firestore doesn't support JavaScript objects with custom prototypes (i.e. objects that were created via the \"new\" operator)
|
||||
function patchFireStoreArrogance(func: Function) {
|
||||
return function (this: unknown) {
|
||||
const origObjectGetPrototype = Object.getPrototypeOf;
|
||||
Object.getPrototypeOf = function (x) {
|
||||
const r = origObjectGetPrototype.call(this, x);
|
||||
if (!r) {
|
||||
return r;
|
||||
}
|
||||
return Object.prototype;
|
||||
};
|
||||
try {
|
||||
return func.call(this, ...arguments);
|
||||
} finally {
|
||||
Object.getPrototypeOf = origObjectGetPrototype;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
Reflect.set(DocumentReference.prototype, 'set', patchFireStoreArrogance(Reflect.get(DocumentReference.prototype, 'set')));
|
||||
Reflect.set(DocumentSnapshot, 'fromObject', patchFireStoreArrogance(Reflect.get(DocumentSnapshot, 'fromObject')));
|
||||
|
||||
function mapValuesDeep(v: any, fn: (i: any) => any): any {
|
||||
if (_.isPlainObject(v)) {
|
||||
return _.mapValues(v, (i) => mapValuesDeep(i, fn));
|
||||
} else if (_.isArray(v)) {
|
||||
return v.map((i) => mapValuesDeep(i, fn));
|
||||
} else {
|
||||
return fn(v);
|
||||
}
|
||||
}
|
||||
|
||||
export type Constructor<T> = { new(...args: any[]): T; };
|
||||
export type Constructed<T> = T extends Partial<infer U> ? U : T extends object ? T : object;
|
||||
|
||||
export function fromFirestore<T extends FirestoreRecord>(
|
||||
this: Constructor<T>, id: string, overrideCollection?: string
|
||||
): Promise<T | undefined>;
|
||||
export async function fromFirestore(
|
||||
this: any, id: string, overrideCollection?: string
|
||||
) {
|
||||
const collection = overrideCollection || this.collectionName;
|
||||
if (!collection) {
|
||||
throw new Error(`Missing collection name to construct ${this.name}`);
|
||||
}
|
||||
|
||||
const ref = this.DB.collection(overrideCollection || this.collectionName).doc(id);
|
||||
|
||||
const ptr = await ref.get();
|
||||
|
||||
if (!ptr.exists) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const doc = this.from(
|
||||
// Fixes non-native firebase types
|
||||
mapValuesDeep(ptr.data(), (i: any) => {
|
||||
if (i instanceof Timestamp) {
|
||||
return i.toDate();
|
||||
}
|
||||
|
||||
return i;
|
||||
})
|
||||
);
|
||||
|
||||
Object.defineProperty(doc, '_ref', { value: ref, enumerable: false });
|
||||
Object.defineProperty(doc, '_id', { value: ptr.id, enumerable: true });
|
||||
|
||||
return doc;
|
||||
}
|
||||
|
||||
export function fromFirestoreQuery<T extends FirestoreRecord>(
|
||||
this: Constructor<T>, query: Query
|
||||
): Promise<T[]>;
|
||||
export async function fromFirestoreQuery(this: any, query: Query) {
|
||||
const ptr = await query.get();
|
||||
|
||||
if (ptr.docs.length) {
|
||||
return ptr.docs.map(doc => {
|
||||
const r = this.from(
|
||||
mapValuesDeep(doc.data(), (i: any) => {
|
||||
if (i instanceof Timestamp) {
|
||||
return i.toDate();
|
||||
}
|
||||
|
||||
return i;
|
||||
})
|
||||
);
|
||||
Object.defineProperty(r, '_ref', { value: doc.ref, enumerable: false });
|
||||
Object.defineProperty(r, '_id', { value: doc.id, enumerable: true });
|
||||
|
||||
return r;
|
||||
});
|
||||
}
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
export function setToFirestore<T extends FirestoreRecord>(
|
||||
this: Constructor<T>, doc: T, overrideCollection?: string, setOptions?: SetOptions
|
||||
): Promise<T>;
|
||||
export async function setToFirestore(
|
||||
this: any, doc: any, overrideCollection?: string, setOptions?: SetOptions
|
||||
) {
|
||||
let ref: DocumentReference<any> = doc._ref;
|
||||
if (!ref) {
|
||||
const collection = overrideCollection || this.collectionName;
|
||||
if (!collection) {
|
||||
throw new Error(`Missing collection name to construct ${this.name}`);
|
||||
}
|
||||
|
||||
const predefinedId = doc._id || undefined;
|
||||
const hdl = this.DB.collection(overrideCollection || this.collectionName);
|
||||
ref = predefinedId ? hdl.doc(predefinedId) : hdl.doc();
|
||||
|
||||
Object.defineProperty(doc, '_ref', { value: ref, enumerable: false });
|
||||
Object.defineProperty(doc, '_id', { value: ref.id, enumerable: true });
|
||||
}
|
||||
|
||||
await ref.set(doc, { merge: true, ...setOptions });
|
||||
|
||||
return doc;
|
||||
}
|
||||
|
||||
export function deleteQueryBatch<T extends FirestoreRecord>(
|
||||
this: Constructor<T>, query: Query
|
||||
): Promise<T>;
|
||||
export async function deleteQueryBatch(this: any, query: Query) {
|
||||
const snapshot = await query.get();
|
||||
|
||||
const batchSize = snapshot.size;
|
||||
if (batchSize === 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Delete documents in a batch
|
||||
const batch = this.DB.batch();
|
||||
snapshot.docs.forEach((doc) => {
|
||||
batch.delete(doc.ref);
|
||||
});
|
||||
await batch.commit();
|
||||
|
||||
process.nextTick(() => {
|
||||
this.deleteQueryBatch(query);
|
||||
});
|
||||
};
|
||||
|
||||
export function fromFirestoreDoc<T extends FirestoreRecord>(
|
||||
this: Constructor<T>, snapshot: DocumentSnapshot,
|
||||
): T | undefined;
|
||||
export function fromFirestoreDoc(
|
||||
this: any, snapshot: DocumentSnapshot,
|
||||
) {
|
||||
const doc = this.from(
|
||||
// Fixes non-native firebase types
|
||||
mapValuesDeep(snapshot.data(), (i: any) => {
|
||||
if (i instanceof Timestamp) {
|
||||
return i.toDate();
|
||||
}
|
||||
|
||||
return i;
|
||||
})
|
||||
);
|
||||
|
||||
Object.defineProperty(doc, '_ref', { value: snapshot.ref, enumerable: false });
|
||||
Object.defineProperty(doc, '_id', { value: snapshot.id, enumerable: true });
|
||||
|
||||
return doc;
|
||||
}
|
||||
const defaultFireStore = new Firestore({
|
||||
projectId: process.env.GCLOUD_PROJECT,
|
||||
});
|
||||
export class FirestoreRecord extends AutoCastable {
|
||||
static collectionName?: string;
|
||||
static OPS = FieldValue;
|
||||
static DB = defaultFireStore;
|
||||
static get COLLECTION() {
|
||||
if (!this.collectionName) {
|
||||
throw new Error('Not implemented');
|
||||
}
|
||||
|
||||
return this.DB.collection(this.collectionName);
|
||||
}
|
||||
|
||||
@Prop()
|
||||
_id?: string;
|
||||
_ref?: DocumentReference<Partial<Omit<this, '_ref' | '_id'>>>;
|
||||
|
||||
static fromFirestore = fromFirestore;
|
||||
static fromFirestoreDoc = fromFirestoreDoc;
|
||||
static fromFirestoreQuery = fromFirestoreQuery;
|
||||
|
||||
static save = setToFirestore;
|
||||
static deleteQueryBatch = deleteQueryBatch;
|
||||
|
||||
[RPC_MARSHAL]() {
|
||||
return {
|
||||
...this,
|
||||
_id: this._id,
|
||||
_ref: this._ref?.path
|
||||
};
|
||||
}
|
||||
|
||||
degradeForFireStore(): this {
|
||||
return JSON.parse(JSON.stringify(this, function (k, v) {
|
||||
if (k === '') {
|
||||
return v;
|
||||
}
|
||||
if (typeof v === 'object' && v && (typeof v.degradeForFireStore === 'function')) {
|
||||
return v.degradeForFireStore();
|
||||
}
|
||||
|
||||
return v;
|
||||
}));
|
||||
}
|
||||
}
|
||||
56
jina-ai/src/lib/logger.ts
Normal file
56
jina-ai/src/lib/logger.ts
Normal file
@ -0,0 +1,56 @@
|
||||
import { AbstractPinoLogger } from 'civkit/pino-logger';
|
||||
import { singleton, container } from 'tsyringe';
|
||||
import { threadId } from 'node:worker_threads';
|
||||
import { getTraceCtx } from 'civkit/async-context';
|
||||
|
||||
|
||||
const levelToSeverityMap: { [k: string]: string | undefined; } = {
|
||||
trace: 'DEFAULT',
|
||||
debug: 'DEBUG',
|
||||
info: 'INFO',
|
||||
warn: 'WARNING',
|
||||
error: 'ERROR',
|
||||
fatal: 'CRITICAL',
|
||||
};
|
||||
|
||||
@singleton()
|
||||
export class GlobalLogger extends AbstractPinoLogger {
|
||||
loggerOptions = {
|
||||
level: 'debug',
|
||||
base: {
|
||||
tid: threadId,
|
||||
}
|
||||
};
|
||||
|
||||
override init(): void {
|
||||
if (process.env['NODE_ENV']?.startsWith('prod')) {
|
||||
super.init(process.stdout);
|
||||
} else {
|
||||
const PinoPretty = require('pino-pretty').PinoPretty;
|
||||
super.init(PinoPretty({
|
||||
singleLine: true,
|
||||
colorize: true,
|
||||
messageFormat(log: any, messageKey: any) {
|
||||
return `${log['tid'] ? `[${log['tid']}]` : ''}[${log['service'] || 'ROOT'}] ${log[messageKey]}`;
|
||||
},
|
||||
}));
|
||||
}
|
||||
|
||||
|
||||
this.emit('ready');
|
||||
}
|
||||
|
||||
override log(...args: any[]) {
|
||||
const [levelObj, ...rest] = args;
|
||||
const severity = levelToSeverityMap[levelObj?.level];
|
||||
const traceCtx = getTraceCtx();
|
||||
const patched: any= { ...levelObj, severity };
|
||||
if (traceCtx?.traceId && process.env['GCLOUD_PROJECT']) {
|
||||
patched['logging.googleapis.com/trace'] = `projects/${process.env['GCLOUD_PROJECT']}/traces/${traceCtx.traceId}`;
|
||||
}
|
||||
return super.log(patched, ...rest);
|
||||
}
|
||||
}
|
||||
|
||||
const instance = container.resolve(GlobalLogger);
|
||||
export default instance;
|
||||
4
jina-ai/src/lib/registry.ts
Normal file
4
jina-ai/src/lib/registry.ts
Normal file
@ -0,0 +1,4 @@
|
||||
import { container } from 'tsyringe';
|
||||
import { propertyInjectorFactory } from 'civkit/property-injector';
|
||||
|
||||
export const InjectProperty = propertyInjectorFactory(container);
|
||||
93
jina-ai/src/patch-express.ts
Normal file
93
jina-ai/src/patch-express.ts
Normal file
@ -0,0 +1,93 @@
|
||||
import { ApplicationError, RPC_CALL_ENVIRONMENT } from "civkit/civ-rpc";
|
||||
import { marshalErrorLike } from "civkit/lang";
|
||||
import { randomUUID } from "crypto";
|
||||
import { once } from "events";
|
||||
import type { NextFunction, Request, Response } from "express";
|
||||
|
||||
import { JinaEmbeddingsAuthDTO } from "./dto/jina-embeddings-auth";
|
||||
import rateLimitControl, { API_CALL_STATUS, RateLimitDesc } from "./rate-limit";
|
||||
import asyncLocalContext from "./lib/async-context";
|
||||
import globalLogger from "./lib/logger";
|
||||
|
||||
globalLogger.serviceReady();
|
||||
const logger = globalLogger.child({ service: 'BillingMiddleware' });
|
||||
|
||||
const appName = 'DEEPRESEARCH';
|
||||
export const jinaAiBillingMiddleware = (req: Request, res: Response, next: NextFunction) => {
|
||||
if (req.path === '/ping') {
|
||||
res.status(200).end('pone');
|
||||
return;
|
||||
}
|
||||
if (req.method !== 'POST' && req.method !== 'GET') {
|
||||
next();
|
||||
return;
|
||||
}
|
||||
asyncLocalContext.run(async () => {
|
||||
const googleTraceId = req.get('x-cloud-trace-context')?.split('/')?.[0];
|
||||
const ctx = asyncLocalContext.ctx;
|
||||
ctx.traceId = req.get('x-request-id') || req.get('request-id') || googleTraceId || randomUUID();
|
||||
ctx.traceT0 = new Date();
|
||||
ctx.ip = req?.ip;
|
||||
|
||||
try {
|
||||
const authDto = JinaEmbeddingsAuthDTO.from({
|
||||
[RPC_CALL_ENVIRONMENT]: { req, res }
|
||||
});
|
||||
|
||||
const user = await authDto.assertUser();
|
||||
await rateLimitControl.serviceReady();
|
||||
const rateLimitPolicy = authDto.getRateLimits(appName) || [
|
||||
parseInt(user.metadata?.speed_level) >= 2 ?
|
||||
RateLimitDesc.from({
|
||||
occurrence: 30,
|
||||
periodSeconds: 60
|
||||
}) :
|
||||
RateLimitDesc.from({
|
||||
occurrence: 10,
|
||||
periodSeconds: 60
|
||||
})
|
||||
];
|
||||
const criterions = rateLimitPolicy.map((c) => rateLimitControl.rateLimitDescToCriterion(c));
|
||||
await Promise.all(criterions.map(([pointInTime, n]) => rateLimitControl.assertUidPeriodicLimit(user._id, pointInTime, n, appName)));
|
||||
|
||||
const apiRoll = rateLimitControl.record({ uid: user._id, tags: [appName] })
|
||||
apiRoll.save().catch((err) => logger.warn(`Failed to save rate limit record`, { err: marshalErrorLike(err) }));
|
||||
|
||||
const pResClose = once(res, 'close');
|
||||
|
||||
next();
|
||||
|
||||
await pResClose;
|
||||
const chargeAmount = ctx.chargeAmount;
|
||||
if (chargeAmount) {
|
||||
authDto.reportUsage(chargeAmount, `reader-${appName}`).catch((err) => {
|
||||
logger.warn(`Unable to report usage for ${user._id}`, { err: marshalErrorLike(err) });
|
||||
});
|
||||
apiRoll.chargeAmount = chargeAmount;
|
||||
}
|
||||
apiRoll.status = res.statusCode === 200 ? API_CALL_STATUS.SUCCESS : API_CALL_STATUS.ERROR;
|
||||
apiRoll.save().catch((err) => logger.warn(`Failed to save rate limit record`, { err: marshalErrorLike(err) }));
|
||||
logger.info(`HTTP ${res.statusCode} for request ${ctx.traceId} after ${Date.now() - ctx.traceT0.valueOf()}ms`, {
|
||||
uid: user._id,
|
||||
chargeAmount,
|
||||
});
|
||||
|
||||
} catch (err: any) {
|
||||
if (!res.headersSent) {
|
||||
if (err instanceof ApplicationError) {
|
||||
res.status(parseInt(err.code as string) || 500).json({ error: err.message });
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
res.status(500).json({ error: 'Internal' });
|
||||
}
|
||||
|
||||
logger.error(`Error in billing middleware`, { err: marshalErrorLike(err) });
|
||||
if (err.stack) {
|
||||
logger.error(err.stack);
|
||||
}
|
||||
}
|
||||
|
||||
});
|
||||
}
|
||||
278
jina-ai/src/rate-limit.ts
Normal file
278
jina-ai/src/rate-limit.ts
Normal file
@ -0,0 +1,278 @@
|
||||
import { AutoCastable, ResourcePolicyDenyError, Also, Prop } from 'civkit/civ-rpc';
|
||||
import { AsyncService } from 'civkit/async-service';
|
||||
import { getTraceId } from 'civkit/async-context';
|
||||
import { singleton, container } from 'tsyringe';
|
||||
|
||||
import { RateLimitTriggeredError } from './lib/errors';
|
||||
import { FirestoreRecord } from './lib/firestore';
|
||||
import { GlobalLogger } from './lib/logger';
|
||||
|
||||
export enum API_CALL_STATUS {
|
||||
SUCCESS = 'success',
|
||||
ERROR = 'error',
|
||||
PENDING = 'pending',
|
||||
}
|
||||
|
||||
@Also({ dictOf: Object })
|
||||
export class APICall extends FirestoreRecord {
|
||||
static override collectionName = 'apiRoll';
|
||||
|
||||
@Prop({
|
||||
required: true,
|
||||
defaultFactory: () => getTraceId()
|
||||
})
|
||||
traceId!: string;
|
||||
|
||||
@Prop()
|
||||
uid?: string;
|
||||
|
||||
@Prop()
|
||||
ip?: string;
|
||||
|
||||
@Prop({
|
||||
arrayOf: String,
|
||||
default: [],
|
||||
})
|
||||
tags!: string[];
|
||||
|
||||
@Prop({
|
||||
required: true,
|
||||
defaultFactory: () => new Date(),
|
||||
})
|
||||
createdAt!: Date;
|
||||
|
||||
@Prop()
|
||||
completedAt?: Date;
|
||||
|
||||
@Prop({
|
||||
required: true,
|
||||
default: API_CALL_STATUS.PENDING,
|
||||
})
|
||||
status!: API_CALL_STATUS;
|
||||
|
||||
@Prop({
|
||||
required: true,
|
||||
defaultFactory: () => new Date(Date.now() + 1000 * 60 * 60 * 24 * 90),
|
||||
})
|
||||
expireAt!: Date;
|
||||
|
||||
[k: string]: any;
|
||||
|
||||
tag(...tags: string[]) {
|
||||
for (const t of tags) {
|
||||
if (!this.tags.includes(t)) {
|
||||
this.tags.push(t);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
save() {
|
||||
return (this.constructor as typeof APICall).save(this);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
export class RateLimitDesc extends AutoCastable {
|
||||
@Prop({
|
||||
default: 1000
|
||||
})
|
||||
occurrence!: number;
|
||||
|
||||
@Prop({
|
||||
default: 3600
|
||||
})
|
||||
periodSeconds!: number;
|
||||
|
||||
@Prop()
|
||||
notBefore?: Date;
|
||||
|
||||
@Prop()
|
||||
notAfter?: Date;
|
||||
|
||||
isEffective() {
|
||||
const now = new Date();
|
||||
if (this.notBefore && this.notBefore > now) {
|
||||
return false;
|
||||
}
|
||||
if (this.notAfter && this.notAfter < now) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@singleton()
|
||||
export class RateLimitControl extends AsyncService {
|
||||
|
||||
logger = this.globalLogger.child({ service: this.constructor.name });
|
||||
|
||||
constructor(
|
||||
protected globalLogger: GlobalLogger,
|
||||
) {
|
||||
super(...arguments);
|
||||
}
|
||||
|
||||
override async init() {
|
||||
await this.dependencyReady();
|
||||
|
||||
this.emit('ready');
|
||||
}
|
||||
|
||||
async queryByUid(uid: string, pointInTime: Date, ...tags: string[]) {
|
||||
let q = APICall.COLLECTION
|
||||
.orderBy('createdAt', 'asc')
|
||||
.where('createdAt', '>=', pointInTime)
|
||||
.where('status', 'in', [API_CALL_STATUS.SUCCESS, API_CALL_STATUS.PENDING])
|
||||
.where('uid', '==', uid);
|
||||
if (tags.length) {
|
||||
q = q.where('tags', 'array-contains-any', tags);
|
||||
}
|
||||
|
||||
return APICall.fromFirestoreQuery(q);
|
||||
}
|
||||
|
||||
async queryByIp(ip: string, pointInTime: Date, ...tags: string[]) {
|
||||
let q = APICall.COLLECTION
|
||||
.orderBy('createdAt', 'asc')
|
||||
.where('createdAt', '>=', pointInTime)
|
||||
.where('status', 'in', [API_CALL_STATUS.SUCCESS, API_CALL_STATUS.PENDING])
|
||||
.where('ip', '==', ip);
|
||||
if (tags.length) {
|
||||
q = q.where('tags', 'array-contains-any', tags);
|
||||
}
|
||||
|
||||
return APICall.fromFirestoreQuery(q);
|
||||
}
|
||||
|
||||
async assertUidPeriodicLimit(uid: string, pointInTime: Date, limit: number, ...tags: string[]) {
|
||||
if (limit <= 0) {
|
||||
throw new ResourcePolicyDenyError(`This UID(${uid}) is not allowed to call this endpoint (rate limit quota is 0).`);
|
||||
}
|
||||
|
||||
let q = APICall.COLLECTION
|
||||
.orderBy('createdAt', 'asc')
|
||||
.where('createdAt', '>=', pointInTime)
|
||||
.where('status', 'in', [API_CALL_STATUS.SUCCESS, API_CALL_STATUS.PENDING])
|
||||
.where('uid', '==', uid);
|
||||
if (tags.length) {
|
||||
q = q.where('tags', 'array-contains-any', tags);
|
||||
}
|
||||
const count = (await q.count().get()).data().count;
|
||||
|
||||
if (count >= limit) {
|
||||
const r = await APICall.fromFirestoreQuery(q.limit(1));
|
||||
const [r1] = r;
|
||||
|
||||
const dtMs = Math.abs(r1.createdAt?.valueOf() - pointInTime.valueOf());
|
||||
const dtSec = Math.ceil(dtMs / 1000);
|
||||
|
||||
throw RateLimitTriggeredError.from({
|
||||
message: `Per UID rate limit exceeded (${tags.join(',') || 'called'} ${limit} times since ${pointInTime})`,
|
||||
retryAfter: dtSec,
|
||||
});
|
||||
}
|
||||
|
||||
return count + 1;
|
||||
}
|
||||
|
||||
async assertIPPeriodicLimit(ip: string, pointInTime: Date, limit: number, ...tags: string[]) {
|
||||
let q = APICall.COLLECTION
|
||||
.orderBy('createdAt', 'asc')
|
||||
.where('createdAt', '>=', pointInTime)
|
||||
.where('status', 'in', [API_CALL_STATUS.SUCCESS, API_CALL_STATUS.PENDING])
|
||||
.where('ip', '==', ip);
|
||||
if (tags.length) {
|
||||
q = q.where('tags', 'array-contains-any', tags);
|
||||
}
|
||||
|
||||
const count = (await q.count().get()).data().count;
|
||||
|
||||
if (count >= limit) {
|
||||
const r = await APICall.fromFirestoreQuery(q.limit(1));
|
||||
const [r1] = r;
|
||||
|
||||
const dtMs = Math.abs(r1.createdAt?.valueOf() - pointInTime.valueOf());
|
||||
const dtSec = Math.ceil(dtMs / 1000);
|
||||
|
||||
throw RateLimitTriggeredError.from({
|
||||
message: `Per IP rate limit exceeded (${tags.join(',') || 'called'} ${limit} times since ${pointInTime})`,
|
||||
retryAfter: dtSec,
|
||||
});
|
||||
}
|
||||
|
||||
return count + 1;
|
||||
}
|
||||
|
||||
record(partialRecord: Partial<APICall>) {
|
||||
const record = APICall.from(partialRecord);
|
||||
const newId = APICall.COLLECTION.doc().id;
|
||||
record._id = newId;
|
||||
|
||||
return record;
|
||||
}
|
||||
|
||||
// async simpleRPCUidBasedLimit(rpcReflect: RPCReflection, uid: string, tags: string[] = [],
|
||||
// ...inputCriterion: RateLimitDesc[] | [Date, number][]) {
|
||||
// const criterion = inputCriterion.map((c) => { return Array.isArray(c) ? c : this.rateLimitDescToCriterion(c); });
|
||||
|
||||
// await Promise.all(criterion.map(([pointInTime, n]) =>
|
||||
// this.assertUidPeriodicLimit(uid, pointInTime, n, ...tags)));
|
||||
|
||||
// const r = this.record({
|
||||
// uid,
|
||||
// tags,
|
||||
// });
|
||||
|
||||
// r.save().catch((err) => this.logger.warn(`Failed to save rate limit record`, { err }));
|
||||
// rpcReflect.then(() => {
|
||||
// r.status = API_CALL_STATUS.SUCCESS;
|
||||
// r.save()
|
||||
// .catch((err) => this.logger.warn(`Failed to save rate limit record`, { err }));
|
||||
// });
|
||||
// rpcReflect.catch((err) => {
|
||||
// r.status = API_CALL_STATUS.ERROR;
|
||||
// r.error = err.toString();
|
||||
// r.save()
|
||||
// .catch((err) => this.logger.warn(`Failed to save rate limit record`, { err }));
|
||||
// });
|
||||
|
||||
// return r;
|
||||
// }
|
||||
|
||||
rateLimitDescToCriterion(rateLimitDesc: RateLimitDesc) {
|
||||
return [new Date(Date.now() - rateLimitDesc.periodSeconds * 1000), rateLimitDesc.occurrence] as [Date, number];
|
||||
}
|
||||
|
||||
// async simpleRpcIPBasedLimit(rpcReflect: RPCReflection, ip: string, tags: string[] = [],
|
||||
// ...inputCriterion: RateLimitDesc[] | [Date, number][]) {
|
||||
// const criterion = inputCriterion.map((c) => { return Array.isArray(c) ? c : this.rateLimitDescToCriterion(c); });
|
||||
// await Promise.all(criterion.map(([pointInTime, n]) =>
|
||||
// this.assertIPPeriodicLimit(ip, pointInTime, n, ...tags)));
|
||||
|
||||
// const r = this.record({
|
||||
// ip,
|
||||
// tags,
|
||||
// });
|
||||
|
||||
// r.save().catch((err) => this.logger.warn(`Failed to save rate limit record`, { err }));
|
||||
// rpcReflect.then(() => {
|
||||
// r.status = API_CALL_STATUS.SUCCESS;
|
||||
// r.save()
|
||||
// .catch((err) => this.logger.warn(`Failed to save rate limit record`, { err }));
|
||||
// });
|
||||
// rpcReflect.catch((err) => {
|
||||
// r.status = API_CALL_STATUS.ERROR;
|
||||
// r.error = err.toString();
|
||||
// r.save()
|
||||
// .catch((err) => this.logger.warn(`Failed to save rate limit record`, { err }));
|
||||
// });
|
||||
|
||||
// return r;
|
||||
// }
|
||||
}
|
||||
|
||||
const instance = container.resolve(RateLimitControl);
|
||||
|
||||
export default instance;
|
||||
56
jina-ai/src/server.ts
Normal file
56
jina-ai/src/server.ts
Normal file
@ -0,0 +1,56 @@
|
||||
import 'reflect-metadata'
|
||||
import express from 'express';
|
||||
import { jinaAiBillingMiddleware } from "./patch-express";
|
||||
import { Server } from 'http';
|
||||
|
||||
const app = require('../..').default;
|
||||
|
||||
const rootApp = express();
|
||||
rootApp.use(jinaAiBillingMiddleware, app);
|
||||
|
||||
|
||||
const port = process.env.PORT || 3000;
|
||||
|
||||
let server: Server | undefined;
|
||||
// Export server startup function for better testing
|
||||
export function startServer() {
|
||||
return rootApp.listen(port, () => {
|
||||
console.log(`Server running at http://localhost:${port}`);
|
||||
});
|
||||
}
|
||||
|
||||
// Start server if running directly
|
||||
if (process.env.NODE_ENV !== 'test') {
|
||||
server = startServer();
|
||||
}
|
||||
|
||||
process.on('unhandledRejection', (_err) => `Is false alarm`);
|
||||
|
||||
process.on('uncaughtException', (err) => {
|
||||
console.log('Uncaught exception', err);
|
||||
|
||||
// Looks like Firebase runtime does not handle error properly.
|
||||
// Make sure to quit the process.
|
||||
process.nextTick(() => process.exit(1));
|
||||
console.error('Uncaught exception, process quit.');
|
||||
throw err;
|
||||
});
|
||||
|
||||
const sigHandler = (signal: string) => {
|
||||
console.log(`Received ${signal}, exiting...`);
|
||||
if (server && server.listening) {
|
||||
console.log(`Shutting down gracefully...`);
|
||||
console.log(`Waiting for the server to drain and close...`);
|
||||
server.close((err) => {
|
||||
if (err) {
|
||||
console.error('Error while closing server', err);
|
||||
return;
|
||||
}
|
||||
process.exit(0);
|
||||
});
|
||||
server.closeIdleConnections();
|
||||
}
|
||||
|
||||
}
|
||||
process.on('SIGTERM', sigHandler);
|
||||
process.on('SIGINT', sigHandler);
|
||||
17
jina-ai/tsconfig.json
Normal file
17
jina-ai/tsconfig.json
Normal file
@ -0,0 +1,17 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"target": "ES2020",
|
||||
"module": "node16",
|
||||
"outDir": "./dist",
|
||||
"rootDir": "./src",
|
||||
"sourceMap": true,
|
||||
"esModuleInterop": true,
|
||||
"skipLibCheck": true,
|
||||
"forceConsistentCasingInFileNames": true,
|
||||
"strict": true,
|
||||
"experimentalDecorators": true,
|
||||
"emitDecoratorMetadata": true,
|
||||
"resolveJsonModule": true
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,15 +1,13 @@
|
||||
{
|
||||
"name": "node-deepresearch",
|
||||
"version": "1.0.0",
|
||||
"main": "dist/index.js",
|
||||
"types": "dist/index.d.ts",
|
||||
"main": "dist/app.js",
|
||||
"files": [
|
||||
"dist",
|
||||
"README.md",
|
||||
"LICENSE"
|
||||
],
|
||||
"scripts": {
|
||||
"prepare": "npm run build",
|
||||
"build": "tsc",
|
||||
"dev": "npx ts-node src/agent.ts",
|
||||
"search": "npx ts-node src/test-duck.ts",
|
||||
@ -17,6 +15,7 @@
|
||||
"lint": "eslint . --ext .ts",
|
||||
"lint:fix": "eslint . --ext .ts --fix",
|
||||
"serve": "ts-node src/server.ts",
|
||||
"start": "ts-node src/server.ts",
|
||||
"eval": "ts-node src/evals/batch-evals.ts",
|
||||
"test": "jest --testTimeout=30000",
|
||||
"test:watch": "jest --watch"
|
||||
|
||||
@ -22,7 +22,7 @@ describe('/v1/chat/completions', () => {
|
||||
process.argv.push(`--secret=${TEST_SECRET}`);
|
||||
|
||||
// Import server module (jest.resetModules() is called automatically before each test)
|
||||
const { default: serverModule } = await import('../server');
|
||||
const { default: serverModule } = await require('../app');
|
||||
app = serverModule;
|
||||
});
|
||||
|
||||
@ -67,7 +67,7 @@ describe('/v1/chat/completions', () => {
|
||||
jest.resetModules();
|
||||
|
||||
// Reload server module without secret
|
||||
const { default: serverModule } = await import('../server');
|
||||
const { default: serverModule } = await require('../app');
|
||||
app = serverModule;
|
||||
|
||||
const response = await request(app)
|
||||
|
||||
647
src/app.ts
Normal file
647
src/app.ts
Normal file
@ -0,0 +1,647 @@
|
||||
import express, {Request, Response, RequestHandler} from 'express';
|
||||
import cors from 'cors';
|
||||
import {EventEmitter} from 'events';
|
||||
import {getResponse} from './agent';
|
||||
import {
|
||||
StepAction,
|
||||
StreamMessage,
|
||||
TrackerContext,
|
||||
ChatCompletionRequest,
|
||||
ChatCompletionResponse,
|
||||
ChatCompletionChunk,
|
||||
AnswerAction,
|
||||
TOKEN_CATEGORIES,
|
||||
Model
|
||||
} from './types';
|
||||
import fs from 'fs/promises';
|
||||
import path from 'path';
|
||||
import {TokenTracker} from "./utils/token-tracker";
|
||||
import {ActionTracker} from "./utils/action-tracker";
|
||||
|
||||
const app = express();
|
||||
|
||||
// Get secret from command line args for optional authentication
|
||||
const secret = process.argv.find(arg => arg.startsWith('--secret='))?.split('=')[1];
|
||||
|
||||
app.use(cors());
|
||||
app.use(express.json());
|
||||
|
||||
const eventEmitter = new EventEmitter();
|
||||
|
||||
interface QueryRequest extends Request {
|
||||
body: {
|
||||
q: string;
|
||||
budget?: number;
|
||||
maxBadAttempt?: number;
|
||||
};
|
||||
}
|
||||
|
||||
function buildMdFromAnswer(answer: AnswerAction) {
|
||||
let refStr = '';
|
||||
if (answer.references?.length > 0) {
|
||||
refStr = `
|
||||
|
||||
## References
|
||||
${answer.references.map((ref, i) => `
|
||||
${i + 1}. [${ref.exactQuote}](${ref.url})`).join('')}`;
|
||||
}
|
||||
return `${answer.answer.replace(/\(REF_(\d+)\)/g, (_, num) => `[^${num}]`)}${refStr}`;
|
||||
}
|
||||
|
||||
|
||||
// Modified streamTextWordByWord function
|
||||
async function* streamTextWordByWord(text: string, streamingState: StreamingState) {
|
||||
const words = text.split(/(\s+)/);
|
||||
for (const word of words) {
|
||||
if (streamingState.currentlyStreaming) {
|
||||
const delay = Math.floor(Math.random() * 100);
|
||||
await new Promise(resolve => setTimeout(resolve, delay));
|
||||
yield word;
|
||||
} else {
|
||||
// If streaming was interrupted, yield all remaining words at once
|
||||
const remainingWords = words.slice(words.indexOf(word)).join('');
|
||||
yield remainingWords;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Helper function to emit remaining content immediately
|
||||
async function emitRemainingContent(
|
||||
res: Response,
|
||||
requestId: string,
|
||||
model: string,
|
||||
content: string
|
||||
) {
|
||||
if (!content) return;
|
||||
|
||||
const chunk: ChatCompletionChunk = {
|
||||
id: requestId,
|
||||
object: 'chat.completion.chunk',
|
||||
created: Math.floor(Date.now() / 1000),
|
||||
model: model,
|
||||
system_fingerprint: 'fp_' + requestId,
|
||||
choices: [{
|
||||
index: 0,
|
||||
delta: {content},
|
||||
logprobs: null,
|
||||
finish_reason: null
|
||||
}]
|
||||
};
|
||||
res.write(`data: ${JSON.stringify(chunk)}\n\n`);
|
||||
}
|
||||
|
||||
interface StreamingState {
|
||||
currentlyStreaming: boolean;
|
||||
currentGenerator: AsyncGenerator<string> | null;
|
||||
remainingContent: string;
|
||||
}
|
||||
|
||||
async function completeCurrentStreaming(
|
||||
streamingState: StreamingState,
|
||||
res: Response,
|
||||
requestId: string,
|
||||
model: string
|
||||
) {
|
||||
if (streamingState.currentlyStreaming && streamingState.remainingContent) {
|
||||
// Force completion of current streaming
|
||||
await emitRemainingContent(
|
||||
res,
|
||||
requestId,
|
||||
model,
|
||||
streamingState.remainingContent
|
||||
);
|
||||
// Reset streaming state
|
||||
streamingState.currentlyStreaming = false;
|
||||
streamingState.remainingContent = '';
|
||||
streamingState.currentGenerator = null;
|
||||
}
|
||||
}
|
||||
|
||||
// OpenAI-compatible chat completions endpoint
|
||||
// Models API endpoints
|
||||
app.get('/v1/models', (async (_req: Request, res: Response) => {
|
||||
const models: Model[] = [{
|
||||
id: 'jina-deepsearch-v1',
|
||||
object: 'model',
|
||||
created: 1686935002,
|
||||
owned_by: 'jina-ai'
|
||||
}];
|
||||
|
||||
res.json({
|
||||
object: 'list',
|
||||
data: models
|
||||
});
|
||||
}) as RequestHandler);
|
||||
|
||||
app.get('/v1/models/:model', (async (req: Request, res: Response) => {
|
||||
const modelId = req.params.model;
|
||||
|
||||
if (modelId === 'jina-deepsearch-v1') {
|
||||
res.json({
|
||||
id: 'jina-deepsearch-v1',
|
||||
object: 'model',
|
||||
created: 1686935002,
|
||||
owned_by: 'jina-ai'
|
||||
});
|
||||
} else {
|
||||
res.status(404).json({
|
||||
error: {
|
||||
message: `Model '${modelId}' not found`,
|
||||
type: 'invalid_request_error',
|
||||
param: null,
|
||||
code: 'model_not_found'
|
||||
}
|
||||
});
|
||||
}
|
||||
}) as RequestHandler);
|
||||
|
||||
if (secret) {
|
||||
// Check authentication only if secret is set
|
||||
app.use((req, res, next) => {
|
||||
const authHeader = req.headers.authorization;
|
||||
if (!authHeader || !authHeader.startsWith('Bearer ') || authHeader.split(' ')[1] !== secret) {
|
||||
console.log('[chat/completions] Unauthorized request');
|
||||
res.status(401).json({ error: 'Unauthorized' });
|
||||
return;
|
||||
}
|
||||
|
||||
return next();
|
||||
});
|
||||
}
|
||||
|
||||
app.post('/v1/chat/completions', (async (req: Request, res: Response) => {
|
||||
// Check authentication only if secret is set
|
||||
if (secret) {
|
||||
const authHeader = req.headers.authorization;
|
||||
if (!authHeader || !authHeader.startsWith('Bearer ') || authHeader.split(' ')[1] !== secret) {
|
||||
console.log('[chat/completions] Unauthorized request');
|
||||
res.status(401).json({error: 'Unauthorized'});
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Log request details (excluding sensitive data)
|
||||
console.log('[chat/completions] Request:', {
|
||||
model: req.body.model,
|
||||
stream: req.body.stream,
|
||||
messageCount: req.body.messages?.length,
|
||||
hasAuth: !!req.headers.authorization,
|
||||
requestId: Date.now().toString()
|
||||
});
|
||||
|
||||
const body = req.body as ChatCompletionRequest;
|
||||
if (!body.messages?.length) {
|
||||
return res.status(400).json({error: 'Messages array is required and must not be empty'});
|
||||
}
|
||||
const lastMessage = body.messages[body.messages.length - 1];
|
||||
if (lastMessage.role !== 'user') {
|
||||
return res.status(400).json({error: 'Last message must be from user'});
|
||||
}
|
||||
|
||||
const requestId = Date.now().toString();
|
||||
const context: TrackerContext = {
|
||||
tokenTracker: new TokenTracker(),
|
||||
actionTracker: new ActionTracker()
|
||||
};
|
||||
|
||||
// Track prompt tokens for the initial message
|
||||
// Use Vercel's token counting convention - 1 token per message
|
||||
const messageTokens = body.messages.length;
|
||||
context.tokenTracker.trackUsage('agent', messageTokens, TOKEN_CATEGORIES.PROMPT);
|
||||
|
||||
// Add this inside the chat completions endpoint, before setting up the action listener
|
||||
const streamingState: StreamingState = {
|
||||
currentlyStreaming: false,
|
||||
currentGenerator: null,
|
||||
remainingContent: ''
|
||||
};
|
||||
|
||||
if (body.stream) {
|
||||
res.setHeader('Content-Type', 'text/event-stream');
|
||||
res.setHeader('Cache-Control', 'no-cache');
|
||||
res.setHeader('Connection', 'keep-alive');
|
||||
|
||||
|
||||
// Send initial chunk with opening think tag
|
||||
const initialChunk: ChatCompletionChunk = {
|
||||
id: requestId,
|
||||
object: 'chat.completion.chunk',
|
||||
created: Math.floor(Date.now() / 1000),
|
||||
model: body.model,
|
||||
system_fingerprint: 'fp_' + requestId,
|
||||
choices: [{
|
||||
index: 0,
|
||||
delta: {role: 'assistant', content: '<think>'},
|
||||
logprobs: null,
|
||||
finish_reason: null
|
||||
}]
|
||||
};
|
||||
res.write(`data: ${JSON.stringify(initialChunk)}\n\n`);
|
||||
|
||||
// Set up progress listener with cleanup
|
||||
const actionListener = async (action: any) => {
|
||||
if (action.thisStep.think) {
|
||||
// Complete any ongoing streaming first
|
||||
await completeCurrentStreaming(streamingState, res, requestId, body.model);
|
||||
|
||||
// Start new streaming session
|
||||
streamingState.currentlyStreaming = true;
|
||||
streamingState.remainingContent = action.thisStep.think;
|
||||
|
||||
try {
|
||||
for await (const word of streamTextWordByWord(action.thisStep.think, streamingState)) {
|
||||
if (!streamingState.currentlyStreaming) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Update remaining content
|
||||
streamingState.remainingContent = streamingState.remainingContent.slice(word.length);
|
||||
|
||||
const chunk: ChatCompletionChunk = {
|
||||
id: requestId,
|
||||
object: 'chat.completion.chunk',
|
||||
created: Math.floor(Date.now() / 1000),
|
||||
model: body.model,
|
||||
system_fingerprint: 'fp_' + requestId,
|
||||
choices: [{
|
||||
index: 0,
|
||||
delta: {content: word},
|
||||
logprobs: null,
|
||||
finish_reason: null
|
||||
}]
|
||||
};
|
||||
res.write(`data: ${JSON.stringify(chunk)}\n\n`);
|
||||
}
|
||||
|
||||
// Only add newline if this streaming completed normally
|
||||
if (streamingState.currentlyStreaming) {
|
||||
const newlineChunk: ChatCompletionChunk = {
|
||||
id: requestId,
|
||||
object: 'chat.completion.chunk',
|
||||
created: Math.floor(Date.now() / 1000),
|
||||
model: body.model,
|
||||
system_fingerprint: 'fp_' + requestId,
|
||||
choices: [{
|
||||
index: 0,
|
||||
delta: {content: '\n'},
|
||||
logprobs: null,
|
||||
finish_reason: null
|
||||
}]
|
||||
};
|
||||
res.write(`data: ${JSON.stringify(newlineChunk)}\n\n`);
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Error in streaming:', error);
|
||||
await completeCurrentStreaming(streamingState, res, requestId, body.model);
|
||||
}
|
||||
}
|
||||
};
|
||||
context.actionTracker.on('action', actionListener);
|
||||
|
||||
// Make sure to update the cleanup code
|
||||
res.on('finish', () => {
|
||||
streamingState.currentlyStreaming = false;
|
||||
streamingState.currentGenerator = null;
|
||||
streamingState.remainingContent = '';
|
||||
context.actionTracker.removeListener('action', actionListener);
|
||||
});
|
||||
}
|
||||
|
||||
try {
|
||||
// Track initial query tokens - already tracked above
|
||||
// const queryTokens = Buffer.byteLength(lastMessage.content, 'utf-8');
|
||||
// context.tokenTracker.trackUsage('agent', queryTokens, 'prompt');
|
||||
|
||||
let result;
|
||||
try {
|
||||
({result} = await getResponse(lastMessage.content, undefined, undefined, context));
|
||||
} catch (error: any) {
|
||||
// If deduplication fails, retry without it
|
||||
if (error?.response?.status === 402) {
|
||||
// If deduplication fails, retry with maxBadAttempt=3 to skip dedup
|
||||
({result} = await getResponse(lastMessage.content, undefined, 3, context));
|
||||
} else {
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
// Track tokens based on action type
|
||||
if (result.action === 'answer') {
|
||||
// Track accepted prediction tokens for the final answer using Vercel's convention
|
||||
const answerTokens = 1; // Default to 1 token per answer
|
||||
context.tokenTracker.trackUsage('evaluator', answerTokens, TOKEN_CATEGORIES.ACCEPTED);
|
||||
} else {
|
||||
// Track rejected prediction tokens for non-answer responses
|
||||
const rejectedTokens = 1; // Default to 1 token per rejected response
|
||||
context.tokenTracker.trackUsage('evaluator', rejectedTokens, TOKEN_CATEGORIES.REJECTED);
|
||||
}
|
||||
|
||||
if (body.stream) {
|
||||
// Complete any ongoing streaming before sending final answer
|
||||
await completeCurrentStreaming(streamingState, res, requestId, body.model);
|
||||
|
||||
// Send closing think tag
|
||||
const closeThinkChunk: ChatCompletionChunk = {
|
||||
id: requestId,
|
||||
object: 'chat.completion.chunk',
|
||||
created: Math.floor(Date.now() / 1000),
|
||||
model: body.model,
|
||||
system_fingerprint: 'fp_' + requestId,
|
||||
choices: [{
|
||||
index: 0,
|
||||
delta: {content: `</think>\n\n`},
|
||||
logprobs: null,
|
||||
finish_reason: null
|
||||
}]
|
||||
};
|
||||
res.write(`data: ${JSON.stringify(closeThinkChunk)}\n\n`);
|
||||
|
||||
// Send final answer as separate chunk
|
||||
const answerChunk: ChatCompletionChunk = {
|
||||
id: requestId,
|
||||
object: 'chat.completion.chunk',
|
||||
created: Math.floor(Date.now() / 1000),
|
||||
model: body.model,
|
||||
system_fingerprint: 'fp_' + requestId,
|
||||
choices: [{
|
||||
index: 0,
|
||||
delta: {content: result.action === 'answer' ? buildMdFromAnswer(result) : result.think},
|
||||
logprobs: null,
|
||||
finish_reason: 'stop'
|
||||
}]
|
||||
};
|
||||
res.write(`data: ${JSON.stringify(answerChunk)}\n\n`);
|
||||
res.end();
|
||||
} else {
|
||||
const usage = context.tokenTracker.getUsageDetails();
|
||||
const response: ChatCompletionResponse = {
|
||||
id: requestId,
|
||||
object: 'chat.completion',
|
||||
created: Math.floor(Date.now() / 1000),
|
||||
model: body.model,
|
||||
system_fingerprint: 'fp_' + requestId,
|
||||
choices: [{
|
||||
index: 0,
|
||||
message: {
|
||||
role: 'assistant',
|
||||
content: result.action === 'answer' ? buildMdFromAnswer(result) : result.think
|
||||
},
|
||||
logprobs: null,
|
||||
finish_reason: 'stop'
|
||||
}],
|
||||
usage
|
||||
};
|
||||
|
||||
// Log final response (excluding full content for brevity)
|
||||
console.log('[chat/completions] Response:', {
|
||||
id: response.id,
|
||||
status: 200,
|
||||
contentLength: response.choices[0].message.content.length,
|
||||
usage: response.usage
|
||||
});
|
||||
|
||||
res.json(response);
|
||||
}
|
||||
} catch (error: any) {
|
||||
// Log error details
|
||||
console.error('[chat/completions] Error:', {
|
||||
message: error?.message || 'An error occurred',
|
||||
stack: error?.stack,
|
||||
type: error?.constructor?.name,
|
||||
requestId
|
||||
});
|
||||
|
||||
// Track error as rejected tokens with Vercel token counting
|
||||
const errorMessage = error?.message || 'An error occurred';
|
||||
// Default to 1 token for errors as per Vercel AI SDK convention
|
||||
const errorTokens = 1;
|
||||
context.tokenTracker.trackUsage('evaluator', errorTokens, TOKEN_CATEGORIES.REJECTED);
|
||||
|
||||
// Clean up event listeners
|
||||
context.actionTracker.removeAllListeners('action');
|
||||
|
||||
// Get token usage in OpenAI API format
|
||||
const usage = context.tokenTracker.getUsageDetails();
|
||||
|
||||
if (body.stream && res.headersSent) {
|
||||
// For streaming responses that have already started, send error as a chunk
|
||||
// First send closing think tag if we're in the middle of thinking
|
||||
const closeThinkChunk: ChatCompletionChunk = {
|
||||
id: requestId,
|
||||
object: 'chat.completion.chunk',
|
||||
created: Math.floor(Date.now() / 1000),
|
||||
model: body.model,
|
||||
system_fingerprint: 'fp_' + requestId,
|
||||
choices: [{
|
||||
index: 0,
|
||||
delta: {content: '</think>'},
|
||||
logprobs: null,
|
||||
finish_reason: null
|
||||
}]
|
||||
};
|
||||
res.write(`data: ${JSON.stringify(closeThinkChunk)}\n\n`);
|
||||
|
||||
// Track error token and send error message
|
||||
context.tokenTracker.trackUsage('evaluator', 1, TOKEN_CATEGORIES.REJECTED);
|
||||
const errorChunk: ChatCompletionChunk = {
|
||||
id: requestId,
|
||||
object: 'chat.completion.chunk',
|
||||
created: Math.floor(Date.now() / 1000),
|
||||
model: body.model,
|
||||
system_fingerprint: 'fp_' + requestId,
|
||||
choices: [{
|
||||
index: 0,
|
||||
delta: {content: errorMessage},
|
||||
logprobs: null,
|
||||
finish_reason: 'stop'
|
||||
}]
|
||||
};
|
||||
res.write(`data: ${JSON.stringify(errorChunk)}\n\n`);
|
||||
res.end();
|
||||
} else {
|
||||
// For non-streaming or not-yet-started responses, send error as JSON
|
||||
const response: ChatCompletionResponse = {
|
||||
id: requestId,
|
||||
object: 'chat.completion',
|
||||
created: Math.floor(Date.now() / 1000),
|
||||
model: body.model,
|
||||
system_fingerprint: 'fp_' + requestId,
|
||||
choices: [{
|
||||
index: 0,
|
||||
message: {
|
||||
role: 'assistant',
|
||||
content: `Error: ${errorMessage}`
|
||||
},
|
||||
logprobs: null,
|
||||
finish_reason: 'stop'
|
||||
}],
|
||||
usage
|
||||
};
|
||||
res.json(response);
|
||||
}
|
||||
}
|
||||
}) as RequestHandler);
|
||||
|
||||
interface StreamResponse extends Response {
|
||||
write: (chunk: string) => boolean;
|
||||
}
|
||||
|
||||
function createProgressEmitter(requestId: string, budget: number | undefined, context: TrackerContext) {
|
||||
return () => {
|
||||
const state = context.actionTracker.getState();
|
||||
const budgetInfo = {
|
||||
used: context.tokenTracker.getTotalUsage(),
|
||||
total: budget || 1_000_000,
|
||||
percentage: ((context.tokenTracker.getTotalUsage() / (budget || 1_000_000)) * 100).toFixed(2)
|
||||
};
|
||||
|
||||
eventEmitter.emit(`progress-${requestId}`, {
|
||||
type: 'progress',
|
||||
data: {...state.thisStep, totalStep: state.totalStep},
|
||||
step: state.totalStep,
|
||||
budget: budgetInfo,
|
||||
trackers: {
|
||||
tokenUsage: context.tokenTracker.getTotalUsage(),
|
||||
actionState: context.actionTracker.getState()
|
||||
}
|
||||
});
|
||||
};
|
||||
}
|
||||
|
||||
function cleanup(requestId: string) {
|
||||
const context = trackers.get(requestId);
|
||||
if (context) {
|
||||
context.actionTracker.removeAllListeners();
|
||||
context.tokenTracker.removeAllListeners();
|
||||
trackers.delete(requestId);
|
||||
}
|
||||
}
|
||||
|
||||
function emitTrackerUpdate(requestId: string, context: TrackerContext) {
|
||||
const trackerData = {
|
||||
tokenUsage: context.tokenTracker.getTotalUsage(),
|
||||
tokenBreakdown: context.tokenTracker.getUsageBreakdown(),
|
||||
actionState: context.actionTracker.getState().thisStep,
|
||||
step: context.actionTracker.getState().totalStep,
|
||||
badAttempts: context.actionTracker.getState().badAttempts,
|
||||
gaps: context.actionTracker.getState().gaps
|
||||
};
|
||||
|
||||
eventEmitter.emit(`progress-${requestId}`, {
|
||||
type: 'progress',
|
||||
trackers: trackerData
|
||||
});
|
||||
}
|
||||
|
||||
// Store the trackers for each request
|
||||
const trackers = new Map<string, TrackerContext>();
|
||||
|
||||
app.post('/api/v1/query', (async (req: QueryRequest, res: Response) => {
|
||||
const {q, budget, maxBadAttempt} = req.body;
|
||||
if (!q) {
|
||||
return res.status(400).json({error: 'Query (q) is required'});
|
||||
}
|
||||
|
||||
const requestId = Date.now().toString();
|
||||
|
||||
// Create new trackers for this request
|
||||
const context: TrackerContext = {
|
||||
tokenTracker: new TokenTracker(),
|
||||
actionTracker: new ActionTracker()
|
||||
};
|
||||
trackers.set(requestId, context);
|
||||
|
||||
// Set up listeners immediately for both trackers
|
||||
context.actionTracker.on('action', () => emitTrackerUpdate(requestId, context));
|
||||
// context.tokenTracker.on('usage', () => emitTrackerUpdate(requestId, context));
|
||||
|
||||
res.json({requestId});
|
||||
|
||||
try {
|
||||
const {result} = await getResponse(q, budget, maxBadAttempt, context);
|
||||
const emitProgress = createProgressEmitter(requestId, budget, context);
|
||||
context.actionTracker.on('action', emitProgress);
|
||||
await storeTaskResult(requestId, result);
|
||||
eventEmitter.emit(`progress-${requestId}`, {
|
||||
type: 'answer',
|
||||
data: result,
|
||||
trackers: {
|
||||
tokenUsage: context.tokenTracker.getTotalUsage(),
|
||||
actionState: context.actionTracker.getState()
|
||||
}
|
||||
});
|
||||
cleanup(requestId);
|
||||
} catch (error: any) {
|
||||
eventEmitter.emit(`progress-${requestId}`, {
|
||||
type: 'error',
|
||||
data: error?.message || 'Unknown error',
|
||||
status: 500,
|
||||
trackers: {
|
||||
tokenUsage: context.tokenTracker.getTotalUsage(),
|
||||
actionState: context.actionTracker.getState()
|
||||
}
|
||||
});
|
||||
cleanup(requestId);
|
||||
}
|
||||
}) as RequestHandler);
|
||||
|
||||
app.get('/api/v1/stream/:requestId', (async (req: Request, res: StreamResponse) => {
|
||||
const requestId = req.params.requestId;
|
||||
const context = trackers.get(requestId);
|
||||
|
||||
res.setHeader('Content-Type', 'text/event-stream');
|
||||
res.setHeader('Cache-Control', 'no-cache');
|
||||
res.setHeader('Connection', 'keep-alive');
|
||||
|
||||
const listener = (data: StreamMessage) => {
|
||||
// The trackers are now included in all event types
|
||||
// We don't need to add them here as they're already part of the data
|
||||
res.write(`data: ${JSON.stringify(data)}\n\n`);
|
||||
};
|
||||
|
||||
eventEmitter.on(`progress-${requestId}`, listener);
|
||||
|
||||
// Handle client disconnection
|
||||
req.on('close', () => {
|
||||
eventEmitter.removeListener(`progress-${requestId}`, listener);
|
||||
});
|
||||
|
||||
// Send initial connection confirmation with tracker state
|
||||
const initialData = {
|
||||
type: 'connected',
|
||||
requestId,
|
||||
trackers: context ? {
|
||||
tokenUsage: context.tokenTracker.getTotalUsage(),
|
||||
actionState: context.actionTracker.getState()
|
||||
} : null
|
||||
};
|
||||
res.write(`data: ${JSON.stringify(initialData)}\n\n`);
|
||||
}) as RequestHandler);
|
||||
|
||||
async function storeTaskResult(requestId: string, result: StepAction) {
|
||||
try {
|
||||
const taskDir = path.join(process.cwd(), 'tasks');
|
||||
await fs.mkdir(taskDir, {recursive: true});
|
||||
await fs.writeFile(
|
||||
path.join(taskDir, `${requestId}.json`),
|
||||
JSON.stringify(result, null, 2)
|
||||
);
|
||||
} catch (error) {
|
||||
console.error('Task storage failed:', error);
|
||||
throw new Error('Failed to store task result');
|
||||
}
|
||||
}
|
||||
|
||||
app.get('/api/v1/task/:requestId', (async (req: Request, res: Response) => {
|
||||
const requestId = req.params.requestId;
|
||||
try {
|
||||
const taskPath = path.join(process.cwd(), 'tasks', `${requestId}.json`);
|
||||
const taskData = await fs.readFile(taskPath, 'utf-8');
|
||||
res.json(JSON.parse(taskData));
|
||||
} catch (error) {
|
||||
res.status(404).json({error: 'Task not found'});
|
||||
}
|
||||
}) as RequestHandler);
|
||||
|
||||
export default app;
|
||||
636
src/server.ts
636
src/server.ts
@ -1,637 +1,7 @@
|
||||
import express, {Request, Response, RequestHandler} from 'express';
|
||||
import cors from 'cors';
|
||||
import {EventEmitter} from 'events';
|
||||
import {getResponse} from './agent';
|
||||
import {
|
||||
StepAction,
|
||||
StreamMessage,
|
||||
TrackerContext,
|
||||
ChatCompletionRequest,
|
||||
ChatCompletionResponse,
|
||||
ChatCompletionChunk,
|
||||
AnswerAction,
|
||||
TOKEN_CATEGORIES,
|
||||
Model
|
||||
} from './types';
|
||||
import fs from 'fs/promises';
|
||||
import path from 'path';
|
||||
import {TokenTracker} from "./utils/token-tracker";
|
||||
import {ActionTracker} from "./utils/action-tracker";
|
||||
import app from "./app";
|
||||
|
||||
const app = express();
|
||||
const port = process.env.PORT || 3000;
|
||||
|
||||
// Get secret from command line args for optional authentication
|
||||
const secret = process.argv.find(arg => arg.startsWith('--secret='))?.split('=')[1];
|
||||
|
||||
app.use(cors());
|
||||
app.use(express.json());
|
||||
|
||||
const eventEmitter = new EventEmitter();
|
||||
|
||||
interface QueryRequest extends Request {
|
||||
body: {
|
||||
q: string;
|
||||
budget?: number;
|
||||
maxBadAttempt?: number;
|
||||
};
|
||||
}
|
||||
|
||||
function buildMdFromAnswer(answer: AnswerAction) {
|
||||
let refStr = '';
|
||||
if (answer.references?.length > 0) {
|
||||
refStr = `
|
||||
|
||||
## References
|
||||
${answer.references.map((ref, i) => `
|
||||
${i + 1}. [${ref.exactQuote}](${ref.url})`).join('')}`;
|
||||
}
|
||||
return `${answer.answer.replace(/\(REF_(\d+)\)/g, (_, num) => `[^${num}]`)}${refStr}`;
|
||||
}
|
||||
|
||||
|
||||
// Modified streamTextWordByWord function
|
||||
async function* streamTextWordByWord(text: string, streamingState: StreamingState) {
|
||||
const words = text.split(/(\s+)/);
|
||||
for (const word of words) {
|
||||
if (streamingState.currentlyStreaming) {
|
||||
const delay = Math.floor(Math.random() * 100);
|
||||
await new Promise(resolve => setTimeout(resolve, delay));
|
||||
yield word;
|
||||
} else {
|
||||
// If streaming was interrupted, yield all remaining words at once
|
||||
const remainingWords = words.slice(words.indexOf(word)).join('');
|
||||
yield remainingWords;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Helper function to emit remaining content immediately
|
||||
async function emitRemainingContent(
|
||||
res: Response,
|
||||
requestId: string,
|
||||
model: string,
|
||||
content: string
|
||||
) {
|
||||
if (!content) return;
|
||||
|
||||
const chunk: ChatCompletionChunk = {
|
||||
id: requestId,
|
||||
object: 'chat.completion.chunk',
|
||||
created: Math.floor(Date.now() / 1000),
|
||||
model: model,
|
||||
system_fingerprint: 'fp_' + requestId,
|
||||
choices: [{
|
||||
index: 0,
|
||||
delta: {content},
|
||||
logprobs: null,
|
||||
finish_reason: null
|
||||
}]
|
||||
};
|
||||
res.write(`data: ${JSON.stringify(chunk)}\n\n`);
|
||||
}
|
||||
|
||||
interface StreamingState {
|
||||
currentlyStreaming: boolean;
|
||||
currentGenerator: AsyncGenerator<string> | null;
|
||||
remainingContent: string;
|
||||
}
|
||||
|
||||
async function completeCurrentStreaming(
|
||||
streamingState: StreamingState,
|
||||
res: Response,
|
||||
requestId: string,
|
||||
model: string
|
||||
) {
|
||||
if (streamingState.currentlyStreaming && streamingState.remainingContent) {
|
||||
// Force completion of current streaming
|
||||
await emitRemainingContent(
|
||||
res,
|
||||
requestId,
|
||||
model,
|
||||
streamingState.remainingContent
|
||||
);
|
||||
// Reset streaming state
|
||||
streamingState.currentlyStreaming = false;
|
||||
streamingState.remainingContent = '';
|
||||
streamingState.currentGenerator = null;
|
||||
}
|
||||
}
|
||||
|
||||
// OpenAI-compatible chat completions endpoint
|
||||
// Models API endpoints
|
||||
app.get('/v1/models', (async (_req: Request, res: Response) => {
|
||||
const models: Model[] = [{
|
||||
id: 'jina-deepsearch-v1',
|
||||
object: 'model',
|
||||
created: 1686935002,
|
||||
owned_by: 'jina-ai'
|
||||
}];
|
||||
|
||||
res.json({
|
||||
object: 'list',
|
||||
data: models
|
||||
});
|
||||
}) as RequestHandler);
|
||||
|
||||
app.get('/v1/models/:model', (async (req: Request, res: Response) => {
|
||||
const modelId = req.params.model;
|
||||
|
||||
if (modelId === 'jina-deepsearch-v1') {
|
||||
res.json({
|
||||
id: 'jina-deepsearch-v1',
|
||||
object: 'model',
|
||||
created: 1686935002,
|
||||
owned_by: 'jina-ai'
|
||||
});
|
||||
} else {
|
||||
res.status(404).json({
|
||||
error: {
|
||||
message: `Model '${modelId}' not found`,
|
||||
type: 'invalid_request_error',
|
||||
param: null,
|
||||
code: 'model_not_found'
|
||||
}
|
||||
});
|
||||
}
|
||||
}) as RequestHandler);
|
||||
|
||||
|
||||
app.post('/v1/chat/completions', (async (req: Request, res: Response) => {
|
||||
// Check authentication only if secret is set
|
||||
if (secret) {
|
||||
const authHeader = req.headers.authorization;
|
||||
if (!authHeader || !authHeader.startsWith('Bearer ') || authHeader.split(' ')[1] !== secret) {
|
||||
console.log('[chat/completions] Unauthorized request');
|
||||
res.status(401).json({error: 'Unauthorized'});
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Log request details (excluding sensitive data)
|
||||
console.log('[chat/completions] Request:', {
|
||||
model: req.body.model,
|
||||
stream: req.body.stream,
|
||||
messageCount: req.body.messages?.length,
|
||||
hasAuth: !!req.headers.authorization,
|
||||
requestId: Date.now().toString()
|
||||
});
|
||||
|
||||
const body = req.body as ChatCompletionRequest;
|
||||
if (!body.messages?.length) {
|
||||
return res.status(400).json({error: 'Messages array is required and must not be empty'});
|
||||
}
|
||||
const lastMessage = body.messages[body.messages.length - 1];
|
||||
if (lastMessage.role !== 'user') {
|
||||
return res.status(400).json({error: 'Last message must be from user'});
|
||||
}
|
||||
|
||||
const requestId = Date.now().toString();
|
||||
const context: TrackerContext = {
|
||||
tokenTracker: new TokenTracker(),
|
||||
actionTracker: new ActionTracker()
|
||||
};
|
||||
|
||||
// Track prompt tokens for the initial message
|
||||
// Use Vercel's token counting convention - 1 token per message
|
||||
const messageTokens = body.messages.length;
|
||||
context.tokenTracker.trackUsage('agent', messageTokens, TOKEN_CATEGORIES.PROMPT);
|
||||
|
||||
// Add this inside the chat completions endpoint, before setting up the action listener
|
||||
const streamingState: StreamingState = {
|
||||
currentlyStreaming: false,
|
||||
currentGenerator: null,
|
||||
remainingContent: ''
|
||||
};
|
||||
|
||||
if (body.stream) {
|
||||
res.setHeader('Content-Type', 'text/event-stream');
|
||||
res.setHeader('Cache-Control', 'no-cache');
|
||||
res.setHeader('Connection', 'keep-alive');
|
||||
|
||||
|
||||
// Send initial chunk with opening think tag
|
||||
const initialChunk: ChatCompletionChunk = {
|
||||
id: requestId,
|
||||
object: 'chat.completion.chunk',
|
||||
created: Math.floor(Date.now() / 1000),
|
||||
model: body.model,
|
||||
system_fingerprint: 'fp_' + requestId,
|
||||
choices: [{
|
||||
index: 0,
|
||||
delta: {role: 'assistant', content: '<think>'},
|
||||
logprobs: null,
|
||||
finish_reason: null
|
||||
}]
|
||||
};
|
||||
res.write(`data: ${JSON.stringify(initialChunk)}\n\n`);
|
||||
|
||||
// Set up progress listener with cleanup
|
||||
const actionListener = async (action: any) => {
|
||||
if (action.thisStep.think) {
|
||||
// Complete any ongoing streaming first
|
||||
await completeCurrentStreaming(streamingState, res, requestId, body.model);
|
||||
|
||||
// Start new streaming session
|
||||
streamingState.currentlyStreaming = true;
|
||||
streamingState.remainingContent = action.thisStep.think;
|
||||
|
||||
try {
|
||||
for await (const word of streamTextWordByWord(action.thisStep.think, streamingState)) {
|
||||
if (!streamingState.currentlyStreaming) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Update remaining content
|
||||
streamingState.remainingContent = streamingState.remainingContent.slice(word.length);
|
||||
|
||||
const chunk: ChatCompletionChunk = {
|
||||
id: requestId,
|
||||
object: 'chat.completion.chunk',
|
||||
created: Math.floor(Date.now() / 1000),
|
||||
model: body.model,
|
||||
system_fingerprint: 'fp_' + requestId,
|
||||
choices: [{
|
||||
index: 0,
|
||||
delta: {content: word},
|
||||
logprobs: null,
|
||||
finish_reason: null
|
||||
}]
|
||||
};
|
||||
res.write(`data: ${JSON.stringify(chunk)}\n\n`);
|
||||
}
|
||||
|
||||
// Only add newline if this streaming completed normally
|
||||
if (streamingState.currentlyStreaming) {
|
||||
const newlineChunk: ChatCompletionChunk = {
|
||||
id: requestId,
|
||||
object: 'chat.completion.chunk',
|
||||
created: Math.floor(Date.now() / 1000),
|
||||
model: body.model,
|
||||
system_fingerprint: 'fp_' + requestId,
|
||||
choices: [{
|
||||
index: 0,
|
||||
delta: {content: '\n'},
|
||||
logprobs: null,
|
||||
finish_reason: null
|
||||
}]
|
||||
};
|
||||
res.write(`data: ${JSON.stringify(newlineChunk)}\n\n`);
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Error in streaming:', error);
|
||||
await completeCurrentStreaming(streamingState, res, requestId, body.model);
|
||||
}
|
||||
}
|
||||
};
|
||||
context.actionTracker.on('action', actionListener);
|
||||
|
||||
// Make sure to update the cleanup code
|
||||
res.on('finish', () => {
|
||||
streamingState.currentlyStreaming = false;
|
||||
streamingState.currentGenerator = null;
|
||||
streamingState.remainingContent = '';
|
||||
context.actionTracker.removeListener('action', actionListener);
|
||||
});
|
||||
}
|
||||
|
||||
try {
|
||||
// Track initial query tokens - already tracked above
|
||||
// const queryTokens = Buffer.byteLength(lastMessage.content, 'utf-8');
|
||||
// context.tokenTracker.trackUsage('agent', queryTokens, 'prompt');
|
||||
|
||||
let result;
|
||||
try {
|
||||
({result} = await getResponse(lastMessage.content, undefined, undefined, context));
|
||||
} catch (error: any) {
|
||||
// If deduplication fails, retry without it
|
||||
if (error?.response?.status === 402) {
|
||||
// If deduplication fails, retry with maxBadAttempt=3 to skip dedup
|
||||
({result} = await getResponse(lastMessage.content, undefined, 3, context));
|
||||
} else {
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
// Track tokens based on action type
|
||||
if (result.action === 'answer') {
|
||||
// Track accepted prediction tokens for the final answer using Vercel's convention
|
||||
const answerTokens = 1; // Default to 1 token per answer
|
||||
context.tokenTracker.trackUsage('evaluator', answerTokens, TOKEN_CATEGORIES.ACCEPTED);
|
||||
} else {
|
||||
// Track rejected prediction tokens for non-answer responses
|
||||
const rejectedTokens = 1; // Default to 1 token per rejected response
|
||||
context.tokenTracker.trackUsage('evaluator', rejectedTokens, TOKEN_CATEGORIES.REJECTED);
|
||||
}
|
||||
|
||||
if (body.stream) {
|
||||
// Complete any ongoing streaming before sending final answer
|
||||
await completeCurrentStreaming(streamingState, res, requestId, body.model);
|
||||
|
||||
// Send closing think tag
|
||||
const closeThinkChunk: ChatCompletionChunk = {
|
||||
id: requestId,
|
||||
object: 'chat.completion.chunk',
|
||||
created: Math.floor(Date.now() / 1000),
|
||||
model: body.model,
|
||||
system_fingerprint: 'fp_' + requestId,
|
||||
choices: [{
|
||||
index: 0,
|
||||
delta: {content: `</think>\n\n`},
|
||||
logprobs: null,
|
||||
finish_reason: null
|
||||
}]
|
||||
};
|
||||
res.write(`data: ${JSON.stringify(closeThinkChunk)}\n\n`);
|
||||
|
||||
// Send final answer as separate chunk
|
||||
const answerChunk: ChatCompletionChunk = {
|
||||
id: requestId,
|
||||
object: 'chat.completion.chunk',
|
||||
created: Math.floor(Date.now() / 1000),
|
||||
model: body.model,
|
||||
system_fingerprint: 'fp_' + requestId,
|
||||
choices: [{
|
||||
index: 0,
|
||||
delta: {content: result.action === 'answer' ? buildMdFromAnswer(result) : result.think},
|
||||
logprobs: null,
|
||||
finish_reason: 'stop'
|
||||
}]
|
||||
};
|
||||
res.write(`data: ${JSON.stringify(answerChunk)}\n\n`);
|
||||
res.end();
|
||||
} else {
|
||||
const usage = context.tokenTracker.getUsageDetails();
|
||||
const response: ChatCompletionResponse = {
|
||||
id: requestId,
|
||||
object: 'chat.completion',
|
||||
created: Math.floor(Date.now() / 1000),
|
||||
model: body.model,
|
||||
system_fingerprint: 'fp_' + requestId,
|
||||
choices: [{
|
||||
index: 0,
|
||||
message: {
|
||||
role: 'assistant',
|
||||
content: result.action === 'answer' ? buildMdFromAnswer(result) : result.think
|
||||
},
|
||||
logprobs: null,
|
||||
finish_reason: 'stop'
|
||||
}],
|
||||
usage
|
||||
};
|
||||
|
||||
// Log final response (excluding full content for brevity)
|
||||
console.log('[chat/completions] Response:', {
|
||||
id: response.id,
|
||||
status: 200,
|
||||
contentLength: response.choices[0].message.content.length,
|
||||
usage: response.usage
|
||||
});
|
||||
|
||||
res.json(response);
|
||||
}
|
||||
} catch (error: any) {
|
||||
// Log error details
|
||||
console.error('[chat/completions] Error:', {
|
||||
message: error?.message || 'An error occurred',
|
||||
stack: error?.stack,
|
||||
type: error?.constructor?.name,
|
||||
requestId
|
||||
});
|
||||
|
||||
// Track error as rejected tokens with Vercel token counting
|
||||
const errorMessage = error?.message || 'An error occurred';
|
||||
// Default to 1 token for errors as per Vercel AI SDK convention
|
||||
const errorTokens = 1;
|
||||
context.tokenTracker.trackUsage('evaluator', errorTokens, TOKEN_CATEGORIES.REJECTED);
|
||||
|
||||
// Clean up event listeners
|
||||
context.actionTracker.removeAllListeners('action');
|
||||
|
||||
// Get token usage in OpenAI API format
|
||||
const usage = context.tokenTracker.getUsageDetails();
|
||||
|
||||
if (body.stream && res.headersSent) {
|
||||
// For streaming responses that have already started, send error as a chunk
|
||||
// First send closing think tag if we're in the middle of thinking
|
||||
const closeThinkChunk: ChatCompletionChunk = {
|
||||
id: requestId,
|
||||
object: 'chat.completion.chunk',
|
||||
created: Math.floor(Date.now() / 1000),
|
||||
model: body.model,
|
||||
system_fingerprint: 'fp_' + requestId,
|
||||
choices: [{
|
||||
index: 0,
|
||||
delta: {content: '</think>'},
|
||||
logprobs: null,
|
||||
finish_reason: null
|
||||
}]
|
||||
};
|
||||
res.write(`data: ${JSON.stringify(closeThinkChunk)}\n\n`);
|
||||
|
||||
// Track error token and send error message
|
||||
context.tokenTracker.trackUsage('evaluator', 1, TOKEN_CATEGORIES.REJECTED);
|
||||
const errorChunk: ChatCompletionChunk = {
|
||||
id: requestId,
|
||||
object: 'chat.completion.chunk',
|
||||
created: Math.floor(Date.now() / 1000),
|
||||
model: body.model,
|
||||
system_fingerprint: 'fp_' + requestId,
|
||||
choices: [{
|
||||
index: 0,
|
||||
delta: {content: errorMessage},
|
||||
logprobs: null,
|
||||
finish_reason: 'stop'
|
||||
}]
|
||||
};
|
||||
res.write(`data: ${JSON.stringify(errorChunk)}\n\n`);
|
||||
res.end();
|
||||
} else {
|
||||
// For non-streaming or not-yet-started responses, send error as JSON
|
||||
const response: ChatCompletionResponse = {
|
||||
id: requestId,
|
||||
object: 'chat.completion',
|
||||
created: Math.floor(Date.now() / 1000),
|
||||
model: body.model,
|
||||
system_fingerprint: 'fp_' + requestId,
|
||||
choices: [{
|
||||
index: 0,
|
||||
message: {
|
||||
role: 'assistant',
|
||||
content: `Error: ${errorMessage}`
|
||||
},
|
||||
logprobs: null,
|
||||
finish_reason: 'stop'
|
||||
}],
|
||||
usage
|
||||
};
|
||||
res.json(response);
|
||||
}
|
||||
}
|
||||
}) as RequestHandler);
|
||||
|
||||
interface StreamResponse extends Response {
|
||||
write: (chunk: string) => boolean;
|
||||
}
|
||||
|
||||
function createProgressEmitter(requestId: string, budget: number | undefined, context: TrackerContext) {
|
||||
return () => {
|
||||
const state = context.actionTracker.getState();
|
||||
const budgetInfo = {
|
||||
used: context.tokenTracker.getTotalUsage(),
|
||||
total: budget || 1_000_000,
|
||||
percentage: ((context.tokenTracker.getTotalUsage() / (budget || 1_000_000)) * 100).toFixed(2)
|
||||
};
|
||||
|
||||
eventEmitter.emit(`progress-${requestId}`, {
|
||||
type: 'progress',
|
||||
data: {...state.thisStep, totalStep: state.totalStep},
|
||||
step: state.totalStep,
|
||||
budget: budgetInfo,
|
||||
trackers: {
|
||||
tokenUsage: context.tokenTracker.getTotalUsage(),
|
||||
actionState: context.actionTracker.getState()
|
||||
}
|
||||
});
|
||||
};
|
||||
}
|
||||
|
||||
function cleanup(requestId: string) {
|
||||
const context = trackers.get(requestId);
|
||||
if (context) {
|
||||
context.actionTracker.removeAllListeners();
|
||||
context.tokenTracker.removeAllListeners();
|
||||
trackers.delete(requestId);
|
||||
}
|
||||
}
|
||||
|
||||
function emitTrackerUpdate(requestId: string, context: TrackerContext) {
|
||||
const trackerData = {
|
||||
tokenUsage: context.tokenTracker.getTotalUsage(),
|
||||
tokenBreakdown: context.tokenTracker.getUsageBreakdown(),
|
||||
actionState: context.actionTracker.getState().thisStep,
|
||||
step: context.actionTracker.getState().totalStep,
|
||||
badAttempts: context.actionTracker.getState().badAttempts,
|
||||
gaps: context.actionTracker.getState().gaps
|
||||
};
|
||||
|
||||
eventEmitter.emit(`progress-${requestId}`, {
|
||||
type: 'progress',
|
||||
trackers: trackerData
|
||||
});
|
||||
}
|
||||
|
||||
// Store the trackers for each request
|
||||
const trackers = new Map<string, TrackerContext>();
|
||||
|
||||
app.post('/api/v1/query', (async (req: QueryRequest, res: Response) => {
|
||||
const {q, budget, maxBadAttempt} = req.body;
|
||||
if (!q) {
|
||||
return res.status(400).json({error: 'Query (q) is required'});
|
||||
}
|
||||
|
||||
const requestId = Date.now().toString();
|
||||
|
||||
// Create new trackers for this request
|
||||
const context: TrackerContext = {
|
||||
tokenTracker: new TokenTracker(),
|
||||
actionTracker: new ActionTracker()
|
||||
};
|
||||
trackers.set(requestId, context);
|
||||
|
||||
// Set up listeners immediately for both trackers
|
||||
context.actionTracker.on('action', () => emitTrackerUpdate(requestId, context));
|
||||
// context.tokenTracker.on('usage', () => emitTrackerUpdate(requestId, context));
|
||||
|
||||
res.json({requestId});
|
||||
|
||||
try {
|
||||
const {result} = await getResponse(q, budget, maxBadAttempt, context);
|
||||
const emitProgress = createProgressEmitter(requestId, budget, context);
|
||||
context.actionTracker.on('action', emitProgress);
|
||||
await storeTaskResult(requestId, result);
|
||||
eventEmitter.emit(`progress-${requestId}`, {
|
||||
type: 'answer',
|
||||
data: result,
|
||||
trackers: {
|
||||
tokenUsage: context.tokenTracker.getTotalUsage(),
|
||||
actionState: context.actionTracker.getState()
|
||||
}
|
||||
});
|
||||
cleanup(requestId);
|
||||
} catch (error: any) {
|
||||
eventEmitter.emit(`progress-${requestId}`, {
|
||||
type: 'error',
|
||||
data: error?.message || 'Unknown error',
|
||||
status: 500,
|
||||
trackers: {
|
||||
tokenUsage: context.tokenTracker.getTotalUsage(),
|
||||
actionState: context.actionTracker.getState()
|
||||
}
|
||||
});
|
||||
cleanup(requestId);
|
||||
}
|
||||
}) as RequestHandler);
|
||||
|
||||
app.get('/api/v1/stream/:requestId', (async (req: Request, res: StreamResponse) => {
|
||||
const requestId = req.params.requestId;
|
||||
const context = trackers.get(requestId);
|
||||
|
||||
res.setHeader('Content-Type', 'text/event-stream');
|
||||
res.setHeader('Cache-Control', 'no-cache');
|
||||
res.setHeader('Connection', 'keep-alive');
|
||||
|
||||
const listener = (data: StreamMessage) => {
|
||||
// The trackers are now included in all event types
|
||||
// We don't need to add them here as they're already part of the data
|
||||
res.write(`data: ${JSON.stringify(data)}\n\n`);
|
||||
};
|
||||
|
||||
eventEmitter.on(`progress-${requestId}`, listener);
|
||||
|
||||
// Handle client disconnection
|
||||
req.on('close', () => {
|
||||
eventEmitter.removeListener(`progress-${requestId}`, listener);
|
||||
});
|
||||
|
||||
// Send initial connection confirmation with tracker state
|
||||
const initialData = {
|
||||
type: 'connected',
|
||||
requestId,
|
||||
trackers: context ? {
|
||||
tokenUsage: context.tokenTracker.getTotalUsage(),
|
||||
actionState: context.actionTracker.getState()
|
||||
} : null
|
||||
};
|
||||
res.write(`data: ${JSON.stringify(initialData)}\n\n`);
|
||||
}) as RequestHandler);
|
||||
|
||||
async function storeTaskResult(requestId: string, result: StepAction) {
|
||||
try {
|
||||
const taskDir = path.join(process.cwd(), 'tasks');
|
||||
await fs.mkdir(taskDir, {recursive: true});
|
||||
await fs.writeFile(
|
||||
path.join(taskDir, `${requestId}.json`),
|
||||
JSON.stringify(result, null, 2)
|
||||
);
|
||||
} catch (error) {
|
||||
console.error('Task storage failed:', error);
|
||||
throw new Error('Failed to store task result');
|
||||
}
|
||||
}
|
||||
|
||||
app.get('/api/v1/task/:requestId', (async (req: Request, res: Response) => {
|
||||
const requestId = req.params.requestId;
|
||||
try {
|
||||
const taskPath = path.join(process.cwd(), 'tasks', `${requestId}.json`);
|
||||
const taskData = await fs.readFile(taskPath, 'utf-8');
|
||||
res.json(JSON.parse(taskData));
|
||||
} catch (error) {
|
||||
res.status(404).json({error: 'Task not found'});
|
||||
}
|
||||
}) as RequestHandler);
|
||||
|
||||
// Export server startup function for better testing
|
||||
export function startServer() {
|
||||
return app.listen(port, () => {
|
||||
@ -642,6 +12,4 @@ export function startServer() {
|
||||
// Start server if running directly
|
||||
if (process.env.NODE_ENV !== 'test') {
|
||||
startServer();
|
||||
}
|
||||
|
||||
export default app;
|
||||
}
|
||||
@ -9,6 +9,16 @@ export class TokenTracker extends EventEmitter {
|
||||
constructor(budget?: number) {
|
||||
super();
|
||||
this.budget = budget;
|
||||
|
||||
if ('asyncLocalContext' in process) {
|
||||
const asyncLocalContext = process.asyncLocalContext as any;
|
||||
this.on('usage', () => {
|
||||
if (asyncLocalContext.available()) {
|
||||
asyncLocalContext.ctx.chargeAmount = this.getTotalUsage();
|
||||
}
|
||||
});
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
trackUsage(tool: string, tokens: number, category?: TokenCategory) {
|
||||
@ -53,9 +63,9 @@ export class TokenTracker extends EventEmitter {
|
||||
}, {} as Record<string, number>);
|
||||
|
||||
const prompt_tokens = categoryBreakdown.prompt || 0;
|
||||
const completion_tokens =
|
||||
(categoryBreakdown.reasoning || 0) +
|
||||
(categoryBreakdown.accepted || 0) +
|
||||
const completion_tokens =
|
||||
(categoryBreakdown.reasoning || 0) +
|
||||
(categoryBreakdown.accepted || 0) +
|
||||
(categoryBreakdown.rejected || 0);
|
||||
|
||||
return {
|
||||
|
||||
@ -1,14 +1,18 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"target": "ES2020",
|
||||
"module": "commonjs",
|
||||
"module": "node16",
|
||||
"outDir": "./dist",
|
||||
"rootDir": "./src",
|
||||
"sourceMap": true,
|
||||
"esModuleInterop": true,
|
||||
"skipLibCheck": true,
|
||||
"forceConsistentCasingInFileNames": true,
|
||||
"strict": true,
|
||||
"resolveJsonModule": true,
|
||||
"moduleResolution": "node"
|
||||
}
|
||||
"experimentalDecorators": true,
|
||||
"emitDecoratorMetadata": true,
|
||||
"resolveJsonModule": true
|
||||
},
|
||||
"include": ["src/**/*"],
|
||||
"exclude": ["jina-ai/**/*", "**/__tests__/**/*"],
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user