mirror of
https://github.com/jina-ai/node-DeepResearch.git
synced 2025-12-25 22:16:49 +08:00
jina-ai: billing for saas service (#55)
* wip: jina billing * wip * fix: build issues * ci: cd gh action * fix: make ci happy
This commit is contained in:
parent
c4639a2e92
commit
8af35c6640
1
.dockerignore
Normal file
1
.dockerignore
Normal file
@ -0,0 +1 @@
|
|||||||
|
node_modules
|
||||||
@ -16,5 +16,6 @@ module.exports = {
|
|||||||
rules: {
|
rules: {
|
||||||
'no-console': ['error', { allow: ['log', 'error'] }],
|
'no-console': ['error', { allow: ['log', 'error'] }],
|
||||||
'@typescript-eslint/no-explicit-any': 'off'
|
'@typescript-eslint/no-explicit-any': 'off'
|
||||||
}
|
},
|
||||||
|
ignorePatterns: ["jina-ai/**/*"]
|
||||||
};
|
};
|
||||||
|
|||||||
66
.github/workflows/cd.yml
vendored
Normal file
66
.github/workflows/cd.yml
vendored
Normal file
@ -0,0 +1,66 @@
|
|||||||
|
run-name: Build push and deploy (CD)
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- main
|
||||||
|
- ci-debug
|
||||||
|
tags:
|
||||||
|
- '*'
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build-and-push-to-gcr:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
concurrency:
|
||||||
|
group: ${{ github.ref_type == 'branch' && github.ref }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
lfs: true
|
||||||
|
- uses: 'google-github-actions/auth@v2'
|
||||||
|
with:
|
||||||
|
credentials_json: '${{ secrets.GCLOUD_SERVICE_ACCOUNT_SECRET_JSON }}'
|
||||||
|
- name: 'Set up Cloud SDK'
|
||||||
|
uses: 'google-github-actions/setup-gcloud@v2'
|
||||||
|
- name: "Docker auth"
|
||||||
|
run: |-
|
||||||
|
gcloud auth configure-docker us-docker.pkg.dev --quiet
|
||||||
|
- name: Set controller release version
|
||||||
|
run: echo "RELEASE_VERSION=${GITHUB_REF#refs/*/}" >> $GITHUB_ENV
|
||||||
|
- name: Set up Node.js
|
||||||
|
uses: actions/setup-node@v4
|
||||||
|
with:
|
||||||
|
node-version: 22.12.0
|
||||||
|
cache: npm
|
||||||
|
|
||||||
|
- name: npm install
|
||||||
|
run: npm ci
|
||||||
|
- name: build application
|
||||||
|
run: npm run build
|
||||||
|
- name: Set package version
|
||||||
|
run: npm version --no-git-tag-version ${{ env.RELEASE_VERSION }}
|
||||||
|
if: github.ref_type == 'tag'
|
||||||
|
- name: Docker meta
|
||||||
|
id: meta
|
||||||
|
uses: docker/metadata-action@v5
|
||||||
|
with:
|
||||||
|
images: |
|
||||||
|
us-docker.pkg.dev/research-df067/deepresearch/node-deep-research
|
||||||
|
- name: Set up QEMU
|
||||||
|
uses: docker/setup-qemu-action@v3
|
||||||
|
- name: Set up Docker Buildx
|
||||||
|
uses: docker/setup-buildx-action@v3
|
||||||
|
- name: Build and push
|
||||||
|
id: container
|
||||||
|
uses: docker/build-push-action@v6
|
||||||
|
with:
|
||||||
|
file: jina-ai/Dockerfile
|
||||||
|
push: true
|
||||||
|
tags: ${{ steps.meta.outputs.tags }}
|
||||||
|
labels: ${{ steps.meta.outputs.labels }}
|
||||||
|
- name: Deploy with Tag
|
||||||
|
run: |
|
||||||
|
gcloud run deploy node-deep-research --image us-docker.pkg.dev/research-df067/deepresearch/node-deep-research@${{steps.container.outputs.imageid}} --tag ${{ env.RELEASE_VERSION }} --region us-central1 --async
|
||||||
|
|
||||||
24
.vscode/launch.json
vendored
Normal file
24
.vscode/launch.json
vendored
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
{
|
||||||
|
"version": "0.2.0",
|
||||||
|
"configurations": [
|
||||||
|
{
|
||||||
|
"name": "Attach",
|
||||||
|
"port": 9229,
|
||||||
|
"request": "attach",
|
||||||
|
"skipFiles": [
|
||||||
|
"<node_internals>/**"
|
||||||
|
],
|
||||||
|
"type": "node"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Attach by Process ID",
|
||||||
|
"processId": "${command:PickProcess}",
|
||||||
|
"request": "attach",
|
||||||
|
"skipFiles": [
|
||||||
|
"<node_internals>/**"
|
||||||
|
],
|
||||||
|
"type": "node"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
1
jina-ai/.dockerignore
Normal file
1
jina-ai/.dockerignore
Normal file
@ -0,0 +1 @@
|
|||||||
|
node_modules
|
||||||
50
jina-ai/Dockerfile
Normal file
50
jina-ai/Dockerfile
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
# ---- BUILD STAGE ----
|
||||||
|
FROM node:20-slim AS builder
|
||||||
|
|
||||||
|
# Set working directory
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Copy package.json and package-lock.json
|
||||||
|
COPY ./package*.json ./
|
||||||
|
COPY ./jina-ai/package*.json ./jina-ai/
|
||||||
|
|
||||||
|
# Install dependencies
|
||||||
|
RUN npm ci
|
||||||
|
WORKDIR /app/jina-ai
|
||||||
|
RUN npm ci
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Copy application code
|
||||||
|
COPY ./src ./src
|
||||||
|
COPY ./config.json ./
|
||||||
|
COPY ./tsconfig.json ./tsconfig.json
|
||||||
|
RUN npm run build
|
||||||
|
|
||||||
|
COPY ./jina-ai/src ./jina-ai/src
|
||||||
|
COPY ./jina-ai/tsconfig.json ./jina-ai/tsconfig.json
|
||||||
|
WORKDIR /app/jina-ai
|
||||||
|
RUN npm run build
|
||||||
|
|
||||||
|
# ---- PRODUCTION STAGE ----
|
||||||
|
FROM node:20 AS production
|
||||||
|
|
||||||
|
# Set working directory
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
COPY --from=builder /app ./
|
||||||
|
# Copy config.json and built files from builder
|
||||||
|
|
||||||
|
WORKDIR /app/jina-ai
|
||||||
|
|
||||||
|
# Set environment variables (Recommended to set at runtime, avoid hardcoding)
|
||||||
|
ENV GEMINI_API_KEY=${GEMINI_API_KEY}
|
||||||
|
ENV OPENAI_API_KEY=${OPENAI_API_KEY}
|
||||||
|
ENV JINA_API_KEY=${JINA_API_KEY}
|
||||||
|
ENV BRAVE_API_KEY=${BRAVE_API_KEY}
|
||||||
|
|
||||||
|
# Expose the port the app runs on
|
||||||
|
EXPOSE 3000
|
||||||
|
|
||||||
|
# Set startup command
|
||||||
|
CMD ["node", "./dist/server.js"]
|
||||||
3980
jina-ai/package-lock.json
generated
Normal file
3980
jina-ai/package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load Diff
38
jina-ai/package.json
Normal file
38
jina-ai/package.json
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
{
|
||||||
|
"name": "@jina-ai/node-deepresearch",
|
||||||
|
"version": "1.0.0",
|
||||||
|
"main": "dist/app.js",
|
||||||
|
"files": [
|
||||||
|
"dist",
|
||||||
|
"README.md",
|
||||||
|
"LICENSE"
|
||||||
|
],
|
||||||
|
"scripts": {
|
||||||
|
"build": "tsc",
|
||||||
|
"dev": "npx ts-node src/agent.ts",
|
||||||
|
"search": "npx ts-node src/test-duck.ts",
|
||||||
|
"rewrite": "npx ts-node src/tools/query-rewriter.ts",
|
||||||
|
"lint": "eslint . --ext .ts",
|
||||||
|
"lint:fix": "eslint . --ext .ts --fix",
|
||||||
|
"serve": "ts-node src/server.ts",
|
||||||
|
"eval": "ts-node src/evals/batch-evals.ts",
|
||||||
|
"test": "jest --testTimeout=30000",
|
||||||
|
"test:watch": "jest --watch"
|
||||||
|
},
|
||||||
|
"keywords": [],
|
||||||
|
"author": "Jina AI",
|
||||||
|
"license": "Apache-2.0",
|
||||||
|
"description": "",
|
||||||
|
"dependencies": {
|
||||||
|
"@google-cloud/firestore": "^7.11.0",
|
||||||
|
"civkit": "^0.8.3-15926cb",
|
||||||
|
"dayjs": "^1.11.13",
|
||||||
|
"lodash": "^4.17.21",
|
||||||
|
"reflect-metadata": "^0.2.2",
|
||||||
|
"tsyringe": "^4.8.0"
|
||||||
|
},
|
||||||
|
"devDependencies": {
|
||||||
|
"@types/lodash": "^4.17.15",
|
||||||
|
"pino-pretty": "^13.0.0"
|
||||||
|
}
|
||||||
|
}
|
||||||
347
jina-ai/src/dto/jina-embeddings-auth.ts
Normal file
347
jina-ai/src/dto/jina-embeddings-auth.ts
Normal file
@ -0,0 +1,347 @@
|
|||||||
|
import {
|
||||||
|
Also, AuthenticationFailedError, AuthenticationRequiredError,
|
||||||
|
DownstreamServiceFailureError, RPC_CALL_ENVIRONMENT,
|
||||||
|
ArrayOf, AutoCastable, Prop
|
||||||
|
} from 'civkit/civ-rpc';
|
||||||
|
import { parseJSONText } from 'civkit/vectorize';
|
||||||
|
import { htmlEscape } from 'civkit/escape';
|
||||||
|
import { marshalErrorLike } from 'civkit/lang';
|
||||||
|
|
||||||
|
import type express from 'express';
|
||||||
|
|
||||||
|
import logger from '../lib/logger';
|
||||||
|
import { AsyncLocalContext } from '../lib/async-context';
|
||||||
|
import { InjectProperty } from '../lib/registry';
|
||||||
|
import { JinaEmbeddingsDashboardHTTP } from '../lib/billing';
|
||||||
|
import envConfig from '../lib/env-config';
|
||||||
|
|
||||||
|
import { FirestoreRecord } from '../lib/firestore';
|
||||||
|
import _ from 'lodash';
|
||||||
|
import { RateLimitDesc } from '../rate-limit';
|
||||||
|
|
||||||
|
export class JinaWallet extends AutoCastable {
|
||||||
|
@Prop({
|
||||||
|
default: ''
|
||||||
|
})
|
||||||
|
user_id!: string;
|
||||||
|
|
||||||
|
@Prop({
|
||||||
|
default: 0
|
||||||
|
})
|
||||||
|
trial_balance!: number;
|
||||||
|
|
||||||
|
@Prop()
|
||||||
|
trial_start?: Date;
|
||||||
|
|
||||||
|
@Prop()
|
||||||
|
trial_end?: Date;
|
||||||
|
|
||||||
|
@Prop({
|
||||||
|
default: 0
|
||||||
|
})
|
||||||
|
regular_balance!: number;
|
||||||
|
|
||||||
|
@Prop({
|
||||||
|
default: 0
|
||||||
|
})
|
||||||
|
total_balance!: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export class JinaEmbeddingsTokenAccount extends FirestoreRecord {
|
||||||
|
static override collectionName = 'embeddingsTokenAccounts';
|
||||||
|
|
||||||
|
override _id!: string;
|
||||||
|
|
||||||
|
@Prop({
|
||||||
|
required: true
|
||||||
|
})
|
||||||
|
user_id!: string;
|
||||||
|
|
||||||
|
@Prop({
|
||||||
|
nullable: true,
|
||||||
|
type: String,
|
||||||
|
})
|
||||||
|
email?: string;
|
||||||
|
|
||||||
|
@Prop({
|
||||||
|
nullable: true,
|
||||||
|
type: String,
|
||||||
|
})
|
||||||
|
full_name?: string;
|
||||||
|
|
||||||
|
@Prop({
|
||||||
|
nullable: true,
|
||||||
|
type: String,
|
||||||
|
})
|
||||||
|
customer_id?: string;
|
||||||
|
|
||||||
|
@Prop({
|
||||||
|
nullable: true,
|
||||||
|
type: String,
|
||||||
|
})
|
||||||
|
avatar_url?: string;
|
||||||
|
|
||||||
|
// Not keeping sensitive info for now
|
||||||
|
// @Prop()
|
||||||
|
// billing_address?: object;
|
||||||
|
|
||||||
|
// @Prop()
|
||||||
|
// payment_method?: object;
|
||||||
|
|
||||||
|
@Prop({
|
||||||
|
required: true
|
||||||
|
})
|
||||||
|
wallet!: JinaWallet;
|
||||||
|
|
||||||
|
@Prop({
|
||||||
|
type: Object
|
||||||
|
})
|
||||||
|
metadata?: { [k: string]: any; };
|
||||||
|
|
||||||
|
@Prop({
|
||||||
|
defaultFactory: () => new Date()
|
||||||
|
})
|
||||||
|
lastSyncedAt!: Date;
|
||||||
|
|
||||||
|
@Prop({
|
||||||
|
dictOf: [ArrayOf(RateLimitDesc)]
|
||||||
|
})
|
||||||
|
customRateLimits?: { [k: string]: RateLimitDesc[]; };
|
||||||
|
|
||||||
|
static patchedFields = [
|
||||||
|
];
|
||||||
|
|
||||||
|
static override from(input: any) {
|
||||||
|
for (const field of this.patchedFields) {
|
||||||
|
if (typeof input[field] === 'string') {
|
||||||
|
input[field] = parseJSONText(input[field]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return super.from(input) as JinaEmbeddingsTokenAccount;
|
||||||
|
}
|
||||||
|
|
||||||
|
override degradeForFireStore() {
|
||||||
|
const copy: any = {
|
||||||
|
...this,
|
||||||
|
wallet: { ...this.wallet },
|
||||||
|
// Firebase disability
|
||||||
|
customRateLimits: _.mapValues(this.customRateLimits, (v) => v.map((x) => ({ ...x }))),
|
||||||
|
};
|
||||||
|
|
||||||
|
for (const field of (this.constructor as typeof JinaEmbeddingsTokenAccount).patchedFields) {
|
||||||
|
if (typeof copy[field] === 'object') {
|
||||||
|
copy[field] = JSON.stringify(copy[field]) as any;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return copy;
|
||||||
|
}
|
||||||
|
|
||||||
|
[k: string]: any;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
const authDtoLogger = logger.child({ service: 'JinaAuthDTO' });
|
||||||
|
|
||||||
|
export interface FireBaseHTTPCtx {
|
||||||
|
req: express.Request,
|
||||||
|
res: express.Response,
|
||||||
|
}
|
||||||
|
|
||||||
|
const THE_VERY_SAME_JINA_EMBEDDINGS_CLIENT = new JinaEmbeddingsDashboardHTTP(envConfig.JINA_EMBEDDINGS_DASHBOARD_API_KEY);
|
||||||
|
|
||||||
|
@Also({
|
||||||
|
openapi: {
|
||||||
|
operation: {
|
||||||
|
parameters: {
|
||||||
|
'Authorization': {
|
||||||
|
description: htmlEscape`Jina Token for authentication.\n\n` +
|
||||||
|
htmlEscape`- Member of <JinaEmbeddingsAuthDTO>\n\n` +
|
||||||
|
`- Authorization: Bearer {YOUR_JINA_TOKEN}`
|
||||||
|
,
|
||||||
|
in: 'header',
|
||||||
|
schema: {
|
||||||
|
anyOf: [
|
||||||
|
{ type: 'string', format: 'token' }
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
export class JinaEmbeddingsAuthDTO extends AutoCastable {
|
||||||
|
uid?: string;
|
||||||
|
bearerToken?: string;
|
||||||
|
user?: JinaEmbeddingsTokenAccount;
|
||||||
|
|
||||||
|
@InjectProperty(AsyncLocalContext)
|
||||||
|
ctxMgr!: AsyncLocalContext;
|
||||||
|
|
||||||
|
jinaEmbeddingsDashboard = THE_VERY_SAME_JINA_EMBEDDINGS_CLIENT;
|
||||||
|
|
||||||
|
static override from(input: any) {
|
||||||
|
const instance = super.from(input) as JinaEmbeddingsAuthDTO;
|
||||||
|
|
||||||
|
const ctx = input[RPC_CALL_ENVIRONMENT];
|
||||||
|
|
||||||
|
const req = (ctx.rawRequest || ctx.req) as express.Request | undefined;
|
||||||
|
|
||||||
|
if (req) {
|
||||||
|
const authorization = req.get('authorization');
|
||||||
|
|
||||||
|
if (authorization) {
|
||||||
|
const authToken = authorization.split(' ')[1] || authorization;
|
||||||
|
instance.bearerToken = authToken;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!instance.bearerToken && input._token) {
|
||||||
|
instance.bearerToken = input._token;
|
||||||
|
}
|
||||||
|
|
||||||
|
return instance;
|
||||||
|
}
|
||||||
|
|
||||||
|
async getBrief(ignoreCache?: boolean | string) {
|
||||||
|
if (!this.bearerToken) {
|
||||||
|
throw new AuthenticationRequiredError({
|
||||||
|
message: 'Absence of bearer token'
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
let account;
|
||||||
|
try {
|
||||||
|
account = await JinaEmbeddingsTokenAccount.fromFirestore(this.bearerToken);
|
||||||
|
} catch (err) {
|
||||||
|
// FireStore would not accept any string as input and may throw if not happy with it
|
||||||
|
void 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
const age = account?.lastSyncedAt ? Date.now() - account.lastSyncedAt.getTime() : Infinity;
|
||||||
|
|
||||||
|
if (account && !ignoreCache) {
|
||||||
|
if (account && age < 180_000) {
|
||||||
|
this.user = account;
|
||||||
|
this.uid = this.user?.user_id;
|
||||||
|
|
||||||
|
return account;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const r = await this.jinaEmbeddingsDashboard.validateToken(this.bearerToken);
|
||||||
|
const brief = r.data;
|
||||||
|
const draftAccount = JinaEmbeddingsTokenAccount.from({
|
||||||
|
...account, ...brief, _id: this.bearerToken,
|
||||||
|
lastSyncedAt: new Date()
|
||||||
|
});
|
||||||
|
await JinaEmbeddingsTokenAccount.save(draftAccount.degradeForFireStore(), undefined, { merge: true });
|
||||||
|
|
||||||
|
this.user = draftAccount;
|
||||||
|
this.uid = this.user?.user_id;
|
||||||
|
|
||||||
|
return draftAccount;
|
||||||
|
} catch (err: any) {
|
||||||
|
authDtoLogger.warn(`Failed to get user brief: ${err}`, { err: marshalErrorLike(err) });
|
||||||
|
|
||||||
|
if (err?.status === 401) {
|
||||||
|
throw new AuthenticationFailedError({
|
||||||
|
message: 'Invalid bearer token'
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (account) {
|
||||||
|
this.user = account;
|
||||||
|
this.uid = this.user?.user_id;
|
||||||
|
|
||||||
|
return account;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
throw new DownstreamServiceFailureError(`Failed to authenticate: ${err}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async reportUsage(tokenCount: number, mdl: string, endpoint: string = '/encode') {
|
||||||
|
const user = await this.assertUser();
|
||||||
|
const uid = user.user_id;
|
||||||
|
user.wallet.total_balance -= tokenCount;
|
||||||
|
|
||||||
|
return this.jinaEmbeddingsDashboard.reportUsage(this.bearerToken!, {
|
||||||
|
model_name: mdl,
|
||||||
|
api_endpoint: endpoint,
|
||||||
|
consumer: {
|
||||||
|
id: uid,
|
||||||
|
user_id: uid,
|
||||||
|
},
|
||||||
|
usage: {
|
||||||
|
total_tokens: tokenCount
|
||||||
|
},
|
||||||
|
labels: {
|
||||||
|
model_name: mdl
|
||||||
|
}
|
||||||
|
}).then((r) => {
|
||||||
|
JinaEmbeddingsTokenAccount.COLLECTION.doc(this.bearerToken!)
|
||||||
|
.update({ 'wallet.total_balance': JinaEmbeddingsTokenAccount.OPS.increment(-tokenCount) })
|
||||||
|
.catch((err) => {
|
||||||
|
authDtoLogger.warn(`Failed to update cache for ${uid}: ${err}`, { err: marshalErrorLike(err) });
|
||||||
|
});
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}).catch((err) => {
|
||||||
|
user.wallet.total_balance += tokenCount;
|
||||||
|
authDtoLogger.warn(`Failed to report usage for ${uid}: ${err}`, { err: marshalErrorLike(err) });
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
async solveUID() {
|
||||||
|
if (this.uid) {
|
||||||
|
this.ctxMgr.set('uid', this.uid);
|
||||||
|
|
||||||
|
return this.uid;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (this.bearerToken) {
|
||||||
|
await this.getBrief();
|
||||||
|
this.ctxMgr.set('uid', this.uid);
|
||||||
|
|
||||||
|
return this.uid;
|
||||||
|
}
|
||||||
|
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
async assertUID() {
|
||||||
|
const uid = await this.solveUID();
|
||||||
|
|
||||||
|
if (!uid) {
|
||||||
|
throw new AuthenticationRequiredError('Authentication failed');
|
||||||
|
}
|
||||||
|
|
||||||
|
return uid;
|
||||||
|
}
|
||||||
|
|
||||||
|
async assertUser() {
|
||||||
|
if (this.user) {
|
||||||
|
return this.user;
|
||||||
|
}
|
||||||
|
|
||||||
|
await this.getBrief();
|
||||||
|
|
||||||
|
return this.user!;
|
||||||
|
}
|
||||||
|
|
||||||
|
getRateLimits(...tags: string[]) {
|
||||||
|
const descs = tags.map((x) => this.user?.customRateLimits?.[x] || []).flat().filter((x) => x.isEffective());
|
||||||
|
|
||||||
|
if (descs.length) {
|
||||||
|
return descs;
|
||||||
|
}
|
||||||
|
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
}
|
||||||
9
jina-ai/src/lib/async-context.ts
Normal file
9
jina-ai/src/lib/async-context.ts
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
import { GlobalAsyncContext } from 'civkit/async-context';
|
||||||
|
import { container, singleton } from 'tsyringe';
|
||||||
|
|
||||||
|
@singleton()
|
||||||
|
export class AsyncLocalContext extends GlobalAsyncContext {}
|
||||||
|
|
||||||
|
const instance = container.resolve(AsyncLocalContext);
|
||||||
|
Reflect.set(process, 'asyncLocalContext', instance);
|
||||||
|
export default instance;
|
||||||
102
jina-ai/src/lib/billing.ts
Normal file
102
jina-ai/src/lib/billing.ts
Normal file
@ -0,0 +1,102 @@
|
|||||||
|
import { HTTPService } from 'civkit';
|
||||||
|
import _ from 'lodash';
|
||||||
|
|
||||||
|
|
||||||
|
export interface JinaWallet {
|
||||||
|
trial_balance: number;
|
||||||
|
trial_start: Date;
|
||||||
|
trial_end: Date;
|
||||||
|
regular_balance: number;
|
||||||
|
total_balance: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
export interface JinaUserBrief {
|
||||||
|
user_id: string;
|
||||||
|
email: string | null;
|
||||||
|
full_name: string | null;
|
||||||
|
customer_id: string | null;
|
||||||
|
avatar_url?: string;
|
||||||
|
billing_address: Partial<{
|
||||||
|
address: string;
|
||||||
|
city: string;
|
||||||
|
state: string;
|
||||||
|
country: string;
|
||||||
|
postal_code: string;
|
||||||
|
}>;
|
||||||
|
payment_method: Partial<{
|
||||||
|
brand: string;
|
||||||
|
last4: string;
|
||||||
|
exp_month: number;
|
||||||
|
exp_year: number;
|
||||||
|
}>;
|
||||||
|
wallet: JinaWallet;
|
||||||
|
metadata: {
|
||||||
|
[k: string]: any;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface JinaUsageReport {
|
||||||
|
model_name: string;
|
||||||
|
api_endpoint: string;
|
||||||
|
consumer: {
|
||||||
|
user_id: string;
|
||||||
|
customer_plan?: string;
|
||||||
|
[k: string]: any;
|
||||||
|
};
|
||||||
|
usage: {
|
||||||
|
total_tokens: number;
|
||||||
|
};
|
||||||
|
labels: {
|
||||||
|
user_type?: string;
|
||||||
|
model_name?: string;
|
||||||
|
[k: string]: any;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
export class JinaEmbeddingsDashboardHTTP extends HTTPService {
|
||||||
|
name = 'JinaEmbeddingsDashboardHTTP';
|
||||||
|
|
||||||
|
constructor(
|
||||||
|
public apiKey: string,
|
||||||
|
public baseUri: string = 'https://embeddings-dashboard-api.jina.ai/api'
|
||||||
|
) {
|
||||||
|
super(baseUri);
|
||||||
|
|
||||||
|
this.baseOptions.timeout = 30_000; // 30 sec
|
||||||
|
}
|
||||||
|
|
||||||
|
async authorization(token: string) {
|
||||||
|
const r = await this.get<JinaUserBrief>('/v1/authorization', {
|
||||||
|
headers: {
|
||||||
|
Authorization: `Bearer ${token}`
|
||||||
|
},
|
||||||
|
responseType: 'json',
|
||||||
|
});
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
async validateToken(token: string) {
|
||||||
|
const r = await this.getWithSearchParams<JinaUserBrief>('/v1/api_key/user', {
|
||||||
|
api_key: token,
|
||||||
|
}, {
|
||||||
|
responseType: 'json',
|
||||||
|
});
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
async reportUsage(token: string, query: JinaUsageReport) {
|
||||||
|
const r = await this.postJson('/v1/usage', query, {
|
||||||
|
headers: {
|
||||||
|
Authorization: `Bearer ${token}`,
|
||||||
|
'x-api-key': this.apiKey,
|
||||||
|
},
|
||||||
|
responseType: 'text',
|
||||||
|
});
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
59
jina-ai/src/lib/env-config.ts
Normal file
59
jina-ai/src/lib/env-config.ts
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
import { container, singleton } from 'tsyringe';
|
||||||
|
|
||||||
|
export const SPECIAL_COMBINED_ENV_KEY = 'ENV_COMBINED';
|
||||||
|
const CONF_ENV = [
|
||||||
|
'OPENAI_API_KEY',
|
||||||
|
|
||||||
|
'ANTHROPIC_API_KEY',
|
||||||
|
|
||||||
|
'REPLICATE_API_KEY',
|
||||||
|
|
||||||
|
'GOOGLE_AI_STUDIO_API_KEY',
|
||||||
|
|
||||||
|
'JINA_EMBEDDINGS_API_KEY',
|
||||||
|
|
||||||
|
'JINA_EMBEDDINGS_DASHBOARD_API_KEY',
|
||||||
|
|
||||||
|
'BRAVE_SEARCH_API_KEY',
|
||||||
|
|
||||||
|
] as const;
|
||||||
|
|
||||||
|
|
||||||
|
@singleton()
|
||||||
|
export class EnvConfig {
|
||||||
|
dynamic!: Record<string, string>;
|
||||||
|
|
||||||
|
combined: Record<string, string> = {};
|
||||||
|
originalEnv: Record<string, string | undefined> = { ...process.env };
|
||||||
|
|
||||||
|
constructor() {
|
||||||
|
if (process.env[SPECIAL_COMBINED_ENV_KEY]) {
|
||||||
|
Object.assign(this.combined, JSON.parse(
|
||||||
|
Buffer.from(process.env[SPECIAL_COMBINED_ENV_KEY]!, 'base64').toString('utf-8')
|
||||||
|
));
|
||||||
|
delete process.env[SPECIAL_COMBINED_ENV_KEY];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Static config
|
||||||
|
for (const x of CONF_ENV) {
|
||||||
|
const s = this.combined[x] || process.env[x] || '';
|
||||||
|
Reflect.set(this, x, s);
|
||||||
|
if (x in process.env) {
|
||||||
|
delete process.env[x];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Dynamic config
|
||||||
|
this.dynamic = new Proxy({
|
||||||
|
get: (_target: any, prop: string) => {
|
||||||
|
return this.combined[prop] || process.env[prop] || '';
|
||||||
|
}
|
||||||
|
}, {}) as any;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// eslint-disable-next-line @typescript-eslint/no-empty-interface
|
||||||
|
export interface EnvConfig extends Record<typeof CONF_ENV[number], string> { }
|
||||||
|
|
||||||
|
const instance = container.resolve(EnvConfig);
|
||||||
|
export default instance;
|
||||||
70
jina-ai/src/lib/errors.ts
Normal file
70
jina-ai/src/lib/errors.ts
Normal file
@ -0,0 +1,70 @@
|
|||||||
|
import { ApplicationError, Prop, RPC_TRANSFER_PROTOCOL_META_SYMBOL, StatusCode } from 'civkit';
|
||||||
|
import _ from 'lodash';
|
||||||
|
import dayjs from 'dayjs';
|
||||||
|
import utc from 'dayjs/plugin/utc';
|
||||||
|
|
||||||
|
dayjs.extend(utc);
|
||||||
|
|
||||||
|
@StatusCode(50301)
|
||||||
|
export class ServiceDisabledError extends ApplicationError { }
|
||||||
|
|
||||||
|
@StatusCode(50302)
|
||||||
|
export class ServiceCrashedError extends ApplicationError { }
|
||||||
|
|
||||||
|
@StatusCode(50303)
|
||||||
|
export class ServiceNodeResourceDrainError extends ApplicationError { }
|
||||||
|
|
||||||
|
@StatusCode(40104)
|
||||||
|
export class EmailUnverifiedError extends ApplicationError { }
|
||||||
|
|
||||||
|
@StatusCode(40201)
|
||||||
|
export class InsufficientCreditsError extends ApplicationError { }
|
||||||
|
|
||||||
|
@StatusCode(40202)
|
||||||
|
export class FreeFeatureLimitError extends ApplicationError { }
|
||||||
|
|
||||||
|
@StatusCode(40203)
|
||||||
|
export class InsufficientBalanceError extends ApplicationError { }
|
||||||
|
|
||||||
|
@StatusCode(40903)
|
||||||
|
export class LockConflictError extends ApplicationError { }
|
||||||
|
|
||||||
|
@StatusCode(40904)
|
||||||
|
export class BudgetExceededError extends ApplicationError { }
|
||||||
|
|
||||||
|
@StatusCode(45101)
|
||||||
|
export class HarmfulContentError extends ApplicationError { }
|
||||||
|
|
||||||
|
@StatusCode(45102)
|
||||||
|
export class SecurityCompromiseError extends ApplicationError { }
|
||||||
|
|
||||||
|
@StatusCode(41201)
|
||||||
|
export class BatchSizeTooLargeError extends ApplicationError { }
|
||||||
|
|
||||||
|
|
||||||
|
@StatusCode(42903)
|
||||||
|
export class RateLimitTriggeredError extends ApplicationError {
|
||||||
|
|
||||||
|
@Prop({
|
||||||
|
desc: 'Retry after seconds',
|
||||||
|
})
|
||||||
|
retryAfter?: number;
|
||||||
|
|
||||||
|
@Prop({
|
||||||
|
desc: 'Retry after date',
|
||||||
|
})
|
||||||
|
retryAfterDate?: Date;
|
||||||
|
|
||||||
|
protected override get [RPC_TRANSFER_PROTOCOL_META_SYMBOL]() {
|
||||||
|
const retryAfter = this.retryAfter || this.retryAfterDate;
|
||||||
|
if (!retryAfter) {
|
||||||
|
return super[RPC_TRANSFER_PROTOCOL_META_SYMBOL];
|
||||||
|
}
|
||||||
|
|
||||||
|
return _.merge(_.cloneDeep(super[RPC_TRANSFER_PROTOCOL_META_SYMBOL]), {
|
||||||
|
headers: {
|
||||||
|
'Retry-After': `${retryAfter instanceof Date ? dayjs(retryAfter).utc().format('ddd, DD MMM YYYY HH:mm:ss [GMT]') : retryAfter}`,
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
223
jina-ai/src/lib/firestore.ts
Normal file
223
jina-ai/src/lib/firestore.ts
Normal file
@ -0,0 +1,223 @@
|
|||||||
|
import _ from 'lodash';
|
||||||
|
import { AutoCastable, Prop, RPC_MARSHAL } from 'civkit/civ-rpc';
|
||||||
|
import {
|
||||||
|
Firestore, FieldValue, DocumentReference,
|
||||||
|
Query, Timestamp, SetOptions, DocumentSnapshot,
|
||||||
|
} from '@google-cloud/firestore';
|
||||||
|
|
||||||
|
// Firestore doesn't support JavaScript objects with custom prototypes (i.e. objects that were created via the \"new\" operator)
|
||||||
|
function patchFireStoreArrogance(func: Function) {
|
||||||
|
return function (this: unknown) {
|
||||||
|
const origObjectGetPrototype = Object.getPrototypeOf;
|
||||||
|
Object.getPrototypeOf = function (x) {
|
||||||
|
const r = origObjectGetPrototype.call(this, x);
|
||||||
|
if (!r) {
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
return Object.prototype;
|
||||||
|
};
|
||||||
|
try {
|
||||||
|
return func.call(this, ...arguments);
|
||||||
|
} finally {
|
||||||
|
Object.getPrototypeOf = origObjectGetPrototype;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
Reflect.set(DocumentReference.prototype, 'set', patchFireStoreArrogance(Reflect.get(DocumentReference.prototype, 'set')));
|
||||||
|
Reflect.set(DocumentSnapshot, 'fromObject', patchFireStoreArrogance(Reflect.get(DocumentSnapshot, 'fromObject')));
|
||||||
|
|
||||||
|
function mapValuesDeep(v: any, fn: (i: any) => any): any {
|
||||||
|
if (_.isPlainObject(v)) {
|
||||||
|
return _.mapValues(v, (i) => mapValuesDeep(i, fn));
|
||||||
|
} else if (_.isArray(v)) {
|
||||||
|
return v.map((i) => mapValuesDeep(i, fn));
|
||||||
|
} else {
|
||||||
|
return fn(v);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export type Constructor<T> = { new(...args: any[]): T; };
|
||||||
|
export type Constructed<T> = T extends Partial<infer U> ? U : T extends object ? T : object;
|
||||||
|
|
||||||
|
export function fromFirestore<T extends FirestoreRecord>(
|
||||||
|
this: Constructor<T>, id: string, overrideCollection?: string
|
||||||
|
): Promise<T | undefined>;
|
||||||
|
export async function fromFirestore(
|
||||||
|
this: any, id: string, overrideCollection?: string
|
||||||
|
) {
|
||||||
|
const collection = overrideCollection || this.collectionName;
|
||||||
|
if (!collection) {
|
||||||
|
throw new Error(`Missing collection name to construct ${this.name}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const ref = this.DB.collection(overrideCollection || this.collectionName).doc(id);
|
||||||
|
|
||||||
|
const ptr = await ref.get();
|
||||||
|
|
||||||
|
if (!ptr.exists) {
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
const doc = this.from(
|
||||||
|
// Fixes non-native firebase types
|
||||||
|
mapValuesDeep(ptr.data(), (i: any) => {
|
||||||
|
if (i instanceof Timestamp) {
|
||||||
|
return i.toDate();
|
||||||
|
}
|
||||||
|
|
||||||
|
return i;
|
||||||
|
})
|
||||||
|
);
|
||||||
|
|
||||||
|
Object.defineProperty(doc, '_ref', { value: ref, enumerable: false });
|
||||||
|
Object.defineProperty(doc, '_id', { value: ptr.id, enumerable: true });
|
||||||
|
|
||||||
|
return doc;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function fromFirestoreQuery<T extends FirestoreRecord>(
|
||||||
|
this: Constructor<T>, query: Query
|
||||||
|
): Promise<T[]>;
|
||||||
|
export async function fromFirestoreQuery(this: any, query: Query) {
|
||||||
|
const ptr = await query.get();
|
||||||
|
|
||||||
|
if (ptr.docs.length) {
|
||||||
|
return ptr.docs.map(doc => {
|
||||||
|
const r = this.from(
|
||||||
|
mapValuesDeep(doc.data(), (i: any) => {
|
||||||
|
if (i instanceof Timestamp) {
|
||||||
|
return i.toDate();
|
||||||
|
}
|
||||||
|
|
||||||
|
return i;
|
||||||
|
})
|
||||||
|
);
|
||||||
|
Object.defineProperty(r, '_ref', { value: doc.ref, enumerable: false });
|
||||||
|
Object.defineProperty(r, '_id', { value: doc.id, enumerable: true });
|
||||||
|
|
||||||
|
return r;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
export function setToFirestore<T extends FirestoreRecord>(
|
||||||
|
this: Constructor<T>, doc: T, overrideCollection?: string, setOptions?: SetOptions
|
||||||
|
): Promise<T>;
|
||||||
|
export async function setToFirestore(
|
||||||
|
this: any, doc: any, overrideCollection?: string, setOptions?: SetOptions
|
||||||
|
) {
|
||||||
|
let ref: DocumentReference<any> = doc._ref;
|
||||||
|
if (!ref) {
|
||||||
|
const collection = overrideCollection || this.collectionName;
|
||||||
|
if (!collection) {
|
||||||
|
throw new Error(`Missing collection name to construct ${this.name}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const predefinedId = doc._id || undefined;
|
||||||
|
const hdl = this.DB.collection(overrideCollection || this.collectionName);
|
||||||
|
ref = predefinedId ? hdl.doc(predefinedId) : hdl.doc();
|
||||||
|
|
||||||
|
Object.defineProperty(doc, '_ref', { value: ref, enumerable: false });
|
||||||
|
Object.defineProperty(doc, '_id', { value: ref.id, enumerable: true });
|
||||||
|
}
|
||||||
|
|
||||||
|
await ref.set(doc, { merge: true, ...setOptions });
|
||||||
|
|
||||||
|
return doc;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function deleteQueryBatch<T extends FirestoreRecord>(
|
||||||
|
this: Constructor<T>, query: Query
|
||||||
|
): Promise<T>;
|
||||||
|
export async function deleteQueryBatch(this: any, query: Query) {
|
||||||
|
const snapshot = await query.get();
|
||||||
|
|
||||||
|
const batchSize = snapshot.size;
|
||||||
|
if (batchSize === 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Delete documents in a batch
|
||||||
|
const batch = this.DB.batch();
|
||||||
|
snapshot.docs.forEach((doc) => {
|
||||||
|
batch.delete(doc.ref);
|
||||||
|
});
|
||||||
|
await batch.commit();
|
||||||
|
|
||||||
|
process.nextTick(() => {
|
||||||
|
this.deleteQueryBatch(query);
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
export function fromFirestoreDoc<T extends FirestoreRecord>(
|
||||||
|
this: Constructor<T>, snapshot: DocumentSnapshot,
|
||||||
|
): T | undefined;
|
||||||
|
export function fromFirestoreDoc(
|
||||||
|
this: any, snapshot: DocumentSnapshot,
|
||||||
|
) {
|
||||||
|
const doc = this.from(
|
||||||
|
// Fixes non-native firebase types
|
||||||
|
mapValuesDeep(snapshot.data(), (i: any) => {
|
||||||
|
if (i instanceof Timestamp) {
|
||||||
|
return i.toDate();
|
||||||
|
}
|
||||||
|
|
||||||
|
return i;
|
||||||
|
})
|
||||||
|
);
|
||||||
|
|
||||||
|
Object.defineProperty(doc, '_ref', { value: snapshot.ref, enumerable: false });
|
||||||
|
Object.defineProperty(doc, '_id', { value: snapshot.id, enumerable: true });
|
||||||
|
|
||||||
|
return doc;
|
||||||
|
}
|
||||||
|
const defaultFireStore = new Firestore({
|
||||||
|
projectId: process.env.GCLOUD_PROJECT,
|
||||||
|
});
|
||||||
|
export class FirestoreRecord extends AutoCastable {
|
||||||
|
static collectionName?: string;
|
||||||
|
static OPS = FieldValue;
|
||||||
|
static DB = defaultFireStore;
|
||||||
|
static get COLLECTION() {
|
||||||
|
if (!this.collectionName) {
|
||||||
|
throw new Error('Not implemented');
|
||||||
|
}
|
||||||
|
|
||||||
|
return this.DB.collection(this.collectionName);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Prop()
|
||||||
|
_id?: string;
|
||||||
|
_ref?: DocumentReference<Partial<Omit<this, '_ref' | '_id'>>>;
|
||||||
|
|
||||||
|
static fromFirestore = fromFirestore;
|
||||||
|
static fromFirestoreDoc = fromFirestoreDoc;
|
||||||
|
static fromFirestoreQuery = fromFirestoreQuery;
|
||||||
|
|
||||||
|
static save = setToFirestore;
|
||||||
|
static deleteQueryBatch = deleteQueryBatch;
|
||||||
|
|
||||||
|
[RPC_MARSHAL]() {
|
||||||
|
return {
|
||||||
|
...this,
|
||||||
|
_id: this._id,
|
||||||
|
_ref: this._ref?.path
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
degradeForFireStore(): this {
|
||||||
|
return JSON.parse(JSON.stringify(this, function (k, v) {
|
||||||
|
if (k === '') {
|
||||||
|
return v;
|
||||||
|
}
|
||||||
|
if (typeof v === 'object' && v && (typeof v.degradeForFireStore === 'function')) {
|
||||||
|
return v.degradeForFireStore();
|
||||||
|
}
|
||||||
|
|
||||||
|
return v;
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
}
|
||||||
56
jina-ai/src/lib/logger.ts
Normal file
56
jina-ai/src/lib/logger.ts
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
import { AbstractPinoLogger } from 'civkit/pino-logger';
|
||||||
|
import { singleton, container } from 'tsyringe';
|
||||||
|
import { threadId } from 'node:worker_threads';
|
||||||
|
import { getTraceCtx } from 'civkit/async-context';
|
||||||
|
|
||||||
|
|
||||||
|
const levelToSeverityMap: { [k: string]: string | undefined; } = {
|
||||||
|
trace: 'DEFAULT',
|
||||||
|
debug: 'DEBUG',
|
||||||
|
info: 'INFO',
|
||||||
|
warn: 'WARNING',
|
||||||
|
error: 'ERROR',
|
||||||
|
fatal: 'CRITICAL',
|
||||||
|
};
|
||||||
|
|
||||||
|
@singleton()
|
||||||
|
export class GlobalLogger extends AbstractPinoLogger {
|
||||||
|
loggerOptions = {
|
||||||
|
level: 'debug',
|
||||||
|
base: {
|
||||||
|
tid: threadId,
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
override init(): void {
|
||||||
|
if (process.env['NODE_ENV']?.startsWith('prod')) {
|
||||||
|
super.init(process.stdout);
|
||||||
|
} else {
|
||||||
|
const PinoPretty = require('pino-pretty').PinoPretty;
|
||||||
|
super.init(PinoPretty({
|
||||||
|
singleLine: true,
|
||||||
|
colorize: true,
|
||||||
|
messageFormat(log: any, messageKey: any) {
|
||||||
|
return `${log['tid'] ? `[${log['tid']}]` : ''}[${log['service'] || 'ROOT'}] ${log[messageKey]}`;
|
||||||
|
},
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
this.emit('ready');
|
||||||
|
}
|
||||||
|
|
||||||
|
override log(...args: any[]) {
|
||||||
|
const [levelObj, ...rest] = args;
|
||||||
|
const severity = levelToSeverityMap[levelObj?.level];
|
||||||
|
const traceCtx = getTraceCtx();
|
||||||
|
const patched: any= { ...levelObj, severity };
|
||||||
|
if (traceCtx?.traceId && process.env['GCLOUD_PROJECT']) {
|
||||||
|
patched['logging.googleapis.com/trace'] = `projects/${process.env['GCLOUD_PROJECT']}/traces/${traceCtx.traceId}`;
|
||||||
|
}
|
||||||
|
return super.log(patched, ...rest);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const instance = container.resolve(GlobalLogger);
|
||||||
|
export default instance;
|
||||||
4
jina-ai/src/lib/registry.ts
Normal file
4
jina-ai/src/lib/registry.ts
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
import { container } from 'tsyringe';
|
||||||
|
import { propertyInjectorFactory } from 'civkit/property-injector';
|
||||||
|
|
||||||
|
export const InjectProperty = propertyInjectorFactory(container);
|
||||||
93
jina-ai/src/patch-express.ts
Normal file
93
jina-ai/src/patch-express.ts
Normal file
@ -0,0 +1,93 @@
|
|||||||
|
import { ApplicationError, RPC_CALL_ENVIRONMENT } from "civkit/civ-rpc";
|
||||||
|
import { marshalErrorLike } from "civkit/lang";
|
||||||
|
import { randomUUID } from "crypto";
|
||||||
|
import { once } from "events";
|
||||||
|
import type { NextFunction, Request, Response } from "express";
|
||||||
|
|
||||||
|
import { JinaEmbeddingsAuthDTO } from "./dto/jina-embeddings-auth";
|
||||||
|
import rateLimitControl, { API_CALL_STATUS, RateLimitDesc } from "./rate-limit";
|
||||||
|
import asyncLocalContext from "./lib/async-context";
|
||||||
|
import globalLogger from "./lib/logger";
|
||||||
|
|
||||||
|
globalLogger.serviceReady();
|
||||||
|
const logger = globalLogger.child({ service: 'BillingMiddleware' });
|
||||||
|
|
||||||
|
const appName = 'DEEPRESEARCH';
|
||||||
|
export const jinaAiBillingMiddleware = (req: Request, res: Response, next: NextFunction) => {
|
||||||
|
if (req.path === '/ping') {
|
||||||
|
res.status(200).end('pone');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (req.method !== 'POST' && req.method !== 'GET') {
|
||||||
|
next();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
asyncLocalContext.run(async () => {
|
||||||
|
const googleTraceId = req.get('x-cloud-trace-context')?.split('/')?.[0];
|
||||||
|
const ctx = asyncLocalContext.ctx;
|
||||||
|
ctx.traceId = req.get('x-request-id') || req.get('request-id') || googleTraceId || randomUUID();
|
||||||
|
ctx.traceT0 = new Date();
|
||||||
|
ctx.ip = req?.ip;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const authDto = JinaEmbeddingsAuthDTO.from({
|
||||||
|
[RPC_CALL_ENVIRONMENT]: { req, res }
|
||||||
|
});
|
||||||
|
|
||||||
|
const user = await authDto.assertUser();
|
||||||
|
await rateLimitControl.serviceReady();
|
||||||
|
const rateLimitPolicy = authDto.getRateLimits(appName) || [
|
||||||
|
parseInt(user.metadata?.speed_level) >= 2 ?
|
||||||
|
RateLimitDesc.from({
|
||||||
|
occurrence: 30,
|
||||||
|
periodSeconds: 60
|
||||||
|
}) :
|
||||||
|
RateLimitDesc.from({
|
||||||
|
occurrence: 10,
|
||||||
|
periodSeconds: 60
|
||||||
|
})
|
||||||
|
];
|
||||||
|
const criterions = rateLimitPolicy.map((c) => rateLimitControl.rateLimitDescToCriterion(c));
|
||||||
|
await Promise.all(criterions.map(([pointInTime, n]) => rateLimitControl.assertUidPeriodicLimit(user._id, pointInTime, n, appName)));
|
||||||
|
|
||||||
|
const apiRoll = rateLimitControl.record({ uid: user._id, tags: [appName] })
|
||||||
|
apiRoll.save().catch((err) => logger.warn(`Failed to save rate limit record`, { err: marshalErrorLike(err) }));
|
||||||
|
|
||||||
|
const pResClose = once(res, 'close');
|
||||||
|
|
||||||
|
next();
|
||||||
|
|
||||||
|
await pResClose;
|
||||||
|
const chargeAmount = ctx.chargeAmount;
|
||||||
|
if (chargeAmount) {
|
||||||
|
authDto.reportUsage(chargeAmount, `reader-${appName}`).catch((err) => {
|
||||||
|
logger.warn(`Unable to report usage for ${user._id}`, { err: marshalErrorLike(err) });
|
||||||
|
});
|
||||||
|
apiRoll.chargeAmount = chargeAmount;
|
||||||
|
}
|
||||||
|
apiRoll.status = res.statusCode === 200 ? API_CALL_STATUS.SUCCESS : API_CALL_STATUS.ERROR;
|
||||||
|
apiRoll.save().catch((err) => logger.warn(`Failed to save rate limit record`, { err: marshalErrorLike(err) }));
|
||||||
|
logger.info(`HTTP ${res.statusCode} for request ${ctx.traceId} after ${Date.now() - ctx.traceT0.valueOf()}ms`, {
|
||||||
|
uid: user._id,
|
||||||
|
chargeAmount,
|
||||||
|
});
|
||||||
|
|
||||||
|
} catch (err: any) {
|
||||||
|
if (!res.headersSent) {
|
||||||
|
if (err instanceof ApplicationError) {
|
||||||
|
res.status(parseInt(err.code as string) || 500).json({ error: err.message });
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
res.status(500).json({ error: 'Internal' });
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.error(`Error in billing middleware`, { err: marshalErrorLike(err) });
|
||||||
|
if (err.stack) {
|
||||||
|
logger.error(err.stack);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
});
|
||||||
|
}
|
||||||
278
jina-ai/src/rate-limit.ts
Normal file
278
jina-ai/src/rate-limit.ts
Normal file
@ -0,0 +1,278 @@
|
|||||||
|
import { AutoCastable, ResourcePolicyDenyError, Also, Prop } from 'civkit/civ-rpc';
|
||||||
|
import { AsyncService } from 'civkit/async-service';
|
||||||
|
import { getTraceId } from 'civkit/async-context';
|
||||||
|
import { singleton, container } from 'tsyringe';
|
||||||
|
|
||||||
|
import { RateLimitTriggeredError } from './lib/errors';
|
||||||
|
import { FirestoreRecord } from './lib/firestore';
|
||||||
|
import { GlobalLogger } from './lib/logger';
|
||||||
|
|
||||||
|
export enum API_CALL_STATUS {
|
||||||
|
SUCCESS = 'success',
|
||||||
|
ERROR = 'error',
|
||||||
|
PENDING = 'pending',
|
||||||
|
}
|
||||||
|
|
||||||
|
@Also({ dictOf: Object })
|
||||||
|
export class APICall extends FirestoreRecord {
|
||||||
|
static override collectionName = 'apiRoll';
|
||||||
|
|
||||||
|
@Prop({
|
||||||
|
required: true,
|
||||||
|
defaultFactory: () => getTraceId()
|
||||||
|
})
|
||||||
|
traceId!: string;
|
||||||
|
|
||||||
|
@Prop()
|
||||||
|
uid?: string;
|
||||||
|
|
||||||
|
@Prop()
|
||||||
|
ip?: string;
|
||||||
|
|
||||||
|
@Prop({
|
||||||
|
arrayOf: String,
|
||||||
|
default: [],
|
||||||
|
})
|
||||||
|
tags!: string[];
|
||||||
|
|
||||||
|
@Prop({
|
||||||
|
required: true,
|
||||||
|
defaultFactory: () => new Date(),
|
||||||
|
})
|
||||||
|
createdAt!: Date;
|
||||||
|
|
||||||
|
@Prop()
|
||||||
|
completedAt?: Date;
|
||||||
|
|
||||||
|
@Prop({
|
||||||
|
required: true,
|
||||||
|
default: API_CALL_STATUS.PENDING,
|
||||||
|
})
|
||||||
|
status!: API_CALL_STATUS;
|
||||||
|
|
||||||
|
@Prop({
|
||||||
|
required: true,
|
||||||
|
defaultFactory: () => new Date(Date.now() + 1000 * 60 * 60 * 24 * 90),
|
||||||
|
})
|
||||||
|
expireAt!: Date;
|
||||||
|
|
||||||
|
[k: string]: any;
|
||||||
|
|
||||||
|
tag(...tags: string[]) {
|
||||||
|
for (const t of tags) {
|
||||||
|
if (!this.tags.includes(t)) {
|
||||||
|
this.tags.push(t);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
save() {
|
||||||
|
return (this.constructor as typeof APICall).save(this);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
export class RateLimitDesc extends AutoCastable {
|
||||||
|
@Prop({
|
||||||
|
default: 1000
|
||||||
|
})
|
||||||
|
occurrence!: number;
|
||||||
|
|
||||||
|
@Prop({
|
||||||
|
default: 3600
|
||||||
|
})
|
||||||
|
periodSeconds!: number;
|
||||||
|
|
||||||
|
@Prop()
|
||||||
|
notBefore?: Date;
|
||||||
|
|
||||||
|
@Prop()
|
||||||
|
notAfter?: Date;
|
||||||
|
|
||||||
|
isEffective() {
|
||||||
|
const now = new Date();
|
||||||
|
if (this.notBefore && this.notBefore > now) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (this.notAfter && this.notAfter < now) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@singleton()
|
||||||
|
export class RateLimitControl extends AsyncService {
|
||||||
|
|
||||||
|
logger = this.globalLogger.child({ service: this.constructor.name });
|
||||||
|
|
||||||
|
constructor(
|
||||||
|
protected globalLogger: GlobalLogger,
|
||||||
|
) {
|
||||||
|
super(...arguments);
|
||||||
|
}
|
||||||
|
|
||||||
|
override async init() {
|
||||||
|
await this.dependencyReady();
|
||||||
|
|
||||||
|
this.emit('ready');
|
||||||
|
}
|
||||||
|
|
||||||
|
async queryByUid(uid: string, pointInTime: Date, ...tags: string[]) {
|
||||||
|
let q = APICall.COLLECTION
|
||||||
|
.orderBy('createdAt', 'asc')
|
||||||
|
.where('createdAt', '>=', pointInTime)
|
||||||
|
.where('status', 'in', [API_CALL_STATUS.SUCCESS, API_CALL_STATUS.PENDING])
|
||||||
|
.where('uid', '==', uid);
|
||||||
|
if (tags.length) {
|
||||||
|
q = q.where('tags', 'array-contains-any', tags);
|
||||||
|
}
|
||||||
|
|
||||||
|
return APICall.fromFirestoreQuery(q);
|
||||||
|
}
|
||||||
|
|
||||||
|
async queryByIp(ip: string, pointInTime: Date, ...tags: string[]) {
|
||||||
|
let q = APICall.COLLECTION
|
||||||
|
.orderBy('createdAt', 'asc')
|
||||||
|
.where('createdAt', '>=', pointInTime)
|
||||||
|
.where('status', 'in', [API_CALL_STATUS.SUCCESS, API_CALL_STATUS.PENDING])
|
||||||
|
.where('ip', '==', ip);
|
||||||
|
if (tags.length) {
|
||||||
|
q = q.where('tags', 'array-contains-any', tags);
|
||||||
|
}
|
||||||
|
|
||||||
|
return APICall.fromFirestoreQuery(q);
|
||||||
|
}
|
||||||
|
|
||||||
|
async assertUidPeriodicLimit(uid: string, pointInTime: Date, limit: number, ...tags: string[]) {
|
||||||
|
if (limit <= 0) {
|
||||||
|
throw new ResourcePolicyDenyError(`This UID(${uid}) is not allowed to call this endpoint (rate limit quota is 0).`);
|
||||||
|
}
|
||||||
|
|
||||||
|
let q = APICall.COLLECTION
|
||||||
|
.orderBy('createdAt', 'asc')
|
||||||
|
.where('createdAt', '>=', pointInTime)
|
||||||
|
.where('status', 'in', [API_CALL_STATUS.SUCCESS, API_CALL_STATUS.PENDING])
|
||||||
|
.where('uid', '==', uid);
|
||||||
|
if (tags.length) {
|
||||||
|
q = q.where('tags', 'array-contains-any', tags);
|
||||||
|
}
|
||||||
|
const count = (await q.count().get()).data().count;
|
||||||
|
|
||||||
|
if (count >= limit) {
|
||||||
|
const r = await APICall.fromFirestoreQuery(q.limit(1));
|
||||||
|
const [r1] = r;
|
||||||
|
|
||||||
|
const dtMs = Math.abs(r1.createdAt?.valueOf() - pointInTime.valueOf());
|
||||||
|
const dtSec = Math.ceil(dtMs / 1000);
|
||||||
|
|
||||||
|
throw RateLimitTriggeredError.from({
|
||||||
|
message: `Per UID rate limit exceeded (${tags.join(',') || 'called'} ${limit} times since ${pointInTime})`,
|
||||||
|
retryAfter: dtSec,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
return count + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
async assertIPPeriodicLimit(ip: string, pointInTime: Date, limit: number, ...tags: string[]) {
|
||||||
|
let q = APICall.COLLECTION
|
||||||
|
.orderBy('createdAt', 'asc')
|
||||||
|
.where('createdAt', '>=', pointInTime)
|
||||||
|
.where('status', 'in', [API_CALL_STATUS.SUCCESS, API_CALL_STATUS.PENDING])
|
||||||
|
.where('ip', '==', ip);
|
||||||
|
if (tags.length) {
|
||||||
|
q = q.where('tags', 'array-contains-any', tags);
|
||||||
|
}
|
||||||
|
|
||||||
|
const count = (await q.count().get()).data().count;
|
||||||
|
|
||||||
|
if (count >= limit) {
|
||||||
|
const r = await APICall.fromFirestoreQuery(q.limit(1));
|
||||||
|
const [r1] = r;
|
||||||
|
|
||||||
|
const dtMs = Math.abs(r1.createdAt?.valueOf() - pointInTime.valueOf());
|
||||||
|
const dtSec = Math.ceil(dtMs / 1000);
|
||||||
|
|
||||||
|
throw RateLimitTriggeredError.from({
|
||||||
|
message: `Per IP rate limit exceeded (${tags.join(',') || 'called'} ${limit} times since ${pointInTime})`,
|
||||||
|
retryAfter: dtSec,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
return count + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
record(partialRecord: Partial<APICall>) {
|
||||||
|
const record = APICall.from(partialRecord);
|
||||||
|
const newId = APICall.COLLECTION.doc().id;
|
||||||
|
record._id = newId;
|
||||||
|
|
||||||
|
return record;
|
||||||
|
}
|
||||||
|
|
||||||
|
// async simpleRPCUidBasedLimit(rpcReflect: RPCReflection, uid: string, tags: string[] = [],
|
||||||
|
// ...inputCriterion: RateLimitDesc[] | [Date, number][]) {
|
||||||
|
// const criterion = inputCriterion.map((c) => { return Array.isArray(c) ? c : this.rateLimitDescToCriterion(c); });
|
||||||
|
|
||||||
|
// await Promise.all(criterion.map(([pointInTime, n]) =>
|
||||||
|
// this.assertUidPeriodicLimit(uid, pointInTime, n, ...tags)));
|
||||||
|
|
||||||
|
// const r = this.record({
|
||||||
|
// uid,
|
||||||
|
// tags,
|
||||||
|
// });
|
||||||
|
|
||||||
|
// r.save().catch((err) => this.logger.warn(`Failed to save rate limit record`, { err }));
|
||||||
|
// rpcReflect.then(() => {
|
||||||
|
// r.status = API_CALL_STATUS.SUCCESS;
|
||||||
|
// r.save()
|
||||||
|
// .catch((err) => this.logger.warn(`Failed to save rate limit record`, { err }));
|
||||||
|
// });
|
||||||
|
// rpcReflect.catch((err) => {
|
||||||
|
// r.status = API_CALL_STATUS.ERROR;
|
||||||
|
// r.error = err.toString();
|
||||||
|
// r.save()
|
||||||
|
// .catch((err) => this.logger.warn(`Failed to save rate limit record`, { err }));
|
||||||
|
// });
|
||||||
|
|
||||||
|
// return r;
|
||||||
|
// }
|
||||||
|
|
||||||
|
rateLimitDescToCriterion(rateLimitDesc: RateLimitDesc) {
|
||||||
|
return [new Date(Date.now() - rateLimitDesc.periodSeconds * 1000), rateLimitDesc.occurrence] as [Date, number];
|
||||||
|
}
|
||||||
|
|
||||||
|
// async simpleRpcIPBasedLimit(rpcReflect: RPCReflection, ip: string, tags: string[] = [],
|
||||||
|
// ...inputCriterion: RateLimitDesc[] | [Date, number][]) {
|
||||||
|
// const criterion = inputCriterion.map((c) => { return Array.isArray(c) ? c : this.rateLimitDescToCriterion(c); });
|
||||||
|
// await Promise.all(criterion.map(([pointInTime, n]) =>
|
||||||
|
// this.assertIPPeriodicLimit(ip, pointInTime, n, ...tags)));
|
||||||
|
|
||||||
|
// const r = this.record({
|
||||||
|
// ip,
|
||||||
|
// tags,
|
||||||
|
// });
|
||||||
|
|
||||||
|
// r.save().catch((err) => this.logger.warn(`Failed to save rate limit record`, { err }));
|
||||||
|
// rpcReflect.then(() => {
|
||||||
|
// r.status = API_CALL_STATUS.SUCCESS;
|
||||||
|
// r.save()
|
||||||
|
// .catch((err) => this.logger.warn(`Failed to save rate limit record`, { err }));
|
||||||
|
// });
|
||||||
|
// rpcReflect.catch((err) => {
|
||||||
|
// r.status = API_CALL_STATUS.ERROR;
|
||||||
|
// r.error = err.toString();
|
||||||
|
// r.save()
|
||||||
|
// .catch((err) => this.logger.warn(`Failed to save rate limit record`, { err }));
|
||||||
|
// });
|
||||||
|
|
||||||
|
// return r;
|
||||||
|
// }
|
||||||
|
}
|
||||||
|
|
||||||
|
const instance = container.resolve(RateLimitControl);
|
||||||
|
|
||||||
|
export default instance;
|
||||||
56
jina-ai/src/server.ts
Normal file
56
jina-ai/src/server.ts
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
import 'reflect-metadata'
|
||||||
|
import express from 'express';
|
||||||
|
import { jinaAiBillingMiddleware } from "./patch-express";
|
||||||
|
import { Server } from 'http';
|
||||||
|
|
||||||
|
const app = require('../..').default;
|
||||||
|
|
||||||
|
const rootApp = express();
|
||||||
|
rootApp.use(jinaAiBillingMiddleware, app);
|
||||||
|
|
||||||
|
|
||||||
|
const port = process.env.PORT || 3000;
|
||||||
|
|
||||||
|
let server: Server | undefined;
|
||||||
|
// Export server startup function for better testing
|
||||||
|
export function startServer() {
|
||||||
|
return rootApp.listen(port, () => {
|
||||||
|
console.log(`Server running at http://localhost:${port}`);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Start server if running directly
|
||||||
|
if (process.env.NODE_ENV !== 'test') {
|
||||||
|
server = startServer();
|
||||||
|
}
|
||||||
|
|
||||||
|
process.on('unhandledRejection', (_err) => `Is false alarm`);
|
||||||
|
|
||||||
|
process.on('uncaughtException', (err) => {
|
||||||
|
console.log('Uncaught exception', err);
|
||||||
|
|
||||||
|
// Looks like Firebase runtime does not handle error properly.
|
||||||
|
// Make sure to quit the process.
|
||||||
|
process.nextTick(() => process.exit(1));
|
||||||
|
console.error('Uncaught exception, process quit.');
|
||||||
|
throw err;
|
||||||
|
});
|
||||||
|
|
||||||
|
const sigHandler = (signal: string) => {
|
||||||
|
console.log(`Received ${signal}, exiting...`);
|
||||||
|
if (server && server.listening) {
|
||||||
|
console.log(`Shutting down gracefully...`);
|
||||||
|
console.log(`Waiting for the server to drain and close...`);
|
||||||
|
server.close((err) => {
|
||||||
|
if (err) {
|
||||||
|
console.error('Error while closing server', err);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
process.exit(0);
|
||||||
|
});
|
||||||
|
server.closeIdleConnections();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
process.on('SIGTERM', sigHandler);
|
||||||
|
process.on('SIGINT', sigHandler);
|
||||||
17
jina-ai/tsconfig.json
Normal file
17
jina-ai/tsconfig.json
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
{
|
||||||
|
"compilerOptions": {
|
||||||
|
"target": "ES2020",
|
||||||
|
"module": "node16",
|
||||||
|
"outDir": "./dist",
|
||||||
|
"rootDir": "./src",
|
||||||
|
"sourceMap": true,
|
||||||
|
"esModuleInterop": true,
|
||||||
|
"skipLibCheck": true,
|
||||||
|
"forceConsistentCasingInFileNames": true,
|
||||||
|
"strict": true,
|
||||||
|
"experimentalDecorators": true,
|
||||||
|
"emitDecoratorMetadata": true,
|
||||||
|
"resolveJsonModule": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@ -1,15 +1,13 @@
|
|||||||
{
|
{
|
||||||
"name": "node-deepresearch",
|
"name": "node-deepresearch",
|
||||||
"version": "1.0.0",
|
"version": "1.0.0",
|
||||||
"main": "dist/index.js",
|
"main": "dist/app.js",
|
||||||
"types": "dist/index.d.ts",
|
|
||||||
"files": [
|
"files": [
|
||||||
"dist",
|
"dist",
|
||||||
"README.md",
|
"README.md",
|
||||||
"LICENSE"
|
"LICENSE"
|
||||||
],
|
],
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"prepare": "npm run build",
|
|
||||||
"build": "tsc",
|
"build": "tsc",
|
||||||
"dev": "npx ts-node src/agent.ts",
|
"dev": "npx ts-node src/agent.ts",
|
||||||
"search": "npx ts-node src/test-duck.ts",
|
"search": "npx ts-node src/test-duck.ts",
|
||||||
@ -17,6 +15,7 @@
|
|||||||
"lint": "eslint . --ext .ts",
|
"lint": "eslint . --ext .ts",
|
||||||
"lint:fix": "eslint . --ext .ts --fix",
|
"lint:fix": "eslint . --ext .ts --fix",
|
||||||
"serve": "ts-node src/server.ts",
|
"serve": "ts-node src/server.ts",
|
||||||
|
"start": "ts-node src/server.ts",
|
||||||
"eval": "ts-node src/evals/batch-evals.ts",
|
"eval": "ts-node src/evals/batch-evals.ts",
|
||||||
"test": "jest --testTimeout=30000",
|
"test": "jest --testTimeout=30000",
|
||||||
"test:watch": "jest --watch"
|
"test:watch": "jest --watch"
|
||||||
|
|||||||
@ -22,7 +22,7 @@ describe('/v1/chat/completions', () => {
|
|||||||
process.argv.push(`--secret=${TEST_SECRET}`);
|
process.argv.push(`--secret=${TEST_SECRET}`);
|
||||||
|
|
||||||
// Import server module (jest.resetModules() is called automatically before each test)
|
// Import server module (jest.resetModules() is called automatically before each test)
|
||||||
const { default: serverModule } = await import('../server');
|
const { default: serverModule } = await require('../app');
|
||||||
app = serverModule;
|
app = serverModule;
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -67,7 +67,7 @@ describe('/v1/chat/completions', () => {
|
|||||||
jest.resetModules();
|
jest.resetModules();
|
||||||
|
|
||||||
// Reload server module without secret
|
// Reload server module without secret
|
||||||
const { default: serverModule } = await import('../server');
|
const { default: serverModule } = await require('../app');
|
||||||
app = serverModule;
|
app = serverModule;
|
||||||
|
|
||||||
const response = await request(app)
|
const response = await request(app)
|
||||||
|
|||||||
647
src/app.ts
Normal file
647
src/app.ts
Normal file
@ -0,0 +1,647 @@
|
|||||||
|
import express, {Request, Response, RequestHandler} from 'express';
|
||||||
|
import cors from 'cors';
|
||||||
|
import {EventEmitter} from 'events';
|
||||||
|
import {getResponse} from './agent';
|
||||||
|
import {
|
||||||
|
StepAction,
|
||||||
|
StreamMessage,
|
||||||
|
TrackerContext,
|
||||||
|
ChatCompletionRequest,
|
||||||
|
ChatCompletionResponse,
|
||||||
|
ChatCompletionChunk,
|
||||||
|
AnswerAction,
|
||||||
|
TOKEN_CATEGORIES,
|
||||||
|
Model
|
||||||
|
} from './types';
|
||||||
|
import fs from 'fs/promises';
|
||||||
|
import path from 'path';
|
||||||
|
import {TokenTracker} from "./utils/token-tracker";
|
||||||
|
import {ActionTracker} from "./utils/action-tracker";
|
||||||
|
|
||||||
|
const app = express();
|
||||||
|
|
||||||
|
// Get secret from command line args for optional authentication
|
||||||
|
const secret = process.argv.find(arg => arg.startsWith('--secret='))?.split('=')[1];
|
||||||
|
|
||||||
|
app.use(cors());
|
||||||
|
app.use(express.json());
|
||||||
|
|
||||||
|
const eventEmitter = new EventEmitter();
|
||||||
|
|
||||||
|
interface QueryRequest extends Request {
|
||||||
|
body: {
|
||||||
|
q: string;
|
||||||
|
budget?: number;
|
||||||
|
maxBadAttempt?: number;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function buildMdFromAnswer(answer: AnswerAction) {
|
||||||
|
let refStr = '';
|
||||||
|
if (answer.references?.length > 0) {
|
||||||
|
refStr = `
|
||||||
|
|
||||||
|
## References
|
||||||
|
${answer.references.map((ref, i) => `
|
||||||
|
${i + 1}. [${ref.exactQuote}](${ref.url})`).join('')}`;
|
||||||
|
}
|
||||||
|
return `${answer.answer.replace(/\(REF_(\d+)\)/g, (_, num) => `[^${num}]`)}${refStr}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Modified streamTextWordByWord function
|
||||||
|
async function* streamTextWordByWord(text: string, streamingState: StreamingState) {
|
||||||
|
const words = text.split(/(\s+)/);
|
||||||
|
for (const word of words) {
|
||||||
|
if (streamingState.currentlyStreaming) {
|
||||||
|
const delay = Math.floor(Math.random() * 100);
|
||||||
|
await new Promise(resolve => setTimeout(resolve, delay));
|
||||||
|
yield word;
|
||||||
|
} else {
|
||||||
|
// If streaming was interrupted, yield all remaining words at once
|
||||||
|
const remainingWords = words.slice(words.indexOf(word)).join('');
|
||||||
|
yield remainingWords;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Helper function to emit remaining content immediately
|
||||||
|
async function emitRemainingContent(
|
||||||
|
res: Response,
|
||||||
|
requestId: string,
|
||||||
|
model: string,
|
||||||
|
content: string
|
||||||
|
) {
|
||||||
|
if (!content) return;
|
||||||
|
|
||||||
|
const chunk: ChatCompletionChunk = {
|
||||||
|
id: requestId,
|
||||||
|
object: 'chat.completion.chunk',
|
||||||
|
created: Math.floor(Date.now() / 1000),
|
||||||
|
model: model,
|
||||||
|
system_fingerprint: 'fp_' + requestId,
|
||||||
|
choices: [{
|
||||||
|
index: 0,
|
||||||
|
delta: {content},
|
||||||
|
logprobs: null,
|
||||||
|
finish_reason: null
|
||||||
|
}]
|
||||||
|
};
|
||||||
|
res.write(`data: ${JSON.stringify(chunk)}\n\n`);
|
||||||
|
}
|
||||||
|
|
||||||
|
interface StreamingState {
|
||||||
|
currentlyStreaming: boolean;
|
||||||
|
currentGenerator: AsyncGenerator<string> | null;
|
||||||
|
remainingContent: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function completeCurrentStreaming(
|
||||||
|
streamingState: StreamingState,
|
||||||
|
res: Response,
|
||||||
|
requestId: string,
|
||||||
|
model: string
|
||||||
|
) {
|
||||||
|
if (streamingState.currentlyStreaming && streamingState.remainingContent) {
|
||||||
|
// Force completion of current streaming
|
||||||
|
await emitRemainingContent(
|
||||||
|
res,
|
||||||
|
requestId,
|
||||||
|
model,
|
||||||
|
streamingState.remainingContent
|
||||||
|
);
|
||||||
|
// Reset streaming state
|
||||||
|
streamingState.currentlyStreaming = false;
|
||||||
|
streamingState.remainingContent = '';
|
||||||
|
streamingState.currentGenerator = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// OpenAI-compatible chat completions endpoint
|
||||||
|
// Models API endpoints
|
||||||
|
app.get('/v1/models', (async (_req: Request, res: Response) => {
|
||||||
|
const models: Model[] = [{
|
||||||
|
id: 'jina-deepsearch-v1',
|
||||||
|
object: 'model',
|
||||||
|
created: 1686935002,
|
||||||
|
owned_by: 'jina-ai'
|
||||||
|
}];
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
object: 'list',
|
||||||
|
data: models
|
||||||
|
});
|
||||||
|
}) as RequestHandler);
|
||||||
|
|
||||||
|
app.get('/v1/models/:model', (async (req: Request, res: Response) => {
|
||||||
|
const modelId = req.params.model;
|
||||||
|
|
||||||
|
if (modelId === 'jina-deepsearch-v1') {
|
||||||
|
res.json({
|
||||||
|
id: 'jina-deepsearch-v1',
|
||||||
|
object: 'model',
|
||||||
|
created: 1686935002,
|
||||||
|
owned_by: 'jina-ai'
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
res.status(404).json({
|
||||||
|
error: {
|
||||||
|
message: `Model '${modelId}' not found`,
|
||||||
|
type: 'invalid_request_error',
|
||||||
|
param: null,
|
||||||
|
code: 'model_not_found'
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}) as RequestHandler);
|
||||||
|
|
||||||
|
if (secret) {
|
||||||
|
// Check authentication only if secret is set
|
||||||
|
app.use((req, res, next) => {
|
||||||
|
const authHeader = req.headers.authorization;
|
||||||
|
if (!authHeader || !authHeader.startsWith('Bearer ') || authHeader.split(' ')[1] !== secret) {
|
||||||
|
console.log('[chat/completions] Unauthorized request');
|
||||||
|
res.status(401).json({ error: 'Unauthorized' });
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
return next();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
app.post('/v1/chat/completions', (async (req: Request, res: Response) => {
|
||||||
|
// Check authentication only if secret is set
|
||||||
|
if (secret) {
|
||||||
|
const authHeader = req.headers.authorization;
|
||||||
|
if (!authHeader || !authHeader.startsWith('Bearer ') || authHeader.split(' ')[1] !== secret) {
|
||||||
|
console.log('[chat/completions] Unauthorized request');
|
||||||
|
res.status(401).json({error: 'Unauthorized'});
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Log request details (excluding sensitive data)
|
||||||
|
console.log('[chat/completions] Request:', {
|
||||||
|
model: req.body.model,
|
||||||
|
stream: req.body.stream,
|
||||||
|
messageCount: req.body.messages?.length,
|
||||||
|
hasAuth: !!req.headers.authorization,
|
||||||
|
requestId: Date.now().toString()
|
||||||
|
});
|
||||||
|
|
||||||
|
const body = req.body as ChatCompletionRequest;
|
||||||
|
if (!body.messages?.length) {
|
||||||
|
return res.status(400).json({error: 'Messages array is required and must not be empty'});
|
||||||
|
}
|
||||||
|
const lastMessage = body.messages[body.messages.length - 1];
|
||||||
|
if (lastMessage.role !== 'user') {
|
||||||
|
return res.status(400).json({error: 'Last message must be from user'});
|
||||||
|
}
|
||||||
|
|
||||||
|
const requestId = Date.now().toString();
|
||||||
|
const context: TrackerContext = {
|
||||||
|
tokenTracker: new TokenTracker(),
|
||||||
|
actionTracker: new ActionTracker()
|
||||||
|
};
|
||||||
|
|
||||||
|
// Track prompt tokens for the initial message
|
||||||
|
// Use Vercel's token counting convention - 1 token per message
|
||||||
|
const messageTokens = body.messages.length;
|
||||||
|
context.tokenTracker.trackUsage('agent', messageTokens, TOKEN_CATEGORIES.PROMPT);
|
||||||
|
|
||||||
|
// Add this inside the chat completions endpoint, before setting up the action listener
|
||||||
|
const streamingState: StreamingState = {
|
||||||
|
currentlyStreaming: false,
|
||||||
|
currentGenerator: null,
|
||||||
|
remainingContent: ''
|
||||||
|
};
|
||||||
|
|
||||||
|
if (body.stream) {
|
||||||
|
res.setHeader('Content-Type', 'text/event-stream');
|
||||||
|
res.setHeader('Cache-Control', 'no-cache');
|
||||||
|
res.setHeader('Connection', 'keep-alive');
|
||||||
|
|
||||||
|
|
||||||
|
// Send initial chunk with opening think tag
|
||||||
|
const initialChunk: ChatCompletionChunk = {
|
||||||
|
id: requestId,
|
||||||
|
object: 'chat.completion.chunk',
|
||||||
|
created: Math.floor(Date.now() / 1000),
|
||||||
|
model: body.model,
|
||||||
|
system_fingerprint: 'fp_' + requestId,
|
||||||
|
choices: [{
|
||||||
|
index: 0,
|
||||||
|
delta: {role: 'assistant', content: '<think>'},
|
||||||
|
logprobs: null,
|
||||||
|
finish_reason: null
|
||||||
|
}]
|
||||||
|
};
|
||||||
|
res.write(`data: ${JSON.stringify(initialChunk)}\n\n`);
|
||||||
|
|
||||||
|
// Set up progress listener with cleanup
|
||||||
|
const actionListener = async (action: any) => {
|
||||||
|
if (action.thisStep.think) {
|
||||||
|
// Complete any ongoing streaming first
|
||||||
|
await completeCurrentStreaming(streamingState, res, requestId, body.model);
|
||||||
|
|
||||||
|
// Start new streaming session
|
||||||
|
streamingState.currentlyStreaming = true;
|
||||||
|
streamingState.remainingContent = action.thisStep.think;
|
||||||
|
|
||||||
|
try {
|
||||||
|
for await (const word of streamTextWordByWord(action.thisStep.think, streamingState)) {
|
||||||
|
if (!streamingState.currentlyStreaming) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update remaining content
|
||||||
|
streamingState.remainingContent = streamingState.remainingContent.slice(word.length);
|
||||||
|
|
||||||
|
const chunk: ChatCompletionChunk = {
|
||||||
|
id: requestId,
|
||||||
|
object: 'chat.completion.chunk',
|
||||||
|
created: Math.floor(Date.now() / 1000),
|
||||||
|
model: body.model,
|
||||||
|
system_fingerprint: 'fp_' + requestId,
|
||||||
|
choices: [{
|
||||||
|
index: 0,
|
||||||
|
delta: {content: word},
|
||||||
|
logprobs: null,
|
||||||
|
finish_reason: null
|
||||||
|
}]
|
||||||
|
};
|
||||||
|
res.write(`data: ${JSON.stringify(chunk)}\n\n`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Only add newline if this streaming completed normally
|
||||||
|
if (streamingState.currentlyStreaming) {
|
||||||
|
const newlineChunk: ChatCompletionChunk = {
|
||||||
|
id: requestId,
|
||||||
|
object: 'chat.completion.chunk',
|
||||||
|
created: Math.floor(Date.now() / 1000),
|
||||||
|
model: body.model,
|
||||||
|
system_fingerprint: 'fp_' + requestId,
|
||||||
|
choices: [{
|
||||||
|
index: 0,
|
||||||
|
delta: {content: '\n'},
|
||||||
|
logprobs: null,
|
||||||
|
finish_reason: null
|
||||||
|
}]
|
||||||
|
};
|
||||||
|
res.write(`data: ${JSON.stringify(newlineChunk)}\n\n`);
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error in streaming:', error);
|
||||||
|
await completeCurrentStreaming(streamingState, res, requestId, body.model);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
context.actionTracker.on('action', actionListener);
|
||||||
|
|
||||||
|
// Make sure to update the cleanup code
|
||||||
|
res.on('finish', () => {
|
||||||
|
streamingState.currentlyStreaming = false;
|
||||||
|
streamingState.currentGenerator = null;
|
||||||
|
streamingState.remainingContent = '';
|
||||||
|
context.actionTracker.removeListener('action', actionListener);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Track initial query tokens - already tracked above
|
||||||
|
// const queryTokens = Buffer.byteLength(lastMessage.content, 'utf-8');
|
||||||
|
// context.tokenTracker.trackUsage('agent', queryTokens, 'prompt');
|
||||||
|
|
||||||
|
let result;
|
||||||
|
try {
|
||||||
|
({result} = await getResponse(lastMessage.content, undefined, undefined, context));
|
||||||
|
} catch (error: any) {
|
||||||
|
// If deduplication fails, retry without it
|
||||||
|
if (error?.response?.status === 402) {
|
||||||
|
// If deduplication fails, retry with maxBadAttempt=3 to skip dedup
|
||||||
|
({result} = await getResponse(lastMessage.content, undefined, 3, context));
|
||||||
|
} else {
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Track tokens based on action type
|
||||||
|
if (result.action === 'answer') {
|
||||||
|
// Track accepted prediction tokens for the final answer using Vercel's convention
|
||||||
|
const answerTokens = 1; // Default to 1 token per answer
|
||||||
|
context.tokenTracker.trackUsage('evaluator', answerTokens, TOKEN_CATEGORIES.ACCEPTED);
|
||||||
|
} else {
|
||||||
|
// Track rejected prediction tokens for non-answer responses
|
||||||
|
const rejectedTokens = 1; // Default to 1 token per rejected response
|
||||||
|
context.tokenTracker.trackUsage('evaluator', rejectedTokens, TOKEN_CATEGORIES.REJECTED);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (body.stream) {
|
||||||
|
// Complete any ongoing streaming before sending final answer
|
||||||
|
await completeCurrentStreaming(streamingState, res, requestId, body.model);
|
||||||
|
|
||||||
|
// Send closing think tag
|
||||||
|
const closeThinkChunk: ChatCompletionChunk = {
|
||||||
|
id: requestId,
|
||||||
|
object: 'chat.completion.chunk',
|
||||||
|
created: Math.floor(Date.now() / 1000),
|
||||||
|
model: body.model,
|
||||||
|
system_fingerprint: 'fp_' + requestId,
|
||||||
|
choices: [{
|
||||||
|
index: 0,
|
||||||
|
delta: {content: `</think>\n\n`},
|
||||||
|
logprobs: null,
|
||||||
|
finish_reason: null
|
||||||
|
}]
|
||||||
|
};
|
||||||
|
res.write(`data: ${JSON.stringify(closeThinkChunk)}\n\n`);
|
||||||
|
|
||||||
|
// Send final answer as separate chunk
|
||||||
|
const answerChunk: ChatCompletionChunk = {
|
||||||
|
id: requestId,
|
||||||
|
object: 'chat.completion.chunk',
|
||||||
|
created: Math.floor(Date.now() / 1000),
|
||||||
|
model: body.model,
|
||||||
|
system_fingerprint: 'fp_' + requestId,
|
||||||
|
choices: [{
|
||||||
|
index: 0,
|
||||||
|
delta: {content: result.action === 'answer' ? buildMdFromAnswer(result) : result.think},
|
||||||
|
logprobs: null,
|
||||||
|
finish_reason: 'stop'
|
||||||
|
}]
|
||||||
|
};
|
||||||
|
res.write(`data: ${JSON.stringify(answerChunk)}\n\n`);
|
||||||
|
res.end();
|
||||||
|
} else {
|
||||||
|
const usage = context.tokenTracker.getUsageDetails();
|
||||||
|
const response: ChatCompletionResponse = {
|
||||||
|
id: requestId,
|
||||||
|
object: 'chat.completion',
|
||||||
|
created: Math.floor(Date.now() / 1000),
|
||||||
|
model: body.model,
|
||||||
|
system_fingerprint: 'fp_' + requestId,
|
||||||
|
choices: [{
|
||||||
|
index: 0,
|
||||||
|
message: {
|
||||||
|
role: 'assistant',
|
||||||
|
content: result.action === 'answer' ? buildMdFromAnswer(result) : result.think
|
||||||
|
},
|
||||||
|
logprobs: null,
|
||||||
|
finish_reason: 'stop'
|
||||||
|
}],
|
||||||
|
usage
|
||||||
|
};
|
||||||
|
|
||||||
|
// Log final response (excluding full content for brevity)
|
||||||
|
console.log('[chat/completions] Response:', {
|
||||||
|
id: response.id,
|
||||||
|
status: 200,
|
||||||
|
contentLength: response.choices[0].message.content.length,
|
||||||
|
usage: response.usage
|
||||||
|
});
|
||||||
|
|
||||||
|
res.json(response);
|
||||||
|
}
|
||||||
|
} catch (error: any) {
|
||||||
|
// Log error details
|
||||||
|
console.error('[chat/completions] Error:', {
|
||||||
|
message: error?.message || 'An error occurred',
|
||||||
|
stack: error?.stack,
|
||||||
|
type: error?.constructor?.name,
|
||||||
|
requestId
|
||||||
|
});
|
||||||
|
|
||||||
|
// Track error as rejected tokens with Vercel token counting
|
||||||
|
const errorMessage = error?.message || 'An error occurred';
|
||||||
|
// Default to 1 token for errors as per Vercel AI SDK convention
|
||||||
|
const errorTokens = 1;
|
||||||
|
context.tokenTracker.trackUsage('evaluator', errorTokens, TOKEN_CATEGORIES.REJECTED);
|
||||||
|
|
||||||
|
// Clean up event listeners
|
||||||
|
context.actionTracker.removeAllListeners('action');
|
||||||
|
|
||||||
|
// Get token usage in OpenAI API format
|
||||||
|
const usage = context.tokenTracker.getUsageDetails();
|
||||||
|
|
||||||
|
if (body.stream && res.headersSent) {
|
||||||
|
// For streaming responses that have already started, send error as a chunk
|
||||||
|
// First send closing think tag if we're in the middle of thinking
|
||||||
|
const closeThinkChunk: ChatCompletionChunk = {
|
||||||
|
id: requestId,
|
||||||
|
object: 'chat.completion.chunk',
|
||||||
|
created: Math.floor(Date.now() / 1000),
|
||||||
|
model: body.model,
|
||||||
|
system_fingerprint: 'fp_' + requestId,
|
||||||
|
choices: [{
|
||||||
|
index: 0,
|
||||||
|
delta: {content: '</think>'},
|
||||||
|
logprobs: null,
|
||||||
|
finish_reason: null
|
||||||
|
}]
|
||||||
|
};
|
||||||
|
res.write(`data: ${JSON.stringify(closeThinkChunk)}\n\n`);
|
||||||
|
|
||||||
|
// Track error token and send error message
|
||||||
|
context.tokenTracker.trackUsage('evaluator', 1, TOKEN_CATEGORIES.REJECTED);
|
||||||
|
const errorChunk: ChatCompletionChunk = {
|
||||||
|
id: requestId,
|
||||||
|
object: 'chat.completion.chunk',
|
||||||
|
created: Math.floor(Date.now() / 1000),
|
||||||
|
model: body.model,
|
||||||
|
system_fingerprint: 'fp_' + requestId,
|
||||||
|
choices: [{
|
||||||
|
index: 0,
|
||||||
|
delta: {content: errorMessage},
|
||||||
|
logprobs: null,
|
||||||
|
finish_reason: 'stop'
|
||||||
|
}]
|
||||||
|
};
|
||||||
|
res.write(`data: ${JSON.stringify(errorChunk)}\n\n`);
|
||||||
|
res.end();
|
||||||
|
} else {
|
||||||
|
// For non-streaming or not-yet-started responses, send error as JSON
|
||||||
|
const response: ChatCompletionResponse = {
|
||||||
|
id: requestId,
|
||||||
|
object: 'chat.completion',
|
||||||
|
created: Math.floor(Date.now() / 1000),
|
||||||
|
model: body.model,
|
||||||
|
system_fingerprint: 'fp_' + requestId,
|
||||||
|
choices: [{
|
||||||
|
index: 0,
|
||||||
|
message: {
|
||||||
|
role: 'assistant',
|
||||||
|
content: `Error: ${errorMessage}`
|
||||||
|
},
|
||||||
|
logprobs: null,
|
||||||
|
finish_reason: 'stop'
|
||||||
|
}],
|
||||||
|
usage
|
||||||
|
};
|
||||||
|
res.json(response);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}) as RequestHandler);
|
||||||
|
|
||||||
|
interface StreamResponse extends Response {
|
||||||
|
write: (chunk: string) => boolean;
|
||||||
|
}
|
||||||
|
|
||||||
|
function createProgressEmitter(requestId: string, budget: number | undefined, context: TrackerContext) {
|
||||||
|
return () => {
|
||||||
|
const state = context.actionTracker.getState();
|
||||||
|
const budgetInfo = {
|
||||||
|
used: context.tokenTracker.getTotalUsage(),
|
||||||
|
total: budget || 1_000_000,
|
||||||
|
percentage: ((context.tokenTracker.getTotalUsage() / (budget || 1_000_000)) * 100).toFixed(2)
|
||||||
|
};
|
||||||
|
|
||||||
|
eventEmitter.emit(`progress-${requestId}`, {
|
||||||
|
type: 'progress',
|
||||||
|
data: {...state.thisStep, totalStep: state.totalStep},
|
||||||
|
step: state.totalStep,
|
||||||
|
budget: budgetInfo,
|
||||||
|
trackers: {
|
||||||
|
tokenUsage: context.tokenTracker.getTotalUsage(),
|
||||||
|
actionState: context.actionTracker.getState()
|
||||||
|
}
|
||||||
|
});
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function cleanup(requestId: string) {
|
||||||
|
const context = trackers.get(requestId);
|
||||||
|
if (context) {
|
||||||
|
context.actionTracker.removeAllListeners();
|
||||||
|
context.tokenTracker.removeAllListeners();
|
||||||
|
trackers.delete(requestId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function emitTrackerUpdate(requestId: string, context: TrackerContext) {
|
||||||
|
const trackerData = {
|
||||||
|
tokenUsage: context.tokenTracker.getTotalUsage(),
|
||||||
|
tokenBreakdown: context.tokenTracker.getUsageBreakdown(),
|
||||||
|
actionState: context.actionTracker.getState().thisStep,
|
||||||
|
step: context.actionTracker.getState().totalStep,
|
||||||
|
badAttempts: context.actionTracker.getState().badAttempts,
|
||||||
|
gaps: context.actionTracker.getState().gaps
|
||||||
|
};
|
||||||
|
|
||||||
|
eventEmitter.emit(`progress-${requestId}`, {
|
||||||
|
type: 'progress',
|
||||||
|
trackers: trackerData
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Store the trackers for each request
|
||||||
|
const trackers = new Map<string, TrackerContext>();
|
||||||
|
|
||||||
|
app.post('/api/v1/query', (async (req: QueryRequest, res: Response) => {
|
||||||
|
const {q, budget, maxBadAttempt} = req.body;
|
||||||
|
if (!q) {
|
||||||
|
return res.status(400).json({error: 'Query (q) is required'});
|
||||||
|
}
|
||||||
|
|
||||||
|
const requestId = Date.now().toString();
|
||||||
|
|
||||||
|
// Create new trackers for this request
|
||||||
|
const context: TrackerContext = {
|
||||||
|
tokenTracker: new TokenTracker(),
|
||||||
|
actionTracker: new ActionTracker()
|
||||||
|
};
|
||||||
|
trackers.set(requestId, context);
|
||||||
|
|
||||||
|
// Set up listeners immediately for both trackers
|
||||||
|
context.actionTracker.on('action', () => emitTrackerUpdate(requestId, context));
|
||||||
|
// context.tokenTracker.on('usage', () => emitTrackerUpdate(requestId, context));
|
||||||
|
|
||||||
|
res.json({requestId});
|
||||||
|
|
||||||
|
try {
|
||||||
|
const {result} = await getResponse(q, budget, maxBadAttempt, context);
|
||||||
|
const emitProgress = createProgressEmitter(requestId, budget, context);
|
||||||
|
context.actionTracker.on('action', emitProgress);
|
||||||
|
await storeTaskResult(requestId, result);
|
||||||
|
eventEmitter.emit(`progress-${requestId}`, {
|
||||||
|
type: 'answer',
|
||||||
|
data: result,
|
||||||
|
trackers: {
|
||||||
|
tokenUsage: context.tokenTracker.getTotalUsage(),
|
||||||
|
actionState: context.actionTracker.getState()
|
||||||
|
}
|
||||||
|
});
|
||||||
|
cleanup(requestId);
|
||||||
|
} catch (error: any) {
|
||||||
|
eventEmitter.emit(`progress-${requestId}`, {
|
||||||
|
type: 'error',
|
||||||
|
data: error?.message || 'Unknown error',
|
||||||
|
status: 500,
|
||||||
|
trackers: {
|
||||||
|
tokenUsage: context.tokenTracker.getTotalUsage(),
|
||||||
|
actionState: context.actionTracker.getState()
|
||||||
|
}
|
||||||
|
});
|
||||||
|
cleanup(requestId);
|
||||||
|
}
|
||||||
|
}) as RequestHandler);
|
||||||
|
|
||||||
|
app.get('/api/v1/stream/:requestId', (async (req: Request, res: StreamResponse) => {
|
||||||
|
const requestId = req.params.requestId;
|
||||||
|
const context = trackers.get(requestId);
|
||||||
|
|
||||||
|
res.setHeader('Content-Type', 'text/event-stream');
|
||||||
|
res.setHeader('Cache-Control', 'no-cache');
|
||||||
|
res.setHeader('Connection', 'keep-alive');
|
||||||
|
|
||||||
|
const listener = (data: StreamMessage) => {
|
||||||
|
// The trackers are now included in all event types
|
||||||
|
// We don't need to add them here as they're already part of the data
|
||||||
|
res.write(`data: ${JSON.stringify(data)}\n\n`);
|
||||||
|
};
|
||||||
|
|
||||||
|
eventEmitter.on(`progress-${requestId}`, listener);
|
||||||
|
|
||||||
|
// Handle client disconnection
|
||||||
|
req.on('close', () => {
|
||||||
|
eventEmitter.removeListener(`progress-${requestId}`, listener);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Send initial connection confirmation with tracker state
|
||||||
|
const initialData = {
|
||||||
|
type: 'connected',
|
||||||
|
requestId,
|
||||||
|
trackers: context ? {
|
||||||
|
tokenUsage: context.tokenTracker.getTotalUsage(),
|
||||||
|
actionState: context.actionTracker.getState()
|
||||||
|
} : null
|
||||||
|
};
|
||||||
|
res.write(`data: ${JSON.stringify(initialData)}\n\n`);
|
||||||
|
}) as RequestHandler);
|
||||||
|
|
||||||
|
async function storeTaskResult(requestId: string, result: StepAction) {
|
||||||
|
try {
|
||||||
|
const taskDir = path.join(process.cwd(), 'tasks');
|
||||||
|
await fs.mkdir(taskDir, {recursive: true});
|
||||||
|
await fs.writeFile(
|
||||||
|
path.join(taskDir, `${requestId}.json`),
|
||||||
|
JSON.stringify(result, null, 2)
|
||||||
|
);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Task storage failed:', error);
|
||||||
|
throw new Error('Failed to store task result');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
app.get('/api/v1/task/:requestId', (async (req: Request, res: Response) => {
|
||||||
|
const requestId = req.params.requestId;
|
||||||
|
try {
|
||||||
|
const taskPath = path.join(process.cwd(), 'tasks', `${requestId}.json`);
|
||||||
|
const taskData = await fs.readFile(taskPath, 'utf-8');
|
||||||
|
res.json(JSON.parse(taskData));
|
||||||
|
} catch (error) {
|
||||||
|
res.status(404).json({error: 'Task not found'});
|
||||||
|
}
|
||||||
|
}) as RequestHandler);
|
||||||
|
|
||||||
|
export default app;
|
||||||
636
src/server.ts
636
src/server.ts
@ -1,637 +1,7 @@
|
|||||||
import express, {Request, Response, RequestHandler} from 'express';
|
import app from "./app";
|
||||||
import cors from 'cors';
|
|
||||||
import {EventEmitter} from 'events';
|
|
||||||
import {getResponse} from './agent';
|
|
||||||
import {
|
|
||||||
StepAction,
|
|
||||||
StreamMessage,
|
|
||||||
TrackerContext,
|
|
||||||
ChatCompletionRequest,
|
|
||||||
ChatCompletionResponse,
|
|
||||||
ChatCompletionChunk,
|
|
||||||
AnswerAction,
|
|
||||||
TOKEN_CATEGORIES,
|
|
||||||
Model
|
|
||||||
} from './types';
|
|
||||||
import fs from 'fs/promises';
|
|
||||||
import path from 'path';
|
|
||||||
import {TokenTracker} from "./utils/token-tracker";
|
|
||||||
import {ActionTracker} from "./utils/action-tracker";
|
|
||||||
|
|
||||||
const app = express();
|
|
||||||
const port = process.env.PORT || 3000;
|
const port = process.env.PORT || 3000;
|
||||||
|
|
||||||
// Get secret from command line args for optional authentication
|
|
||||||
const secret = process.argv.find(arg => arg.startsWith('--secret='))?.split('=')[1];
|
|
||||||
|
|
||||||
app.use(cors());
|
|
||||||
app.use(express.json());
|
|
||||||
|
|
||||||
const eventEmitter = new EventEmitter();
|
|
||||||
|
|
||||||
interface QueryRequest extends Request {
|
|
||||||
body: {
|
|
||||||
q: string;
|
|
||||||
budget?: number;
|
|
||||||
maxBadAttempt?: number;
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
function buildMdFromAnswer(answer: AnswerAction) {
|
|
||||||
let refStr = '';
|
|
||||||
if (answer.references?.length > 0) {
|
|
||||||
refStr = `
|
|
||||||
|
|
||||||
## References
|
|
||||||
${answer.references.map((ref, i) => `
|
|
||||||
${i + 1}. [${ref.exactQuote}](${ref.url})`).join('')}`;
|
|
||||||
}
|
|
||||||
return `${answer.answer.replace(/\(REF_(\d+)\)/g, (_, num) => `[^${num}]`)}${refStr}`;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// Modified streamTextWordByWord function
|
|
||||||
async function* streamTextWordByWord(text: string, streamingState: StreamingState) {
|
|
||||||
const words = text.split(/(\s+)/);
|
|
||||||
for (const word of words) {
|
|
||||||
if (streamingState.currentlyStreaming) {
|
|
||||||
const delay = Math.floor(Math.random() * 100);
|
|
||||||
await new Promise(resolve => setTimeout(resolve, delay));
|
|
||||||
yield word;
|
|
||||||
} else {
|
|
||||||
// If streaming was interrupted, yield all remaining words at once
|
|
||||||
const remainingWords = words.slice(words.indexOf(word)).join('');
|
|
||||||
yield remainingWords;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Helper function to emit remaining content immediately
|
|
||||||
async function emitRemainingContent(
|
|
||||||
res: Response,
|
|
||||||
requestId: string,
|
|
||||||
model: string,
|
|
||||||
content: string
|
|
||||||
) {
|
|
||||||
if (!content) return;
|
|
||||||
|
|
||||||
const chunk: ChatCompletionChunk = {
|
|
||||||
id: requestId,
|
|
||||||
object: 'chat.completion.chunk',
|
|
||||||
created: Math.floor(Date.now() / 1000),
|
|
||||||
model: model,
|
|
||||||
system_fingerprint: 'fp_' + requestId,
|
|
||||||
choices: [{
|
|
||||||
index: 0,
|
|
||||||
delta: {content},
|
|
||||||
logprobs: null,
|
|
||||||
finish_reason: null
|
|
||||||
}]
|
|
||||||
};
|
|
||||||
res.write(`data: ${JSON.stringify(chunk)}\n\n`);
|
|
||||||
}
|
|
||||||
|
|
||||||
interface StreamingState {
|
|
||||||
currentlyStreaming: boolean;
|
|
||||||
currentGenerator: AsyncGenerator<string> | null;
|
|
||||||
remainingContent: string;
|
|
||||||
}
|
|
||||||
|
|
||||||
async function completeCurrentStreaming(
|
|
||||||
streamingState: StreamingState,
|
|
||||||
res: Response,
|
|
||||||
requestId: string,
|
|
||||||
model: string
|
|
||||||
) {
|
|
||||||
if (streamingState.currentlyStreaming && streamingState.remainingContent) {
|
|
||||||
// Force completion of current streaming
|
|
||||||
await emitRemainingContent(
|
|
||||||
res,
|
|
||||||
requestId,
|
|
||||||
model,
|
|
||||||
streamingState.remainingContent
|
|
||||||
);
|
|
||||||
// Reset streaming state
|
|
||||||
streamingState.currentlyStreaming = false;
|
|
||||||
streamingState.remainingContent = '';
|
|
||||||
streamingState.currentGenerator = null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// OpenAI-compatible chat completions endpoint
|
|
||||||
// Models API endpoints
|
|
||||||
app.get('/v1/models', (async (_req: Request, res: Response) => {
|
|
||||||
const models: Model[] = [{
|
|
||||||
id: 'jina-deepsearch-v1',
|
|
||||||
object: 'model',
|
|
||||||
created: 1686935002,
|
|
||||||
owned_by: 'jina-ai'
|
|
||||||
}];
|
|
||||||
|
|
||||||
res.json({
|
|
||||||
object: 'list',
|
|
||||||
data: models
|
|
||||||
});
|
|
||||||
}) as RequestHandler);
|
|
||||||
|
|
||||||
app.get('/v1/models/:model', (async (req: Request, res: Response) => {
|
|
||||||
const modelId = req.params.model;
|
|
||||||
|
|
||||||
if (modelId === 'jina-deepsearch-v1') {
|
|
||||||
res.json({
|
|
||||||
id: 'jina-deepsearch-v1',
|
|
||||||
object: 'model',
|
|
||||||
created: 1686935002,
|
|
||||||
owned_by: 'jina-ai'
|
|
||||||
});
|
|
||||||
} else {
|
|
||||||
res.status(404).json({
|
|
||||||
error: {
|
|
||||||
message: `Model '${modelId}' not found`,
|
|
||||||
type: 'invalid_request_error',
|
|
||||||
param: null,
|
|
||||||
code: 'model_not_found'
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}) as RequestHandler);
|
|
||||||
|
|
||||||
|
|
||||||
app.post('/v1/chat/completions', (async (req: Request, res: Response) => {
|
|
||||||
// Check authentication only if secret is set
|
|
||||||
if (secret) {
|
|
||||||
const authHeader = req.headers.authorization;
|
|
||||||
if (!authHeader || !authHeader.startsWith('Bearer ') || authHeader.split(' ')[1] !== secret) {
|
|
||||||
console.log('[chat/completions] Unauthorized request');
|
|
||||||
res.status(401).json({error: 'Unauthorized'});
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Log request details (excluding sensitive data)
|
|
||||||
console.log('[chat/completions] Request:', {
|
|
||||||
model: req.body.model,
|
|
||||||
stream: req.body.stream,
|
|
||||||
messageCount: req.body.messages?.length,
|
|
||||||
hasAuth: !!req.headers.authorization,
|
|
||||||
requestId: Date.now().toString()
|
|
||||||
});
|
|
||||||
|
|
||||||
const body = req.body as ChatCompletionRequest;
|
|
||||||
if (!body.messages?.length) {
|
|
||||||
return res.status(400).json({error: 'Messages array is required and must not be empty'});
|
|
||||||
}
|
|
||||||
const lastMessage = body.messages[body.messages.length - 1];
|
|
||||||
if (lastMessage.role !== 'user') {
|
|
||||||
return res.status(400).json({error: 'Last message must be from user'});
|
|
||||||
}
|
|
||||||
|
|
||||||
const requestId = Date.now().toString();
|
|
||||||
const context: TrackerContext = {
|
|
||||||
tokenTracker: new TokenTracker(),
|
|
||||||
actionTracker: new ActionTracker()
|
|
||||||
};
|
|
||||||
|
|
||||||
// Track prompt tokens for the initial message
|
|
||||||
// Use Vercel's token counting convention - 1 token per message
|
|
||||||
const messageTokens = body.messages.length;
|
|
||||||
context.tokenTracker.trackUsage('agent', messageTokens, TOKEN_CATEGORIES.PROMPT);
|
|
||||||
|
|
||||||
// Add this inside the chat completions endpoint, before setting up the action listener
|
|
||||||
const streamingState: StreamingState = {
|
|
||||||
currentlyStreaming: false,
|
|
||||||
currentGenerator: null,
|
|
||||||
remainingContent: ''
|
|
||||||
};
|
|
||||||
|
|
||||||
if (body.stream) {
|
|
||||||
res.setHeader('Content-Type', 'text/event-stream');
|
|
||||||
res.setHeader('Cache-Control', 'no-cache');
|
|
||||||
res.setHeader('Connection', 'keep-alive');
|
|
||||||
|
|
||||||
|
|
||||||
// Send initial chunk with opening think tag
|
|
||||||
const initialChunk: ChatCompletionChunk = {
|
|
||||||
id: requestId,
|
|
||||||
object: 'chat.completion.chunk',
|
|
||||||
created: Math.floor(Date.now() / 1000),
|
|
||||||
model: body.model,
|
|
||||||
system_fingerprint: 'fp_' + requestId,
|
|
||||||
choices: [{
|
|
||||||
index: 0,
|
|
||||||
delta: {role: 'assistant', content: '<think>'},
|
|
||||||
logprobs: null,
|
|
||||||
finish_reason: null
|
|
||||||
}]
|
|
||||||
};
|
|
||||||
res.write(`data: ${JSON.stringify(initialChunk)}\n\n`);
|
|
||||||
|
|
||||||
// Set up progress listener with cleanup
|
|
||||||
const actionListener = async (action: any) => {
|
|
||||||
if (action.thisStep.think) {
|
|
||||||
// Complete any ongoing streaming first
|
|
||||||
await completeCurrentStreaming(streamingState, res, requestId, body.model);
|
|
||||||
|
|
||||||
// Start new streaming session
|
|
||||||
streamingState.currentlyStreaming = true;
|
|
||||||
streamingState.remainingContent = action.thisStep.think;
|
|
||||||
|
|
||||||
try {
|
|
||||||
for await (const word of streamTextWordByWord(action.thisStep.think, streamingState)) {
|
|
||||||
if (!streamingState.currentlyStreaming) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Update remaining content
|
|
||||||
streamingState.remainingContent = streamingState.remainingContent.slice(word.length);
|
|
||||||
|
|
||||||
const chunk: ChatCompletionChunk = {
|
|
||||||
id: requestId,
|
|
||||||
object: 'chat.completion.chunk',
|
|
||||||
created: Math.floor(Date.now() / 1000),
|
|
||||||
model: body.model,
|
|
||||||
system_fingerprint: 'fp_' + requestId,
|
|
||||||
choices: [{
|
|
||||||
index: 0,
|
|
||||||
delta: {content: word},
|
|
||||||
logprobs: null,
|
|
||||||
finish_reason: null
|
|
||||||
}]
|
|
||||||
};
|
|
||||||
res.write(`data: ${JSON.stringify(chunk)}\n\n`);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Only add newline if this streaming completed normally
|
|
||||||
if (streamingState.currentlyStreaming) {
|
|
||||||
const newlineChunk: ChatCompletionChunk = {
|
|
||||||
id: requestId,
|
|
||||||
object: 'chat.completion.chunk',
|
|
||||||
created: Math.floor(Date.now() / 1000),
|
|
||||||
model: body.model,
|
|
||||||
system_fingerprint: 'fp_' + requestId,
|
|
||||||
choices: [{
|
|
||||||
index: 0,
|
|
||||||
delta: {content: '\n'},
|
|
||||||
logprobs: null,
|
|
||||||
finish_reason: null
|
|
||||||
}]
|
|
||||||
};
|
|
||||||
res.write(`data: ${JSON.stringify(newlineChunk)}\n\n`);
|
|
||||||
}
|
|
||||||
} catch (error) {
|
|
||||||
console.error('Error in streaming:', error);
|
|
||||||
await completeCurrentStreaming(streamingState, res, requestId, body.model);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
context.actionTracker.on('action', actionListener);
|
|
||||||
|
|
||||||
// Make sure to update the cleanup code
|
|
||||||
res.on('finish', () => {
|
|
||||||
streamingState.currentlyStreaming = false;
|
|
||||||
streamingState.currentGenerator = null;
|
|
||||||
streamingState.remainingContent = '';
|
|
||||||
context.actionTracker.removeListener('action', actionListener);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
// Track initial query tokens - already tracked above
|
|
||||||
// const queryTokens = Buffer.byteLength(lastMessage.content, 'utf-8');
|
|
||||||
// context.tokenTracker.trackUsage('agent', queryTokens, 'prompt');
|
|
||||||
|
|
||||||
let result;
|
|
||||||
try {
|
|
||||||
({result} = await getResponse(lastMessage.content, undefined, undefined, context));
|
|
||||||
} catch (error: any) {
|
|
||||||
// If deduplication fails, retry without it
|
|
||||||
if (error?.response?.status === 402) {
|
|
||||||
// If deduplication fails, retry with maxBadAttempt=3 to skip dedup
|
|
||||||
({result} = await getResponse(lastMessage.content, undefined, 3, context));
|
|
||||||
} else {
|
|
||||||
throw error;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Track tokens based on action type
|
|
||||||
if (result.action === 'answer') {
|
|
||||||
// Track accepted prediction tokens for the final answer using Vercel's convention
|
|
||||||
const answerTokens = 1; // Default to 1 token per answer
|
|
||||||
context.tokenTracker.trackUsage('evaluator', answerTokens, TOKEN_CATEGORIES.ACCEPTED);
|
|
||||||
} else {
|
|
||||||
// Track rejected prediction tokens for non-answer responses
|
|
||||||
const rejectedTokens = 1; // Default to 1 token per rejected response
|
|
||||||
context.tokenTracker.trackUsage('evaluator', rejectedTokens, TOKEN_CATEGORIES.REJECTED);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (body.stream) {
|
|
||||||
// Complete any ongoing streaming before sending final answer
|
|
||||||
await completeCurrentStreaming(streamingState, res, requestId, body.model);
|
|
||||||
|
|
||||||
// Send closing think tag
|
|
||||||
const closeThinkChunk: ChatCompletionChunk = {
|
|
||||||
id: requestId,
|
|
||||||
object: 'chat.completion.chunk',
|
|
||||||
created: Math.floor(Date.now() / 1000),
|
|
||||||
model: body.model,
|
|
||||||
system_fingerprint: 'fp_' + requestId,
|
|
||||||
choices: [{
|
|
||||||
index: 0,
|
|
||||||
delta: {content: `</think>\n\n`},
|
|
||||||
logprobs: null,
|
|
||||||
finish_reason: null
|
|
||||||
}]
|
|
||||||
};
|
|
||||||
res.write(`data: ${JSON.stringify(closeThinkChunk)}\n\n`);
|
|
||||||
|
|
||||||
// Send final answer as separate chunk
|
|
||||||
const answerChunk: ChatCompletionChunk = {
|
|
||||||
id: requestId,
|
|
||||||
object: 'chat.completion.chunk',
|
|
||||||
created: Math.floor(Date.now() / 1000),
|
|
||||||
model: body.model,
|
|
||||||
system_fingerprint: 'fp_' + requestId,
|
|
||||||
choices: [{
|
|
||||||
index: 0,
|
|
||||||
delta: {content: result.action === 'answer' ? buildMdFromAnswer(result) : result.think},
|
|
||||||
logprobs: null,
|
|
||||||
finish_reason: 'stop'
|
|
||||||
}]
|
|
||||||
};
|
|
||||||
res.write(`data: ${JSON.stringify(answerChunk)}\n\n`);
|
|
||||||
res.end();
|
|
||||||
} else {
|
|
||||||
const usage = context.tokenTracker.getUsageDetails();
|
|
||||||
const response: ChatCompletionResponse = {
|
|
||||||
id: requestId,
|
|
||||||
object: 'chat.completion',
|
|
||||||
created: Math.floor(Date.now() / 1000),
|
|
||||||
model: body.model,
|
|
||||||
system_fingerprint: 'fp_' + requestId,
|
|
||||||
choices: [{
|
|
||||||
index: 0,
|
|
||||||
message: {
|
|
||||||
role: 'assistant',
|
|
||||||
content: result.action === 'answer' ? buildMdFromAnswer(result) : result.think
|
|
||||||
},
|
|
||||||
logprobs: null,
|
|
||||||
finish_reason: 'stop'
|
|
||||||
}],
|
|
||||||
usage
|
|
||||||
};
|
|
||||||
|
|
||||||
// Log final response (excluding full content for brevity)
|
|
||||||
console.log('[chat/completions] Response:', {
|
|
||||||
id: response.id,
|
|
||||||
status: 200,
|
|
||||||
contentLength: response.choices[0].message.content.length,
|
|
||||||
usage: response.usage
|
|
||||||
});
|
|
||||||
|
|
||||||
res.json(response);
|
|
||||||
}
|
|
||||||
} catch (error: any) {
|
|
||||||
// Log error details
|
|
||||||
console.error('[chat/completions] Error:', {
|
|
||||||
message: error?.message || 'An error occurred',
|
|
||||||
stack: error?.stack,
|
|
||||||
type: error?.constructor?.name,
|
|
||||||
requestId
|
|
||||||
});
|
|
||||||
|
|
||||||
// Track error as rejected tokens with Vercel token counting
|
|
||||||
const errorMessage = error?.message || 'An error occurred';
|
|
||||||
// Default to 1 token for errors as per Vercel AI SDK convention
|
|
||||||
const errorTokens = 1;
|
|
||||||
context.tokenTracker.trackUsage('evaluator', errorTokens, TOKEN_CATEGORIES.REJECTED);
|
|
||||||
|
|
||||||
// Clean up event listeners
|
|
||||||
context.actionTracker.removeAllListeners('action');
|
|
||||||
|
|
||||||
// Get token usage in OpenAI API format
|
|
||||||
const usage = context.tokenTracker.getUsageDetails();
|
|
||||||
|
|
||||||
if (body.stream && res.headersSent) {
|
|
||||||
// For streaming responses that have already started, send error as a chunk
|
|
||||||
// First send closing think tag if we're in the middle of thinking
|
|
||||||
const closeThinkChunk: ChatCompletionChunk = {
|
|
||||||
id: requestId,
|
|
||||||
object: 'chat.completion.chunk',
|
|
||||||
created: Math.floor(Date.now() / 1000),
|
|
||||||
model: body.model,
|
|
||||||
system_fingerprint: 'fp_' + requestId,
|
|
||||||
choices: [{
|
|
||||||
index: 0,
|
|
||||||
delta: {content: '</think>'},
|
|
||||||
logprobs: null,
|
|
||||||
finish_reason: null
|
|
||||||
}]
|
|
||||||
};
|
|
||||||
res.write(`data: ${JSON.stringify(closeThinkChunk)}\n\n`);
|
|
||||||
|
|
||||||
// Track error token and send error message
|
|
||||||
context.tokenTracker.trackUsage('evaluator', 1, TOKEN_CATEGORIES.REJECTED);
|
|
||||||
const errorChunk: ChatCompletionChunk = {
|
|
||||||
id: requestId,
|
|
||||||
object: 'chat.completion.chunk',
|
|
||||||
created: Math.floor(Date.now() / 1000),
|
|
||||||
model: body.model,
|
|
||||||
system_fingerprint: 'fp_' + requestId,
|
|
||||||
choices: [{
|
|
||||||
index: 0,
|
|
||||||
delta: {content: errorMessage},
|
|
||||||
logprobs: null,
|
|
||||||
finish_reason: 'stop'
|
|
||||||
}]
|
|
||||||
};
|
|
||||||
res.write(`data: ${JSON.stringify(errorChunk)}\n\n`);
|
|
||||||
res.end();
|
|
||||||
} else {
|
|
||||||
// For non-streaming or not-yet-started responses, send error as JSON
|
|
||||||
const response: ChatCompletionResponse = {
|
|
||||||
id: requestId,
|
|
||||||
object: 'chat.completion',
|
|
||||||
created: Math.floor(Date.now() / 1000),
|
|
||||||
model: body.model,
|
|
||||||
system_fingerprint: 'fp_' + requestId,
|
|
||||||
choices: [{
|
|
||||||
index: 0,
|
|
||||||
message: {
|
|
||||||
role: 'assistant',
|
|
||||||
content: `Error: ${errorMessage}`
|
|
||||||
},
|
|
||||||
logprobs: null,
|
|
||||||
finish_reason: 'stop'
|
|
||||||
}],
|
|
||||||
usage
|
|
||||||
};
|
|
||||||
res.json(response);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}) as RequestHandler);
|
|
||||||
|
|
||||||
interface StreamResponse extends Response {
|
|
||||||
write: (chunk: string) => boolean;
|
|
||||||
}
|
|
||||||
|
|
||||||
function createProgressEmitter(requestId: string, budget: number | undefined, context: TrackerContext) {
|
|
||||||
return () => {
|
|
||||||
const state = context.actionTracker.getState();
|
|
||||||
const budgetInfo = {
|
|
||||||
used: context.tokenTracker.getTotalUsage(),
|
|
||||||
total: budget || 1_000_000,
|
|
||||||
percentage: ((context.tokenTracker.getTotalUsage() / (budget || 1_000_000)) * 100).toFixed(2)
|
|
||||||
};
|
|
||||||
|
|
||||||
eventEmitter.emit(`progress-${requestId}`, {
|
|
||||||
type: 'progress',
|
|
||||||
data: {...state.thisStep, totalStep: state.totalStep},
|
|
||||||
step: state.totalStep,
|
|
||||||
budget: budgetInfo,
|
|
||||||
trackers: {
|
|
||||||
tokenUsage: context.tokenTracker.getTotalUsage(),
|
|
||||||
actionState: context.actionTracker.getState()
|
|
||||||
}
|
|
||||||
});
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
function cleanup(requestId: string) {
|
|
||||||
const context = trackers.get(requestId);
|
|
||||||
if (context) {
|
|
||||||
context.actionTracker.removeAllListeners();
|
|
||||||
context.tokenTracker.removeAllListeners();
|
|
||||||
trackers.delete(requestId);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
function emitTrackerUpdate(requestId: string, context: TrackerContext) {
|
|
||||||
const trackerData = {
|
|
||||||
tokenUsage: context.tokenTracker.getTotalUsage(),
|
|
||||||
tokenBreakdown: context.tokenTracker.getUsageBreakdown(),
|
|
||||||
actionState: context.actionTracker.getState().thisStep,
|
|
||||||
step: context.actionTracker.getState().totalStep,
|
|
||||||
badAttempts: context.actionTracker.getState().badAttempts,
|
|
||||||
gaps: context.actionTracker.getState().gaps
|
|
||||||
};
|
|
||||||
|
|
||||||
eventEmitter.emit(`progress-${requestId}`, {
|
|
||||||
type: 'progress',
|
|
||||||
trackers: trackerData
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
// Store the trackers for each request
|
|
||||||
const trackers = new Map<string, TrackerContext>();
|
|
||||||
|
|
||||||
app.post('/api/v1/query', (async (req: QueryRequest, res: Response) => {
|
|
||||||
const {q, budget, maxBadAttempt} = req.body;
|
|
||||||
if (!q) {
|
|
||||||
return res.status(400).json({error: 'Query (q) is required'});
|
|
||||||
}
|
|
||||||
|
|
||||||
const requestId = Date.now().toString();
|
|
||||||
|
|
||||||
// Create new trackers for this request
|
|
||||||
const context: TrackerContext = {
|
|
||||||
tokenTracker: new TokenTracker(),
|
|
||||||
actionTracker: new ActionTracker()
|
|
||||||
};
|
|
||||||
trackers.set(requestId, context);
|
|
||||||
|
|
||||||
// Set up listeners immediately for both trackers
|
|
||||||
context.actionTracker.on('action', () => emitTrackerUpdate(requestId, context));
|
|
||||||
// context.tokenTracker.on('usage', () => emitTrackerUpdate(requestId, context));
|
|
||||||
|
|
||||||
res.json({requestId});
|
|
||||||
|
|
||||||
try {
|
|
||||||
const {result} = await getResponse(q, budget, maxBadAttempt, context);
|
|
||||||
const emitProgress = createProgressEmitter(requestId, budget, context);
|
|
||||||
context.actionTracker.on('action', emitProgress);
|
|
||||||
await storeTaskResult(requestId, result);
|
|
||||||
eventEmitter.emit(`progress-${requestId}`, {
|
|
||||||
type: 'answer',
|
|
||||||
data: result,
|
|
||||||
trackers: {
|
|
||||||
tokenUsage: context.tokenTracker.getTotalUsage(),
|
|
||||||
actionState: context.actionTracker.getState()
|
|
||||||
}
|
|
||||||
});
|
|
||||||
cleanup(requestId);
|
|
||||||
} catch (error: any) {
|
|
||||||
eventEmitter.emit(`progress-${requestId}`, {
|
|
||||||
type: 'error',
|
|
||||||
data: error?.message || 'Unknown error',
|
|
||||||
status: 500,
|
|
||||||
trackers: {
|
|
||||||
tokenUsage: context.tokenTracker.getTotalUsage(),
|
|
||||||
actionState: context.actionTracker.getState()
|
|
||||||
}
|
|
||||||
});
|
|
||||||
cleanup(requestId);
|
|
||||||
}
|
|
||||||
}) as RequestHandler);
|
|
||||||
|
|
||||||
app.get('/api/v1/stream/:requestId', (async (req: Request, res: StreamResponse) => {
|
|
||||||
const requestId = req.params.requestId;
|
|
||||||
const context = trackers.get(requestId);
|
|
||||||
|
|
||||||
res.setHeader('Content-Type', 'text/event-stream');
|
|
||||||
res.setHeader('Cache-Control', 'no-cache');
|
|
||||||
res.setHeader('Connection', 'keep-alive');
|
|
||||||
|
|
||||||
const listener = (data: StreamMessage) => {
|
|
||||||
// The trackers are now included in all event types
|
|
||||||
// We don't need to add them here as they're already part of the data
|
|
||||||
res.write(`data: ${JSON.stringify(data)}\n\n`);
|
|
||||||
};
|
|
||||||
|
|
||||||
eventEmitter.on(`progress-${requestId}`, listener);
|
|
||||||
|
|
||||||
// Handle client disconnection
|
|
||||||
req.on('close', () => {
|
|
||||||
eventEmitter.removeListener(`progress-${requestId}`, listener);
|
|
||||||
});
|
|
||||||
|
|
||||||
// Send initial connection confirmation with tracker state
|
|
||||||
const initialData = {
|
|
||||||
type: 'connected',
|
|
||||||
requestId,
|
|
||||||
trackers: context ? {
|
|
||||||
tokenUsage: context.tokenTracker.getTotalUsage(),
|
|
||||||
actionState: context.actionTracker.getState()
|
|
||||||
} : null
|
|
||||||
};
|
|
||||||
res.write(`data: ${JSON.stringify(initialData)}\n\n`);
|
|
||||||
}) as RequestHandler);
|
|
||||||
|
|
||||||
async function storeTaskResult(requestId: string, result: StepAction) {
|
|
||||||
try {
|
|
||||||
const taskDir = path.join(process.cwd(), 'tasks');
|
|
||||||
await fs.mkdir(taskDir, {recursive: true});
|
|
||||||
await fs.writeFile(
|
|
||||||
path.join(taskDir, `${requestId}.json`),
|
|
||||||
JSON.stringify(result, null, 2)
|
|
||||||
);
|
|
||||||
} catch (error) {
|
|
||||||
console.error('Task storage failed:', error);
|
|
||||||
throw new Error('Failed to store task result');
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
app.get('/api/v1/task/:requestId', (async (req: Request, res: Response) => {
|
|
||||||
const requestId = req.params.requestId;
|
|
||||||
try {
|
|
||||||
const taskPath = path.join(process.cwd(), 'tasks', `${requestId}.json`);
|
|
||||||
const taskData = await fs.readFile(taskPath, 'utf-8');
|
|
||||||
res.json(JSON.parse(taskData));
|
|
||||||
} catch (error) {
|
|
||||||
res.status(404).json({error: 'Task not found'});
|
|
||||||
}
|
|
||||||
}) as RequestHandler);
|
|
||||||
|
|
||||||
// Export server startup function for better testing
|
// Export server startup function for better testing
|
||||||
export function startServer() {
|
export function startServer() {
|
||||||
return app.listen(port, () => {
|
return app.listen(port, () => {
|
||||||
@ -642,6 +12,4 @@ export function startServer() {
|
|||||||
// Start server if running directly
|
// Start server if running directly
|
||||||
if (process.env.NODE_ENV !== 'test') {
|
if (process.env.NODE_ENV !== 'test') {
|
||||||
startServer();
|
startServer();
|
||||||
}
|
}
|
||||||
|
|
||||||
export default app;
|
|
||||||
@ -9,6 +9,16 @@ export class TokenTracker extends EventEmitter {
|
|||||||
constructor(budget?: number) {
|
constructor(budget?: number) {
|
||||||
super();
|
super();
|
||||||
this.budget = budget;
|
this.budget = budget;
|
||||||
|
|
||||||
|
if ('asyncLocalContext' in process) {
|
||||||
|
const asyncLocalContext = process.asyncLocalContext as any;
|
||||||
|
this.on('usage', () => {
|
||||||
|
if (asyncLocalContext.available()) {
|
||||||
|
asyncLocalContext.ctx.chargeAmount = this.getTotalUsage();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
trackUsage(tool: string, tokens: number, category?: TokenCategory) {
|
trackUsage(tool: string, tokens: number, category?: TokenCategory) {
|
||||||
@ -53,9 +63,9 @@ export class TokenTracker extends EventEmitter {
|
|||||||
}, {} as Record<string, number>);
|
}, {} as Record<string, number>);
|
||||||
|
|
||||||
const prompt_tokens = categoryBreakdown.prompt || 0;
|
const prompt_tokens = categoryBreakdown.prompt || 0;
|
||||||
const completion_tokens =
|
const completion_tokens =
|
||||||
(categoryBreakdown.reasoning || 0) +
|
(categoryBreakdown.reasoning || 0) +
|
||||||
(categoryBreakdown.accepted || 0) +
|
(categoryBreakdown.accepted || 0) +
|
||||||
(categoryBreakdown.rejected || 0);
|
(categoryBreakdown.rejected || 0);
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
|||||||
@ -1,14 +1,18 @@
|
|||||||
{
|
{
|
||||||
"compilerOptions": {
|
"compilerOptions": {
|
||||||
"target": "ES2020",
|
"target": "ES2020",
|
||||||
"module": "commonjs",
|
"module": "node16",
|
||||||
"outDir": "./dist",
|
"outDir": "./dist",
|
||||||
"rootDir": "./src",
|
"rootDir": "./src",
|
||||||
|
"sourceMap": true,
|
||||||
"esModuleInterop": true,
|
"esModuleInterop": true,
|
||||||
"skipLibCheck": true,
|
"skipLibCheck": true,
|
||||||
"forceConsistentCasingInFileNames": true,
|
"forceConsistentCasingInFileNames": true,
|
||||||
"strict": true,
|
"strict": true,
|
||||||
"resolveJsonModule": true,
|
"experimentalDecorators": true,
|
||||||
"moduleResolution": "node"
|
"emitDecoratorMetadata": true,
|
||||||
}
|
"resolveJsonModule": true
|
||||||
|
},
|
||||||
|
"include": ["src/**/*"],
|
||||||
|
"exclude": ["jina-ai/**/*", "**/__tests__/**/*"],
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user