mirror of
https://github.com/jina-ai/node-DeepResearch.git
synced 2026-03-22 07:29:35 +08:00
feat: add OpenAI-compatible chat completions endpoint (#48)
This commit is contained in:
committed by
GitHub
parent
a9008ae0dd
commit
39579d560e
95
README.md
95
README.md
@@ -107,11 +107,104 @@ export DEFAULT_MODEL_NAME=qwen2.5-7b # your local llm model name
|
|||||||
|
|
||||||
Start the server:
|
Start the server:
|
||||||
```bash
|
```bash
|
||||||
|
# Without authentication
|
||||||
npm run serve
|
npm run serve
|
||||||
|
|
||||||
|
# With authentication (clients must provide this secret as Bearer token)
|
||||||
|
npm run serve --secret=your_secret_token
|
||||||
```
|
```
|
||||||
|
|
||||||
The server will start on http://localhost:3000 with the following endpoints:
|
The server will start on http://localhost:3000 with the following endpoints:
|
||||||
|
|
||||||
|
### POST /v1/chat/completions
|
||||||
|
OpenAI-compatible chat completions endpoint:
|
||||||
|
```bash
|
||||||
|
# Without authentication
|
||||||
|
curl http://localhost:3000/v1/chat/completions \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"model": "gpt-4o-mini",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "Hello!"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}'
|
||||||
|
|
||||||
|
# With authentication (when server is started with --secret)
|
||||||
|
curl http://localhost:3000/v1/chat/completions \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "Authorization: Bearer your_secret_token" \
|
||||||
|
-d '{
|
||||||
|
"model": "gpt-4o-mini",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "Hello!"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"stream": true
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
Response format:
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"id": "chatcmpl-123",
|
||||||
|
"object": "chat.completion",
|
||||||
|
"created": 1677652288,
|
||||||
|
"model": "gpt-4o-mini",
|
||||||
|
"system_fingerprint": "fp_44709d6fcb",
|
||||||
|
"choices": [{
|
||||||
|
"index": 0,
|
||||||
|
"message": {
|
||||||
|
"role": "assistant",
|
||||||
|
"content": "YOUR FINAL ANSWER"
|
||||||
|
},
|
||||||
|
"logprobs": null,
|
||||||
|
"finish_reason": "stop"
|
||||||
|
}],
|
||||||
|
"usage": {
|
||||||
|
"prompt_tokens": 9,
|
||||||
|
"completion_tokens": 12,
|
||||||
|
"total_tokens": 21,
|
||||||
|
"completion_tokens_details": {
|
||||||
|
"reasoning_tokens": 0,
|
||||||
|
"accepted_prediction_tokens": 0,
|
||||||
|
"rejected_prediction_tokens": 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
For streaming responses (stream: true), the server sends chunks in this format:
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"id": "chatcmpl-123",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"created": 1694268190,
|
||||||
|
"model": "gpt-4o-mini",
|
||||||
|
"system_fingerprint": "fp_44709d6fcb",
|
||||||
|
"choices": [{
|
||||||
|
"index": 0,
|
||||||
|
"delta": {
|
||||||
|
"content": "..."
|
||||||
|
},
|
||||||
|
"logprobs": null,
|
||||||
|
"finish_reason": null
|
||||||
|
}]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Note: The think content in streaming responses is wrapped in XML tags:
|
||||||
|
```
|
||||||
|
<think>
|
||||||
|
[thinking steps...]
|
||||||
|
</think>
|
||||||
|
[final answer]
|
||||||
|
```
|
||||||
|
|
||||||
### POST /api/v1/query
|
### POST /api/v1/query
|
||||||
Submit a query to be answered:
|
Submit a query to be answered:
|
||||||
```bash
|
```bash
|
||||||
@@ -248,4 +341,4 @@ It should not be surprised that plain `gemini-2.0-flash` has a 0% pass rate, as
|
|||||||
| Average Tokens | 428 | 59,408 |
|
| Average Tokens | 428 | 59,408 |
|
||||||
| Median Tokens | 434 | 16,001 |
|
| Median Tokens | 434 | 16,001 |
|
||||||
| Maximum Tokens | 463 | 347,222 |
|
| Maximum Tokens | 463 | 347,222 |
|
||||||
| Minimum Tokens | 374 | 5,594 |
|
| Minimum Tokens | 374 | 5,594 |
|
||||||
|
|||||||
163
package-lock.json
generated
163
package-lock.json
generated
@@ -11,7 +11,7 @@
|
|||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@ai-sdk/google": "^1.0.0",
|
"@ai-sdk/google": "^1.0.0",
|
||||||
"@ai-sdk/openai": "^1.1.9",
|
"@ai-sdk/openai": "^1.1.9",
|
||||||
"ai": "^4.1.21",
|
"ai": "^4.1.26",
|
||||||
"axios": "^1.7.9",
|
"axios": "^1.7.9",
|
||||||
"commander": "^13.1.0",
|
"commander": "^13.1.0",
|
||||||
"cors": "^2.8.5",
|
"cors": "^2.8.5",
|
||||||
@@ -29,10 +29,12 @@
|
|||||||
"@types/jest": "^29.5.14",
|
"@types/jest": "^29.5.14",
|
||||||
"@types/node": "^22.10.10",
|
"@types/node": "^22.10.10",
|
||||||
"@types/node-fetch": "^2.6.12",
|
"@types/node-fetch": "^2.6.12",
|
||||||
|
"@types/supertest": "^6.0.2",
|
||||||
"@typescript-eslint/eslint-plugin": "^7.0.1",
|
"@typescript-eslint/eslint-plugin": "^7.0.1",
|
||||||
"@typescript-eslint/parser": "^7.0.1",
|
"@typescript-eslint/parser": "^7.0.1",
|
||||||
"eslint": "^8.56.0",
|
"eslint": "^8.56.0",
|
||||||
"jest": "^29.7.0",
|
"jest": "^29.7.0",
|
||||||
|
"supertest": "^7.0.0",
|
||||||
"ts-jest": "^29.2.5",
|
"ts-jest": "^29.2.5",
|
||||||
"ts-node": "^10.9.2",
|
"ts-node": "^10.9.2",
|
||||||
"typescript": "^5.7.3"
|
"typescript": "^5.7.3"
|
||||||
@@ -1552,6 +1554,13 @@
|
|||||||
"@types/node": "*"
|
"@types/node": "*"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/@types/cookiejar": {
|
||||||
|
"version": "2.1.5",
|
||||||
|
"resolved": "https://registry.npmjs.org/@types/cookiejar/-/cookiejar-2.1.5.tgz",
|
||||||
|
"integrity": "sha512-he+DHOWReW0nghN24E1WUqM0efK4kI9oTqDm6XmK8ZPe2djZ90BSNdGnIyCLzCPw7/pogPlGbzI2wHGGmi4O/Q==",
|
||||||
|
"dev": true,
|
||||||
|
"license": "MIT"
|
||||||
|
},
|
||||||
"node_modules/@types/cors": {
|
"node_modules/@types/cors": {
|
||||||
"version": "2.8.17",
|
"version": "2.8.17",
|
||||||
"resolved": "https://registry.npmjs.org/@types/cors/-/cors-2.8.17.tgz",
|
"resolved": "https://registry.npmjs.org/@types/cors/-/cors-2.8.17.tgz",
|
||||||
@@ -1649,6 +1658,13 @@
|
|||||||
"pretty-format": "^29.0.0"
|
"pretty-format": "^29.0.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/@types/methods": {
|
||||||
|
"version": "1.1.4",
|
||||||
|
"resolved": "https://registry.npmjs.org/@types/methods/-/methods-1.1.4.tgz",
|
||||||
|
"integrity": "sha512-ymXWVrDiCxTBE3+RIrrP533E70eA+9qu7zdWoHuOmGujkYtzf4HQF96b8nwHLqhuf4ykX61IGRIB38CC6/sImQ==",
|
||||||
|
"dev": true,
|
||||||
|
"license": "MIT"
|
||||||
|
},
|
||||||
"node_modules/@types/mime": {
|
"node_modules/@types/mime": {
|
||||||
"version": "1.3.5",
|
"version": "1.3.5",
|
||||||
"resolved": "https://registry.npmjs.org/@types/mime/-/mime-1.3.5.tgz",
|
"resolved": "https://registry.npmjs.org/@types/mime/-/mime-1.3.5.tgz",
|
||||||
@@ -1721,6 +1737,30 @@
|
|||||||
"dev": true,
|
"dev": true,
|
||||||
"license": "MIT"
|
"license": "MIT"
|
||||||
},
|
},
|
||||||
|
"node_modules/@types/superagent": {
|
||||||
|
"version": "8.1.9",
|
||||||
|
"resolved": "https://registry.npmjs.org/@types/superagent/-/superagent-8.1.9.tgz",
|
||||||
|
"integrity": "sha512-pTVjI73witn+9ILmoJdajHGW2jkSaOzhiFYF1Rd3EQ94kymLqB9PjD9ISg7WaALC7+dCHT0FGe9T2LktLq/3GQ==",
|
||||||
|
"dev": true,
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"@types/cookiejar": "^2.1.5",
|
||||||
|
"@types/methods": "^1.1.4",
|
||||||
|
"@types/node": "*",
|
||||||
|
"form-data": "^4.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@types/supertest": {
|
||||||
|
"version": "6.0.2",
|
||||||
|
"resolved": "https://registry.npmjs.org/@types/supertest/-/supertest-6.0.2.tgz",
|
||||||
|
"integrity": "sha512-137ypx2lk/wTQbW6An6safu9hXmajAifU/s7szAHLN/FeIm5w7yR0Wkl9fdJMRSHwOn4HLAI0DaB2TOORuhPDg==",
|
||||||
|
"dev": true,
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"@types/methods": "^1.1.4",
|
||||||
|
"@types/superagent": "^8.1.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/@types/yargs": {
|
"node_modules/@types/yargs": {
|
||||||
"version": "17.0.33",
|
"version": "17.0.33",
|
||||||
"resolved": "https://registry.npmjs.org/@types/yargs/-/yargs-17.0.33.tgz",
|
"resolved": "https://registry.npmjs.org/@types/yargs/-/yargs-17.0.33.tgz",
|
||||||
@@ -1988,9 +2028,9 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/ai": {
|
"node_modules/ai": {
|
||||||
"version": "4.1.21",
|
"version": "4.1.26",
|
||||||
"resolved": "https://registry.npmjs.org/ai/-/ai-4.1.21.tgz",
|
"resolved": "https://registry.npmjs.org/ai/-/ai-4.1.26.tgz",
|
||||||
"integrity": "sha512-w1v3T/fisoD1qRFz7CS7nE7mggeaxEpkEvWvVUWRem9lERgwh670OPhMPUSrdzTtCjMkOTrNkaecKoYAwvqM/A==",
|
"integrity": "sha512-Mww6mJbGwmMK0qAKR67WfVK1WyaUjfFlPZ2rhUUmDns3WhI+DVgMM7gLmuo0rA+I5qq69g7YE1OCgUwMRKKjMw==",
|
||||||
"license": "Apache-2.0",
|
"license": "Apache-2.0",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@ai-sdk/provider": "1.0.7",
|
"@ai-sdk/provider": "1.0.7",
|
||||||
@@ -2132,6 +2172,13 @@
|
|||||||
"node": ">=8"
|
"node": ">=8"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/asap": {
|
||||||
|
"version": "2.0.6",
|
||||||
|
"resolved": "https://registry.npmjs.org/asap/-/asap-2.0.6.tgz",
|
||||||
|
"integrity": "sha512-BSHWgDSAiKs50o2Re8ppvp3seVHXSRM44cdSsT9FfNEUUZLOGWVCsiWaRPWM1Znn+mqZ1OfVZ3z3DWEzSp7hRA==",
|
||||||
|
"dev": true,
|
||||||
|
"license": "MIT"
|
||||||
|
},
|
||||||
"node_modules/async": {
|
"node_modules/async": {
|
||||||
"version": "3.2.6",
|
"version": "3.2.6",
|
||||||
"resolved": "https://registry.npmjs.org/async/-/async-3.2.6.tgz",
|
"resolved": "https://registry.npmjs.org/async/-/async-3.2.6.tgz",
|
||||||
@@ -2629,6 +2676,16 @@
|
|||||||
"node": ">=18"
|
"node": ">=18"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/component-emitter": {
|
||||||
|
"version": "1.3.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/component-emitter/-/component-emitter-1.3.1.tgz",
|
||||||
|
"integrity": "sha512-T0+barUSQRTUQASh8bx02dl+DhF54GtIDY13Y3m9oWTklKbb3Wv974meRpeZ3lp1JpLVECWWNHC4vaG2XHXouQ==",
|
||||||
|
"dev": true,
|
||||||
|
"license": "MIT",
|
||||||
|
"funding": {
|
||||||
|
"url": "https://github.com/sponsors/sindresorhus"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/concat-map": {
|
"node_modules/concat-map": {
|
||||||
"version": "0.0.1",
|
"version": "0.0.1",
|
||||||
"resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz",
|
"resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz",
|
||||||
@@ -2679,6 +2736,13 @@
|
|||||||
"integrity": "sha512-QADzlaHc8icV8I7vbaJXJwod9HWYp8uCqf1xa4OfNu1T7JVxQIrUgOWtHdNDtPiywmFbiS12VjotIXLrKM3orQ==",
|
"integrity": "sha512-QADzlaHc8icV8I7vbaJXJwod9HWYp8uCqf1xa4OfNu1T7JVxQIrUgOWtHdNDtPiywmFbiS12VjotIXLrKM3orQ==",
|
||||||
"license": "MIT"
|
"license": "MIT"
|
||||||
},
|
},
|
||||||
|
"node_modules/cookiejar": {
|
||||||
|
"version": "2.1.4",
|
||||||
|
"resolved": "https://registry.npmjs.org/cookiejar/-/cookiejar-2.1.4.tgz",
|
||||||
|
"integrity": "sha512-LDx6oHrK+PhzLKJU9j5S7/Y3jM/mUHvD/DeI1WQmJn652iPC5Y4TBzC9l+5OMOXlyTTA+SmVUPm0HQUwpD5Jqw==",
|
||||||
|
"dev": true,
|
||||||
|
"license": "MIT"
|
||||||
|
},
|
||||||
"node_modules/cors": {
|
"node_modules/cors": {
|
||||||
"version": "2.8.5",
|
"version": "2.8.5",
|
||||||
"resolved": "https://registry.npmjs.org/cors/-/cors-2.8.5.tgz",
|
"resolved": "https://registry.npmjs.org/cors/-/cors-2.8.5.tgz",
|
||||||
@@ -2842,6 +2906,17 @@
|
|||||||
"node": ">=8"
|
"node": ">=8"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/dezalgo": {
|
||||||
|
"version": "1.0.4",
|
||||||
|
"resolved": "https://registry.npmjs.org/dezalgo/-/dezalgo-1.0.4.tgz",
|
||||||
|
"integrity": "sha512-rXSP0bf+5n0Qonsb+SVVfNfIsimO4HEtmnIpPHY8Q1UCzKlQrDMfdobr8nJOOsRgWCyMRqeSBQzmWUMq7zvVig==",
|
||||||
|
"dev": true,
|
||||||
|
"license": "ISC",
|
||||||
|
"dependencies": {
|
||||||
|
"asap": "^2.0.0",
|
||||||
|
"wrappy": "1"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/diff": {
|
"node_modules/diff": {
|
||||||
"version": "4.0.2",
|
"version": "4.0.2",
|
||||||
"resolved": "https://registry.npmjs.org/diff/-/diff-4.0.2.tgz",
|
"resolved": "https://registry.npmjs.org/diff/-/diff-4.0.2.tgz",
|
||||||
@@ -3429,6 +3504,13 @@
|
|||||||
"dev": true,
|
"dev": true,
|
||||||
"license": "MIT"
|
"license": "MIT"
|
||||||
},
|
},
|
||||||
|
"node_modules/fast-safe-stringify": {
|
||||||
|
"version": "2.1.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/fast-safe-stringify/-/fast-safe-stringify-2.1.1.tgz",
|
||||||
|
"integrity": "sha512-W+KJc2dmILlPplD/H4K9l9LcAHAfPtP6BY84uVLXQ6Evcz9Lcg33Y2z1IVblT6xdY54PXYVHEv+0Wpq8Io6zkA==",
|
||||||
|
"dev": true,
|
||||||
|
"license": "MIT"
|
||||||
|
},
|
||||||
"node_modules/fastq": {
|
"node_modules/fastq": {
|
||||||
"version": "1.19.0",
|
"version": "1.19.0",
|
||||||
"resolved": "https://registry.npmjs.org/fastq/-/fastq-1.19.0.tgz",
|
"resolved": "https://registry.npmjs.org/fastq/-/fastq-1.19.0.tgz",
|
||||||
@@ -3639,6 +3721,21 @@
|
|||||||
"node": ">=12.20.0"
|
"node": ">=12.20.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/formidable": {
|
||||||
|
"version": "3.5.2",
|
||||||
|
"resolved": "https://registry.npmjs.org/formidable/-/formidable-3.5.2.tgz",
|
||||||
|
"integrity": "sha512-Jqc1btCy3QzRbJaICGwKcBfGWuLADRerLzDqi2NwSt/UkXLsHJw2TVResiaoBufHVHy9aSgClOHCeJsSsFLTbg==",
|
||||||
|
"dev": true,
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"dezalgo": "^1.0.4",
|
||||||
|
"hexoid": "^2.0.0",
|
||||||
|
"once": "^1.4.0"
|
||||||
|
},
|
||||||
|
"funding": {
|
||||||
|
"url": "https://ko-fi.com/tunnckoCore/commissions"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/forwarded": {
|
"node_modules/forwarded": {
|
||||||
"version": "0.2.0",
|
"version": "0.2.0",
|
||||||
"resolved": "https://registry.npmjs.org/forwarded/-/forwarded-0.2.0.tgz",
|
"resolved": "https://registry.npmjs.org/forwarded/-/forwarded-0.2.0.tgz",
|
||||||
@@ -3924,6 +4021,16 @@
|
|||||||
"node": ">= 0.4"
|
"node": ">= 0.4"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/hexoid": {
|
||||||
|
"version": "2.0.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/hexoid/-/hexoid-2.0.0.tgz",
|
||||||
|
"integrity": "sha512-qlspKUK7IlSQv2o+5I7yhUd7TxlOG2Vr5LTa3ve2XSNVKAL/n/u/7KLvKmFNimomDIKvZFXWHv0T12mv7rT8Aw==",
|
||||||
|
"dev": true,
|
||||||
|
"license": "MIT",
|
||||||
|
"engines": {
|
||||||
|
"node": ">=8"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/html-entities": {
|
"node_modules/html-entities": {
|
||||||
"version": "2.5.2",
|
"version": "2.5.2",
|
||||||
"resolved": "https://registry.npmjs.org/html-entities/-/html-entities-2.5.2.tgz",
|
"resolved": "https://registry.npmjs.org/html-entities/-/html-entities-2.5.2.tgz",
|
||||||
@@ -6360,6 +6467,54 @@
|
|||||||
"url": "https://github.com/sponsors/sindresorhus"
|
"url": "https://github.com/sponsors/sindresorhus"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/superagent": {
|
||||||
|
"version": "9.0.2",
|
||||||
|
"resolved": "https://registry.npmjs.org/superagent/-/superagent-9.0.2.tgz",
|
||||||
|
"integrity": "sha512-xuW7dzkUpcJq7QnhOsnNUgtYp3xRwpt2F7abdRYIpCsAt0hhUqia0EdxyXZQQpNmGtsCzYHryaKSV3q3GJnq7w==",
|
||||||
|
"dev": true,
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"component-emitter": "^1.3.0",
|
||||||
|
"cookiejar": "^2.1.4",
|
||||||
|
"debug": "^4.3.4",
|
||||||
|
"fast-safe-stringify": "^2.1.1",
|
||||||
|
"form-data": "^4.0.0",
|
||||||
|
"formidable": "^3.5.1",
|
||||||
|
"methods": "^1.1.2",
|
||||||
|
"mime": "2.6.0",
|
||||||
|
"qs": "^6.11.0"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">=14.18.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/superagent/node_modules/mime": {
|
||||||
|
"version": "2.6.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/mime/-/mime-2.6.0.tgz",
|
||||||
|
"integrity": "sha512-USPkMeET31rOMiarsBNIHZKLGgvKc/LrjofAnBlOttf5ajRvqiRA8QsenbcooctK6d6Ts6aqZXBA+XbkKthiQg==",
|
||||||
|
"dev": true,
|
||||||
|
"license": "MIT",
|
||||||
|
"bin": {
|
||||||
|
"mime": "cli.js"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">=4.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/supertest": {
|
||||||
|
"version": "7.0.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/supertest/-/supertest-7.0.0.tgz",
|
||||||
|
"integrity": "sha512-qlsr7fIC0lSddmA3tzojvzubYxvlGtzumcdHgPwbFWMISQwL22MhM2Y3LNt+6w9Yyx7559VW5ab70dgphm8qQA==",
|
||||||
|
"dev": true,
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"methods": "^1.1.2",
|
||||||
|
"superagent": "^9.0.1"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">=14.18.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/supports-color": {
|
"node_modules/supports-color": {
|
||||||
"version": "7.2.0",
|
"version": "7.2.0",
|
||||||
"resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz",
|
"resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz",
|
||||||
|
|||||||
12
package.json
12
package.json
@@ -28,28 +28,30 @@
|
|||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@ai-sdk/google": "^1.0.0",
|
"@ai-sdk/google": "^1.0.0",
|
||||||
"@ai-sdk/openai": "^1.1.9",
|
"@ai-sdk/openai": "^1.1.9",
|
||||||
"ai": "^4.1.21",
|
"ai": "^4.1.26",
|
||||||
"axios": "^1.7.9",
|
"axios": "^1.7.9",
|
||||||
"commander": "^13.1.0",
|
"commander": "^13.1.0",
|
||||||
"cors": "^2.8.5",
|
"cors": "^2.8.5",
|
||||||
|
"dotenv": "^16.4.7",
|
||||||
"duck-duck-scrape": "^2.2.7",
|
"duck-duck-scrape": "^2.2.7",
|
||||||
"express": "^4.21.2",
|
"express": "^4.21.2",
|
||||||
"node-fetch": "^3.3.2",
|
"node-fetch": "^3.3.2",
|
||||||
"undici": "^7.3.0",
|
"undici": "^7.3.0",
|
||||||
"zod": "^3.22.4",
|
"zod": "^3.22.4"
|
||||||
"dotenv": "^16.4.7"
|
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
|
"@types/commander": "^2.12.0",
|
||||||
"@types/cors": "^2.8.17",
|
"@types/cors": "^2.8.17",
|
||||||
"@types/express": "^5.0.0",
|
"@types/express": "^5.0.0",
|
||||||
"@types/node-fetch": "^2.6.12",
|
|
||||||
"@types/commander": "^2.12.0",
|
|
||||||
"@types/jest": "^29.5.14",
|
"@types/jest": "^29.5.14",
|
||||||
"@types/node": "^22.10.10",
|
"@types/node": "^22.10.10",
|
||||||
|
"@types/node-fetch": "^2.6.12",
|
||||||
|
"@types/supertest": "^6.0.2",
|
||||||
"@typescript-eslint/eslint-plugin": "^7.0.1",
|
"@typescript-eslint/eslint-plugin": "^7.0.1",
|
||||||
"@typescript-eslint/parser": "^7.0.1",
|
"@typescript-eslint/parser": "^7.0.1",
|
||||||
"eslint": "^8.56.0",
|
"eslint": "^8.56.0",
|
||||||
"jest": "^29.7.0",
|
"jest": "^29.7.0",
|
||||||
|
"supertest": "^7.0.0",
|
||||||
"ts-jest": "^29.2.5",
|
"ts-jest": "^29.2.5",
|
||||||
"ts-node": "^10.9.2",
|
"ts-node": "^10.9.2",
|
||||||
"typescript": "^5.7.3"
|
"typescript": "^5.7.3"
|
||||||
|
|||||||
353
src/__tests__/server.test.ts
Normal file
353
src/__tests__/server.test.ts
Normal file
@@ -0,0 +1,353 @@
|
|||||||
|
import request from 'supertest';
|
||||||
|
import { EventEmitter } from 'events';
|
||||||
|
import type { Express } from 'express';
|
||||||
|
|
||||||
|
const TEST_SECRET = 'test-secret';
|
||||||
|
let app: Express;
|
||||||
|
|
||||||
|
describe('/v1/chat/completions', () => {
|
||||||
|
jest.setTimeout(120000); // Increase timeout for all tests in this suite
|
||||||
|
|
||||||
|
beforeEach(async () => {
|
||||||
|
// Set NODE_ENV to test to prevent server from auto-starting
|
||||||
|
process.env.NODE_ENV = 'test';
|
||||||
|
|
||||||
|
// Clean up any existing secret
|
||||||
|
const existingSecretIndex = process.argv.findIndex(arg => arg.startsWith('--secret='));
|
||||||
|
if (existingSecretIndex !== -1) {
|
||||||
|
process.argv.splice(existingSecretIndex, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set up test secret and import server module
|
||||||
|
process.argv.push(`--secret=${TEST_SECRET}`);
|
||||||
|
|
||||||
|
// Import server module (jest.resetModules() is called automatically before each test)
|
||||||
|
const { default: serverModule } = await import('../server');
|
||||||
|
app = serverModule;
|
||||||
|
});
|
||||||
|
|
||||||
|
afterEach(async () => {
|
||||||
|
// Clean up any remaining event listeners
|
||||||
|
const emitter = EventEmitter.prototype;
|
||||||
|
emitter.removeAllListeners();
|
||||||
|
emitter.setMaxListeners(emitter.getMaxListeners() + 1);
|
||||||
|
|
||||||
|
// Clean up test secret
|
||||||
|
const secretIndex = process.argv.findIndex(arg => arg.startsWith('--secret='));
|
||||||
|
if (secretIndex !== -1) {
|
||||||
|
process.argv.splice(secretIndex, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait for any pending promises to settle
|
||||||
|
await new Promise(resolve => setTimeout(resolve, 500));
|
||||||
|
|
||||||
|
// Reset module cache to ensure clean state
|
||||||
|
jest.resetModules();
|
||||||
|
});
|
||||||
|
it('should require authentication when secret is set', async () => {
|
||||||
|
// Note: secret is already set in beforeEach
|
||||||
|
|
||||||
|
const response = await request(app)
|
||||||
|
.post('/v1/chat/completions')
|
||||||
|
.send({
|
||||||
|
model: 'test-model',
|
||||||
|
messages: [{ role: 'user', content: 'test' }]
|
||||||
|
});
|
||||||
|
expect(response.status).toBe(401);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should allow requests without auth when no secret is set', async () => {
|
||||||
|
// Remove secret for this test
|
||||||
|
const secretIndex = process.argv.findIndex(arg => arg.startsWith('--secret='));
|
||||||
|
if (secretIndex !== -1) {
|
||||||
|
process.argv.splice(secretIndex, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reload server module without secret
|
||||||
|
const { default: serverModule } = await import('../server');
|
||||||
|
app = serverModule;
|
||||||
|
|
||||||
|
const response = await request(app)
|
||||||
|
.post('/v1/chat/completions')
|
||||||
|
.send({
|
||||||
|
model: 'test-model',
|
||||||
|
messages: [{ role: 'user', content: 'test' }]
|
||||||
|
});
|
||||||
|
expect(response.status).toBe(200);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should reject requests without user message', async () => {
|
||||||
|
const response = await request(app)
|
||||||
|
.post('/v1/chat/completions')
|
||||||
|
.set('Authorization', `Bearer ${TEST_SECRET}`)
|
||||||
|
.send({
|
||||||
|
model: 'test-model',
|
||||||
|
messages: [{ role: 'developer', content: 'test' }]
|
||||||
|
});
|
||||||
|
expect(response.status).toBe(400);
|
||||||
|
expect(response.body.error).toBe('Last message must be from user');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should handle non-streaming request', async () => {
|
||||||
|
const response = await request(app)
|
||||||
|
.post('/v1/chat/completions')
|
||||||
|
.set('Authorization', `Bearer ${TEST_SECRET}`)
|
||||||
|
.send({
|
||||||
|
model: 'test-model',
|
||||||
|
messages: [{ role: 'user', content: 'test' }]
|
||||||
|
});
|
||||||
|
expect(response.status).toBe(200);
|
||||||
|
expect(response.body).toMatchObject({
|
||||||
|
object: 'chat.completion',
|
||||||
|
choices: [{
|
||||||
|
message: {
|
||||||
|
role: 'assistant'
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should track tokens correctly in non-streaming response', async () => {
|
||||||
|
// Create a promise that resolves when token tracking is complete
|
||||||
|
const tokenTrackingPromise = new Promise<void>((resolve) => {
|
||||||
|
const emitter = EventEmitter.prototype;
|
||||||
|
const originalEmit = emitter.emit;
|
||||||
|
|
||||||
|
// Override emit to detect when token tracking is done
|
||||||
|
emitter.emit = function(event: string, ...args: any[]) {
|
||||||
|
if (event === 'usage') {
|
||||||
|
// Wait for next tick to ensure all token tracking is complete
|
||||||
|
process.nextTick(() => {
|
||||||
|
emitter.emit = originalEmit;
|
||||||
|
resolve();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
return originalEmit.apply(this, [event, ...args]);
|
||||||
|
};
|
||||||
|
});
|
||||||
|
|
||||||
|
const response = await request(app)
|
||||||
|
.post('/v1/chat/completions')
|
||||||
|
.set('Authorization', `Bearer ${TEST_SECRET}`)
|
||||||
|
.send({
|
||||||
|
model: 'test-model',
|
||||||
|
messages: [{ role: 'user', content: 'test' }]
|
||||||
|
});
|
||||||
|
|
||||||
|
// Wait for token tracking to complete
|
||||||
|
await tokenTrackingPromise;
|
||||||
|
|
||||||
|
expect(response.body.usage).toMatchObject({
|
||||||
|
prompt_tokens: expect.any(Number),
|
||||||
|
completion_tokens: expect.any(Number),
|
||||||
|
total_tokens: expect.any(Number),
|
||||||
|
completion_tokens_details: {
|
||||||
|
reasoning_tokens: expect.any(Number),
|
||||||
|
accepted_prediction_tokens: expect.any(Number),
|
||||||
|
rejected_prediction_tokens: expect.any(Number)
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Verify token counts are reasonable
|
||||||
|
expect(response.body.usage.prompt_tokens).toBeGreaterThan(0);
|
||||||
|
expect(response.body.usage.completion_tokens).toBeGreaterThan(0);
|
||||||
|
expect(response.body.usage.total_tokens).toBe(
|
||||||
|
response.body.usage.prompt_tokens + response.body.usage.completion_tokens
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should handle streaming request and track tokens correctly', async () => {
|
||||||
|
return new Promise<void>((resolve, reject) => {
|
||||||
|
let isDone = false;
|
||||||
|
let totalCompletionTokens = 0;
|
||||||
|
|
||||||
|
const cleanup = () => {
|
||||||
|
clearTimeout(timeoutHandle);
|
||||||
|
isDone = true;
|
||||||
|
resolve();
|
||||||
|
};
|
||||||
|
|
||||||
|
const timeoutHandle = setTimeout(() => {
|
||||||
|
if (!isDone) {
|
||||||
|
cleanup();
|
||||||
|
reject(new Error('Test timed out'));
|
||||||
|
}
|
||||||
|
}, 30000);
|
||||||
|
|
||||||
|
request(app)
|
||||||
|
.post('/v1/chat/completions')
|
||||||
|
.set('Authorization', `Bearer ${TEST_SECRET}`)
|
||||||
|
.send({
|
||||||
|
model: 'test-model',
|
||||||
|
messages: [{ role: 'user', content: 'test' }],
|
||||||
|
stream: true
|
||||||
|
})
|
||||||
|
.buffer(true)
|
||||||
|
.parse((res, callback) => {
|
||||||
|
const response = res as unknown as {
|
||||||
|
on(event: 'data', listener: (chunk: Buffer) => void): void;
|
||||||
|
on(event: 'end', listener: () => void): void;
|
||||||
|
on(event: 'error', listener: (err: Error) => void): void;
|
||||||
|
};
|
||||||
|
let responseData = '';
|
||||||
|
|
||||||
|
response.on('error', (err) => {
|
||||||
|
cleanup();
|
||||||
|
callback(err, null);
|
||||||
|
});
|
||||||
|
|
||||||
|
response.on('data', (chunk) => {
|
||||||
|
responseData += chunk.toString();
|
||||||
|
});
|
||||||
|
|
||||||
|
response.on('end', () => {
|
||||||
|
try {
|
||||||
|
callback(null, responseData);
|
||||||
|
} catch (err) {
|
||||||
|
cleanup();
|
||||||
|
callback(err instanceof Error ? err : new Error(String(err)), null);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
})
|
||||||
|
.end((err, res) => {
|
||||||
|
if (err) return reject(err);
|
||||||
|
|
||||||
|
expect(res.status).toBe(200);
|
||||||
|
expect(res.headers['content-type']).toBe('text/event-stream');
|
||||||
|
|
||||||
|
// Verify stream format and content
|
||||||
|
if (isDone) return; // Prevent multiple resolves
|
||||||
|
|
||||||
|
const responseText = res.body as string;
|
||||||
|
const chunks = responseText
|
||||||
|
.split('\n\n')
|
||||||
|
.filter((line: string) => line.startsWith('data: '))
|
||||||
|
.map((line: string) => JSON.parse(line.replace('data: ', '')));
|
||||||
|
|
||||||
|
// Process all chunks
|
||||||
|
expect(chunks.length).toBeGreaterThan(0);
|
||||||
|
|
||||||
|
// Verify initial chunk format
|
||||||
|
expect(chunks[0]).toMatchObject({
|
||||||
|
id: expect.any(String),
|
||||||
|
object: 'chat.completion.chunk',
|
||||||
|
choices: [{
|
||||||
|
index: 0,
|
||||||
|
delta: { role: 'assistant' },
|
||||||
|
logprobs: null,
|
||||||
|
finish_reason: null
|
||||||
|
}]
|
||||||
|
});
|
||||||
|
|
||||||
|
// Verify content chunks have content
|
||||||
|
chunks.slice(1).forEach(chunk => {
|
||||||
|
const content = chunk.choices[0].delta.content;
|
||||||
|
if (content && content.trim()) {
|
||||||
|
totalCompletionTokens += 1; // Count 1 token per chunk as per Vercel convention
|
||||||
|
}
|
||||||
|
expect(chunk).toMatchObject({
|
||||||
|
object: 'chat.completion.chunk',
|
||||||
|
choices: [{
|
||||||
|
delta: expect.objectContaining({
|
||||||
|
content: expect.any(String)
|
||||||
|
})
|
||||||
|
}]
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// Verify final chunk format if present
|
||||||
|
const lastChunk = chunks[chunks.length - 1];
|
||||||
|
if (lastChunk?.choices?.[0]?.finish_reason === 'stop') {
|
||||||
|
expect(lastChunk).toMatchObject({
|
||||||
|
object: 'chat.completion.chunk',
|
||||||
|
choices: [{
|
||||||
|
delta: {},
|
||||||
|
finish_reason: 'stop'
|
||||||
|
}]
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify we tracked some completion tokens
|
||||||
|
expect(totalCompletionTokens).toBeGreaterThan(0);
|
||||||
|
|
||||||
|
// Clean up and resolve
|
||||||
|
if (!isDone) {
|
||||||
|
cleanup();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should track tokens correctly in error response', async () => {
|
||||||
|
const response = await request(app)
|
||||||
|
.post('/v1/chat/completions')
|
||||||
|
.set('Authorization', `Bearer ${TEST_SECRET}`)
|
||||||
|
.send({
|
||||||
|
model: 'test-model',
|
||||||
|
messages: [] // Invalid messages array
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(response.status).toBe(400);
|
||||||
|
expect(response.body).toHaveProperty('error');
|
||||||
|
expect(response.body.error).toBe('Messages array is required and must not be empty');
|
||||||
|
|
||||||
|
// Make another request to verify token tracking after error
|
||||||
|
const validResponse = await request(app)
|
||||||
|
.post('/v1/chat/completions')
|
||||||
|
.set('Authorization', `Bearer ${TEST_SECRET}`)
|
||||||
|
.send({
|
||||||
|
model: 'test-model',
|
||||||
|
messages: [{ role: 'user', content: 'test' }]
|
||||||
|
});
|
||||||
|
|
||||||
|
// Verify token tracking still works after error
|
||||||
|
expect(validResponse.body.usage).toMatchObject({
|
||||||
|
prompt_tokens: expect.any(Number),
|
||||||
|
completion_tokens: expect.any(Number),
|
||||||
|
total_tokens: expect.any(Number),
|
||||||
|
completion_tokens_details: {
|
||||||
|
reasoning_tokens: expect.any(Number),
|
||||||
|
accepted_prediction_tokens: expect.any(Number),
|
||||||
|
rejected_prediction_tokens: expect.any(Number)
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Verify token counts are reasonable
|
||||||
|
expect(validResponse.body.usage.prompt_tokens).toBeGreaterThan(0);
|
||||||
|
expect(validResponse.body.usage.completion_tokens).toBeGreaterThan(0);
|
||||||
|
expect(validResponse.body.usage.total_tokens).toBe(
|
||||||
|
validResponse.body.usage.prompt_tokens + validResponse.body.usage.completion_tokens
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should provide token usage in Vercel AI SDK format', async () => {
|
||||||
|
const response = await request(app)
|
||||||
|
.post('/v1/chat/completions')
|
||||||
|
.set('Authorization', `Bearer ${TEST_SECRET}`)
|
||||||
|
.send({
|
||||||
|
model: 'test-model',
|
||||||
|
messages: [{ role: 'user', content: 'test' }]
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(response.status).toBe(200);
|
||||||
|
const usage = response.body.usage;
|
||||||
|
|
||||||
|
expect(usage).toMatchObject({
|
||||||
|
prompt_tokens: expect.any(Number),
|
||||||
|
completion_tokens: expect.any(Number),
|
||||||
|
total_tokens: expect.any(Number),
|
||||||
|
completion_tokens_details: {
|
||||||
|
reasoning_tokens: expect.any(Number),
|
||||||
|
accepted_prediction_tokens: expect.any(Number),
|
||||||
|
rejected_prediction_tokens: expect.any(Number)
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Verify token counts are reasonable
|
||||||
|
expect(usage.prompt_tokens).toBeGreaterThan(0);
|
||||||
|
expect(usage.completion_tokens).toBeGreaterThan(0);
|
||||||
|
expect(usage.total_tokens).toBe(
|
||||||
|
usage.prompt_tokens + usage.completion_tokens
|
||||||
|
);
|
||||||
|
});
|
||||||
|
});
|
||||||
@@ -7,8 +7,6 @@ import {GEMINI_API_KEY} from '../config';
|
|||||||
import {z} from 'zod';
|
import {z} from 'zod';
|
||||||
import {AnswerAction, TrackerContext} from "../types";
|
import {AnswerAction, TrackerContext} from "../types";
|
||||||
import {createGoogleGenerativeAI} from "@ai-sdk/google";
|
import {createGoogleGenerativeAI} from "@ai-sdk/google";
|
||||||
import {TokenTracker} from "../utils/token-tracker";
|
|
||||||
import {ActionTracker} from "../utils/action-tracker";
|
|
||||||
|
|
||||||
const execAsync = promisify(exec);
|
const execAsync = promisify(exec);
|
||||||
|
|
||||||
@@ -184,27 +182,6 @@ async function batchEvaluate(inputFile: string): Promise<void> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async function getResponseStreamingAgent(query: string) {
|
|
||||||
const res = await fetch("http://localhost:3000/chat", {
|
|
||||||
method: "POST",
|
|
||||||
headers: {"Content-Type": "application/json"},
|
|
||||||
body: JSON.stringify({query})
|
|
||||||
})
|
|
||||||
const text = await res.text()
|
|
||||||
return {
|
|
||||||
result: {
|
|
||||||
think: '',
|
|
||||||
action: 'answer',
|
|
||||||
answer: text.split("RESPONSE_START")[1].split("RESPONSE_END")[0].trim(),
|
|
||||||
references: []
|
|
||||||
},
|
|
||||||
context: {
|
|
||||||
tokenTracker: new TokenTracker(),
|
|
||||||
actionTracker: new ActionTracker()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Calculate and print statistics
|
// Calculate and print statistics
|
||||||
const stats = calculateStats(results, modelName);
|
const stats = calculateStats(results, modelName);
|
||||||
printStats(stats);
|
printStats(stats);
|
||||||
@@ -229,4 +206,4 @@ if (require.main === module) {
|
|||||||
batchEvaluate(inputFile).catch(console.error);
|
batchEvaluate(inputFile).catch(console.error);
|
||||||
}
|
}
|
||||||
|
|
||||||
export {batchEvaluate};
|
export {batchEvaluate};
|
||||||
|
|||||||
300
src/server.ts
300
src/server.ts
@@ -2,7 +2,16 @@ import express, {Request, Response, RequestHandler} from 'express';
|
|||||||
import cors from 'cors';
|
import cors from 'cors';
|
||||||
import {EventEmitter} from 'events';
|
import {EventEmitter} from 'events';
|
||||||
import {getResponse} from './agent';
|
import {getResponse} from './agent';
|
||||||
import {StepAction, StreamMessage, TrackerContext} from './types';
|
import {
|
||||||
|
StepAction,
|
||||||
|
StreamMessage,
|
||||||
|
TrackerContext,
|
||||||
|
ChatCompletionRequest,
|
||||||
|
ChatCompletionResponse,
|
||||||
|
ChatCompletionChunk,
|
||||||
|
AnswerAction,
|
||||||
|
TOKEN_CATEGORIES
|
||||||
|
} from './types';
|
||||||
import fs from 'fs/promises';
|
import fs from 'fs/promises';
|
||||||
import path from 'path';
|
import path from 'path';
|
||||||
import {TokenTracker} from "./utils/token-tracker";
|
import {TokenTracker} from "./utils/token-tracker";
|
||||||
@@ -11,6 +20,9 @@ import {ActionTracker} from "./utils/action-tracker";
|
|||||||
const app = express();
|
const app = express();
|
||||||
const port = process.env.PORT || 3000;
|
const port = process.env.PORT || 3000;
|
||||||
|
|
||||||
|
// Get secret from command line args for optional authentication
|
||||||
|
const secret = process.argv.find(arg => arg.startsWith('--secret='))?.split('=')[1];
|
||||||
|
|
||||||
app.use(cors());
|
app.use(cors());
|
||||||
app.use(express.json());
|
app.use(express.json());
|
||||||
|
|
||||||
@@ -24,6 +36,278 @@ interface QueryRequest extends Request {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// OpenAI-compatible chat completions endpoint
|
||||||
|
app.post('/v1/chat/completions', (async (req: Request, res: Response) => {
|
||||||
|
// Check authentication if secret is set
|
||||||
|
if (secret) {
|
||||||
|
const authHeader = req.headers.authorization;
|
||||||
|
if (!authHeader || !authHeader.startsWith('Bearer ') || authHeader.split(' ')[1] !== secret) {
|
||||||
|
console.log('[chat/completions] Unauthorized request');
|
||||||
|
res.status(401).json({ error: 'Unauthorized' });
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Log request details (excluding sensitive data)
|
||||||
|
console.log('[chat/completions] Request:', {
|
||||||
|
model: req.body.model,
|
||||||
|
stream: req.body.stream,
|
||||||
|
messageCount: req.body.messages?.length,
|
||||||
|
hasAuth: !!req.headers.authorization,
|
||||||
|
requestId: Date.now().toString()
|
||||||
|
});
|
||||||
|
|
||||||
|
const body = req.body as ChatCompletionRequest;
|
||||||
|
if (!body.messages?.length) {
|
||||||
|
return res.status(400).json({ error: 'Messages array is required and must not be empty' });
|
||||||
|
}
|
||||||
|
const lastMessage = body.messages[body.messages.length - 1];
|
||||||
|
if (lastMessage.role !== 'user') {
|
||||||
|
return res.status(400).json({ error: 'Last message must be from user' });
|
||||||
|
}
|
||||||
|
|
||||||
|
const requestId = Date.now().toString();
|
||||||
|
const context: TrackerContext = {
|
||||||
|
tokenTracker: new TokenTracker(),
|
||||||
|
actionTracker: new ActionTracker()
|
||||||
|
};
|
||||||
|
|
||||||
|
// Track prompt tokens for the initial message
|
||||||
|
// Use Vercel's token counting convention - 1 token per message
|
||||||
|
const messageTokens = body.messages.length;
|
||||||
|
context.tokenTracker.trackUsage('agent', messageTokens, TOKEN_CATEGORIES.PROMPT);
|
||||||
|
|
||||||
|
if (body.stream) {
|
||||||
|
res.setHeader('Content-Type', 'text/event-stream');
|
||||||
|
res.setHeader('Cache-Control', 'no-cache');
|
||||||
|
res.setHeader('Connection', 'keep-alive');
|
||||||
|
|
||||||
|
// Send initial chunk with opening think tag
|
||||||
|
const initialChunk: ChatCompletionChunk = {
|
||||||
|
id: requestId,
|
||||||
|
object: 'chat.completion.chunk',
|
||||||
|
created: Math.floor(Date.now() / 1000),
|
||||||
|
model: body.model,
|
||||||
|
system_fingerprint: 'fp_' + requestId,
|
||||||
|
choices: [{
|
||||||
|
index: 0,
|
||||||
|
delta: { role: 'assistant', content: '<think>' },
|
||||||
|
logprobs: null,
|
||||||
|
finish_reason: null
|
||||||
|
}]
|
||||||
|
};
|
||||||
|
res.write(`data: ${JSON.stringify(initialChunk)}\n\n`);
|
||||||
|
|
||||||
|
// Set up progress listener with cleanup
|
||||||
|
const actionListener = (action: any) => {
|
||||||
|
// Track reasoning tokens for each chunk using Vercel's convention
|
||||||
|
const chunkTokens = 1; // Default to 1 token per chunk
|
||||||
|
context.tokenTracker.trackUsage('evaluator', chunkTokens, TOKEN_CATEGORIES.REASONING);
|
||||||
|
|
||||||
|
// Only send chunk if there's content to send
|
||||||
|
if (action.think) {
|
||||||
|
const chunk: ChatCompletionChunk = {
|
||||||
|
id: requestId,
|
||||||
|
object: 'chat.completion.chunk',
|
||||||
|
created: Math.floor(Date.now() / 1000),
|
||||||
|
model: body.model,
|
||||||
|
system_fingerprint: 'fp_' + requestId,
|
||||||
|
choices: [{
|
||||||
|
index: 0,
|
||||||
|
delta: { content: `<think>${action.think}</think>` },
|
||||||
|
logprobs: null,
|
||||||
|
finish_reason: null
|
||||||
|
}]
|
||||||
|
};
|
||||||
|
const chunkStr = `data: ${JSON.stringify(chunk)}\n\n`;
|
||||||
|
console.log('[chat/completions] Sending chunk:', {
|
||||||
|
id: chunk.id,
|
||||||
|
content: chunk.choices[0].delta.content,
|
||||||
|
finish_reason: chunk.choices[0].finish_reason
|
||||||
|
});
|
||||||
|
res.write(chunkStr);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
context.actionTracker.on('action', actionListener);
|
||||||
|
|
||||||
|
// Clean up listener on response finish
|
||||||
|
res.on('finish', () => {
|
||||||
|
context.actionTracker.removeListener('action', actionListener);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Track initial query tokens - already tracked above
|
||||||
|
// const queryTokens = Buffer.byteLength(lastMessage.content, 'utf-8');
|
||||||
|
// context.tokenTracker.trackUsage('agent', queryTokens, 'prompt');
|
||||||
|
|
||||||
|
let result;
|
||||||
|
try {
|
||||||
|
({ result } = await getResponse(lastMessage.content, undefined, undefined, context));
|
||||||
|
} catch (error: any) {
|
||||||
|
// If deduplication fails, retry without it
|
||||||
|
if (error?.response?.status === 402) {
|
||||||
|
// If deduplication fails, retry with maxBadAttempt=3 to skip dedup
|
||||||
|
({ result } = await getResponse(lastMessage.content, undefined, 3, context));
|
||||||
|
} else {
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Track tokens based on action type
|
||||||
|
if (result.action === 'answer') {
|
||||||
|
// Track accepted prediction tokens for the final answer using Vercel's convention
|
||||||
|
const answerTokens = 1; // Default to 1 token per answer
|
||||||
|
context.tokenTracker.trackUsage('evaluator', answerTokens, TOKEN_CATEGORIES.ACCEPTED);
|
||||||
|
} else {
|
||||||
|
// Track rejected prediction tokens for non-answer responses
|
||||||
|
const rejectedTokens = 1; // Default to 1 token per rejected response
|
||||||
|
context.tokenTracker.trackUsage('evaluator', rejectedTokens, TOKEN_CATEGORIES.REJECTED);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (body.stream) {
|
||||||
|
// Send closing think tag
|
||||||
|
const closeThinkChunk: ChatCompletionChunk = {
|
||||||
|
id: requestId,
|
||||||
|
object: 'chat.completion.chunk',
|
||||||
|
created: Math.floor(Date.now() / 1000),
|
||||||
|
model: body.model,
|
||||||
|
system_fingerprint: 'fp_' + requestId,
|
||||||
|
choices: [{
|
||||||
|
index: 0,
|
||||||
|
delta: { content: '</think>' },
|
||||||
|
logprobs: null,
|
||||||
|
finish_reason: null
|
||||||
|
}]
|
||||||
|
};
|
||||||
|
res.write(`data: ${JSON.stringify(closeThinkChunk)}\n\n`);
|
||||||
|
|
||||||
|
// Send final answer as separate chunk
|
||||||
|
const answerChunk: ChatCompletionChunk = {
|
||||||
|
id: requestId,
|
||||||
|
object: 'chat.completion.chunk',
|
||||||
|
created: Math.floor(Date.now() / 1000),
|
||||||
|
model: body.model,
|
||||||
|
system_fingerprint: 'fp_' + requestId,
|
||||||
|
choices: [{
|
||||||
|
index: 0,
|
||||||
|
delta: { content: result.action === 'answer' ? (result as AnswerAction).answer : result.think },
|
||||||
|
logprobs: null,
|
||||||
|
finish_reason: 'stop'
|
||||||
|
}]
|
||||||
|
};
|
||||||
|
res.write(`data: ${JSON.stringify(answerChunk)}\n\n`);
|
||||||
|
res.end();
|
||||||
|
} else {
|
||||||
|
const usage = context.tokenTracker.getUsageDetails();
|
||||||
|
const response: ChatCompletionResponse = {
|
||||||
|
id: requestId,
|
||||||
|
object: 'chat.completion',
|
||||||
|
created: Math.floor(Date.now() / 1000),
|
||||||
|
model: body.model,
|
||||||
|
system_fingerprint: 'fp_' + requestId,
|
||||||
|
choices: [{
|
||||||
|
index: 0,
|
||||||
|
message: {
|
||||||
|
role: 'assistant',
|
||||||
|
content: result.action === 'answer' ? (result as AnswerAction).answer : result.think
|
||||||
|
},
|
||||||
|
logprobs: null,
|
||||||
|
finish_reason: 'stop'
|
||||||
|
}],
|
||||||
|
usage
|
||||||
|
};
|
||||||
|
|
||||||
|
// Log final response (excluding full content for brevity)
|
||||||
|
console.log('[chat/completions] Response:', {
|
||||||
|
id: response.id,
|
||||||
|
status: 200,
|
||||||
|
contentLength: response.choices[0].message.content.length,
|
||||||
|
usage: response.usage
|
||||||
|
});
|
||||||
|
|
||||||
|
res.json(response);
|
||||||
|
}
|
||||||
|
} catch (error: any) {
|
||||||
|
// Log error details
|
||||||
|
console.error('[chat/completions] Error:', {
|
||||||
|
message: error?.message || 'An error occurred',
|
||||||
|
stack: error?.stack,
|
||||||
|
type: error?.constructor?.name,
|
||||||
|
requestId
|
||||||
|
});
|
||||||
|
|
||||||
|
// Track error as rejected tokens with Vercel token counting
|
||||||
|
const errorMessage = error?.message || 'An error occurred';
|
||||||
|
// Default to 1 token for errors as per Vercel AI SDK convention
|
||||||
|
const errorTokens = 1;
|
||||||
|
context.tokenTracker.trackUsage('evaluator', errorTokens, TOKEN_CATEGORIES.REJECTED);
|
||||||
|
|
||||||
|
// Clean up event listeners
|
||||||
|
context.actionTracker.removeAllListeners('action');
|
||||||
|
|
||||||
|
// Get token usage in OpenAI API format
|
||||||
|
const usage = context.tokenTracker.getUsageDetails();
|
||||||
|
|
||||||
|
if (body.stream && res.headersSent) {
|
||||||
|
// For streaming responses that have already started, send error as a chunk
|
||||||
|
// First send closing think tag if we're in the middle of thinking
|
||||||
|
const closeThinkChunk: ChatCompletionChunk = {
|
||||||
|
id: requestId,
|
||||||
|
object: 'chat.completion.chunk',
|
||||||
|
created: Math.floor(Date.now() / 1000),
|
||||||
|
model: body.model,
|
||||||
|
system_fingerprint: 'fp_' + requestId,
|
||||||
|
choices: [{
|
||||||
|
index: 0,
|
||||||
|
delta: { content: '</think>' },
|
||||||
|
logprobs: null,
|
||||||
|
finish_reason: null
|
||||||
|
}]
|
||||||
|
};
|
||||||
|
res.write(`data: ${JSON.stringify(closeThinkChunk)}\n\n`);
|
||||||
|
|
||||||
|
// Track error token and send error message
|
||||||
|
context.tokenTracker.trackUsage('evaluator', 1, TOKEN_CATEGORIES.REJECTED);
|
||||||
|
const errorChunk: ChatCompletionChunk = {
|
||||||
|
id: requestId,
|
||||||
|
object: 'chat.completion.chunk',
|
||||||
|
created: Math.floor(Date.now() / 1000),
|
||||||
|
model: body.model,
|
||||||
|
system_fingerprint: 'fp_' + requestId,
|
||||||
|
choices: [{
|
||||||
|
index: 0,
|
||||||
|
delta: { content: errorMessage },
|
||||||
|
logprobs: null,
|
||||||
|
finish_reason: 'stop'
|
||||||
|
}]
|
||||||
|
};
|
||||||
|
res.write(`data: ${JSON.stringify(errorChunk)}\n\n`);
|
||||||
|
res.end();
|
||||||
|
} else {
|
||||||
|
// For non-streaming or not-yet-started responses, send error as JSON
|
||||||
|
const response: ChatCompletionResponse = {
|
||||||
|
id: requestId,
|
||||||
|
object: 'chat.completion',
|
||||||
|
created: Math.floor(Date.now() / 1000),
|
||||||
|
model: body.model,
|
||||||
|
system_fingerprint: 'fp_' + requestId,
|
||||||
|
choices: [{
|
||||||
|
index: 0,
|
||||||
|
message: {
|
||||||
|
role: 'assistant',
|
||||||
|
content: `Error: ${errorMessage}`
|
||||||
|
},
|
||||||
|
logprobs: null,
|
||||||
|
finish_reason: 'stop'
|
||||||
|
}],
|
||||||
|
usage
|
||||||
|
};
|
||||||
|
res.json(response);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}) as RequestHandler);
|
||||||
|
|
||||||
interface StreamResponse extends Response {
|
interface StreamResponse extends Response {
|
||||||
write: (chunk: string) => boolean;
|
write: (chunk: string) => boolean;
|
||||||
}
|
}
|
||||||
@@ -185,8 +469,16 @@ app.get('/api/v1/task/:requestId', (async (req: Request, res: Response) => {
|
|||||||
}
|
}
|
||||||
}) as RequestHandler);
|
}) as RequestHandler);
|
||||||
|
|
||||||
app.listen(port, () => {
|
// Export server startup function for better testing
|
||||||
console.log(`Server running at http://localhost:${port}`);
|
export function startServer() {
|
||||||
});
|
return app.listen(port, () => {
|
||||||
|
console.log(`Server running at http://localhost:${port}`);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Start server if running directly
|
||||||
|
if (process.env.NODE_ENV !== 'test') {
|
||||||
|
startServer();
|
||||||
|
}
|
||||||
|
|
||||||
export default app;
|
export default app;
|
||||||
|
|||||||
@@ -1,13 +1,25 @@
|
|||||||
import axios from 'axios';
|
import axios, { AxiosError } from 'axios';
|
||||||
import { TokenTracker } from "../utils/token-tracker";
|
import { TokenTracker } from "../utils/token-tracker";
|
||||||
import {JINA_API_KEY} from "../config";
|
import {JINA_API_KEY} from "../config";
|
||||||
|
|
||||||
const JINA_API_URL = 'https://api.jina.ai/v1/embeddings';
|
const JINA_API_URL = 'https://api.jina.ai/v1/embeddings';
|
||||||
const SIMILARITY_THRESHOLD = 0.93; // Adjustable threshold for cosine similarity
|
const SIMILARITY_THRESHOLD = 0.93; // Adjustable threshold for cosine similarity
|
||||||
|
|
||||||
|
const JINA_API_CONFIG = {
|
||||||
|
MODEL: 'jina-embeddings-v3',
|
||||||
|
TASK: 'text-matching',
|
||||||
|
DIMENSIONS: 1024,
|
||||||
|
EMBEDDING_TYPE: 'float',
|
||||||
|
LATE_CHUNKING: false
|
||||||
|
} as const;
|
||||||
|
|
||||||
// Types for Jina API
|
// Types for Jina API
|
||||||
interface JinaEmbeddingRequest {
|
interface JinaEmbeddingRequest {
|
||||||
model: string;
|
model: string;
|
||||||
|
task: string;
|
||||||
|
late_chunking: boolean;
|
||||||
|
dimensions: number;
|
||||||
|
embedding_type: string;
|
||||||
input: string[];
|
input: string[];
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -41,7 +53,11 @@ async function getEmbeddings(queries: string[]): Promise<{ embeddings: number[][
|
|||||||
}
|
}
|
||||||
|
|
||||||
const request: JinaEmbeddingRequest = {
|
const request: JinaEmbeddingRequest = {
|
||||||
model: 'jina-embeddings-v3',
|
model: JINA_API_CONFIG.MODEL,
|
||||||
|
task: JINA_API_CONFIG.TASK,
|
||||||
|
late_chunking: JINA_API_CONFIG.LATE_CHUNKING,
|
||||||
|
dimensions: JINA_API_CONFIG.DIMENSIONS,
|
||||||
|
embedding_type: JINA_API_CONFIG.EMBEDDING_TYPE,
|
||||||
input: queries
|
input: queries
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -57,6 +73,15 @@ async function getEmbeddings(queries: string[]): Promise<{ embeddings: number[][
|
|||||||
}
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// Validate response format
|
||||||
|
if (!response.data.data || response.data.data.length !== queries.length) {
|
||||||
|
console.error('Invalid response from Jina API:', response.data);
|
||||||
|
return {
|
||||||
|
embeddings: [],
|
||||||
|
tokens: 0
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
// Sort embeddings by index to maintain original order
|
// Sort embeddings by index to maintain original order
|
||||||
const embeddings = response.data.data
|
const embeddings = response.data.data
|
||||||
.sort((a, b) => a.index - b.index)
|
.sort((a, b) => a.index - b.index)
|
||||||
@@ -68,6 +93,12 @@ async function getEmbeddings(queries: string[]): Promise<{ embeddings: number[][
|
|||||||
};
|
};
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('Error getting embeddings from Jina:', error);
|
console.error('Error getting embeddings from Jina:', error);
|
||||||
|
if (error instanceof AxiosError && error.response?.status === 402) {
|
||||||
|
return {
|
||||||
|
embeddings: [],
|
||||||
|
tokens: 0
|
||||||
|
};
|
||||||
|
}
|
||||||
throw error;
|
throw error;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -91,6 +122,15 @@ export async function dedupQueries(
|
|||||||
const allQueries = [...newQueries, ...existingQueries];
|
const allQueries = [...newQueries, ...existingQueries];
|
||||||
const { embeddings: allEmbeddings, tokens } = await getEmbeddings(allQueries);
|
const { embeddings: allEmbeddings, tokens } = await getEmbeddings(allQueries);
|
||||||
|
|
||||||
|
// If embeddings is empty (due to 402 error), return all new queries
|
||||||
|
if (!allEmbeddings.length) {
|
||||||
|
console.log('Dedup (no embeddings):', newQueries);
|
||||||
|
return {
|
||||||
|
unique_queries: newQueries,
|
||||||
|
tokens: 0
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
// Split embeddings back into new and existing
|
// Split embeddings back into new and existing
|
||||||
const newEmbeddings = allEmbeddings.slice(0, newQueries.length);
|
const newEmbeddings = allEmbeddings.slice(0, newQueries.length);
|
||||||
const existingEmbeddings = allEmbeddings.slice(newQueries.length);
|
const existingEmbeddings = allEmbeddings.slice(newQueries.length);
|
||||||
|
|||||||
69
src/types.ts
69
src/types.ts
@@ -31,9 +31,24 @@ export type VisitAction = BaseAction & {
|
|||||||
export type StepAction = SearchAction | AnswerAction | ReflectAction | VisitAction;
|
export type StepAction = SearchAction | AnswerAction | ReflectAction | VisitAction;
|
||||||
|
|
||||||
// Response Types
|
// Response Types
|
||||||
|
export const TOKEN_CATEGORIES = {
|
||||||
|
PROMPT: 'prompt',
|
||||||
|
REASONING: 'reasoning',
|
||||||
|
ACCEPTED: 'accepted',
|
||||||
|
REJECTED: 'rejected'
|
||||||
|
} as const;
|
||||||
|
|
||||||
|
export type TokenCategory = typeof TOKEN_CATEGORIES[keyof typeof TOKEN_CATEGORIES];
|
||||||
|
|
||||||
|
// Following Vercel AI SDK's token counting interface
|
||||||
export interface TokenUsage {
|
export interface TokenUsage {
|
||||||
tool: string;
|
tool: string;
|
||||||
tokens: number;
|
tokens: number;
|
||||||
|
category?: TokenCategory;
|
||||||
|
// Following Vercel AI SDK's token counting interface
|
||||||
|
prompt_tokens?: number;
|
||||||
|
completion_tokens?: number;
|
||||||
|
total_tokens?: number;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface SearchResponse {
|
export interface SearchResponse {
|
||||||
@@ -144,6 +159,60 @@ export interface StreamMessage {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// OpenAI API Types
|
||||||
|
export interface ChatCompletionRequest {
|
||||||
|
model: string;
|
||||||
|
messages: Array<{
|
||||||
|
role: string;
|
||||||
|
content: string;
|
||||||
|
}>;
|
||||||
|
stream?: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ChatCompletionResponse {
|
||||||
|
id: string;
|
||||||
|
object: 'chat.completion';
|
||||||
|
created: number;
|
||||||
|
model: string;
|
||||||
|
system_fingerprint: string;
|
||||||
|
choices: Array<{
|
||||||
|
index: number;
|
||||||
|
message: {
|
||||||
|
role: 'assistant';
|
||||||
|
content: string;
|
||||||
|
};
|
||||||
|
logprobs: null;
|
||||||
|
finish_reason: 'stop';
|
||||||
|
}>;
|
||||||
|
usage: {
|
||||||
|
prompt_tokens: number;
|
||||||
|
completion_tokens: number;
|
||||||
|
total_tokens: number;
|
||||||
|
completion_tokens_details?: {
|
||||||
|
reasoning_tokens: number;
|
||||||
|
accepted_prediction_tokens: number;
|
||||||
|
rejected_prediction_tokens: number;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ChatCompletionChunk {
|
||||||
|
id: string;
|
||||||
|
object: 'chat.completion.chunk';
|
||||||
|
created: number;
|
||||||
|
model: string;
|
||||||
|
system_fingerprint: string;
|
||||||
|
choices: Array<{
|
||||||
|
index: number;
|
||||||
|
delta: {
|
||||||
|
role?: 'assistant';
|
||||||
|
content?: string;
|
||||||
|
};
|
||||||
|
logprobs: null;
|
||||||
|
finish_reason: null | 'stop';
|
||||||
|
}>;
|
||||||
|
}
|
||||||
|
|
||||||
// Tracker Types
|
// Tracker Types
|
||||||
import { TokenTracker } from './utils/token-tracker';
|
import { TokenTracker } from './utils/token-tracker';
|
||||||
import { ActionTracker } from './utils/action-tracker';
|
import { ActionTracker } from './utils/action-tracker';
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
import { EventEmitter } from 'events';
|
import { EventEmitter } from 'events';
|
||||||
|
|
||||||
import { TokenUsage } from '../types';
|
import { TokenUsage, TokenCategory } from '../types';
|
||||||
|
|
||||||
export class TokenTracker extends EventEmitter {
|
export class TokenTracker extends EventEmitter {
|
||||||
private usages: TokenUsage[] = [];
|
private usages: TokenUsage[] = [];
|
||||||
@@ -11,15 +11,16 @@ export class TokenTracker extends EventEmitter {
|
|||||||
this.budget = budget;
|
this.budget = budget;
|
||||||
}
|
}
|
||||||
|
|
||||||
trackUsage(tool: string, tokens: number) {
|
trackUsage(tool: string, tokens: number, category?: TokenCategory) {
|
||||||
const currentTotal = this.getTotalUsage();
|
const currentTotal = this.getTotalUsage();
|
||||||
if (this.budget && currentTotal + tokens > this.budget) {
|
if (this.budget && currentTotal + tokens > this.budget) {
|
||||||
console.error(`Token budget exceeded: ${currentTotal + tokens} > ${this.budget}`);
|
console.error(`Token budget exceeded: ${currentTotal + tokens} > ${this.budget}`);
|
||||||
}
|
}
|
||||||
// Only track usage if we're within budget
|
// Only track usage if we're within budget
|
||||||
if (!this.budget || currentTotal + tokens <= this.budget) {
|
if (!this.budget || currentTotal + tokens <= this.budget) {
|
||||||
this.usages.push({ tool, tokens });
|
const usage = { tool, tokens, category };
|
||||||
this.emit('usage', { tool, tokens });
|
this.usages.push(usage);
|
||||||
|
this.emit('usage', usage);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -34,6 +35,41 @@ export class TokenTracker extends EventEmitter {
|
|||||||
}, {} as Record<string, number>);
|
}, {} as Record<string, number>);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
getUsageDetails(): {
|
||||||
|
prompt_tokens: number;
|
||||||
|
completion_tokens: number;
|
||||||
|
total_tokens: number;
|
||||||
|
completion_tokens_details?: {
|
||||||
|
reasoning_tokens: number;
|
||||||
|
accepted_prediction_tokens: number;
|
||||||
|
rejected_prediction_tokens: number;
|
||||||
|
};
|
||||||
|
} {
|
||||||
|
const categoryBreakdown = this.usages.reduce((acc, { tokens, category }) => {
|
||||||
|
if (category) {
|
||||||
|
acc[category] = (acc[category] || 0) + tokens;
|
||||||
|
}
|
||||||
|
return acc;
|
||||||
|
}, {} as Record<string, number>);
|
||||||
|
|
||||||
|
const prompt_tokens = categoryBreakdown.prompt || 0;
|
||||||
|
const completion_tokens =
|
||||||
|
(categoryBreakdown.reasoning || 0) +
|
||||||
|
(categoryBreakdown.accepted || 0) +
|
||||||
|
(categoryBreakdown.rejected || 0);
|
||||||
|
|
||||||
|
return {
|
||||||
|
prompt_tokens,
|
||||||
|
completion_tokens,
|
||||||
|
total_tokens: prompt_tokens + completion_tokens,
|
||||||
|
completion_tokens_details: {
|
||||||
|
reasoning_tokens: categoryBreakdown.reasoning || 0,
|
||||||
|
accepted_prediction_tokens: categoryBreakdown.accepted || 0,
|
||||||
|
rejected_prediction_tokens: categoryBreakdown.rejected || 0
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
printSummary() {
|
printSummary() {
|
||||||
const breakdown = this.getUsageBreakdown();
|
const breakdown = this.getUsageBreakdown();
|
||||||
console.log('Token Usage Summary:', {
|
console.log('Token Usage Summary:', {
|
||||||
|
|||||||
Reference in New Issue
Block a user