feat: add json schema support

This commit is contained in:
Han Xiao 2025-03-03 10:51:03 +08:00
parent 61b28da719
commit 7c035f59c5
5 changed files with 81 additions and 17 deletions

View File

@ -1,6 +1,6 @@
# DeepResearch
[Official UI](https://search.jina.ai/) | [UI Code](https://github.com/jina-ai/deepsearch-ui) | [Official API](https://jina.ai/deepsearch) | [Evaluation](#evaluation)
[Official UI](https://search.jina.ai/) | [UI Code](https://github.com/jina-ai/deepsearch-ui) | [Official API](https://jina.ai/deepsearch) | [Blog](https://jina.ai/news/a-practical-guide-to-implementing-deepsearch-deepresearch)
Keep searching, reading webpages, reasoning until an answer is found (or the token budget is exceeded). Useful for deeply investigating a query.

27
package-lock.json generated
View File

@ -10,8 +10,9 @@
"license": "Apache-2.0",
"dependencies": {
"@ai-sdk/google": "^1.0.0",
"@ai-sdk/google-vertex": "*",
"@ai-sdk/openai": "^1.1.9",
"@dmitryrechkin/json-schema-to-zod": "^1.0.0",
"add": "^2.0.6",
"ai": "^4.1.26",
"axios": "^1.7.9",
"commander": "^13.1.0",
@ -19,6 +20,7 @@
"dotenv": "^16.4.7",
"duck-duck-scrape": "^2.2.7",
"express": "^4.21.2",
"json-schema-to-zod": "^2.6.0",
"node-fetch": "^3.3.2",
"undici": "^7.3.0",
"zod": "^3.22.4",
@ -763,6 +765,14 @@
"node": ">=12"
}
},
"node_modules/@dmitryrechkin/json-schema-to-zod": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/@dmitryrechkin/json-schema-to-zod/-/json-schema-to-zod-1.0.0.tgz",
"integrity": "sha512-avV26RC8CRzhnL6AvQsURlkd071SXlcPURxiYFsRLpsMoDDXBBGJDIsNQTvYmevq31WHYdwGCKGgQKC0YIjDGg==",
"dependencies": {
"zod": "^3.23.8"
}
},
"node_modules/@eslint-community/eslint-utils": {
"version": "4.4.1",
"resolved": "https://registry.npmjs.org/@eslint-community/eslint-utils/-/eslint-utils-4.4.1.tgz",
@ -2069,6 +2079,12 @@
"node": ">=0.4.0"
}
},
"node_modules/add": {
"version": "2.0.6",
"resolved": "https://registry.npmjs.org/add/-/add-2.0.6.tgz",
"integrity": "sha512-j5QzrmsokwWWp6kUcJQySpbG+xfOBqqKnup3OIk1pz+kB/80SLorZ9V8zHFLO92Lcd+hbvq8bT+zOGoPkmBV0Q==",
"license": "MIT"
},
"node_modules/agent-base": {
"version": "7.1.3",
"resolved": "https://registry.npmjs.org/agent-base/-/agent-base-7.1.3.tgz",
@ -5258,6 +5274,15 @@
"integrity": "sha512-es94M3nTIfsEPisRafak+HDLfHXnKBhV3vU5eqPcS3flIWqcxJWgXHXiey3YrpaNsanY5ei1VoYEbOzijuq9BA==",
"license": "(AFL-2.1 OR BSD-3-Clause)"
},
"node_modules/json-schema-to-zod": {
"version": "2.6.0",
"resolved": "https://registry.npmjs.org/json-schema-to-zod/-/json-schema-to-zod-2.6.0.tgz",
"integrity": "sha512-6sFZqOzHZeON8g2ZW5HJ114Hb/FffNCjWh8dgulJaKFkUqKCEWZAzF4+g07SQpfBZF7HXemwedtdLypZzmnVpQ==",
"license": "ISC",
"bin": {
"json-schema-to-zod": "dist/cjs/cli.js"
}
},
"node_modules/json-schema-traverse": {
"version": "0.4.1",
"resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz",

View File

@ -11,6 +11,8 @@ import {
} from './types';
import {TokenTracker} from "./utils/token-tracker";
import {ActionTracker} from "./utils/action-tracker";
import {ObjectGeneratorSafe} from "./utils/safe-generator";
import {jsonSchema} from "ai"; // or another converter library
const app = express();
@ -189,7 +191,7 @@ async function emitRemainingContent(
system_fingerprint: 'fp_' + requestId,
choices: [{
index: 0,
delta: {content},
delta: {content, type: "think"},
logprobs: null,
finish_reason: null
}],
@ -335,7 +337,7 @@ async function processQueue(streamingState: StreamingState, res: Response, reque
system_fingerprint: 'fp_' + requestId,
choices: [{
index: 0,
delta: {content: word},
delta: {content: word, type: "think"},
logprobs: null,
finish_reason: null
}]
@ -404,6 +406,17 @@ app.post('/v1/chat/completions', (async (req: Request, res: Response) => {
maxBadAttempts = body.max_attempts;
}
let responseSchema = undefined;
if (body.response_format?.json_schema) {
// Convert JSON schema to Zod schema using a proper converter
try {
responseSchema = jsonSchema(body.response_format.json_schema);
console.log(responseSchema)
} catch (error: any) {
return res.status(400).json({error: `Invalid JSON schema: ${error.message}`});
}
}
const requestId = Date.now().toString();
const created = Math.floor(Date.now() / 1000);
const context: TrackerContext = {
@ -436,7 +449,7 @@ app.post('/v1/chat/completions', (async (req: Request, res: Response) => {
system_fingerprint: 'fp_' + requestId,
choices: [{
index: 0,
delta: {role: 'assistant', content: '<think>'},
delta: {role: 'assistant', content: '<think>', type: "text"},
logprobs: null,
finish_reason: null
}]
@ -476,12 +489,31 @@ app.post('/v1/chat/completions', (async (req: Request, res: Response) => {
visitedURLs: visitedURLs,
readURLs: readURLs
} = await getResponse(undefined, tokenBudget, maxBadAttempts, context, body.messages)
let finalAnswer = (finalStep as AnswerAction).mdAnswer;
if (responseSchema) {
try {
console.log('hello2')
const generator = new ObjectGeneratorSafe(context?.tokenTracker);
const result = await generator.generateObject({
model: 'agent',
schema: responseSchema,
prompt: finalAnswer,
system: "Extract the structured data from the text according to the JSON schema.",
});
// Use the generated object as the response content
finalAnswer = JSON.stringify(result.object, null, 2);
console.log('Generated object:', finalAnswer)
} catch (error) {
console.error('Error processing response with schema:', error);
}
}
const usage = context.tokenTracker.getTotalUsageSnakeCase();
if (body.stream) {
// Complete any ongoing streaming before sending final answer
await completeCurrentStreaming(streamingState, res, requestId, created, body.model);
const finalAnswer = (finalStep as AnswerAction).mdAnswer;
// Send closing think tag
const closeThinkChunk: ChatCompletionChunk = {
id: requestId,
@ -491,7 +523,7 @@ app.post('/v1/chat/completions', (async (req: Request, res: Response) => {
system_fingerprint: 'fp_' + requestId,
choices: [{
index: 0,
delta: {content: `</think>\n\n${finalAnswer}`},
delta: {content: `</think>\n\n`, type: "think"},
logprobs: null,
finish_reason: null
}]
@ -507,7 +539,7 @@ app.post('/v1/chat/completions', (async (req: Request, res: Response) => {
system_fingerprint: 'fp_' + requestId,
choices: [{
index: 0,
delta: {content: ''},
delta: {content: finalAnswer, type: "text"},
logprobs: null,
finish_reason: 'stop'
}],
@ -529,7 +561,7 @@ app.post('/v1/chat/completions', (async (req: Request, res: Response) => {
index: 0,
message: {
role: 'assistant',
content: finalStep.action === 'answer' ? (finalStep.mdAnswer || '') : finalStep.think
content: finalStep.action === 'answer' ? (finalAnswer || '') : finalStep.think
},
logprobs: null,
finish_reason: 'stop'
@ -580,7 +612,7 @@ app.post('/v1/chat/completions', (async (req: Request, res: Response) => {
system_fingerprint: 'fp_' + requestId,
choices: [{
index: 0,
delta: {content: '</think>'},
delta: {content: '</think>', type: "think"},
logprobs: null,
finish_reason: null
}],
@ -597,7 +629,7 @@ app.post('/v1/chat/completions', (async (req: Request, res: Response) => {
system_fingerprint: 'fp_' + requestId,
choices: [{
index: 0,
delta: {content: errorMessage},
delta: {content: errorMessage, type: "error"},
logprobs: null,
finish_reason: 'stop'
}],

View File

@ -185,15 +185,21 @@ export interface Model {
export type PromptPair = { system: string, user: string };
export type ResponseFormat = {
type: 'json_schema' | 'json_object';
json_schema?: any;
}
export interface ChatCompletionRequest {
model: string;
messages: Array<CoreUserMessage | CoreAssistantMessage>;
stream?: boolean;
reasoning_effort?: 'low' | 'medium' | 'high' | null;
max_completion_tokens?: number | null;
reasoning_effort?: 'low' | 'medium' | 'high';
max_completion_tokens?: number;
budget_tokens?: number | null;
max_attempts?: number | null;
budget_tokens?: number;
max_attempts?: number;
response_format?: ResponseFormat;
}
export interface ChatCompletionResponse {
@ -231,6 +237,7 @@ export interface ChatCompletionChunk {
delta: {
role?: 'assistant';
content?: string;
type?: 'text' | 'think' | 'json' | 'error';
};
logprobs: null;
finish_reason: null | 'stop';

View File

@ -1,5 +1,5 @@
import { z } from 'zod';
import {generateObject, LanguageModelUsage, NoObjectGeneratedError} from "ai";
import {generateObject, LanguageModelUsage, NoObjectGeneratedError, Schema} from "ai";
import {TokenTracker} from "./token-tracker";
import {getModel, ToolName, getToolConfig} from "../config";
@ -10,7 +10,7 @@ interface GenerateObjectResult<T> {
interface GenerateOptions<T> {
model: ToolName;
schema: z.ZodType<T>;
schema: z.ZodType<T> | Schema<T>;
prompt?: string;
system?:string;
messages?: any;