mirror of
https://github.com/jina-ai/node-DeepResearch.git
synced 2025-12-26 06:28:56 +08:00
fix: improve json parsing resilience and disable gemini thinking
- add jsonrepair fallback for truncated LLM output - disable gemini built-in thinking mode (thinkingBudget: 0) - increase token limits for errorAnalyzer, queryRewriter, serpCluster - switch production default to gemini-2.5-flash-lite - fix normalizeHostName to handle wildcard patterns
This commit is contained in:
parent
579fd95fff
commit
d44cec6524
13
config.json
13
config.json
@ -38,14 +38,19 @@
|
|||||||
},
|
},
|
||||||
"evaluator": {
|
"evaluator": {
|
||||||
"temperature": 0.6,
|
"temperature": 0.6,
|
||||||
"maxTokens": 200
|
"maxTokens": 1000
|
||||||
|
},
|
||||||
|
"errorAnalyzer": {
|
||||||
|
"maxTokens": 4000
|
||||||
},
|
},
|
||||||
"errorAnalyzer": {},
|
|
||||||
"queryRewriter": {
|
"queryRewriter": {
|
||||||
"temperature": 0.1
|
"temperature": 0.1,
|
||||||
|
"maxTokens": 4000
|
||||||
},
|
},
|
||||||
"researchPlanner": {},
|
"researchPlanner": {},
|
||||||
"serpCluster": {},
|
"serpCluster": {
|
||||||
|
"maxTokens": 4000
|
||||||
|
},
|
||||||
"agent": {
|
"agent": {
|
||||||
"temperature": 0.7
|
"temperature": 0.7
|
||||||
},
|
},
|
||||||
|
|||||||
@ -34,31 +34,35 @@
|
|||||||
"models": {
|
"models": {
|
||||||
"gemini": {
|
"gemini": {
|
||||||
"default": {
|
"default": {
|
||||||
"model": "gemini-2.5-flash",
|
"model": "gemini-2.5-flash-lite",
|
||||||
"temperature": 0.6,
|
"temperature": 0.6,
|
||||||
"maxTokens": 8000
|
"maxTokens": 8000
|
||||||
},
|
},
|
||||||
"tools": {
|
"tools": {
|
||||||
"coder": {
|
"coder": {
|
||||||
"maxTokens": 2000,
|
"maxTokens": 2000
|
||||||
"model": "gemini-2.5-flash-lite"
|
|
||||||
},
|
},
|
||||||
"researchPlanner": {},
|
"researchPlanner": {},
|
||||||
"evaluator": {
|
"evaluator": {
|
||||||
"maxTokens": 2000
|
"maxTokens": 2000
|
||||||
},
|
},
|
||||||
"serpCluster": {},
|
"serpCluster": {
|
||||||
|
"maxTokens": 4000
|
||||||
|
},
|
||||||
"errorAnalyzer": {
|
"errorAnalyzer": {
|
||||||
"maxTokens": 1000
|
"maxTokens": 4000
|
||||||
},
|
},
|
||||||
"queryRewriter": {
|
"queryRewriter": {
|
||||||
"maxTokens": 2000
|
"maxTokens": 4000
|
||||||
|
},
|
||||||
|
"agent": {
|
||||||
|
"model": "gemini-2.5-flash"
|
||||||
|
},
|
||||||
|
"agentBeastMode": {
|
||||||
|
"model": "gemini-2.5-flash"
|
||||||
},
|
},
|
||||||
"agent": {},
|
|
||||||
"agentBeastMode": {},
|
|
||||||
"fallback": {
|
"fallback": {
|
||||||
"maxTokens": 8000,
|
"maxTokens": 8000
|
||||||
"model": "gemini-2.5-flash-lite"
|
|
||||||
},
|
},
|
||||||
"finalizer": {},
|
"finalizer": {},
|
||||||
"reducer": {
|
"reducer": {
|
||||||
|
|||||||
10
package-lock.json
generated
10
package-lock.json
generated
@ -22,6 +22,7 @@
|
|||||||
"express-validator": "^7.2.1",
|
"express-validator": "^7.2.1",
|
||||||
"hjson": "^3.2.2",
|
"hjson": "^3.2.2",
|
||||||
"jsdom": "^26.0.0",
|
"jsdom": "^26.0.0",
|
||||||
|
"jsonrepair": "^3.13.1",
|
||||||
"node-fetch": "^3.3.2",
|
"node-fetch": "^3.3.2",
|
||||||
"sharp": "^0.34.2",
|
"sharp": "^0.34.2",
|
||||||
"undici": "^7.3.0",
|
"undici": "^7.3.0",
|
||||||
@ -6230,6 +6231,15 @@
|
|||||||
"url": "https://github.com/chalk/chalk?sponsor=1"
|
"url": "https://github.com/chalk/chalk?sponsor=1"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/jsonrepair": {
|
||||||
|
"version": "3.13.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/jsonrepair/-/jsonrepair-3.13.1.tgz",
|
||||||
|
"integrity": "sha512-WJeiE0jGfxYmtLwBTEk8+y/mYcaleyLXWaqp5bJu0/ZTSeG0KQq/wWQ8pmnkKenEdN6pdnn6QtcoSUkbqDHWNw==",
|
||||||
|
"license": "ISC",
|
||||||
|
"bin": {
|
||||||
|
"jsonrepair": "bin/cli.js"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/jwa": {
|
"node_modules/jwa": {
|
||||||
"version": "2.0.0",
|
"version": "2.0.0",
|
||||||
"resolved": "https://registry.npmjs.org/jwa/-/jwa-2.0.0.tgz",
|
"resolved": "https://registry.npmjs.org/jwa/-/jwa-2.0.0.tgz",
|
||||||
|
|||||||
@ -40,6 +40,7 @@
|
|||||||
"express-validator": "^7.2.1",
|
"express-validator": "^7.2.1",
|
||||||
"hjson": "^3.2.2",
|
"hjson": "^3.2.2",
|
||||||
"jsdom": "^26.0.0",
|
"jsdom": "^26.0.0",
|
||||||
|
"jsonrepair": "^3.13.1",
|
||||||
"node-fetch": "^3.3.2",
|
"node-fetch": "^3.3.2",
|
||||||
"sharp": "^0.34.2",
|
"sharp": "^0.34.2",
|
||||||
"undici": "^7.3.0",
|
"undici": "^7.3.0",
|
||||||
|
|||||||
@ -8,9 +8,15 @@ import {
|
|||||||
} from "ai";
|
} from "ai";
|
||||||
import { TokenTracker } from "./token-tracker";
|
import { TokenTracker } from "./token-tracker";
|
||||||
import { getModel, ToolName, getToolConfig } from "../config";
|
import { getModel, ToolName, getToolConfig } from "../config";
|
||||||
import Hjson from 'hjson'; // Import Hjson library
|
import Hjson from 'hjson';
|
||||||
import { logError, logDebug, logWarning } from '../logging';
|
import { logError, logDebug, logWarning } from '../logging';
|
||||||
|
|
||||||
|
// Dynamic import for ESM module
|
||||||
|
const getJsonRepair = async () => {
|
||||||
|
const { jsonrepair } = await import('jsonrepair');
|
||||||
|
return jsonrepair;
|
||||||
|
};
|
||||||
|
|
||||||
interface GenerateObjectResult<T> {
|
interface GenerateObjectResult<T> {
|
||||||
object: T;
|
object: T;
|
||||||
usage: LanguageModelUsage;
|
usage: LanguageModelUsage;
|
||||||
@ -154,6 +160,13 @@ export class ObjectGeneratorSafe {
|
|||||||
messages,
|
messages,
|
||||||
maxTokens: getToolConfig(model).maxTokens,
|
maxTokens: getToolConfig(model).maxTokens,
|
||||||
temperature: getToolConfig(model).temperature,
|
temperature: getToolConfig(model).temperature,
|
||||||
|
providerOptions: {
|
||||||
|
google: {
|
||||||
|
thinkingConfig: {
|
||||||
|
thinkingBudget: 0 // Disable Gemini's built-in thinking to avoid conflict with our schema's think field
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
this.tokenTracker.trackUsage(model, result.usage);
|
this.tokenTracker.trackUsage(model, result.usage);
|
||||||
@ -200,6 +213,13 @@ export class ObjectGeneratorSafe {
|
|||||||
schema: distilledSchema,
|
schema: distilledSchema,
|
||||||
prompt: `Following the given JSON schema, extract the field from below: \n\n ${failedOutput}`,
|
prompt: `Following the given JSON schema, extract the field from below: \n\n ${failedOutput}`,
|
||||||
temperature: getToolConfig('fallback').temperature,
|
temperature: getToolConfig('fallback').temperature,
|
||||||
|
providerOptions: {
|
||||||
|
google: {
|
||||||
|
thinkingConfig: {
|
||||||
|
thinkingBudget: 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
this.tokenTracker.trackUsage('fallback', fallbackResult.usage); // Track against fallback model
|
this.tokenTracker.trackUsage('fallback', fallbackResult.usage); // Track against fallback model
|
||||||
@ -224,29 +244,43 @@ export class ObjectGeneratorSafe {
|
|||||||
private async handleGenerateObjectError<T>(error: unknown): Promise<GenerateObjectResult<T>> {
|
private async handleGenerateObjectError<T>(error: unknown): Promise<GenerateObjectResult<T>> {
|
||||||
if (NoObjectGeneratedError.isInstance(error)) {
|
if (NoObjectGeneratedError.isInstance(error)) {
|
||||||
logWarning('Object not generated according to schema, fallback to manual parsing', { error });
|
logWarning('Object not generated according to schema, fallback to manual parsing', { error });
|
||||||
|
const rawText = (error as any).text;
|
||||||
|
|
||||||
|
// 1. First try standard JSON parsing
|
||||||
try {
|
try {
|
||||||
// First try standard JSON parsing
|
const partialResponse = JSON.parse(rawText);
|
||||||
const partialResponse = JSON.parse((error as any).text);
|
|
||||||
logDebug('JSON parse success!');
|
logDebug('JSON parse success!');
|
||||||
return {
|
return {
|
||||||
object: partialResponse as T,
|
object: partialResponse as T,
|
||||||
usage: (error as any).usage
|
usage: (error as any).usage
|
||||||
};
|
};
|
||||||
} catch (parseError) {
|
} catch (jsonError) {
|
||||||
// Use Hjson to parse the error response for more lenient parsing
|
// 2. Try jsonrepair to fix truncated/malformed JSON
|
||||||
try {
|
try {
|
||||||
const hjsonResponse = Hjson.parse((error as any).text);
|
const jsonrepair = await getJsonRepair();
|
||||||
|
const repairedJson = jsonrepair(rawText);
|
||||||
|
const repairedResponse = JSON.parse(repairedJson);
|
||||||
|
logDebug('jsonrepair parse success!');
|
||||||
|
return {
|
||||||
|
object: repairedResponse as T,
|
||||||
|
usage: (error as any).usage
|
||||||
|
};
|
||||||
|
} catch (repairError) {
|
||||||
|
// 3. Try Hjson for lenient parsing (trailing commas, comments, etc.)
|
||||||
|
try {
|
||||||
|
const hjsonResponse = Hjson.parse(rawText);
|
||||||
logDebug('Hjson parse success!');
|
logDebug('Hjson parse success!');
|
||||||
return {
|
return {
|
||||||
object: hjsonResponse as T,
|
object: hjsonResponse as T,
|
||||||
usage: (error as any).usage
|
usage: (error as any).usage
|
||||||
};
|
};
|
||||||
} catch (hjsonError) {
|
} catch (hjsonError) {
|
||||||
logError('Both JSON and Hjson parsing failed:', { error: hjsonError });
|
logError('All JSON parsing attempts failed (JSON, jsonrepair, Hjson):', { error: hjsonError });
|
||||||
throw error;
|
throw error;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
throw error;
|
throw error;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -194,6 +194,18 @@ const extractUrlParts = (urlStr: string) => {
|
|||||||
};
|
};
|
||||||
|
|
||||||
export const normalizeHostName = (hostStr: string) => {
|
export const normalizeHostName = (hostStr: string) => {
|
||||||
|
// Handle wildcard patterns like *.medium.com
|
||||||
|
if (hostStr.startsWith('*.')) {
|
||||||
|
hostStr = hostStr.slice(2);
|
||||||
|
}
|
||||||
|
|
||||||
|
// If it doesn't look like a URL (no protocol), just clean up the hostname directly
|
||||||
|
if (!hostStr.includes('://')) {
|
||||||
|
const cleaned = hostStr.startsWith('www.') ? hostStr.slice(4) : hostStr;
|
||||||
|
return cleaned.toLowerCase();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try to parse as URL
|
||||||
const extract = extractUrlParts(hostStr);
|
const extract = extractUrlParts(hostStr);
|
||||||
const host = extract.hostname;
|
const host = extract.hostname;
|
||||||
if (!host) {
|
if (!host) {
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user