mirror of
https://github.com/jina-ai/node-DeepResearch.git
synced 2026-03-22 07:29:35 +08:00
feat: add serpCluster integration and schema
This commit is contained in:
@@ -45,6 +45,7 @@
|
|||||||
"temperature": 0.1
|
"temperature": 0.1
|
||||||
},
|
},
|
||||||
"researchPlanner": {},
|
"researchPlanner": {},
|
||||||
|
"serpCluster": {},
|
||||||
"agent": {
|
"agent": {
|
||||||
"temperature": 0.7
|
"temperature": 0.7
|
||||||
},
|
},
|
||||||
@@ -79,6 +80,7 @@
|
|||||||
"queryRewriter": {
|
"queryRewriter": {
|
||||||
"temperature": 0.1
|
"temperature": 0.1
|
||||||
},
|
},
|
||||||
|
"serpCluster": {},
|
||||||
"agent": {
|
"agent": {
|
||||||
"temperature": 0.7
|
"temperature": 0.7
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -47,6 +47,7 @@
|
|||||||
"evaluator": {
|
"evaluator": {
|
||||||
"maxTokens": 2000
|
"maxTokens": 2000
|
||||||
},
|
},
|
||||||
|
"serpCluster": {},
|
||||||
"errorAnalyzer": {
|
"errorAnalyzer": {
|
||||||
"maxTokens": 1000
|
"maxTokens": 1000
|
||||||
},
|
},
|
||||||
@@ -60,7 +61,9 @@
|
|||||||
"model": "gemini-2.0-flash-lite"
|
"model": "gemini-2.0-flash-lite"
|
||||||
},
|
},
|
||||||
"finalizer": {},
|
"finalizer": {},
|
||||||
"reducer": {"maxTokens": 16000}
|
"reducer": {
|
||||||
|
"maxTokens": 16000
|
||||||
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"openai": {
|
"openai": {
|
||||||
@@ -79,6 +82,7 @@
|
|||||||
"queryRewriter": {
|
"queryRewriter": {
|
||||||
"temperature": 0.1
|
"temperature": 0.1
|
||||||
},
|
},
|
||||||
|
"serpCluster": {},
|
||||||
"agent": {
|
"agent": {
|
||||||
"temperature": 0.7
|
"temperature": 0.7
|
||||||
},
|
},
|
||||||
@@ -89,7 +93,9 @@
|
|||||||
"temperature": 0
|
"temperature": 0
|
||||||
},
|
},
|
||||||
"finalizer": {},
|
"finalizer": {},
|
||||||
"reducer": {"maxTokens": 16000}
|
"reducer": {
|
||||||
|
"maxTokens": 16000
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
30
src/agent.ts
30
src/agent.ts
@@ -48,6 +48,7 @@ import { researchPlan } from './tools/research-planner';
|
|||||||
import { reduceAnswers } from './tools/reducer';
|
import { reduceAnswers } from './tools/reducer';
|
||||||
import { AxiosError } from 'axios';
|
import { AxiosError } from 'axios';
|
||||||
import { dedupImagesWithEmbeddings } from './utils/image-tools';
|
import { dedupImagesWithEmbeddings } from './utils/image-tools';
|
||||||
|
import { serpCluster } from './tools/serp-cluster';
|
||||||
|
|
||||||
async function wait(seconds: number) {
|
async function wait(seconds: number) {
|
||||||
logDebug(`Waiting ${seconds}s...`);
|
logDebug(`Waiting ${seconds}s...`);
|
||||||
@@ -361,12 +362,27 @@ async function executeSearchQueries(
|
|||||||
|
|
||||||
searchedQueries.push(query.q)
|
searchedQueries.push(query.q)
|
||||||
|
|
||||||
newKnowledge.push({
|
try {
|
||||||
question: `What do Internet say about "${oldQuery}"?`,
|
const clusters = await serpCluster(minResults, context, SchemaGen);
|
||||||
answer: removeHTMLtags(minResults.map(r => r.description).join('; ')),
|
clusters.forEach(c => {
|
||||||
type: 'side-info',
|
newKnowledge.push({
|
||||||
updated: query.tbs ? formatDateRange(query) : undefined
|
question: c.question,
|
||||||
});
|
answer: c.insight,
|
||||||
|
references: c.urls,
|
||||||
|
type: 'url',
|
||||||
|
});
|
||||||
|
});
|
||||||
|
} catch (error) {
|
||||||
|
logWarning('serpCluster failed:', { error });
|
||||||
|
newKnowledge.push({
|
||||||
|
question: `What do Internet say about "${oldQuery}"?`,
|
||||||
|
answer: removeHTMLtags(minResults.map(r => r.description).join('; ')),
|
||||||
|
type: 'side-info',
|
||||||
|
updated: query.tbs ? formatDateRange(query) : undefined
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
if (searchedQueries.length === 0) {
|
if (searchedQueries.length === 0) {
|
||||||
if (onlyHostnames && onlyHostnames.length > 0) {
|
if (onlyHostnames && onlyHostnames.length > 0) {
|
||||||
@@ -405,7 +421,7 @@ export async function getResponse(question?: string,
|
|||||||
searchLanguageCode?: string,
|
searchLanguageCode?: string,
|
||||||
searchProvider?: string,
|
searchProvider?: string,
|
||||||
withImages: boolean = false,
|
withImages: boolean = false,
|
||||||
teamSize: number = 2
|
teamSize: number = 1
|
||||||
): Promise<{ result: StepAction; context: TrackerContext; visitedURLs: string[], readURLs: string[], allURLs: string[], imageReferences?: ImageReference[] }> {
|
): Promise<{ result: StepAction; context: TrackerContext; visitedURLs: string[], readURLs: string[], allURLs: string[], imageReferences?: ImageReference[] }> {
|
||||||
|
|
||||||
let step = 0;
|
let step = 0;
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ import { TokenTracker } from "../utils/token-tracker";
|
|||||||
import { JinaSearchResponse, SERPQuery } from '../types';
|
import { JinaSearchResponse, SERPQuery } from '../types';
|
||||||
import { JINA_API_KEY } from "../config";
|
import { JINA_API_KEY } from "../config";
|
||||||
import axiosClient from '../utils/axios-client';
|
import axiosClient from '../utils/axios-client';
|
||||||
import { logInfo, logError, logDebug, logWarning } from '../logging';
|
import { logError, logDebug } from '../logging';
|
||||||
|
|
||||||
export async function search(
|
export async function search(
|
||||||
query: SERPQuery,
|
query: SERPQuery,
|
||||||
|
|||||||
40
src/tools/serp-cluster.ts
Normal file
40
src/tools/serp-cluster.ts
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
import { PromptPair, TrackerContext } from '../types';
|
||||||
|
import { ObjectGeneratorSafe } from "../utils/safe-generator";
|
||||||
|
import { Schemas } from "../utils/schemas";
|
||||||
|
import { logInfo, logError } from '../logging';
|
||||||
|
import { SearchSnippet } from '../types';
|
||||||
|
|
||||||
|
function getPrompt(results: SearchSnippet[]): PromptPair {
|
||||||
|
return {
|
||||||
|
system: `
|
||||||
|
You are a search engine result analyzer. You look at the SERP API response and group them into meaningful cluster.
|
||||||
|
|
||||||
|
Each cluster should contain a summary of the content, key data and insights, the corresponding URLs and search advice. Respond in JSON format.
|
||||||
|
`,
|
||||||
|
user:
|
||||||
|
`
|
||||||
|
${JSON.stringify(results)}
|
||||||
|
`
|
||||||
|
};
|
||||||
|
}
|
||||||
|
const TOOL_NAME = 'serpCluster';
|
||||||
|
|
||||||
|
export async function serpCluster(results: SearchSnippet[], trackers: TrackerContext, schemaGen: Schemas): Promise<any[]> {
|
||||||
|
try {
|
||||||
|
const generator = new ObjectGeneratorSafe(trackers.tokenTracker);
|
||||||
|
const prompt = getPrompt(results);
|
||||||
|
const result = await generator.generateObject({
|
||||||
|
model: TOOL_NAME,
|
||||||
|
schema: schemaGen.getSerpClusterSchema(),
|
||||||
|
system: prompt.system,
|
||||||
|
prompt: prompt.user,
|
||||||
|
});
|
||||||
|
trackers?.actionTracker.trackThink(result.object.think);
|
||||||
|
const clusters = result.object.clusters;
|
||||||
|
logInfo(TOOL_NAME, { clusters });
|
||||||
|
return clusters;
|
||||||
|
} catch (error) {
|
||||||
|
logError(TOOL_NAME, { error });
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -6,6 +6,7 @@ import { logDebug } from '../logging';
|
|||||||
export const MAX_URLS_PER_STEP = 5
|
export const MAX_URLS_PER_STEP = 5
|
||||||
export const MAX_QUERIES_PER_STEP = 5
|
export const MAX_QUERIES_PER_STEP = 5
|
||||||
export const MAX_REFLECT_PER_STEP = 2
|
export const MAX_REFLECT_PER_STEP = 2
|
||||||
|
export const MAX_CLUSTERS = 5
|
||||||
|
|
||||||
function getLanguagePrompt(question: string): PromptPair {
|
function getLanguagePrompt(question: string): PromptPair {
|
||||||
return {
|
return {
|
||||||
@@ -173,6 +174,20 @@ export class Schemas {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
getSerpClusterSchema(): z.ZodObject<any> {
|
||||||
|
return z.object({
|
||||||
|
think: z.string().describe(`Explain why you cluster the search results like this. ${this.getLanguagePrompt()}`).max(500),
|
||||||
|
clusters: z.array(
|
||||||
|
z.object({
|
||||||
|
question: z.string().describe('What question this cluster answers.').max(100),
|
||||||
|
insight: z.string().describe('Summary and list key numbers, data and insights that worth to be highlighted. End with an actionable advice such as "Visit these URLs if you want to understand [what...]". Do not use "This cluster..."').max(200),
|
||||||
|
urls: z.array(z.string().describe('URLs in this cluster.').max(100))
|
||||||
|
}))
|
||||||
|
.max(MAX_CLUSTERS)
|
||||||
|
.describe(`'The optimal clustering of search engine results, orthogonal to each other. Maximum ${MAX_CLUSTERS} clusters allowed.'`)
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
getQueryRewriterSchema(): z.ZodObject<any> {
|
getQueryRewriterSchema(): z.ZodObject<any> {
|
||||||
return z.object({
|
return z.object({
|
||||||
think: z.string().describe(`Explain why you choose those search queries. ${this.getLanguagePrompt()}`).max(500),
|
think: z.string().describe(`Explain why you choose those search queries. ${this.getLanguagePrompt()}`).max(500),
|
||||||
|
|||||||
Reference in New Issue
Block a user