mirror of
https://github.com/jina-ai/node-DeepResearch.git
synced 2025-12-26 06:28:56 +08:00
feat: add serpCluster integration and schema
This commit is contained in:
parent
96aca78d6a
commit
44dae8efb3
@ -45,6 +45,7 @@
|
||||
"temperature": 0.1
|
||||
},
|
||||
"researchPlanner": {},
|
||||
"serpCluster": {},
|
||||
"agent": {
|
||||
"temperature": 0.7
|
||||
},
|
||||
@ -79,6 +80,7 @@
|
||||
"queryRewriter": {
|
||||
"temperature": 0.1
|
||||
},
|
||||
"serpCluster": {},
|
||||
"agent": {
|
||||
"temperature": 0.7
|
||||
},
|
||||
|
||||
@ -47,6 +47,7 @@
|
||||
"evaluator": {
|
||||
"maxTokens": 2000
|
||||
},
|
||||
"serpCluster": {},
|
||||
"errorAnalyzer": {
|
||||
"maxTokens": 1000
|
||||
},
|
||||
@ -60,7 +61,9 @@
|
||||
"model": "gemini-2.0-flash-lite"
|
||||
},
|
||||
"finalizer": {},
|
||||
"reducer": {"maxTokens": 16000}
|
||||
"reducer": {
|
||||
"maxTokens": 16000
|
||||
}
|
||||
}
|
||||
},
|
||||
"openai": {
|
||||
@ -79,6 +82,7 @@
|
||||
"queryRewriter": {
|
||||
"temperature": 0.1
|
||||
},
|
||||
"serpCluster": {},
|
||||
"agent": {
|
||||
"temperature": 0.7
|
||||
},
|
||||
@ -89,7 +93,9 @@
|
||||
"temperature": 0
|
||||
},
|
||||
"finalizer": {},
|
||||
"reducer": {"maxTokens": 16000}
|
||||
"reducer": {
|
||||
"maxTokens": 16000
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
18
src/agent.ts
18
src/agent.ts
@ -48,6 +48,7 @@ import { researchPlan } from './tools/research-planner';
|
||||
import { reduceAnswers } from './tools/reducer';
|
||||
import { AxiosError } from 'axios';
|
||||
import { dedupImagesWithEmbeddings } from './utils/image-tools';
|
||||
import { serpCluster } from './tools/serp-cluster';
|
||||
|
||||
async function wait(seconds: number) {
|
||||
logDebug(`Waiting ${seconds}s...`);
|
||||
@ -361,6 +362,18 @@ async function executeSearchQueries(
|
||||
|
||||
searchedQueries.push(query.q)
|
||||
|
||||
try {
|
||||
const clusters = await serpCluster(minResults, context, SchemaGen);
|
||||
clusters.forEach(c => {
|
||||
newKnowledge.push({
|
||||
question: c.question,
|
||||
answer: c.insight,
|
||||
references: c.urls,
|
||||
type: 'url',
|
||||
});
|
||||
});
|
||||
} catch (error) {
|
||||
logWarning('serpCluster failed:', { error });
|
||||
newKnowledge.push({
|
||||
question: `What do Internet say about "${oldQuery}"?`,
|
||||
answer: removeHTMLtags(minResults.map(r => r.description).join('; ')),
|
||||
@ -368,6 +381,9 @@ async function executeSearchQueries(
|
||||
updated: query.tbs ? formatDateRange(query) : undefined
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
if (searchedQueries.length === 0) {
|
||||
if (onlyHostnames && onlyHostnames.length > 0) {
|
||||
logWarning(`No results found for queries: ${uniqQOnly.join(', ')} on hostnames: ${onlyHostnames.join(', ')}`);
|
||||
@ -405,7 +421,7 @@ export async function getResponse(question?: string,
|
||||
searchLanguageCode?: string,
|
||||
searchProvider?: string,
|
||||
withImages: boolean = false,
|
||||
teamSize: number = 2
|
||||
teamSize: number = 1
|
||||
): Promise<{ result: StepAction; context: TrackerContext; visitedURLs: string[], readURLs: string[], allURLs: string[], imageReferences?: ImageReference[] }> {
|
||||
|
||||
let step = 0;
|
||||
|
||||
@ -2,7 +2,7 @@ import { TokenTracker } from "../utils/token-tracker";
|
||||
import { JinaSearchResponse, SERPQuery } from '../types';
|
||||
import { JINA_API_KEY } from "../config";
|
||||
import axiosClient from '../utils/axios-client';
|
||||
import { logInfo, logError, logDebug, logWarning } from '../logging';
|
||||
import { logError, logDebug } from '../logging';
|
||||
|
||||
export async function search(
|
||||
query: SERPQuery,
|
||||
|
||||
40
src/tools/serp-cluster.ts
Normal file
40
src/tools/serp-cluster.ts
Normal file
@ -0,0 +1,40 @@
|
||||
import { PromptPair, TrackerContext } from '../types';
|
||||
import { ObjectGeneratorSafe } from "../utils/safe-generator";
|
||||
import { Schemas } from "../utils/schemas";
|
||||
import { logInfo, logError } from '../logging';
|
||||
import { SearchSnippet } from '../types';
|
||||
|
||||
function getPrompt(results: SearchSnippet[]): PromptPair {
|
||||
return {
|
||||
system: `
|
||||
You are a search engine result analyzer. You look at the SERP API response and group them into meaningful cluster.
|
||||
|
||||
Each cluster should contain a summary of the content, key data and insights, the corresponding URLs and search advice. Respond in JSON format.
|
||||
`,
|
||||
user:
|
||||
`
|
||||
${JSON.stringify(results)}
|
||||
`
|
||||
};
|
||||
}
|
||||
const TOOL_NAME = 'serpCluster';
|
||||
|
||||
export async function serpCluster(results: SearchSnippet[], trackers: TrackerContext, schemaGen: Schemas): Promise<any[]> {
|
||||
try {
|
||||
const generator = new ObjectGeneratorSafe(trackers.tokenTracker);
|
||||
const prompt = getPrompt(results);
|
||||
const result = await generator.generateObject({
|
||||
model: TOOL_NAME,
|
||||
schema: schemaGen.getSerpClusterSchema(),
|
||||
system: prompt.system,
|
||||
prompt: prompt.user,
|
||||
});
|
||||
trackers?.actionTracker.trackThink(result.object.think);
|
||||
const clusters = result.object.clusters;
|
||||
logInfo(TOOL_NAME, { clusters });
|
||||
return clusters;
|
||||
} catch (error) {
|
||||
logError(TOOL_NAME, { error });
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
@ -6,6 +6,7 @@ import { logDebug } from '../logging';
|
||||
export const MAX_URLS_PER_STEP = 5
|
||||
export const MAX_QUERIES_PER_STEP = 5
|
||||
export const MAX_REFLECT_PER_STEP = 2
|
||||
export const MAX_CLUSTERS = 5
|
||||
|
||||
function getLanguagePrompt(question: string): PromptPair {
|
||||
return {
|
||||
@ -173,6 +174,20 @@ export class Schemas {
|
||||
});
|
||||
}
|
||||
|
||||
getSerpClusterSchema(): z.ZodObject<any> {
|
||||
return z.object({
|
||||
think: z.string().describe(`Explain why you cluster the search results like this. ${this.getLanguagePrompt()}`).max(500),
|
||||
clusters: z.array(
|
||||
z.object({
|
||||
question: z.string().describe('What question this cluster answers.').max(100),
|
||||
insight: z.string().describe('Summary and list key numbers, data and insights that worth to be highlighted. End with an actionable advice such as "Visit these URLs if you want to understand [what...]". Do not use "This cluster..."').max(200),
|
||||
urls: z.array(z.string().describe('URLs in this cluster.').max(100))
|
||||
}))
|
||||
.max(MAX_CLUSTERS)
|
||||
.describe(`'The optimal clustering of search engine results, orthogonal to each other. Maximum ${MAX_CLUSTERS} clusters allowed.'`)
|
||||
});
|
||||
}
|
||||
|
||||
getQueryRewriterSchema(): z.ZodObject<any> {
|
||||
return z.object({
|
||||
think: z.string().describe(`Explain why you choose those search queries. ${this.getLanguagePrompt()}`).max(500),
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user