mirror of
https://github.com/jina-ai/node-DeepResearch.git
synced 2026-03-22 07:29:35 +08:00
refactor: query rewriter
This commit is contained in:
12
src/agent.ts
12
src/agent.ts
@@ -33,7 +33,7 @@ import {
|
||||
countUrlParts,
|
||||
removeBFromA,
|
||||
normalizeUrl, sampleMultinomial,
|
||||
weightedURLToString, getLastModified
|
||||
weightedURLToString, getLastModified, keepKPerHostname
|
||||
} from "./utils/url-tools";
|
||||
import {
|
||||
buildMdFromAnswer,
|
||||
@@ -350,13 +350,12 @@ export async function getResponse(question?: string,
|
||||
if (allURLs && Object.keys(allURLs).length > 0) {
|
||||
// rerank urls
|
||||
weightedURLs = rankURLs(
|
||||
removeBFromA(allURLs,
|
||||
[
|
||||
...visitedURLs,
|
||||
...weightedURLs.map(r => r.url).slice(Math.floor(weightedURLs.length * 0.5))]),
|
||||
removeBFromA(allURLs, visitedURLs),
|
||||
{
|
||||
question: currentQuestion
|
||||
}, context);
|
||||
// improve diversity by keep top 2 urls of each hostname
|
||||
weightedURLs = keepKPerHostname(weightedURLs, 2);
|
||||
console.log('Weighted URLs:', weightedURLs.length);
|
||||
}
|
||||
|
||||
@@ -856,14 +855,13 @@ But unfortunately, you failed to solve the issue. You need to think out of the b
|
||||
await storeContext(system, schema, {allContext, allKeywords, allQuestions, allKnowledge, weightedURLs}, totalStep);
|
||||
|
||||
// max return 300 urls
|
||||
const returnedURLs = Object.keys(allURLs).slice(0, 300);
|
||||
const returnedURLs = weightedURLs.slice(0, 300).map(r => r.url);
|
||||
return {
|
||||
result: thisStep,
|
||||
context,
|
||||
visitedURLs: returnedURLs,
|
||||
readURLs: visitedURLs,
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
async function storeContext(prompt: string, schema: any, memory: {
|
||||
|
||||
@@ -362,3 +362,23 @@ export async function getLastModified(url: string): Promise<string | undefined>
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
export const keepKPerHostname = (results: BoostedSearchSnippet[], k: number) => {
|
||||
const hostnameMap: Record<string, number> = {};
|
||||
const filteredResults: BoostedSearchSnippet[] = [];
|
||||
|
||||
results.forEach((result) => {
|
||||
const hostname = extractUrlParts(result.url).hostname;
|
||||
if (hostnameMap[hostname] === undefined) {
|
||||
hostnameMap[hostname] = 0;
|
||||
}
|
||||
|
||||
if (hostnameMap[hostname] < k) {
|
||||
filteredResults.push(result);
|
||||
hostnameMap[hostname]++;
|
||||
}
|
||||
});
|
||||
|
||||
return filteredResults;
|
||||
}
|
||||
Reference in New Issue
Block a user