mirror of
https://github.com/jina-ai/node-DeepResearch.git
synced 2025-12-26 06:28:56 +08:00
feat: add hostnames bw filter
This commit is contained in:
parent
b27eced6f8
commit
4ca7804e58
@ -354,6 +354,8 @@ export async function getResponse(question?: string,
|
||||
messages?: Array<CoreMessage>,
|
||||
numReturnedURLs: number = 100,
|
||||
noDirectAnswer: boolean = false,
|
||||
boostHostnames: string[] = [],
|
||||
badHostnames: string[] = [],
|
||||
): Promise<{ result: StepAction; context: TrackerContext; visitedURLs: string[], readURLs: string[], allURLs: string[] }> {
|
||||
|
||||
let step = 0;
|
||||
@ -446,9 +448,10 @@ export async function getResponse(question?: string,
|
||||
if (allURLs && Object.keys(allURLs).length > 0) {
|
||||
// rerank urls
|
||||
weightedURLs = rankURLs(
|
||||
filterURLs(allURLs, visitedURLs),
|
||||
filterURLs(allURLs, visitedURLs, badHostnames),
|
||||
{
|
||||
question: currentQuestion
|
||||
question: currentQuestion,
|
||||
boostHostnames
|
||||
}, context);
|
||||
// improve diversity by keep top 2 urls of each hostname
|
||||
weightedURLs = keepKPerHostname(weightedURLs, 2);
|
||||
|
||||
11
src/app.ts
11
src/app.ts
@ -548,7 +548,16 @@ app.post('/v1/chat/completions', (async (req: Request, res: Response) => {
|
||||
visitedURLs,
|
||||
readURLs,
|
||||
allURLs
|
||||
} = await getResponse(undefined, tokenBudget, maxBadAttempts, context, body.messages, body.max_returned_urls, body.no_direct_answer)
|
||||
} = await getResponse(undefined,
|
||||
tokenBudget,
|
||||
maxBadAttempts,
|
||||
context,
|
||||
body.messages,
|
||||
body.max_returned_urls,
|
||||
body.no_direct_answer,
|
||||
body.boostHostnames,
|
||||
body.badHostnames,
|
||||
)
|
||||
let finalAnswer = (finalStep as AnswerAction).mdAnswer;
|
||||
|
||||
const annotations = (finalStep as AnswerAction).references?.map(ref => ({
|
||||
|
||||
@ -217,6 +217,9 @@ export interface ChatCompletionRequest {
|
||||
response_format?: ResponseFormat;
|
||||
no_direct_answer?: boolean;
|
||||
max_returned_urls?: number;
|
||||
|
||||
boostHostnames?: string[];
|
||||
badHostnames?: string[];
|
||||
}
|
||||
|
||||
export interface URLAnnotation {
|
||||
|
||||
@ -140,9 +140,9 @@ export function normalizeUrl(urlString: string, debug = false, options = {
|
||||
}
|
||||
}
|
||||
|
||||
export function filterURLs(allURLs: Record<string, SearchSnippet>, visitedURLs: string[]): SearchSnippet[] {
|
||||
export function filterURLs(allURLs: Record<string, SearchSnippet>, visitedURLs: string[], badHostnames: string[]): SearchSnippet[] {
|
||||
return Object.entries(allURLs)
|
||||
.filter(([url,]) => !visitedURLs.includes(url))
|
||||
.filter(([url,]) => !visitedURLs.includes(url) && !badHostnames.includes(extractUrlParts(url).hostname))
|
||||
.map(([, result]) => result);
|
||||
}
|
||||
|
||||
@ -205,6 +205,7 @@ export const rankURLs = (urlItems: SearchSnippet[], options: any = {}, trackers:
|
||||
minBoost = 0, // Minimum boost score
|
||||
maxBoost = 5, // Maximum boost score cap
|
||||
question = '', // Optional question for Jina reranking
|
||||
boostHostnames = [], // Optional hostnames to boost
|
||||
} = options;
|
||||
|
||||
// Count URL parts first
|
||||
@ -235,7 +236,7 @@ export const rankURLs = (urlItems: SearchSnippet[], options: any = {}, trackers:
|
||||
|
||||
// Hostname boost (normalized by total URLs)
|
||||
const hostnameFreq = normalizeCount(hostnameCount[hostname] || 0, totalUrls);
|
||||
const hostnameBoost = hostnameFreq * hostnameBoostFactor;
|
||||
const hostnameBoost = hostnameFreq * hostnameBoostFactor * (boostHostnames.includes(hostname) ? 2 : 1);
|
||||
|
||||
// Path boost (consider all path prefixes with decay for longer paths)
|
||||
let pathBoost = 0;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user