mirror of
https://github.com/jina-ai/node-DeepResearch.git
synced 2025-12-26 06:28:56 +08:00
feat: only hostnames
This commit is contained in:
parent
45bcbf958e
commit
347beda0c2
16
src/agent.ts
16
src/agent.ts
@ -14,10 +14,9 @@ import {
|
||||
StepAction,
|
||||
AnswerAction,
|
||||
KnowledgeItem,
|
||||
SearchResult,
|
||||
EvaluationType,
|
||||
BoostedSearchSnippet,
|
||||
SearchSnippet, EvaluationResponse, Reference, SERPQuery, RepeatEvaluationType
|
||||
SearchSnippet, EvaluationResponse, Reference, SERPQuery, RepeatEvaluationType, UnNormalizedSearchSnippet
|
||||
} from "./types";
|
||||
import {TrackerContext} from "./types";
|
||||
import {search} from "./tools/jina-search";
|
||||
@ -254,7 +253,7 @@ async function updateReferences(thisStep: AnswerAction, allURLs: Record<string,
|
||||
.replace(/\s+/g, ' '),
|
||||
title: allURLs[normalizedUrl]?.title || '',
|
||||
url: normalizedUrl,
|
||||
dateTime: ref?.dateTime || ''
|
||||
dateTime: ref?.dateTime || allURLs[normalizedUrl]?.date || '',
|
||||
};
|
||||
})
|
||||
.filter(Boolean) as Reference[]; // Add type assertion here
|
||||
@ -284,7 +283,7 @@ async function executeSearchQueries(
|
||||
context.actionTracker.trackThink('search_for', SchemaGen.languageCode, {keywords: uniqQOnly.join(', ')});
|
||||
let utilityScore = 0;
|
||||
for (const query of keywordsQueries) {
|
||||
let results: SearchResult[] = [];
|
||||
let results: UnNormalizedSearchSnippet[] = [];
|
||||
const oldQuery = query.q;
|
||||
if (onlyHostnames && onlyHostnames.length > 0) {
|
||||
query.q = `${query.q} site:${onlyHostnames.join(' OR site:')}`;
|
||||
@ -321,15 +320,16 @@ async function executeSearchQueries(
|
||||
|
||||
const minResults: SearchSnippet[] = results
|
||||
.map(r => {
|
||||
const url = normalizeUrl('url' in r ? r.url : r.link);
|
||||
const url = normalizeUrl('url' in r ? r.url! : r.link!);
|
||||
if (!url) return null; // Skip invalid URLs
|
||||
|
||||
return {
|
||||
title: r.title,
|
||||
url,
|
||||
description: 'description' in r ? r.description : r.snippet,
|
||||
weight: 1
|
||||
};
|
||||
weight: 1,
|
||||
date: r.date,
|
||||
} as SearchSnippet;
|
||||
})
|
||||
.filter(Boolean) as SearchSnippet[]; // Filter out null entries and assert type
|
||||
|
||||
@ -798,7 +798,7 @@ You decided to think out of the box or cut from a completely different angle.
|
||||
.map(url => normalizeUrl(url))
|
||||
.filter(url => url && !visitedURLs.includes(url)) as string[];
|
||||
|
||||
thisStep.URLTargets = [...new Set([...thisStep.URLTargets, ...weightedURLs.map(r => r.url)])].slice(0, MAX_URLS_PER_STEP);
|
||||
thisStep.URLTargets = [...new Set([...thisStep.URLTargets, ...weightedURLs.map(r => r.url!)])].slice(0, MAX_URLS_PER_STEP);
|
||||
|
||||
const uniqueURLs = thisStep.URLTargets;
|
||||
console.log(uniqueURLs)
|
||||
|
||||
@ -215,7 +215,7 @@ function getTokenBudgetAndMaxAttempts(
|
||||
if (maxCompletionTokens !== null) {
|
||||
return {
|
||||
tokenBudget: maxCompletionTokens,
|
||||
maxBadAttempts: 2 // Default to medium setting for max attempts
|
||||
maxBadAttempts: 1 // Default to medium setting for max attempts
|
||||
};
|
||||
}
|
||||
|
||||
@ -226,7 +226,7 @@ function getTokenBudgetAndMaxAttempts(
|
||||
return {tokenBudget: 1000000, maxBadAttempts: 2};
|
||||
case 'medium':
|
||||
default:
|
||||
return {tokenBudget: 500000, maxBadAttempts: 2};
|
||||
return {tokenBudget: 500000, maxBadAttempts: 1};
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
17
src/types.ts
17
src/types.ts
@ -179,11 +179,20 @@ export type ErrorAnalysisResponse = {
|
||||
};
|
||||
|
||||
|
||||
export type SearchResult =
|
||||
| SearchSnippet
|
||||
| { title: string; link: string; snippet: string; weight?: number };
|
||||
export type UnNormalizedSearchSnippet = {
|
||||
title: string;
|
||||
url?: string;
|
||||
description?: string;
|
||||
link?: string;
|
||||
snippet?: string;
|
||||
weight?: number,
|
||||
date?: string
|
||||
};
|
||||
|
||||
export type SearchSnippet = { title: string; url: string; description: string; weight?: number }
|
||||
export type SearchSnippet = UnNormalizedSearchSnippet& {
|
||||
url: string;
|
||||
description: string;
|
||||
};
|
||||
|
||||
export type BoostedSearchSnippet = SearchSnippet & {
|
||||
freqBoost: number;
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
import {BoostedSearchSnippet, KnowledgeItem, SearchResult, SearchSnippet, TrackerContext, VisitAction} from "../types";
|
||||
import {BoostedSearchSnippet, KnowledgeItem, SearchSnippet, TrackerContext, VisitAction} from "../types";
|
||||
import {getI18nText, smartMergeStrings} from "./text-tools";
|
||||
import {rerankDocuments} from "../tools/jina-rerank";
|
||||
import {readUrl} from "../tools/read";
|
||||
@ -182,7 +182,7 @@ const extractUrlParts = (urlStr: string) => {
|
||||
};
|
||||
|
||||
// Function to count occurrences of hostnames and paths
|
||||
export const countUrlParts = (urlItems: SearchResult[]) => {
|
||||
export const countUrlParts = (urlItems: SearchSnippet[]) => {
|
||||
const hostnameCount: Record<string, number> = {};
|
||||
const pathPrefixCount: Record<string, number> = {};
|
||||
let totalUrls = 0;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user