mirror of
https://github.com/jina-ai/node-DeepResearch.git
synced 2025-12-26 06:28:56 +08:00
feat: only hostnames
This commit is contained in:
parent
2e8ee47536
commit
7d07078ec5
52
src/agent.ts
52
src/agent.ts
@ -272,7 +272,8 @@ async function executeSearchQueries(
|
|||||||
keywordsQueries: any[],
|
keywordsQueries: any[],
|
||||||
context: TrackerContext,
|
context: TrackerContext,
|
||||||
allURLs: Record<string, SearchSnippet>,
|
allURLs: Record<string, SearchSnippet>,
|
||||||
SchemaGen: any
|
SchemaGen: Schemas,
|
||||||
|
onlyHostnames?: string[]
|
||||||
): Promise<{
|
): Promise<{
|
||||||
newKnowledge: KnowledgeItem[],
|
newKnowledge: KnowledgeItem[],
|
||||||
searchedQueries: string[]
|
searchedQueries: string[]
|
||||||
@ -285,6 +286,9 @@ async function executeSearchQueries(
|
|||||||
for (const query of keywordsQueries) {
|
for (const query of keywordsQueries) {
|
||||||
let results: SearchResult[] = [];
|
let results: SearchResult[] = [];
|
||||||
const oldQuery = query.q;
|
const oldQuery = query.q;
|
||||||
|
if (onlyHostnames && onlyHostnames.length > 0) {
|
||||||
|
query.q = `${query.q} site:${onlyHostnames.join(' OR site:')}`;
|
||||||
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
console.log('Search query:', query);
|
console.log('Search query:', query);
|
||||||
@ -342,10 +346,16 @@ async function executeSearchQueries(
|
|||||||
updated: query.tbs ? formatDateRange(query) : undefined
|
updated: query.tbs ? formatDateRange(query) : undefined
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
if (searchedQueries.length === 0) {
|
||||||
console.log(`Utility/Queries: ${utilityScore}/${searchedQueries.length}`);
|
if (onlyHostnames && onlyHostnames.length > 0) {
|
||||||
if (searchedQueries.length > MAX_QUERIES_PER_STEP) {
|
console.log(`No results found for queries: ${uniqQOnly.join(', ')} on hostnames: ${onlyHostnames.join(', ')}`);
|
||||||
console.log(`So many queries??? ${searchedQueries.map(q => `"${q}"`).join(', ')}`)
|
context.actionTracker.trackThink('hostnames_no_results', SchemaGen.languageCode, {hostnames: onlyHostnames.join(', ')});
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
console.log(`Utility/Queries: ${utilityScore}/${searchedQueries.length}`);
|
||||||
|
if (searchedQueries.length > MAX_QUERIES_PER_STEP) {
|
||||||
|
console.log(`So many queries??? ${searchedQueries.map(q => `"${q}"`).join(', ')}`)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return {
|
return {
|
||||||
newKnowledge,
|
newKnowledge,
|
||||||
@ -366,6 +376,7 @@ export async function getResponse(question?: string,
|
|||||||
noDirectAnswer: boolean = false,
|
noDirectAnswer: boolean = false,
|
||||||
boostHostnames: string[] = [],
|
boostHostnames: string[] = [],
|
||||||
badHostnames: string[] = [],
|
badHostnames: string[] = [],
|
||||||
|
onlyHostnames: string[] = []
|
||||||
): Promise<{ result: StepAction; context: TrackerContext; visitedURLs: string[], readURLs: string[], allURLs: string[] }> {
|
): Promise<{ result: StepAction; context: TrackerContext; visitedURLs: string[], readURLs: string[], allURLs: string[] }> {
|
||||||
|
|
||||||
let step = 0;
|
let step = 0;
|
||||||
@ -457,12 +468,11 @@ export async function getResponse(question?: string,
|
|||||||
allowReflect = false;
|
allowReflect = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// update all urls with buildURLMap
|
|
||||||
// allowRead = allowRead && (Object.keys(allURLs).length > 0);
|
|
||||||
if (allURLs && Object.keys(allURLs).length > 0) {
|
if (allURLs && Object.keys(allURLs).length > 0) {
|
||||||
// rerank urls
|
// rerank urls
|
||||||
weightedURLs = rankURLs(
|
weightedURLs = rankURLs(
|
||||||
filterURLs(allURLs, visitedURLs, badHostnames),
|
filterURLs(allURLs, visitedURLs, badHostnames, onlyHostnames),
|
||||||
{
|
{
|
||||||
question: currentQuestion,
|
question: currentQuestion,
|
||||||
boostHostnames
|
boostHostnames
|
||||||
@ -471,6 +481,7 @@ export async function getResponse(question?: string,
|
|||||||
weightedURLs = keepKPerHostname(weightedURLs, 2);
|
weightedURLs = keepKPerHostname(weightedURLs, 2);
|
||||||
console.log('Weighted URLs:', weightedURLs.length);
|
console.log('Weighted URLs:', weightedURLs.length);
|
||||||
}
|
}
|
||||||
|
allowRead = allowRead && (weightedURLs.length > 0);
|
||||||
|
|
||||||
allowSearch = allowSearch && (weightedURLs.length < 200); // disable search when too many urls already
|
allowSearch = allowSearch && (weightedURLs.length < 200); // disable search when too many urls already
|
||||||
|
|
||||||
@ -743,25 +754,28 @@ But then you realized you have asked them before. You decided to to think out of
|
|||||||
keywordsQueries,
|
keywordsQueries,
|
||||||
context,
|
context,
|
||||||
allURLs,
|
allURLs,
|
||||||
SchemaGen
|
SchemaGen,
|
||||||
|
onlyHostnames
|
||||||
);
|
);
|
||||||
|
|
||||||
allKeywords.push(...searchedQueries);
|
if (searchedQueries.length > 0) {
|
||||||
allKnowledge.push(...newKnowledge);
|
anyResult = true;
|
||||||
|
allKeywords.push(...searchedQueries);
|
||||||
|
allKnowledge.push(...newKnowledge);
|
||||||
|
|
||||||
diaryContext.push(`
|
diaryContext.push(`
|
||||||
At step ${step}, you took the **search** action and look for external information for the question: "${currentQuestion}".
|
At step ${step}, you took the **search** action and look for external information for the question: "${currentQuestion}".
|
||||||
In particular, you tried to search for the following keywords: "${keywordsQueries.map(q => q.q).join(', ')}".
|
In particular, you tried to search for the following keywords: "${keywordsQueries.map(q => q.q).join(', ')}".
|
||||||
You found quite some information and add them to your URL list and **visit** them later when needed.
|
You found quite some information and add them to your URL list and **visit** them later when needed.
|
||||||
`);
|
`);
|
||||||
|
|
||||||
updateContext({
|
updateContext({
|
||||||
totalStep,
|
totalStep,
|
||||||
question: currentQuestion,
|
question: currentQuestion,
|
||||||
...thisStep,
|
...thisStep,
|
||||||
result: result
|
result: result
|
||||||
});
|
});
|
||||||
anyResult = true;
|
}
|
||||||
}
|
}
|
||||||
if (!anyResult || !keywordsQueries?.length) {
|
if (!anyResult || !keywordsQueries?.length) {
|
||||||
diaryContext.push(`
|
diaryContext.push(`
|
||||||
|
|||||||
@ -557,6 +557,7 @@ app.post('/v1/chat/completions', (async (req: Request, res: Response) => {
|
|||||||
body.no_direct_answer,
|
body.no_direct_answer,
|
||||||
body.boost_hostnames,
|
body.boost_hostnames,
|
||||||
body.bad_hostnames,
|
body.bad_hostnames,
|
||||||
|
body.only_hostnames,
|
||||||
)
|
)
|
||||||
let finalAnswer = (finalStep as AnswerAction).mdAnswer;
|
let finalAnswer = (finalStep as AnswerAction).mdAnswer;
|
||||||
|
|
||||||
|
|||||||
@ -91,7 +91,7 @@ queries: [
|
|||||||
"q": "二手宝马价格趋势"
|
"q": "二手宝马价格趋势"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"q": "二手宝马vs奔驰vs丰田 性价比"
|
"q": "二手宝马vs奔驰vs奥迪 性价比"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"tbs": "qdr:m",
|
"tbs": "qdr:m",
|
||||||
|
|||||||
@ -224,6 +224,7 @@ export interface ChatCompletionRequest {
|
|||||||
|
|
||||||
boost_hostnames?: string[];
|
boost_hostnames?: string[];
|
||||||
bad_hostnames?: string[];
|
bad_hostnames?: string[];
|
||||||
|
only_hostnames?: string[];
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface URLAnnotation {
|
export interface URLAnnotation {
|
||||||
|
|||||||
@ -6,7 +6,8 @@
|
|||||||
"read_for_verify": "Let me fetch the source content to verify the answer.",
|
"read_for_verify": "Let me fetch the source content to verify the answer.",
|
||||||
"late_chunk": "Content of ${url} is too long, let me cherry-pick the relevant parts.",
|
"late_chunk": "Content of ${url} is too long, let me cherry-pick the relevant parts.",
|
||||||
"final_answer": "Let me finalize the answer.",
|
"final_answer": "Let me finalize the answer.",
|
||||||
"blocked_content": "Hmm...the content of ${url} doesn't look right, I might be blocked."
|
"blocked_content": "Hmm...the content of ${url} doesn't look right, I might be blocked.",
|
||||||
|
"hostnames_no_results": "Can't find any results from ${hostnames}."
|
||||||
},
|
},
|
||||||
"zh-CN": {
|
"zh-CN": {
|
||||||
"eval_first": "等等,让我先自己评估一下答案。",
|
"eval_first": "等等,让我先自己评估一下答案。",
|
||||||
@ -15,7 +16,8 @@
|
|||||||
"read_for_verify": "让我读取源网页内容来验证答案。",
|
"read_for_verify": "让我读取源网页内容来验证答案。",
|
||||||
"late_chunk": "网页 ${url} 内容太长,我正在筛选精华部分。",
|
"late_chunk": "网页 ${url} 内容太长,我正在筛选精华部分。",
|
||||||
"final_answer": "我来整理一下答案。",
|
"final_answer": "我来整理一下答案。",
|
||||||
"blocked_content": "额…这个 ${url} 的内容不太对啊,我是不是被屏蔽了啊。"
|
"blocked_content": "额…这个 ${url} 的内容不太对啊,我是不是被屏蔽了啊。",
|
||||||
|
"hostnames_no_results": "额… ${hostnames} 找不到什么结果啊。"
|
||||||
},
|
},
|
||||||
"zh-TW": {
|
"zh-TW": {
|
||||||
"eval_first": "等等,讓我先評估一下答案。",
|
"eval_first": "等等,讓我先評估一下答案。",
|
||||||
@ -24,7 +26,8 @@
|
|||||||
"read_for_verify": "讓我獲取源內容來驗證答案。",
|
"read_for_verify": "讓我獲取源內容來驗證答案。",
|
||||||
"late_chunk": "網頁 ${url} 內容太長,我正在挑選相關部分。",
|
"late_chunk": "網頁 ${url} 內容太長,我正在挑選相關部分。",
|
||||||
"final_answer": "我來整理一下答案。",
|
"final_answer": "我來整理一下答案。",
|
||||||
"blocked_content": "咦...奇怪了,${url} 好像把我擋在門外了。有够麻烦!"
|
"blocked_content": "咦...奇怪了,${url} 好像把我擋在門外了。有够麻烦!",
|
||||||
|
"hostnames_no_results": "咦... ${hostnames} 找不到什么结果。"
|
||||||
},
|
},
|
||||||
"ja": {
|
"ja": {
|
||||||
"eval_first": "ちょっと待って、まず答えを評価します。",
|
"eval_first": "ちょっと待って、まず答えを評価します。",
|
||||||
@ -33,7 +36,8 @@
|
|||||||
"read_for_verify": "答えを確認するために、ソースコンテンツを取得します。",
|
"read_for_verify": "答えを確認するために、ソースコンテンツを取得します。",
|
||||||
"late_chunk": "${url} のコンテンツが長すぎるため、関連部分を選択します。",
|
"late_chunk": "${url} のコンテンツが長すぎるため、関連部分を選択します。",
|
||||||
"final_answer": "答えをまとめます。",
|
"final_answer": "答えをまとめます。",
|
||||||
"blocked_content": "あれ?${url}にアクセスできないみたいです。壁にぶつかってしまいました。申し訳ありません。"
|
"blocked_content": "あれ?${url}にアクセスできないみたいです。壁にぶつかってしまいました。申し訳ありません。",
|
||||||
|
"hostnames_no_results": "${hostnames} から結果が見つかりません。"
|
||||||
},
|
},
|
||||||
"ko": {
|
"ko": {
|
||||||
"eval_first": "잠시만요, 먼저 답변을 평가해 보겠습니다.",
|
"eval_first": "잠시만요, 먼저 답변을 평가해 보겠습니다.",
|
||||||
@ -42,7 +46,8 @@
|
|||||||
"read_for_verify": "답변을 확인하기 위해 소스 콘텐츠를 가져오겠습니다.",
|
"read_for_verify": "답변을 확인하기 위해 소스 콘텐츠를 가져오겠습니다.",
|
||||||
"late_chunk": "${url} 의 콘텐츠가 너무 길어, 관련 부분을 선택하겠습니다.",
|
"late_chunk": "${url} 의 콘텐츠가 너무 길어, 관련 부분을 선택하겠습니다.",
|
||||||
"final_answer": "답변을 마무리하겠습니다.",
|
"final_answer": "답변을 마무리하겠습니다.",
|
||||||
"blocked_content": "어라? ${url}에서 문전박대를 당했네요. 참 황당하네요!"
|
"blocked_content": "어라? ${url}에서 문전박대를 당했네요. 참 황당하네요!",
|
||||||
|
"hostnames_no_results": "${hostnames} 에서 결과를 찾을 수 없습니다."
|
||||||
},
|
},
|
||||||
"fr": {
|
"fr": {
|
||||||
"eval_first": "Un instant, je vais d'abord évaluer la réponse.",
|
"eval_first": "Un instant, je vais d'abord évaluer la réponse.",
|
||||||
@ -51,7 +56,8 @@
|
|||||||
"read_for_verify": "Je vais récupérer le contenu source pour vérifier la réponse.",
|
"read_for_verify": "Je vais récupérer le contenu source pour vérifier la réponse.",
|
||||||
"late_chunk": "Le contenu de ${url} est trop long, je vais sélectionner les parties pertinentes.",
|
"late_chunk": "Le contenu de ${url} est trop long, je vais sélectionner les parties pertinentes.",
|
||||||
"final_answer": "Je vais finaliser la réponse.",
|
"final_answer": "Je vais finaliser la réponse.",
|
||||||
"blocked_content": "Zut alors ! ${url} me met à la porte. C'est la galère !"
|
"blocked_content": "Zut alors ! ${url} me met à la porte. C'est la galère !",
|
||||||
|
"hostnames_no_results": "Aucun résultat trouvé sur ${hostnames}."
|
||||||
},
|
},
|
||||||
"de": {
|
"de": {
|
||||||
"eval_first": "Einen Moment, ich werde die Antwort zuerst evaluieren.",
|
"eval_first": "Einen Moment, ich werde die Antwort zuerst evaluieren.",
|
||||||
@ -60,7 +66,8 @@
|
|||||||
"read_for_verify": "Ich werde den Quellinhalt abrufen, um die Antwort zu überprüfen.",
|
"read_for_verify": "Ich werde den Quellinhalt abrufen, um die Antwort zu überprüfen.",
|
||||||
"late_chunk": "Der Inhalt von ${url} ist zu lang, ich werde die relevanten Teile auswählen.",
|
"late_chunk": "Der Inhalt von ${url} ist zu lang, ich werde die relevanten Teile auswählen.",
|
||||||
"final_answer": "Ich werde die Antwort abschließen.",
|
"final_answer": "Ich werde die Antwort abschließen.",
|
||||||
"blocked_content": "Mist! ${url} lässt mich nicht rein."
|
"blocked_content": "Mist! ${url} lässt mich nicht rein.",
|
||||||
|
"hostnames_no_results": "Keine Ergebnisse von ${hostnames} gefunden."
|
||||||
},
|
},
|
||||||
"es": {
|
"es": {
|
||||||
"eval_first": "Un momento, voy a evaluar la respuesta primero.",
|
"eval_first": "Un momento, voy a evaluar la respuesta primero.",
|
||||||
@ -69,7 +76,8 @@
|
|||||||
"read_for_verify": "Voy a obtener el contenido fuente para verificar la respuesta.",
|
"read_for_verify": "Voy a obtener el contenido fuente para verificar la respuesta.",
|
||||||
"late_chunk": "El contenido de ${url} es demasiado largo, voy a seleccionar las partes relevantes.",
|
"late_chunk": "El contenido de ${url} es demasiado largo, voy a seleccionar las partes relevantes.",
|
||||||
"final_answer": "Voy a finalizar la respuesta.",
|
"final_answer": "Voy a finalizar la respuesta.",
|
||||||
"blocked_content": "¡Oh no! Estoy bloqueado por ${url}, ¡no es genial!"
|
"blocked_content": "¡Oh no! Estoy bloqueado por ${url}, ¡no es genial!",
|
||||||
|
"hostnames_no_results": "No se encontraron resultados de ${hostnames}."
|
||||||
},
|
},
|
||||||
"it": {
|
"it": {
|
||||||
"eval_first": "Un attimo, valuterò prima la risposta.",
|
"eval_first": "Un attimo, valuterò prima la risposta.",
|
||||||
@ -77,7 +85,9 @@
|
|||||||
"read_for": "Leggerò ${urls} per raccogliere ulteriori informazioni.",
|
"read_for": "Leggerò ${urls} per raccogliere ulteriori informazioni.",
|
||||||
"read_for_verify": "Recupererò il contenuto sorgente per verificare la risposta.",
|
"read_for_verify": "Recupererò il contenuto sorgente per verificare la risposta.",
|
||||||
"late_chunk": "Il contenuto di ${url} è troppo lungo, selezionerò le parti rilevanti.",
|
"late_chunk": "Il contenuto di ${url} è troppo lungo, selezionerò le parti rilevanti.",
|
||||||
"final_answer": "Finalizzerò la risposta."
|
"final_answer": "Finalizzerò la risposta.",
|
||||||
|
"blocked_content": "Mannaggia! Sono bloccato da ${url}, non è bello!",
|
||||||
|
"hostnames_no_results": "Nessun risultato trovato da ${hostnames}."
|
||||||
},
|
},
|
||||||
"pt": {
|
"pt": {
|
||||||
"eval_first": "Um momento, vou avaliar a resposta primeiro.",
|
"eval_first": "Um momento, vou avaliar a resposta primeiro.",
|
||||||
@ -86,7 +96,8 @@
|
|||||||
"read_for_verify": "Vou buscar o conteúdo da fonte para verificar a resposta.",
|
"read_for_verify": "Vou buscar o conteúdo da fonte para verificar a resposta.",
|
||||||
"late_chunk": "O conteúdo de ${url} é muito longo, vou selecionar as partes relevantes.",
|
"late_chunk": "O conteúdo de ${url} é muito longo, vou selecionar as partes relevantes.",
|
||||||
"final_answer": "Vou finalizar a resposta.",
|
"final_answer": "Vou finalizar a resposta.",
|
||||||
"blocked_content": "Ah não! Estou bloqueado por ${url}, não é legal!"
|
"blocked_content": "Ah não! Estou bloqueado por ${url}, não é legal!",
|
||||||
|
"hostnames_no_results": "Nenhum resultado encontrado em ${hostnames}."
|
||||||
},
|
},
|
||||||
"ru": {
|
"ru": {
|
||||||
"eval_first": "Подождите, я сначала оценю ответ.",
|
"eval_first": "Подождите, я сначала оценю ответ.",
|
||||||
@ -95,7 +106,8 @@
|
|||||||
"read_for_verify": "Дайте мне получить исходный контент для проверки ответа.",
|
"read_for_verify": "Дайте мне получить исходный контент для проверки ответа.",
|
||||||
"late_chunk": "Содержимое ${url} слишком длинное, я выберу только значимые части.",
|
"late_chunk": "Содержимое ${url} слишком длинное, я выберу только значимые части.",
|
||||||
"final_answer": "Дайте мне завершить ответ.",
|
"final_answer": "Дайте мне завершить ответ.",
|
||||||
"blocked_content": "Ой! Меня заблокировал ${url}, не круто!"
|
"blocked_content": "Ой! Меня заблокировал ${url}, не круто!",
|
||||||
|
"hostnames_no_results": "Ничего не найдено на ${hostnames}."
|
||||||
},
|
},
|
||||||
"ar": {
|
"ar": {
|
||||||
"eval_first": "لكن انتظر، دعني أقوم بتقييم الإجابة أولاً.",
|
"eval_first": "لكن انتظر، دعني أقوم بتقييم الإجابة أولاً.",
|
||||||
@ -103,7 +115,8 @@
|
|||||||
"read_for": "دعني أقرأ ${urls} لجمع المزيد من المعلومات.",
|
"read_for": "دعني أقرأ ${urls} لجمع المزيد من المعلومات.",
|
||||||
"read_for_verify": "دعني أحضر محتوى المصدر للتحقق من الإجابة.",
|
"read_for_verify": "دعني أحضر محتوى المصدر للتحقق من الإجابة.",
|
||||||
"late_chunk": "محتوى ${url} طويل جدًا، سأختار الأجزاء ذات الصلة.",
|
"late_chunk": "محتوى ${url} طويل جدًا، سأختار الأجزاء ذات الصلة.",
|
||||||
"blocked_content": "أوه لا! أنا محظور من ${url}، ليس جيدًا!"
|
"blocked_content": "أوه لا! أنا محظور من ${url}، ليس جيدًا!",
|
||||||
|
"hostnames_no_results": "لا يمكن العثور على أي نتائج من ${hostnames}."
|
||||||
},
|
},
|
||||||
"nl": {
|
"nl": {
|
||||||
"eval_first": "Een moment, ik zal het antwoord eerst evalueren.",
|
"eval_first": "Een moment, ik zal het antwoord eerst evalueren.",
|
||||||
@ -112,7 +125,8 @@
|
|||||||
"read_for_verify": "Ik zal de broninhoud ophalen om het antwoord te verifiëren.",
|
"read_for_verify": "Ik zal de broninhoud ophalen om het antwoord te verifiëren.",
|
||||||
"late_chunk": "De inhoud van ${url} is te lang, ik zal de relevante delen selecteren.",
|
"late_chunk": "De inhoud van ${url} is te lang, ik zal de relevante delen selecteren.",
|
||||||
"final_answer": "Ik zal het antwoord afronden.",
|
"final_answer": "Ik zal het antwoord afronden.",
|
||||||
"blocked_content": "Verdorie! Ik word geblokkeerd door ${url}."
|
"blocked_content": "Verdorie! Ik word geblokkeerd door ${url}.",
|
||||||
|
"hostnames_no_results": "Geen resultaten gevonden van ${hostnames}."
|
||||||
},
|
},
|
||||||
"zh": {
|
"zh": {
|
||||||
"eval_first": "等等,让我先评估一下答案。",
|
"eval_first": "等等,让我先评估一下答案。",
|
||||||
@ -121,6 +135,7 @@
|
|||||||
"read_for_verify": "让我获取源内容来验证答案。",
|
"read_for_verify": "让我获取源内容来验证答案。",
|
||||||
"late_chunk": "网页 ${url} 内容太长,我正在筛选精华部分。",
|
"late_chunk": "网页 ${url} 内容太长,我正在筛选精华部分。",
|
||||||
"final_answer": "我来整理一下答案。",
|
"final_answer": "我来整理一下答案。",
|
||||||
"blocked_content": "额…这个内容不太对啊,我感觉被 ${url} 屏蔽了。"
|
"blocked_content": "额…这个内容不太对啊,我感觉被 ${url} 屏蔽了。",
|
||||||
|
"hostnames_no_results": "额… ${hostnames} 找不到什么结果啊。"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -160,9 +160,9 @@ export function normalizeUrl(urlString: string, debug = false, options = {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
export function filterURLs(allURLs: Record<string, SearchSnippet>, visitedURLs: string[], badHostnames: string[]): SearchSnippet[] {
|
export function filterURLs(allURLs: Record<string, SearchSnippet>, visitedURLs: string[], badHostnames: string[], onlyHostnames: string[]): SearchSnippet[] {
|
||||||
return Object.entries(allURLs)
|
return Object.entries(allURLs)
|
||||||
.filter(([url,]) => !visitedURLs.includes(url) && !badHostnames.includes(extractUrlParts(url).hostname))
|
.filter(([url,]) => !visitedURLs.includes(url) && !badHostnames.includes(extractUrlParts(url).hostname) && (onlyHostnames.length === 0 || onlyHostnames.includes(extractUrlParts(url).hostname)))
|
||||||
.map(([, result]) => result);
|
.map(([, result]) => result);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user