feat: only hostnames

This commit is contained in:
Han Xiao 2025-03-24 11:27:27 +08:00
parent 45bcbf958e
commit 347beda0c2
4 changed files with 25 additions and 16 deletions

View File

@ -14,10 +14,9 @@ import {
StepAction,
AnswerAction,
KnowledgeItem,
SearchResult,
EvaluationType,
BoostedSearchSnippet,
SearchSnippet, EvaluationResponse, Reference, SERPQuery, RepeatEvaluationType
SearchSnippet, EvaluationResponse, Reference, SERPQuery, RepeatEvaluationType, UnNormalizedSearchSnippet
} from "./types";
import {TrackerContext} from "./types";
import {search} from "./tools/jina-search";
@ -254,7 +253,7 @@ async function updateReferences(thisStep: AnswerAction, allURLs: Record<string,
.replace(/\s+/g, ' '),
title: allURLs[normalizedUrl]?.title || '',
url: normalizedUrl,
dateTime: ref?.dateTime || ''
dateTime: ref?.dateTime || allURLs[normalizedUrl]?.date || '',
};
})
.filter(Boolean) as Reference[]; // Add type assertion here
@ -284,7 +283,7 @@ async function executeSearchQueries(
context.actionTracker.trackThink('search_for', SchemaGen.languageCode, {keywords: uniqQOnly.join(', ')});
let utilityScore = 0;
for (const query of keywordsQueries) {
let results: SearchResult[] = [];
let results: UnNormalizedSearchSnippet[] = [];
const oldQuery = query.q;
if (onlyHostnames && onlyHostnames.length > 0) {
query.q = `${query.q} site:${onlyHostnames.join(' OR site:')}`;
@ -321,15 +320,16 @@ async function executeSearchQueries(
const minResults: SearchSnippet[] = results
.map(r => {
const url = normalizeUrl('url' in r ? r.url : r.link);
const url = normalizeUrl('url' in r ? r.url! : r.link!);
if (!url) return null; // Skip invalid URLs
return {
title: r.title,
url,
description: 'description' in r ? r.description : r.snippet,
weight: 1
};
weight: 1,
date: r.date,
} as SearchSnippet;
})
.filter(Boolean) as SearchSnippet[]; // Filter out null entries and assert type
@ -798,7 +798,7 @@ You decided to think out of the box or cut from a completely different angle.
.map(url => normalizeUrl(url))
.filter(url => url && !visitedURLs.includes(url)) as string[];
thisStep.URLTargets = [...new Set([...thisStep.URLTargets, ...weightedURLs.map(r => r.url)])].slice(0, MAX_URLS_PER_STEP);
thisStep.URLTargets = [...new Set([...thisStep.URLTargets, ...weightedURLs.map(r => r.url!)])].slice(0, MAX_URLS_PER_STEP);
const uniqueURLs = thisStep.URLTargets;
console.log(uniqueURLs)

View File

@ -215,7 +215,7 @@ function getTokenBudgetAndMaxAttempts(
if (maxCompletionTokens !== null) {
return {
tokenBudget: maxCompletionTokens,
maxBadAttempts: 2 // Default to medium setting for max attempts
maxBadAttempts: 1 // Default to medium setting for max attempts
};
}
@ -226,7 +226,7 @@ function getTokenBudgetAndMaxAttempts(
return {tokenBudget: 1000000, maxBadAttempts: 2};
case 'medium':
default:
return {tokenBudget: 500000, maxBadAttempts: 2};
return {tokenBudget: 500000, maxBadAttempts: 1};
}
}

View File

@ -179,11 +179,20 @@ export type ErrorAnalysisResponse = {
};
export type SearchResult =
| SearchSnippet
| { title: string; link: string; snippet: string; weight?: number };
export type UnNormalizedSearchSnippet = {
title: string;
url?: string;
description?: string;
link?: string;
snippet?: string;
weight?: number,
date?: string
};
export type SearchSnippet = { title: string; url: string; description: string; weight?: number }
export type SearchSnippet = UnNormalizedSearchSnippet& {
url: string;
description: string;
};
export type BoostedSearchSnippet = SearchSnippet & {
freqBoost: number;

View File

@ -1,4 +1,4 @@
import {BoostedSearchSnippet, KnowledgeItem, SearchResult, SearchSnippet, TrackerContext, VisitAction} from "../types";
import {BoostedSearchSnippet, KnowledgeItem, SearchSnippet, TrackerContext, VisitAction} from "../types";
import {getI18nText, smartMergeStrings} from "./text-tools";
import {rerankDocuments} from "../tools/jina-rerank";
import {readUrl} from "../tools/read";
@ -182,7 +182,7 @@ const extractUrlParts = (urlStr: string) => {
};
// Function to count occurrences of hostnames and paths
export const countUrlParts = (urlItems: SearchResult[]) => {
export const countUrlParts = (urlItems: SearchSnippet[]) => {
const hostnameCount: Record<string, number> = {};
const pathPrefixCount: Record<string, number> = {};
let totalUrls = 0;