add onlyHostnames when building references

This commit is contained in:
Sha Zhou 2025-04-28 18:20:30 +08:00
parent d0165d419e
commit 93ff448532
2 changed files with 6 additions and 2 deletions

View File

@ -995,7 +995,8 @@ But unfortunately, you failed to solve the issue. You need to think out of the b
SchemaGen,
80,
maxRef,
minRelScore
minRelScore,
onlyHostnames
);
answerStep.answer = answer;

View File

@ -3,6 +3,7 @@ import {Reference, TrackerContext, WebContent} from "../types";
import {Schemas} from "../utils/schemas";
import {cosineSimilarity, jaccardRank} from "./cosine";
import {getEmbeddings} from "./embeddings";
import {normalizeHostName} from '../utils/url-tools';
export async function buildReferences(
answer: string,
@ -11,7 +12,8 @@ export async function buildReferences(
schema: Schemas,
minChunkLength: number = 80,
maxRef: number = 10,
minRelScore: number = 0.7
minRelScore: number = 0.7,
onlyHostnames: string[] = []
): Promise<{ answer: string, references: Array<Reference> }> {
console.log(`[buildReferences] Starting with maxRef=${maxRef}, minChunkLength=${minChunkLength}, minRelScore=${minRelScore}`);
console.log(`[buildReferences] Answer length: ${answer.length} chars, Web content sources: ${Object.keys(webContents).length}`);
@ -30,6 +32,7 @@ export async function buildReferences(
let chunkIndex = 0;
for (const [url, content] of Object.entries(webContents)) {
if (!content.chunks || content.chunks.length === 0) continue;
if (onlyHostnames.length > 0 && !onlyHostnames.includes(normalizeHostName(url))) continue;
for (let i = 0; i < content.chunks.length; i++) {
const chunk = content.chunks[i];