diff --git a/src/agent.ts b/src/agent.ts index a39e38d..bb21e95 100644 --- a/src/agent.ts +++ b/src/agent.ts @@ -995,7 +995,8 @@ But unfortunately, you failed to solve the issue. You need to think out of the b SchemaGen, 80, maxRef, - minRelScore + minRelScore, + onlyHostnames ); answerStep.answer = answer; diff --git a/src/tools/build-ref.ts b/src/tools/build-ref.ts index a99ad0e..f1acdd6 100644 --- a/src/tools/build-ref.ts +++ b/src/tools/build-ref.ts @@ -3,6 +3,7 @@ import {Reference, TrackerContext, WebContent} from "../types"; import {Schemas} from "../utils/schemas"; import {cosineSimilarity, jaccardRank} from "./cosine"; import {getEmbeddings} from "./embeddings"; +import {normalizeHostName} from '../utils/url-tools'; export async function buildReferences( answer: string, @@ -11,7 +12,8 @@ export async function buildReferences( schema: Schemas, minChunkLength: number = 80, maxRef: number = 10, - minRelScore: number = 0.7 + minRelScore: number = 0.7, + onlyHostnames: string[] = [] ): Promise<{ answer: string, references: Array }> { console.log(`[buildReferences] Starting with maxRef=${maxRef}, minChunkLength=${minChunkLength}, minRelScore=${minRelScore}`); console.log(`[buildReferences] Answer length: ${answer.length} chars, Web content sources: ${Object.keys(webContents).length}`); @@ -30,6 +32,7 @@ export async function buildReferences( let chunkIndex = 0; for (const [url, content] of Object.entries(webContents)) { if (!content.chunks || content.chunks.length === 0) continue; + if (onlyHostnames.length > 0 && !onlyHostnames.includes(normalizeHostName(url))) continue; for (let i = 0; i < content.chunks.length; i++) { const chunk = content.chunks[i];