diff --git a/src/agent.ts b/src/agent.ts index a367c8a..ff68b02 100644 --- a/src/agent.ts +++ b/src/agent.ts @@ -48,7 +48,7 @@ import { logInfo, logError, logDebug, logWarning } from './logging'; import { researchPlan } from './tools/research-planner'; import { reduceAnswers } from './tools/reducer'; import { AxiosError } from 'axios'; -import { dedupImagesWithEmbeddings } from './utils/image-tools'; +import { dedupImagesWithEmbeddings, filterImages } from './utils/image-tools'; import { serpCluster } from './tools/serp-cluster'; async function wait(seconds: number) { @@ -845,7 +845,7 @@ But then you realized you have asked them before. You decided to to think out of answer: subproblemResponses.map(r => (r.result as AnswerAction).answer).join('\n\n'), mdAnswer: subproblemResponses.map(r => (r.result as AnswerAction).mdAnswer).join('\n\n'), references: subproblemResponses.map(r => (r.result as AnswerAction).references).flat(), - imageReferences: subproblemResponses.map(r => (r.result as AnswerAction).imageReferences).flat(), + imageReferences: subproblemResponses.map(r => (r.result as AnswerAction).imageReferences).filter(Boolean).flat(), isFinal: true, isAggregated: true } as AnswerAction; @@ -1124,10 +1124,12 @@ But unfortunately, you failed to solve the issue. You need to think out of the b answerStep.answer = candidateAnswers.join('\n\n'); // await reduceAnswers(candidateAnswers, context, SchemaGen); answerStep.mdAnswer = repairMarkdownFootnotesOuter(buildMdFromAnswer(answerStep)); if (withImages && answerStep.imageReferences?.length) { - logDebug('[agent] all image references:', { count: answerStep.imageReferences?.length }); - const dedupImages = dedupImagesWithEmbeddings(answerStep.imageReferences as ImageObject[], []); - logDebug('[agent] deduped images:', { count: dedupImages.length }); - answerStep.imageReferences = answerStep.imageReferences?.filter(i => i?.url && dedupImages.some(d => d?.url === i.url)) || []; + const sortedImages = answerStep.imageReferences.sort((a, b) => (b.relevanceScore ?? 0) - (a.relevanceScore ?? 0)); + logDebug('[agent] all sorted image references:', { count: sortedImages?.length }); + const dedupImages = dedupImagesWithEmbeddings(sortedImages as ImageObject[], []); + const filteredImages = filterImages(sortedImages, dedupImages); + logDebug('[agent] filtered images:', { count: filteredImages.length }); + answerStep.imageReferences = filteredImages.slice(0, 10); // limit to 10 images } } diff --git a/src/tools/build-ref.ts b/src/tools/build-ref.ts index 5425358..57bdcc3 100644 --- a/src/tools/build-ref.ts +++ b/src/tools/build-ref.ts @@ -522,7 +522,7 @@ export async function buildImageReferences( const filteredMatches = []; for (const match of allMatches) { - if (match.relevanceScore < minRelScore) continue; + // if (match.relevanceScore < minRelScore) continue; if (!usedImages.has(match.imageIndex) && !usedAnswerChunks.has(match.answerChunkIndex)) { filteredMatches.push(match); diff --git a/src/utils/image-tools.ts b/src/utils/image-tools.ts index b15b76c..0b4e763 100644 --- a/src/utils/image-tools.ts +++ b/src/utils/image-tools.ts @@ -1,6 +1,6 @@ import { getEmbeddings } from '../tools/embeddings'; import { TokenTracker } from './token-tracker'; -import { ImageObject } from '../types'; +import { ImageObject, ImageReference } from '../types'; import { cosineSimilarity } from '../tools/cosine'; import { logInfo, logError, logDebug, logWarning } from '../logging'; import sharp from 'sharp'; @@ -190,4 +190,28 @@ export const dedupImagesWithEmbeddings = ( // Return all new images if there is an error return newImages; } +} + +export const filterImages = (imageReferences: ImageReference[], dedupedImages: ImageObject[]): ImageReference[] => { + if (!imageReferences || imageReferences.length === 0) { + logInfo('No image references provided for filtering'); + return []; + } + + if (!dedupedImages || dedupedImages.length === 0) { + logInfo('No deduplicated images provided for filtering'); + return imageReferences; + } + + const urlMap = new Map(); + for (const img of imageReferences) { + if (img?.url && !urlMap.has(img.url)) { + urlMap.set(img.url, img); + } + } + + const filteredReferences = dedupedImages.map(img => urlMap.get(img.url)) + .filter(Boolean); + + return filteredReferences; } \ No newline at end of file