feat: gather images to response (#98)

* feat: add image tools

* rank images

* add image dedup

* wip

* wip

* remove rank functions

* fix

* add embeddings to image

* move image object to agent

* build image references

* update

* add with_images param

* update dimensions for image tools

* dudup images

* save images to cloud storage

* remove extra log

* fix

* remove test data

* fix
This commit is contained in:
Sha Zhou 2025-06-10 11:55:46 +08:00 committed by GitHub
parent 77c96c07fa
commit a768755783
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 1433 additions and 75 deletions

921
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@ -28,6 +28,8 @@
"dependencies": {
"@ai-sdk/google": "^1.0.0",
"@ai-sdk/openai": "^1.1.9",
"@google-cloud/storage": "^7.16.0",
"@napi-rs/canvas": "^0.1.68",
"@types/jsdom": "^21.1.7",
"ai": "^4.1.26",
"axios": "^1.7.9",

View File

@ -16,7 +16,9 @@ import {
KnowledgeItem,
EvaluationType,
BoostedSearchSnippet,
SearchSnippet, EvaluationResponse, Reference, SERPQuery, RepeatEvaluationType, UnNormalizedSearchSnippet, WebContent
SearchSnippet, EvaluationResponse, Reference, SERPQuery, RepeatEvaluationType, UnNormalizedSearchSnippet, WebContent,
ImageObject,
ImageReference
} from "./types";
import { TrackerContext } from "./types";
import { search } from "./tools/jina-search";
@ -41,7 +43,7 @@ import {
import { MAX_QUERIES_PER_STEP, MAX_REFLECT_PER_STEP, MAX_URLS_PER_STEP, Schemas } from "./utils/schemas";
import { formatDateBasedOnType, formatDateRange } from "./utils/date-tools";
import { reviseAnswer } from "./tools/md-fixer";
import { buildReferences } from "./tools/build-ref";
import { buildImageReferences, buildReferences } from "./tools/build-ref";
async function sleep(ms: number) {
const seconds = Math.ceil(ms / 1000);
@ -391,8 +393,9 @@ export async function getResponse(question?: string,
minRelScore: number = 0.85,
languageCode: string | undefined = undefined,
searchLanguageCode?: string,
searchProvider?: string
): Promise<{ result: StepAction; context: TrackerContext; visitedURLs: string[], readURLs: string[], allURLs: string[] }> {
searchProvider?: string,
with_images: boolean = false
): Promise<{ result: StepAction; context: TrackerContext; visitedURLs: string[], readURLs: string[], allURLs: string[], allImages?: string[], relatedImages?: string[] }> {
let step = 0;
let totalStep = 0;
@ -451,6 +454,7 @@ export async function getResponse(question?: string,
const allWebContents: Record<string, WebContent> = {};
const visitedURLs: string[] = [];
const badURLs: string[] = [];
const imageObjects: ImageObject[] = [];
const evaluationMetrics: Record<string, RepeatEvaluationType[]> = {};
// reserve the 10% final budget for the beast mode
const regularBudget = tokenBudget * 0.85;
@ -859,9 +863,11 @@ You decided to think out of the box or cut from a completely different angle.
allURLs,
visitedURLs,
badURLs,
imageObjects,
SchemaGen,
currentQuestion,
allWebContents
allWebContents,
with_images
);
diaryContext.push(success
@ -1017,7 +1023,16 @@ But unfortunately, you failed to solve the issue. You need to think out of the b
answerStep.mdAnswer = buildMdFromAnswer(answerStep);
}
console.log(thisStep)
let imageReferences: ImageReference[] = [];
if(imageObjects.length && with_images) {
try {
imageReferences = await buildImageReferences(answerStep.answer, imageObjects, context, SchemaGen);
console.log('Image references built:', imageReferences);
} catch (error) {
console.error('Error building image references:', error);
imageReferences = [];
}
}
// max return 300 urls
const returnedURLs = weightedURLs.slice(0, numReturnedURLs).map(r => r.url);
@ -1026,7 +1041,9 @@ But unfortunately, you failed to solve the issue. You need to think out of the b
context,
visitedURLs: returnedURLs,
readURLs: visitedURLs.filter(url => !badURLs.includes(url)),
allURLs: weightedURLs.map(r => r.url)
allURLs: weightedURLs.map(r => r.url),
allImages: with_images ? imageObjects.map(i => i.url) : undefined,
relatedImages: with_images ? imageReferences.map(i => i.url) : undefined,
};
}

View File

@ -7,7 +7,7 @@ import {
ChatCompletionResponse,
ChatCompletionChunk,
AnswerAction,
Model, StepAction, VisitAction
Model, StepAction, VisitAction,
} from './types';
import { TokenTracker } from "./utils/token-tracker";
import { ActionTracker } from "./utils/action-tracker";
@ -522,7 +522,7 @@ app.post('/v1/chat/completions', (async (req: Request, res: Response) => {
// Add content to queue for both thinking steps and final answer
if (step.action === 'visit') {
// emit every url in the visit action in url field
((step as VisitAction).URLTargets as string[]).forEach((url) => {
((step as VisitAction).URLTargets as string[])?.forEach((url) => {
const chunk: ChatCompletionChunk = {
id: requestId,
object: 'chat.completion.chunk',
@ -568,7 +568,9 @@ app.post('/v1/chat/completions', (async (req: Request, res: Response) => {
result: finalStep,
visitedURLs,
readURLs,
allURLs
allURLs,
allImages,
relatedImages,
} = await getResponse(undefined,
tokenBudget,
maxBadAttempts,
@ -583,7 +585,8 @@ app.post('/v1/chat/completions', (async (req: Request, res: Response) => {
body.min_annotation_relevance,
body.language_code,
body.search_language_code,
body.search_provider
body.search_provider,
body.with_images
)
let finalAnswer = (finalStep as AnswerAction).mdAnswer;
@ -656,7 +659,8 @@ app.post('/v1/chat/completions', (async (req: Request, res: Response) => {
usage,
visitedURLs,
readURLs,
numURLs: allURLs.length
numURLs: allURLs.length,
relatedImages
};
res.write(`data: ${JSON.stringify(finalChunk)}\n\n`);
res.end();
@ -682,7 +686,8 @@ app.post('/v1/chat/completions', (async (req: Request, res: Response) => {
usage,
visitedURLs,
readURLs,
numURLs: allURLs.length
numURLs: allURLs.length,
relatedImages,
};
// Log final response (excluding full content for brevity)
@ -693,7 +698,9 @@ app.post('/v1/chat/completions', (async (req: Request, res: Response) => {
usage: response.usage,
visitedURLs: response.visitedURLs,
readURLs: response.readURLs,
numURLs: allURLs.length
numURLs: allURLs.length,
allImages: allImages?.length,
relatedImages: relatedImages?.length,
});
res.json(response);

View File

@ -1,8 +1,9 @@
import {segmentText} from './segment';
import {Reference, TrackerContext, WebContent} from "../types";
import {ImageObject, ImageReference, Reference, TrackerContext, WebContent} from "../types";
import {Schemas} from "../utils/schemas";
import {cosineSimilarity, jaccardRank} from "./cosine";
import {getEmbeddings} from "./embeddings";
import { dedupImagesWithEmbeddings } from '../utils/image-tools';
import {normalizeHostName} from '../utils/url-tools';
export async function buildReferences(
@ -366,4 +367,184 @@ function buildFinalResult(
answer: modifiedAnswer,
references
};
}
export async function buildImageReferences(
answer: string,
imageObjects: ImageObject[],
context: TrackerContext,
schema: Schemas,
minChunkLength: number = 80,
maxRef: number = 10,
minRelScore: number = 0.35
): Promise<Array<ImageReference>> {
console.log(`[buildImageReferences] Starting with maxRef=${maxRef}, minChunkLength=${minChunkLength}, minRelScore=${minRelScore}`);
console.log(`[buildImageReferences] Answer length: ${answer.length} chars, Image sources: ${imageObjects.length}`);
// Step 1: Chunk the answer
console.log(`[buildImageReferences] Step 1: Chunking answer text`);
const {chunks: answerChunks, chunk_positions: answerChunkPositions} = await segmentText(answer, context);
console.log(`[buildImageReferences] Answer segmented into ${answerChunks.length} chunks`);
// Step 2: Prepare image content
console.log(`[buildImageReferences] Step 2: Preparing image content`);
const dudupImages = dedupImagesWithEmbeddings(imageObjects, []);
const allImageEmbeddings: number[][] = dudupImages.map(img => img.embedding[0]); // Extract embedding
const imageToSourceMap: any = {};
const validImageIndices = new Set<number>();
dudupImages.forEach((img, index) => {
imageToSourceMap[index] = {
url: img.url,
altText: img.alt,
embedding: img.embedding[0] // Store extracted embedding
};
validImageIndices.add(index);
});
console.log(`[buildImageReferences] Collected ${allImageEmbeddings.length} image embeddings`);
if (allImageEmbeddings.length === 0) {
console.log(`[buildImageReferences] No image data available, returning empty array`);
return [];
}
// Step 3: Filter answer chunks by minimum length
console.log(`[buildImageReferences] Step 3: Filtering answer chunks by minimum length`);
const validAnswerChunks: string[] = [];
const validAnswerChunkIndices: number[] = [];
const validAnswerChunkPositions: [number, number][] = [];
context.actionTracker.trackThink('cross_reference', schema.languageCode);
for (let i = 0; i < answerChunks.length; i++) {
const answerChunk = answerChunks[i];
const answerChunkPosition = answerChunkPositions[i];
if (!answerChunk.trim() || answerChunk.length < minChunkLength) continue;
validAnswerChunks.push(answerChunk);
validAnswerChunkIndices.push(i);
validAnswerChunkPositions.push(answerChunkPosition);
}
console.log(`[buildImageReferences] Found ${validAnswerChunks.length}/${answerChunks.length} valid answer chunks above minimum length`);
if (validAnswerChunks.length === 0) {
console.log(`[buildImageReferences] No valid answer chunks, returning empty array`);
return [];
}
// Step 4: Get embeddings for answer chunks
console.log(`[buildImageReferences] Step 4: Getting embeddings for answer chunks`);
const answerEmbeddings: number[][] = [];
try {
// const embeddingsResult = await getEmbeddings(validAnswerChunks, context.tokenTracker, embeddingOptions); // No embeddingOptions needed here
// answerEmbeddings.push(...embeddingsResult.embeddings);
const embeddingsResult = await getEmbeddings(validAnswerChunks, context.tokenTracker, {
dimensions: 1024,
model: 'jina-clip-v2',
});
answerEmbeddings.push(...embeddingsResult.embeddings);
console.log(`[buildImageReferences] Got embeddings for ${answerEmbeddings.length} answer chunks`);
// Step 5: Compute pairwise cosine similarity
console.log(`[buildImageReferences] Step 5: Computing pairwise cosine similarity between answer and image embeddings`);
const allMatches = [];
for (let i = 0; i < validAnswerChunks.length; i++) {
const answerChunkIndex = validAnswerChunkIndices[i];
const answerChunk = validAnswerChunks[i];
const answerChunkPosition = answerChunkPositions[i];
const answerEmbedding = answerEmbeddings[i];
const matchesForChunk = [];
for (const imageIndex of validImageIndices) {
const imageEmbedding = allImageEmbeddings[imageIndex];
if (imageEmbedding) {
const score = cosineSimilarity(answerEmbedding, imageEmbedding);
matchesForChunk.push({
imageIndex,
relevanceScore: score
});
}
}
matchesForChunk.sort((a, b) => b.relevanceScore - a.relevanceScore);
for (const match of matchesForChunk) {
allMatches.push({
imageIndex: match.imageIndex,
answerChunkIndex: answerChunkIndex,
relevanceScore: match.relevanceScore,
answerChunk: answerChunk,
answerChunkPosition: answerChunkPosition
});
}
console.log(`[buildImageReferences] Processed answer chunk ${i + 1}/${validAnswerChunks.length}, top score: ${matchesForChunk[0]?.relevanceScore.toFixed(4)}`);
}
// Log statistics about relevance scores
if (allMatches.length > 0) {
const relevanceScores = allMatches.map(match => match.relevanceScore);
const minRelevance = Math.min(...relevanceScores);
const maxRelevance = Math.max(...relevanceScores);
const sumRelevance = relevanceScores.reduce((sum, score) => sum + score, 0);
const meanRelevance = sumRelevance / relevanceScores.length;
console.log('Reference relevance statistics:', {
min: minRelevance.toFixed(4),
max: maxRelevance.toFixed(4),
mean: meanRelevance.toFixed(4),
count: relevanceScores.length
});
}
// Step 6: Sort all matches by relevance
allMatches.sort((a, b) => b.relevanceScore - a.relevanceScore);
console.log(`[buildImageReferences] Step 6: Sorted ${allMatches.length} potential matches by relevance score`);
// Step 7: Filter matches
console.log(`[buildImageReferences] Step 7: Filtering matches to ensure uniqueness and threshold (min: ${minRelScore})`);
const usedImages = new Set();
const usedAnswerChunks = new Set();
const filteredMatches = [];
for (const match of allMatches) {
if (match.relevanceScore < minRelScore) continue;
if (!usedImages.has(match.imageIndex) && !usedAnswerChunks.has(match.answerChunkIndex)) {
filteredMatches.push(match);
usedImages.add(match.imageIndex);
usedAnswerChunks.add(match.answerChunkIndex);
if (filteredMatches.length >= maxRef) break;
}
}
console.log(`[buildImageReferences] Selected ${filteredMatches.length}/${allMatches.length} references after filtering`);
const references: ImageReference[] = filteredMatches.map((match) => {
const source = imageToSourceMap[match.imageIndex];
return {
url: source.url,
relevanceScore: match.relevanceScore,
answerChunk: match.answerChunk,
answerChunkPosition: match.answerChunkPosition
};
});
return references;
} catch (error) {
console.error('Embedding failed', error);
return [];
}
}

View File

@ -8,13 +8,14 @@ const MAX_RETRIES = 3; // Maximum number of retries for missing embeddings
// Modified to support different embedding tasks and dimensions
export async function getEmbeddings(
texts: string[],
texts: string[] | Record<string, string>[],
tokenTracker?: any,
options: {
task?: "text-matching" | "retrieval.passage" | "retrieval.query",
dimensions?: number,
late_chunking?: boolean,
embedding_type?: string
embedding_type?: string,
model?: string,
} = {}
): Promise<{ embeddings: number[][], tokens: number }> {
console.log(`[embeddings] Getting embeddings for ${texts.length} texts`);
@ -66,12 +67,13 @@ export async function getEmbeddings(
// Helper function to get embeddings for a batch with retry logic for missing indices
async function getBatchEmbeddingsWithRetry(
batchTexts: string[],
batchTexts: string[] | Record<string, string>[],
options: {
task?: "text-matching" | "retrieval.passage" | "retrieval.query",
dimensions?: number,
late_chunking?: boolean,
embedding_type?: string
embedding_type?: string,
model?: string,
},
currentBatch: number,
batchCount: number
@ -89,12 +91,15 @@ async function getBatchEmbeddingsWithRetry(
while (textsToProcess.length > 0 && retryCount < MAX_RETRIES) {
const request: JinaEmbeddingRequest = {
model: "jina-embeddings-v3",
task: options.task || "text-matching",
input: textsToProcess,
truncate: true,
model: options.model || "jina-embeddings-v3",
input: textsToProcess as any,
};
if (request.model === "jina-embeddings-v3") {
request.task = options.task || "text-matching";
request.truncate = true;
}
// Add optional parameters if provided
if (options.dimensions) request.dimensions = options.dimensions;
if (options.late_chunking) request.late_chunking = options.late_chunking;
@ -110,7 +115,7 @@ async function getBatchEmbeddingsWithRetry(
"Authorization": `Bearer ${JINA_API_KEY}`
}
}
);
);
if (!response.data.data) {
console.error('No data returned from Jina API');
@ -118,7 +123,7 @@ async function getBatchEmbeddingsWithRetry(
// On last retry, create placeholder embeddings
const dimensionSize = options.dimensions || 1024;
const placeholderEmbeddings = textsToProcess.map(text => {
console.error(`Failed to get embedding after all retries: [${text.substring(0, 50)}...]`);
console.error(`Failed to get embedding after all retries: [${truncateInputString(text)}...]`);
return new Array(dimensionSize).fill(0);
});
@ -140,7 +145,7 @@ async function getBatchEmbeddingsWithRetry(
// Process successful embeddings
const successfulEmbeddings: number[][] = [];
const remainingTexts: string[] = [];
const remainingTexts: (string | Record<string, string>)[] = [];
const newIndexMap = new Map<number, number>();
for (let idx = 0; idx < textsToProcess.length; idx++) {
@ -160,7 +165,7 @@ async function getBatchEmbeddingsWithRetry(
const newIndex = remainingTexts.length;
newIndexMap.set(newIndex, indexMap.get(idx)!);
remainingTexts.push(textsToProcess[idx]);
console.log(`Missing embedding for index ${idx}, will retry: [${textsToProcess[idx].substring(0, 50)}...]`);
console.log(`Missing embedding for index ${idx}, will retry: [${truncateInputString(textsToProcess[idx])}...]`);
}
}
@ -190,7 +195,7 @@ async function getBatchEmbeddingsWithRetry(
const dimensionSize = options.dimensions || 1024;
for (let idx = 0; idx < textsToProcess.length; idx++) {
const originalIndex = indexMap.get(idx)!;
console.error(`Failed to get embedding after all retries for index ${originalIndex}: [${textsToProcess[idx].substring(0, 50)}...]`);
console.error(`Failed to get embedding after all retries for index ${originalIndex}: [${truncateInputString(textsToProcess[idx])}...]`);
while (batchEmbeddings.length <= originalIndex) {
batchEmbeddings.push([]);
@ -228,3 +233,11 @@ async function getBatchEmbeddingsWithRetry(
return { batchEmbeddings, batchTokens };
}
function truncateInputString(input: string | Record<string, string>): string {
if (typeof input === 'string') {
return input.slice(0, 50);
} else {
return Object.values(input)[0].slice(0, 50);
}
}

View File

@ -78,4 +78,4 @@ export async function dedupQueries(
unique_queries: newQueries,
};
}
}
}

View File

@ -6,7 +6,8 @@ import axiosClient from "../utils/axios-client";
export async function readUrl(
url: string,
withAllLinks?: boolean,
tracker?: TokenTracker
tracker?: TokenTracker,
withAllImages?: boolean
): Promise<{ response: ReadResponse }> {
if (!url.trim()) {
throw new Error('URL cannot be empty');
@ -20,7 +21,6 @@ export async function readUrl(
'Accept': 'application/json',
'Authorization': `Bearer ${JINA_API_KEY}`,
'Content-Type': 'application/json',
'X-Retain-Images': 'none',
'X-Md-Link-Style': 'discarded',
};
@ -28,6 +28,12 @@ export async function readUrl(
headers['X-With-Links-Summary'] = 'all';
}
if (withAllImages) {
headers['X-With-Images-Summary'] = 'true'
} else {
headers['X-Retain-Images'] = 'none'
}
try {
// Use axios which handles encoding properly
const { data } = await axiosClient.post<ReadResponse>(

View File

@ -27,6 +27,14 @@ export type Reference = {
answerChunkPosition?: number[];
}
export type ImageReference = {
url: string;
dateTime?: string;
relevanceScore?: number;
answerChunk?: string;
answerChunkPosition?: number[];
}
export type AnswerAction = BaseAction & {
action: "answer";
answer: string;
@ -53,6 +61,7 @@ export type ReflectAction = BaseAction & {
export type VisitAction = BaseAction & {
action: "visit";
URLTargets: number[] | string[];
image?: ImageObject;
};
export type CodingAction = BaseAction & {
@ -155,6 +164,7 @@ export interface ReadResponse {
content: string;
usage: { tokens: number; };
links: Array<[string, string]>; // [anchor, url]
images: Record<string, string>; // { image: url }
};
name?: string;
message?: string;
@ -259,6 +269,8 @@ export interface ChatCompletionRequest {
max_annotations?: number;
min_annotation_relevance?: number;
with_images?: boolean;
language_code?: string;
search_language_code?: string;
search_provider?: string;
@ -294,6 +306,8 @@ export interface ChatCompletionResponse {
visitedURLs?: string[];
readURLs?: string[];
numURLs?: number;
allImages?: string[];
relatedImages?: string[];
}
export interface ChatCompletionChunk {
@ -318,6 +332,8 @@ export interface ChatCompletionChunk {
visitedURLs?: string[];
readURLs?: string[];
numURLs?: number;
allImages?: string[];
relatedImages?: string[];
}
// Tracker Types
@ -336,11 +352,11 @@ export interface TrackerContext {
// Interface definitions for Jina API
export interface JinaEmbeddingRequest {
model: string;
task: string;
task?: string;
late_chunking?: boolean;
dimensions?: number;
embedding_type?: string;
input: string[];
input: string[] | Record<string, string>[];
truncate?: boolean;
}
@ -356,4 +372,10 @@ export interface JinaEmbeddingResponse {
index: number;
embedding: number[];
}>;
}
export type ImageObject = {
url: string;
alt?: string;
embedding: number[][];
}

233
src/utils/image-tools.ts Normal file
View File

@ -0,0 +1,233 @@
import canvas from '@napi-rs/canvas';
import { getEmbeddings } from '../tools/embeddings';
import { TokenTracker } from './token-tracker';
import { ImageObject } from '../types';
import { cosineSimilarity } from '../tools/cosine';
export type { Canvas, Image } from '@napi-rs/canvas';
import { Storage } from '@google-cloud/storage';
import { randomUUID } from 'crypto';
export const downloadFile = async (uri: string) => {
const resp = await fetch(uri);
if (!(resp.ok && resp.body)) {
throw new Error(`Unexpected response ${resp.statusText}`);
}
const contentLength = parseInt(resp.headers.get('content-length') || '0');
if (contentLength > 1024 * 1024 * 100) {
throw new Error('File too large');
}
const buff = await resp.arrayBuffer();
return { buff, contentType: resp.headers.get('content-type') };
};
const _loadImage = async (input: string | Buffer) => {
let buff;
let contentType;
if (typeof input === 'string') {
if (input.startsWith('data:')) {
const firstComma = input.indexOf(',');
const header = input.slice(0, firstComma);
const data = input.slice(firstComma + 1);
const encoding = header.split(';')[1];
contentType = header.split(';')[0].split(':')[1];
if (encoding?.startsWith('base64')) {
buff = Buffer.from(data, 'base64');
} else {
buff = Buffer.from(decodeURIComponent(data), 'utf-8');
}
}
if (input.startsWith('http')) {
if (input.endsWith('.svg')) {
throw new Error('Unsupported image type');
}
const r = await downloadFile(input);
buff = Buffer.from(r.buff);
contentType = r.contentType;
}
}
if (!buff) {
throw new Error('Invalid input');
}
const img = await canvas.loadImage(buff);
Reflect.set(img, 'contentType', contentType);
return {
img,
buff,
contentType,
};
}
export const loadImage = async (uri: string | Buffer) => {
try {
const theImage = await _loadImage(uri);
return theImage;
} catch (err: any) {
if (err?.message?.includes('Unsupported image type') || err?.message?.includes('unsupported')) {
throw new Error(`Unknown image format for ${uri.slice(0, 128)}`);
}
throw err;
}
}
export const fitImageToSquareBox = (image: canvas.Image | canvas.Canvas, size: number = 1024) => {
if (image.width <= size && image.height <= size) {
const canvasInstance = canvas.createCanvas(image.width, image.height);
const ctx = canvasInstance.getContext('2d');
ctx.drawImage(image, 0, 0, image.width, image.height, 0, 0, canvasInstance.width, canvasInstance.height);
return canvasInstance;
}
const aspectRatio = image.width / image.height;
const resizedWidth = Math.round(aspectRatio > 1 ? size : size * aspectRatio);
const resizedHeight = Math.round(aspectRatio > 1 ? size / aspectRatio : size);
const canvasInstance = canvas.createCanvas(resizedWidth, resizedHeight);
const ctx = canvasInstance.getContext('2d');
ctx.drawImage(image, 0, 0, image.width, image.height, 0, 0, resizedWidth, resizedHeight);
return canvasInstance;
}
export const canvasToDataUrl = (canvas: canvas.Canvas, mimeType?: 'image/png' | 'image/jpeg') => {
return canvas.toDataURLAsync((mimeType || 'image/png') as 'image/png');
}
export const canvasToBuffer = (canvas: canvas.Canvas, mimeType?: 'image/png' | 'image/jpeg') => {
return canvas.toBuffer((mimeType || 'image/png') as 'image/png');
}
export const processImage = async (url: string, tracker: TokenTracker): Promise<ImageObject | undefined> => {
try {
const { img, buff, contentType } = await loadImage(url);
if (!img) {
return;
}
// Check if the image is smaller than 256x256
if (img.width < 256 || img.height < 256) {
return;
}
const newUrl = await saveImageToFirebase(buff, contentType);
const canvas = fitImageToSquareBox(img, 512);
const base64Data = (await canvasToDataUrl(canvas)).split(',')[1];
const {embeddings} = await getEmbeddings([{ image: base64Data }], tracker, {
dimensions: 1024,
model: 'jina-clip-v2',
});
return {
url: newUrl ?? url,
embedding: embeddings,
};
} catch (error) {
return;
}
}
export const dedupImagesWithEmbeddings = (
newImages: ImageObject[], // New images with embeddings
existingImages: ImageObject[], // Existing images with embeddings
similarityThreshold: number = 0.86, // Default similarity threshold
): ImageObject[] =>{
try {
// Quick return for single new image with no existing images
if (newImages.length === 1 && existingImages.length === 0) {
return newImages;
}
const uniqueImages: ImageObject[] = [];
const usedIndices = new Set<number>();
// Compare each new image against existing images and already accepted images
for (let i = 0; i < newImages.length; i++) {
let isUnique = true;
// Check against existing images
for (let j = 0; j < existingImages.length; j++) {
const similarity = cosineSimilarity(
newImages[i].embedding[0], // Use the first embedding for comparison
existingImages[j].embedding[0]
);
if (similarity >= similarityThreshold) {
isUnique = false;
break;
}
}
// Check against already accepted images
if (isUnique) {
for (const usedIndex of usedIndices) {
const similarity = cosineSimilarity(
newImages[i].embedding[0], // Use the first embedding for comparison
newImages[usedIndex].embedding[0]
);
if (similarity >= similarityThreshold) {
isUnique = false;
break;
}
}
}
// Add to unique images if passed all checks
if (isUnique) {
uniqueImages.push(newImages[i]);
usedIndices.add(i);
}
}
return uniqueImages;
} catch (error) {
console.error('Error in image deduplication analysis:', error);
// Return all new images if there is an error
return newImages;
}
}
export const saveImageToFirebase = async (
buffer: Buffer,
mimeType?: string | null,
): Promise<string | undefined> => {
if (!process.env.GCLOUD_PROJECT) {
console.error('GCLOUD_PROJECT environment variable is not set');
return;
}
const firebaseDefaultBucket = new Storage().bucket(`${process.env.GCLOUD_PROJECT}.appspot.com`);
try {
let extension = 'png';
const finalMimeType = mimeType || 'image/png';
if (!finalMimeType.startsWith('image/')) {
return;
} else {
extension = finalMimeType?.split('/')[1] || 'png';
}
const fileName = `readImages/${randomUUID()}.${extension}`;
const file = firebaseDefaultBucket.file(fileName);
await file.save(buffer, {
contentType: finalMimeType,
public: true,
});
return file.publicUrl();
} catch (error) {
console.error('Error saving image to Firebase Storage:', error);
return;
}
};

View File

@ -1,12 +1,13 @@
import { BoostedSearchSnippet, KnowledgeItem, SearchSnippet, TrackerContext, VisitAction, WebContent } from "../types";
import { getI18nText, smartMergeStrings } from "./text-tools";
import { rerankDocuments } from "../tools/jina-rerank";
import { readUrl } from "../tools/read";
import { Schemas } from "./schemas";
import { cherryPick } from "../tools/jina-latechunk";
import { formatDateBasedOnType } from "./date-tools";
import { classifyText } from "../tools/jina-classify-spam";
import { segmentText } from "../tools/segment";
import {BoostedSearchSnippet, ImageObject, KnowledgeItem, SearchSnippet, TrackerContext, VisitAction, WebContent} from "../types";
import {getI18nText, smartMergeStrings} from "./text-tools";
import {rerankDocuments} from "../tools/jina-rerank";
import {readUrl} from "../tools/read";
import {Schemas} from "./schemas";
import {cherryPick} from "../tools/jina-latechunk";
import {formatDateBasedOnType} from "./date-tools";
import {classifyText} from "../tools/jina-classify-spam";
import { processImage } from "./image-tools";
import {segmentText} from "../tools/segment";
import axiosClient from "./axios-client";
export function normalizeUrl(urlString: string, debug = false, options = {
@ -460,9 +461,11 @@ export async function processURLs(
allURLs: Record<string, SearchSnippet>,
visitedURLs: string[],
badURLs: string[],
imageObjects: ImageObject[],
schemaGen: Schemas,
question: string,
webContents: Record<string, WebContent>
webContents: Record<string, WebContent>,
withImages: boolean = false,
): Promise<{ urlResults: any[], success: boolean }> {
// Skip if no URLs to process
if (urls.length === 0) {
@ -491,8 +494,8 @@ export async function processURLs(
// Store normalized URL for consistent reference
url = normalizedUrl;
const { response } = await readUrl(url, true, context.tokenTracker);
const { data } = response;
const {response} = await readUrl(url, true, context.tokenTracker, withImages);
const {data} = response;
const guessedTime = await getLastModified(url);
if (guessedTime) {
console.log('Guessed time for', url, guessedTime);
@ -554,7 +557,18 @@ export async function processURLs(
}
});
return { url, result: response };
// Process images
if (withImages && data.images) {
const imageEntries = Object.entries(data.images || {});
imageEntries.forEach(async ([alt, url]) => {
const imageObject = await processImage(url, context.tokenTracker);
if (imageObject && !imageObjects.find(i => i.url === imageObject.url)) {
imageObjects.push(imageObject);
}
});
}
return {url, result: response};
} catch (error: any) {
console.error('Error reading URL:', url, error);
badURLs.push(url);