node-DeepResearch/src/utils/image-tools.ts
2025-06-13 13:44:59 +08:00

193 lines
5.6 KiB
TypeScript

import { getEmbeddings } from '../tools/embeddings';
import { TokenTracker } from './token-tracker';
import { ImageObject } from '../types';
import { cosineSimilarity } from '../tools/cosine';
import { logInfo, logError, logDebug, logWarning } from '../logging';
import sharp from 'sharp';
export const downloadFile = async (uri: string) => {
const resp = await fetch(uri);
if (!(resp.ok && resp.body)) {
throw new Error(`Unexpected response ${resp.statusText}`);
}
const contentLength = parseInt(resp.headers.get('content-length') || '0');
if (contentLength > 1024 * 1024 * 100) {
throw new Error('File too large');
}
const buff = await resp.arrayBuffer();
const contentType = resp.headers.get('content-type');
if (!contentType || !contentType.startsWith('image/')) {
throw new Error(`Invalid content type ${contentType}, expected image/*`);
}
return { buff, contentType };
};
const loadImage = async (input: string | Buffer) => {
let buff;
let contentType: string = '';
if (typeof input === 'string') {
if (input.startsWith('data:')) {
const firstComma = input.indexOf(',');
const header = input.slice(0, firstComma);
const data = input.slice(firstComma + 1);
const encoding = header.split(';')[1];
contentType = header.split(';')[0].split(':')[1];
if (encoding?.startsWith('base64')) {
buff = Buffer.from(data, 'base64');
} else {
buff = Buffer.from(decodeURIComponent(data), 'utf-8');
}
}
if (input.startsWith('http')) {
if (input.endsWith('.svg')) {
throw new Error('Unsupported image type');
}
const r = await downloadFile(input);
buff = Buffer.from(r.buff);
contentType = r.contentType;
}
}
if (!buff) {
throw new Error('Invalid input');
}
if (buff.length > 20 * 1024 * 1024) {
throw new Error('Image too large');
}
return {
buff,
contentType,
};
}
const ImageTypes = ['png', 'jpeg', 'jpg', 'webp', 'avif', 'tiff', 'gif', 'svg', 'bmp', 'heif', 'jxl', 'jp2', 'ppm', 'raw', 'exr', 'fits', 'rad'];
export const fitImageToSquareBox = async (imageBuffer: Buffer, contentType: string, size: number = 1024) => {
if (!imageBuffer || imageBuffer.length === 0) {
throw new Error('Invalid image buffer');
}
const metadata = await sharp(imageBuffer).metadata();
if (!metadata.width || !metadata.height || metadata.width < 256 || metadata.height < 256) {
throw new Error('Image must be at least 256x256 pixels');
}
let width = metadata.width;
let height = metadata.height;
const targetSize = size;
const imageType = contentType.split('/')[1];
if (!ImageTypes.includes(imageType)) {
throw new Error(`Unsupported image type: ${imageType}`);
}
if (width > targetSize || height > targetSize) {
const aspectRatio = width / height;
if (aspectRatio > 1) {
width = targetSize;
height = Math.round(targetSize / aspectRatio);
} else {
height = targetSize;
width = Math.round(targetSize * aspectRatio);
}
}
const resizedImageBuffer = await sharp(imageBuffer)
.resize(width, height, {
fit: 'inside',
withoutEnlargement: true
})
.toFormat(imageType as any)
.toBuffer();
return resizedImageBuffer.toString('base64');
}
export const processImage = async (url: string, tracker: TokenTracker): Promise<ImageObject | undefined> => {
try {
const { buff, contentType } = await loadImage(url);
const base64Data = await fitImageToSquareBox(buff, contentType, 256);
const { embeddings } = await getEmbeddings([{ image: base64Data }], tracker, {
dimensions: 512,
model: 'jina-clip-v2',
});
return {
url,
embedding: embeddings,
};
} catch (error) {
return;
}
}
export const dedupImagesWithEmbeddings = (
newImages: ImageObject[], // New images with embeddings
existingImages: ImageObject[], // Existing images with embeddings
similarityThreshold: number = 0.86, // Default similarity threshold
): ImageObject[] => {
try {
if (newImages.length === 0) {
logWarning('No new images provided for deduplication');
return [];
}
// Quick return for single new image with no existing images
if (newImages.length === 1 && existingImages.length === 0) {
return newImages;
}
const uniqueImages: ImageObject[] = [];
const usedIndices = new Set<number>();
// Compare each new image against existing images and already accepted images
for (let i = 0; i < newImages.length; i++) {
let isUnique = true;
// Check against existing images
for (let j = 0; j < existingImages.length; j++) {
const similarity = cosineSimilarity(
newImages[i].embedding[0], // Use the first embedding for comparison
existingImages[j].embedding[0]
);
if (similarity >= similarityThreshold) {
isUnique = false;
break;
}
}
// Check against already accepted images
if (isUnique) {
for (const usedIndex of usedIndices) {
const similarity = cosineSimilarity(
newImages[i].embedding[0], // Use the first embedding for comparison
newImages[usedIndex].embedding[0]
);
if (similarity >= similarityThreshold) {
isUnique = false;
break;
}
}
}
// Add to unique images if passed all checks
if (isUnique) {
uniqueImages.push(newImages[i]);
usedIndices.add(i);
}
}
return uniqueImages;
} catch (error) {
logError('Error in image deduplication analysis:', { error });
// Return all new images if there is an error
return newImages;
}
}