mirror of
https://github.com/jina-ai/node-DeepResearch.git
synced 2025-12-25 22:16:49 +08:00
feat: gather images to response (#98)
* feat: add image tools * rank images * add image dedup * wip * wip * remove rank functions * fix * add embeddings to image * move image object to agent * build image references * update * add with_images param * update dimensions for image tools * dudup images * save images to cloud storage * remove extra log * fix * remove test data * fix
This commit is contained in:
parent
77c96c07fa
commit
a768755783
921
package-lock.json
generated
921
package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@ -28,6 +28,8 @@
|
||||
"dependencies": {
|
||||
"@ai-sdk/google": "^1.0.0",
|
||||
"@ai-sdk/openai": "^1.1.9",
|
||||
"@google-cloud/storage": "^7.16.0",
|
||||
"@napi-rs/canvas": "^0.1.68",
|
||||
"@types/jsdom": "^21.1.7",
|
||||
"ai": "^4.1.26",
|
||||
"axios": "^1.7.9",
|
||||
|
||||
31
src/agent.ts
31
src/agent.ts
@ -16,7 +16,9 @@ import {
|
||||
KnowledgeItem,
|
||||
EvaluationType,
|
||||
BoostedSearchSnippet,
|
||||
SearchSnippet, EvaluationResponse, Reference, SERPQuery, RepeatEvaluationType, UnNormalizedSearchSnippet, WebContent
|
||||
SearchSnippet, EvaluationResponse, Reference, SERPQuery, RepeatEvaluationType, UnNormalizedSearchSnippet, WebContent,
|
||||
ImageObject,
|
||||
ImageReference
|
||||
} from "./types";
|
||||
import { TrackerContext } from "./types";
|
||||
import { search } from "./tools/jina-search";
|
||||
@ -41,7 +43,7 @@ import {
|
||||
import { MAX_QUERIES_PER_STEP, MAX_REFLECT_PER_STEP, MAX_URLS_PER_STEP, Schemas } from "./utils/schemas";
|
||||
import { formatDateBasedOnType, formatDateRange } from "./utils/date-tools";
|
||||
import { reviseAnswer } from "./tools/md-fixer";
|
||||
import { buildReferences } from "./tools/build-ref";
|
||||
import { buildImageReferences, buildReferences } from "./tools/build-ref";
|
||||
|
||||
async function sleep(ms: number) {
|
||||
const seconds = Math.ceil(ms / 1000);
|
||||
@ -391,8 +393,9 @@ export async function getResponse(question?: string,
|
||||
minRelScore: number = 0.85,
|
||||
languageCode: string | undefined = undefined,
|
||||
searchLanguageCode?: string,
|
||||
searchProvider?: string
|
||||
): Promise<{ result: StepAction; context: TrackerContext; visitedURLs: string[], readURLs: string[], allURLs: string[] }> {
|
||||
searchProvider?: string,
|
||||
with_images: boolean = false
|
||||
): Promise<{ result: StepAction; context: TrackerContext; visitedURLs: string[], readURLs: string[], allURLs: string[], allImages?: string[], relatedImages?: string[] }> {
|
||||
|
||||
let step = 0;
|
||||
let totalStep = 0;
|
||||
@ -451,6 +454,7 @@ export async function getResponse(question?: string,
|
||||
const allWebContents: Record<string, WebContent> = {};
|
||||
const visitedURLs: string[] = [];
|
||||
const badURLs: string[] = [];
|
||||
const imageObjects: ImageObject[] = [];
|
||||
const evaluationMetrics: Record<string, RepeatEvaluationType[]> = {};
|
||||
// reserve the 10% final budget for the beast mode
|
||||
const regularBudget = tokenBudget * 0.85;
|
||||
@ -859,9 +863,11 @@ You decided to think out of the box or cut from a completely different angle.
|
||||
allURLs,
|
||||
visitedURLs,
|
||||
badURLs,
|
||||
imageObjects,
|
||||
SchemaGen,
|
||||
currentQuestion,
|
||||
allWebContents
|
||||
allWebContents,
|
||||
with_images
|
||||
);
|
||||
|
||||
diaryContext.push(success
|
||||
@ -1017,7 +1023,16 @@ But unfortunately, you failed to solve the issue. You need to think out of the b
|
||||
answerStep.mdAnswer = buildMdFromAnswer(answerStep);
|
||||
}
|
||||
|
||||
console.log(thisStep)
|
||||
let imageReferences: ImageReference[] = [];
|
||||
if(imageObjects.length && with_images) {
|
||||
try {
|
||||
imageReferences = await buildImageReferences(answerStep.answer, imageObjects, context, SchemaGen);
|
||||
console.log('Image references built:', imageReferences);
|
||||
} catch (error) {
|
||||
console.error('Error building image references:', error);
|
||||
imageReferences = [];
|
||||
}
|
||||
}
|
||||
|
||||
// max return 300 urls
|
||||
const returnedURLs = weightedURLs.slice(0, numReturnedURLs).map(r => r.url);
|
||||
@ -1026,7 +1041,9 @@ But unfortunately, you failed to solve the issue. You need to think out of the b
|
||||
context,
|
||||
visitedURLs: returnedURLs,
|
||||
readURLs: visitedURLs.filter(url => !badURLs.includes(url)),
|
||||
allURLs: weightedURLs.map(r => r.url)
|
||||
allURLs: weightedURLs.map(r => r.url),
|
||||
allImages: with_images ? imageObjects.map(i => i.url) : undefined,
|
||||
relatedImages: with_images ? imageReferences.map(i => i.url) : undefined,
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
21
src/app.ts
21
src/app.ts
@ -7,7 +7,7 @@ import {
|
||||
ChatCompletionResponse,
|
||||
ChatCompletionChunk,
|
||||
AnswerAction,
|
||||
Model, StepAction, VisitAction
|
||||
Model, StepAction, VisitAction,
|
||||
} from './types';
|
||||
import { TokenTracker } from "./utils/token-tracker";
|
||||
import { ActionTracker } from "./utils/action-tracker";
|
||||
@ -522,7 +522,7 @@ app.post('/v1/chat/completions', (async (req: Request, res: Response) => {
|
||||
// Add content to queue for both thinking steps and final answer
|
||||
if (step.action === 'visit') {
|
||||
// emit every url in the visit action in url field
|
||||
((step as VisitAction).URLTargets as string[]).forEach((url) => {
|
||||
((step as VisitAction).URLTargets as string[])?.forEach((url) => {
|
||||
const chunk: ChatCompletionChunk = {
|
||||
id: requestId,
|
||||
object: 'chat.completion.chunk',
|
||||
@ -568,7 +568,9 @@ app.post('/v1/chat/completions', (async (req: Request, res: Response) => {
|
||||
result: finalStep,
|
||||
visitedURLs,
|
||||
readURLs,
|
||||
allURLs
|
||||
allURLs,
|
||||
allImages,
|
||||
relatedImages,
|
||||
} = await getResponse(undefined,
|
||||
tokenBudget,
|
||||
maxBadAttempts,
|
||||
@ -583,7 +585,8 @@ app.post('/v1/chat/completions', (async (req: Request, res: Response) => {
|
||||
body.min_annotation_relevance,
|
||||
body.language_code,
|
||||
body.search_language_code,
|
||||
body.search_provider
|
||||
body.search_provider,
|
||||
body.with_images
|
||||
)
|
||||
let finalAnswer = (finalStep as AnswerAction).mdAnswer;
|
||||
|
||||
@ -656,7 +659,8 @@ app.post('/v1/chat/completions', (async (req: Request, res: Response) => {
|
||||
usage,
|
||||
visitedURLs,
|
||||
readURLs,
|
||||
numURLs: allURLs.length
|
||||
numURLs: allURLs.length,
|
||||
relatedImages
|
||||
};
|
||||
res.write(`data: ${JSON.stringify(finalChunk)}\n\n`);
|
||||
res.end();
|
||||
@ -682,7 +686,8 @@ app.post('/v1/chat/completions', (async (req: Request, res: Response) => {
|
||||
usage,
|
||||
visitedURLs,
|
||||
readURLs,
|
||||
numURLs: allURLs.length
|
||||
numURLs: allURLs.length,
|
||||
relatedImages,
|
||||
};
|
||||
|
||||
// Log final response (excluding full content for brevity)
|
||||
@ -693,7 +698,9 @@ app.post('/v1/chat/completions', (async (req: Request, res: Response) => {
|
||||
usage: response.usage,
|
||||
visitedURLs: response.visitedURLs,
|
||||
readURLs: response.readURLs,
|
||||
numURLs: allURLs.length
|
||||
numURLs: allURLs.length,
|
||||
allImages: allImages?.length,
|
||||
relatedImages: relatedImages?.length,
|
||||
});
|
||||
|
||||
res.json(response);
|
||||
|
||||
@ -1,8 +1,9 @@
|
||||
import {segmentText} from './segment';
|
||||
import {Reference, TrackerContext, WebContent} from "../types";
|
||||
import {ImageObject, ImageReference, Reference, TrackerContext, WebContent} from "../types";
|
||||
import {Schemas} from "../utils/schemas";
|
||||
import {cosineSimilarity, jaccardRank} from "./cosine";
|
||||
import {getEmbeddings} from "./embeddings";
|
||||
import { dedupImagesWithEmbeddings } from '../utils/image-tools';
|
||||
import {normalizeHostName} from '../utils/url-tools';
|
||||
|
||||
export async function buildReferences(
|
||||
@ -366,4 +367,184 @@ function buildFinalResult(
|
||||
answer: modifiedAnswer,
|
||||
references
|
||||
};
|
||||
}
|
||||
|
||||
export async function buildImageReferences(
|
||||
answer: string,
|
||||
imageObjects: ImageObject[],
|
||||
context: TrackerContext,
|
||||
schema: Schemas,
|
||||
minChunkLength: number = 80,
|
||||
maxRef: number = 10,
|
||||
minRelScore: number = 0.35
|
||||
): Promise<Array<ImageReference>> {
|
||||
console.log(`[buildImageReferences] Starting with maxRef=${maxRef}, minChunkLength=${minChunkLength}, minRelScore=${minRelScore}`);
|
||||
console.log(`[buildImageReferences] Answer length: ${answer.length} chars, Image sources: ${imageObjects.length}`);
|
||||
|
||||
// Step 1: Chunk the answer
|
||||
console.log(`[buildImageReferences] Step 1: Chunking answer text`);
|
||||
const {chunks: answerChunks, chunk_positions: answerChunkPositions} = await segmentText(answer, context);
|
||||
console.log(`[buildImageReferences] Answer segmented into ${answerChunks.length} chunks`);
|
||||
|
||||
// Step 2: Prepare image content
|
||||
console.log(`[buildImageReferences] Step 2: Preparing image content`);
|
||||
const dudupImages = dedupImagesWithEmbeddings(imageObjects, []);
|
||||
const allImageEmbeddings: number[][] = dudupImages.map(img => img.embedding[0]); // Extract embedding
|
||||
const imageToSourceMap: any = {};
|
||||
const validImageIndices = new Set<number>();
|
||||
|
||||
dudupImages.forEach((img, index) => {
|
||||
imageToSourceMap[index] = {
|
||||
url: img.url,
|
||||
altText: img.alt,
|
||||
embedding: img.embedding[0] // Store extracted embedding
|
||||
};
|
||||
validImageIndices.add(index);
|
||||
});
|
||||
|
||||
console.log(`[buildImageReferences] Collected ${allImageEmbeddings.length} image embeddings`);
|
||||
|
||||
if (allImageEmbeddings.length === 0) {
|
||||
console.log(`[buildImageReferences] No image data available, returning empty array`);
|
||||
return [];
|
||||
}
|
||||
|
||||
// Step 3: Filter answer chunks by minimum length
|
||||
console.log(`[buildImageReferences] Step 3: Filtering answer chunks by minimum length`);
|
||||
const validAnswerChunks: string[] = [];
|
||||
const validAnswerChunkIndices: number[] = [];
|
||||
const validAnswerChunkPositions: [number, number][] = [];
|
||||
|
||||
context.actionTracker.trackThink('cross_reference', schema.languageCode);
|
||||
|
||||
for (let i = 0; i < answerChunks.length; i++) {
|
||||
const answerChunk = answerChunks[i];
|
||||
const answerChunkPosition = answerChunkPositions[i];
|
||||
|
||||
if (!answerChunk.trim() || answerChunk.length < minChunkLength) continue;
|
||||
|
||||
validAnswerChunks.push(answerChunk);
|
||||
validAnswerChunkIndices.push(i);
|
||||
validAnswerChunkPositions.push(answerChunkPosition);
|
||||
}
|
||||
|
||||
console.log(`[buildImageReferences] Found ${validAnswerChunks.length}/${answerChunks.length} valid answer chunks above minimum length`);
|
||||
|
||||
if (validAnswerChunks.length === 0) {
|
||||
console.log(`[buildImageReferences] No valid answer chunks, returning empty array`);
|
||||
return [];
|
||||
}
|
||||
|
||||
// Step 4: Get embeddings for answer chunks
|
||||
console.log(`[buildImageReferences] Step 4: Getting embeddings for answer chunks`);
|
||||
const answerEmbeddings: number[][] = [];
|
||||
|
||||
try {
|
||||
// const embeddingsResult = await getEmbeddings(validAnswerChunks, context.tokenTracker, embeddingOptions); // No embeddingOptions needed here
|
||||
// answerEmbeddings.push(...embeddingsResult.embeddings);
|
||||
const embeddingsResult = await getEmbeddings(validAnswerChunks, context.tokenTracker, {
|
||||
dimensions: 1024,
|
||||
model: 'jina-clip-v2',
|
||||
});
|
||||
answerEmbeddings.push(...embeddingsResult.embeddings);
|
||||
|
||||
console.log(`[buildImageReferences] Got embeddings for ${answerEmbeddings.length} answer chunks`);
|
||||
|
||||
// Step 5: Compute pairwise cosine similarity
|
||||
console.log(`[buildImageReferences] Step 5: Computing pairwise cosine similarity between answer and image embeddings`);
|
||||
const allMatches = [];
|
||||
|
||||
for (let i = 0; i < validAnswerChunks.length; i++) {
|
||||
const answerChunkIndex = validAnswerChunkIndices[i];
|
||||
const answerChunk = validAnswerChunks[i];
|
||||
const answerChunkPosition = answerChunkPositions[i];
|
||||
const answerEmbedding = answerEmbeddings[i];
|
||||
|
||||
const matchesForChunk = [];
|
||||
|
||||
for (const imageIndex of validImageIndices) {
|
||||
const imageEmbedding = allImageEmbeddings[imageIndex];
|
||||
|
||||
if (imageEmbedding) {
|
||||
const score = cosineSimilarity(answerEmbedding, imageEmbedding);
|
||||
|
||||
matchesForChunk.push({
|
||||
imageIndex,
|
||||
relevanceScore: score
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
matchesForChunk.sort((a, b) => b.relevanceScore - a.relevanceScore);
|
||||
|
||||
for (const match of matchesForChunk) {
|
||||
allMatches.push({
|
||||
imageIndex: match.imageIndex,
|
||||
answerChunkIndex: answerChunkIndex,
|
||||
relevanceScore: match.relevanceScore,
|
||||
answerChunk: answerChunk,
|
||||
answerChunkPosition: answerChunkPosition
|
||||
});
|
||||
}
|
||||
|
||||
console.log(`[buildImageReferences] Processed answer chunk ${i + 1}/${validAnswerChunks.length}, top score: ${matchesForChunk[0]?.relevanceScore.toFixed(4)}`);
|
||||
}
|
||||
|
||||
// Log statistics about relevance scores
|
||||
if (allMatches.length > 0) {
|
||||
const relevanceScores = allMatches.map(match => match.relevanceScore);
|
||||
const minRelevance = Math.min(...relevanceScores);
|
||||
const maxRelevance = Math.max(...relevanceScores);
|
||||
const sumRelevance = relevanceScores.reduce((sum, score) => sum + score, 0);
|
||||
const meanRelevance = sumRelevance / relevanceScores.length;
|
||||
|
||||
console.log('Reference relevance statistics:', {
|
||||
min: minRelevance.toFixed(4),
|
||||
max: maxRelevance.toFixed(4),
|
||||
mean: meanRelevance.toFixed(4),
|
||||
count: relevanceScores.length
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
// Step 6: Sort all matches by relevance
|
||||
allMatches.sort((a, b) => b.relevanceScore - a.relevanceScore);
|
||||
console.log(`[buildImageReferences] Step 6: Sorted ${allMatches.length} potential matches by relevance score`);
|
||||
|
||||
// Step 7: Filter matches
|
||||
console.log(`[buildImageReferences] Step 7: Filtering matches to ensure uniqueness and threshold (min: ${minRelScore})`);
|
||||
const usedImages = new Set();
|
||||
const usedAnswerChunks = new Set();
|
||||
const filteredMatches = [];
|
||||
|
||||
for (const match of allMatches) {
|
||||
if (match.relevanceScore < minRelScore) continue;
|
||||
|
||||
if (!usedImages.has(match.imageIndex) && !usedAnswerChunks.has(match.answerChunkIndex)) {
|
||||
filteredMatches.push(match);
|
||||
usedImages.add(match.imageIndex);
|
||||
usedAnswerChunks.add(match.answerChunkIndex);
|
||||
|
||||
if (filteredMatches.length >= maxRef) break;
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`[buildImageReferences] Selected ${filteredMatches.length}/${allMatches.length} references after filtering`);
|
||||
|
||||
const references: ImageReference[] = filteredMatches.map((match) => {
|
||||
const source = imageToSourceMap[match.imageIndex];
|
||||
return {
|
||||
url: source.url,
|
||||
relevanceScore: match.relevanceScore,
|
||||
answerChunk: match.answerChunk,
|
||||
answerChunkPosition: match.answerChunkPosition
|
||||
};
|
||||
});
|
||||
|
||||
return references;
|
||||
|
||||
} catch (error) {
|
||||
console.error('Embedding failed', error);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
@ -8,13 +8,14 @@ const MAX_RETRIES = 3; // Maximum number of retries for missing embeddings
|
||||
|
||||
// Modified to support different embedding tasks and dimensions
|
||||
export async function getEmbeddings(
|
||||
texts: string[],
|
||||
texts: string[] | Record<string, string>[],
|
||||
tokenTracker?: any,
|
||||
options: {
|
||||
task?: "text-matching" | "retrieval.passage" | "retrieval.query",
|
||||
dimensions?: number,
|
||||
late_chunking?: boolean,
|
||||
embedding_type?: string
|
||||
embedding_type?: string,
|
||||
model?: string,
|
||||
} = {}
|
||||
): Promise<{ embeddings: number[][], tokens: number }> {
|
||||
console.log(`[embeddings] Getting embeddings for ${texts.length} texts`);
|
||||
@ -66,12 +67,13 @@ export async function getEmbeddings(
|
||||
|
||||
// Helper function to get embeddings for a batch with retry logic for missing indices
|
||||
async function getBatchEmbeddingsWithRetry(
|
||||
batchTexts: string[],
|
||||
batchTexts: string[] | Record<string, string>[],
|
||||
options: {
|
||||
task?: "text-matching" | "retrieval.passage" | "retrieval.query",
|
||||
dimensions?: number,
|
||||
late_chunking?: boolean,
|
||||
embedding_type?: string
|
||||
embedding_type?: string,
|
||||
model?: string,
|
||||
},
|
||||
currentBatch: number,
|
||||
batchCount: number
|
||||
@ -89,12 +91,15 @@ async function getBatchEmbeddingsWithRetry(
|
||||
|
||||
while (textsToProcess.length > 0 && retryCount < MAX_RETRIES) {
|
||||
const request: JinaEmbeddingRequest = {
|
||||
model: "jina-embeddings-v3",
|
||||
task: options.task || "text-matching",
|
||||
input: textsToProcess,
|
||||
truncate: true,
|
||||
model: options.model || "jina-embeddings-v3",
|
||||
input: textsToProcess as any,
|
||||
};
|
||||
|
||||
if (request.model === "jina-embeddings-v3") {
|
||||
request.task = options.task || "text-matching";
|
||||
request.truncate = true;
|
||||
}
|
||||
|
||||
// Add optional parameters if provided
|
||||
if (options.dimensions) request.dimensions = options.dimensions;
|
||||
if (options.late_chunking) request.late_chunking = options.late_chunking;
|
||||
@ -110,7 +115,7 @@ async function getBatchEmbeddingsWithRetry(
|
||||
"Authorization": `Bearer ${JINA_API_KEY}`
|
||||
}
|
||||
}
|
||||
);
|
||||
);
|
||||
|
||||
if (!response.data.data) {
|
||||
console.error('No data returned from Jina API');
|
||||
@ -118,7 +123,7 @@ async function getBatchEmbeddingsWithRetry(
|
||||
// On last retry, create placeholder embeddings
|
||||
const dimensionSize = options.dimensions || 1024;
|
||||
const placeholderEmbeddings = textsToProcess.map(text => {
|
||||
console.error(`Failed to get embedding after all retries: [${text.substring(0, 50)}...]`);
|
||||
console.error(`Failed to get embedding after all retries: [${truncateInputString(text)}...]`);
|
||||
return new Array(dimensionSize).fill(0);
|
||||
});
|
||||
|
||||
@ -140,7 +145,7 @@ async function getBatchEmbeddingsWithRetry(
|
||||
|
||||
// Process successful embeddings
|
||||
const successfulEmbeddings: number[][] = [];
|
||||
const remainingTexts: string[] = [];
|
||||
const remainingTexts: (string | Record<string, string>)[] = [];
|
||||
const newIndexMap = new Map<number, number>();
|
||||
|
||||
for (let idx = 0; idx < textsToProcess.length; idx++) {
|
||||
@ -160,7 +165,7 @@ async function getBatchEmbeddingsWithRetry(
|
||||
const newIndex = remainingTexts.length;
|
||||
newIndexMap.set(newIndex, indexMap.get(idx)!);
|
||||
remainingTexts.push(textsToProcess[idx]);
|
||||
console.log(`Missing embedding for index ${idx}, will retry: [${textsToProcess[idx].substring(0, 50)}...]`);
|
||||
console.log(`Missing embedding for index ${idx}, will retry: [${truncateInputString(textsToProcess[idx])}...]`);
|
||||
}
|
||||
}
|
||||
|
||||
@ -190,7 +195,7 @@ async function getBatchEmbeddingsWithRetry(
|
||||
const dimensionSize = options.dimensions || 1024;
|
||||
for (let idx = 0; idx < textsToProcess.length; idx++) {
|
||||
const originalIndex = indexMap.get(idx)!;
|
||||
console.error(`Failed to get embedding after all retries for index ${originalIndex}: [${textsToProcess[idx].substring(0, 50)}...]`);
|
||||
console.error(`Failed to get embedding after all retries for index ${originalIndex}: [${truncateInputString(textsToProcess[idx])}...]`);
|
||||
|
||||
while (batchEmbeddings.length <= originalIndex) {
|
||||
batchEmbeddings.push([]);
|
||||
@ -228,3 +233,11 @@ async function getBatchEmbeddingsWithRetry(
|
||||
|
||||
return { batchEmbeddings, batchTokens };
|
||||
}
|
||||
|
||||
function truncateInputString(input: string | Record<string, string>): string {
|
||||
if (typeof input === 'string') {
|
||||
return input.slice(0, 50);
|
||||
} else {
|
||||
return Object.values(input)[0].slice(0, 50);
|
||||
}
|
||||
}
|
||||
@ -78,4 +78,4 @@ export async function dedupQueries(
|
||||
unique_queries: newQueries,
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -6,7 +6,8 @@ import axiosClient from "../utils/axios-client";
|
||||
export async function readUrl(
|
||||
url: string,
|
||||
withAllLinks?: boolean,
|
||||
tracker?: TokenTracker
|
||||
tracker?: TokenTracker,
|
||||
withAllImages?: boolean
|
||||
): Promise<{ response: ReadResponse }> {
|
||||
if (!url.trim()) {
|
||||
throw new Error('URL cannot be empty');
|
||||
@ -20,7 +21,6 @@ export async function readUrl(
|
||||
'Accept': 'application/json',
|
||||
'Authorization': `Bearer ${JINA_API_KEY}`,
|
||||
'Content-Type': 'application/json',
|
||||
'X-Retain-Images': 'none',
|
||||
'X-Md-Link-Style': 'discarded',
|
||||
};
|
||||
|
||||
@ -28,6 +28,12 @@ export async function readUrl(
|
||||
headers['X-With-Links-Summary'] = 'all';
|
||||
}
|
||||
|
||||
if (withAllImages) {
|
||||
headers['X-With-Images-Summary'] = 'true'
|
||||
} else {
|
||||
headers['X-Retain-Images'] = 'none'
|
||||
}
|
||||
|
||||
try {
|
||||
// Use axios which handles encoding properly
|
||||
const { data } = await axiosClient.post<ReadResponse>(
|
||||
|
||||
26
src/types.ts
26
src/types.ts
@ -27,6 +27,14 @@ export type Reference = {
|
||||
answerChunkPosition?: number[];
|
||||
}
|
||||
|
||||
export type ImageReference = {
|
||||
url: string;
|
||||
dateTime?: string;
|
||||
relevanceScore?: number;
|
||||
answerChunk?: string;
|
||||
answerChunkPosition?: number[];
|
||||
}
|
||||
|
||||
export type AnswerAction = BaseAction & {
|
||||
action: "answer";
|
||||
answer: string;
|
||||
@ -53,6 +61,7 @@ export type ReflectAction = BaseAction & {
|
||||
export type VisitAction = BaseAction & {
|
||||
action: "visit";
|
||||
URLTargets: number[] | string[];
|
||||
image?: ImageObject;
|
||||
};
|
||||
|
||||
export type CodingAction = BaseAction & {
|
||||
@ -155,6 +164,7 @@ export interface ReadResponse {
|
||||
content: string;
|
||||
usage: { tokens: number; };
|
||||
links: Array<[string, string]>; // [anchor, url]
|
||||
images: Record<string, string>; // { image: url }
|
||||
};
|
||||
name?: string;
|
||||
message?: string;
|
||||
@ -259,6 +269,8 @@ export interface ChatCompletionRequest {
|
||||
|
||||
max_annotations?: number;
|
||||
min_annotation_relevance?: number;
|
||||
|
||||
with_images?: boolean;
|
||||
language_code?: string;
|
||||
search_language_code?: string;
|
||||
search_provider?: string;
|
||||
@ -294,6 +306,8 @@ export interface ChatCompletionResponse {
|
||||
visitedURLs?: string[];
|
||||
readURLs?: string[];
|
||||
numURLs?: number;
|
||||
allImages?: string[];
|
||||
relatedImages?: string[];
|
||||
}
|
||||
|
||||
export interface ChatCompletionChunk {
|
||||
@ -318,6 +332,8 @@ export interface ChatCompletionChunk {
|
||||
visitedURLs?: string[];
|
||||
readURLs?: string[];
|
||||
numURLs?: number;
|
||||
allImages?: string[];
|
||||
relatedImages?: string[];
|
||||
}
|
||||
|
||||
// Tracker Types
|
||||
@ -336,11 +352,11 @@ export interface TrackerContext {
|
||||
// Interface definitions for Jina API
|
||||
export interface JinaEmbeddingRequest {
|
||||
model: string;
|
||||
task: string;
|
||||
task?: string;
|
||||
late_chunking?: boolean;
|
||||
dimensions?: number;
|
||||
embedding_type?: string;
|
||||
input: string[];
|
||||
input: string[] | Record<string, string>[];
|
||||
truncate?: boolean;
|
||||
}
|
||||
|
||||
@ -356,4 +372,10 @@ export interface JinaEmbeddingResponse {
|
||||
index: number;
|
||||
embedding: number[];
|
||||
}>;
|
||||
}
|
||||
|
||||
export type ImageObject = {
|
||||
url: string;
|
||||
alt?: string;
|
||||
embedding: number[][];
|
||||
}
|
||||
233
src/utils/image-tools.ts
Normal file
233
src/utils/image-tools.ts
Normal file
@ -0,0 +1,233 @@
|
||||
import canvas from '@napi-rs/canvas';
|
||||
import { getEmbeddings } from '../tools/embeddings';
|
||||
import { TokenTracker } from './token-tracker';
|
||||
import { ImageObject } from '../types';
|
||||
import { cosineSimilarity } from '../tools/cosine';
|
||||
export type { Canvas, Image } from '@napi-rs/canvas';
|
||||
import { Storage } from '@google-cloud/storage';
|
||||
import { randomUUID } from 'crypto';
|
||||
|
||||
export const downloadFile = async (uri: string) => {
|
||||
const resp = await fetch(uri);
|
||||
if (!(resp.ok && resp.body)) {
|
||||
throw new Error(`Unexpected response ${resp.statusText}`);
|
||||
}
|
||||
const contentLength = parseInt(resp.headers.get('content-length') || '0');
|
||||
if (contentLength > 1024 * 1024 * 100) {
|
||||
throw new Error('File too large');
|
||||
}
|
||||
const buff = await resp.arrayBuffer();
|
||||
|
||||
return { buff, contentType: resp.headers.get('content-type') };
|
||||
};
|
||||
|
||||
const _loadImage = async (input: string | Buffer) => {
|
||||
let buff;
|
||||
let contentType;
|
||||
|
||||
if (typeof input === 'string') {
|
||||
if (input.startsWith('data:')) {
|
||||
const firstComma = input.indexOf(',');
|
||||
const header = input.slice(0, firstComma);
|
||||
const data = input.slice(firstComma + 1);
|
||||
const encoding = header.split(';')[1];
|
||||
contentType = header.split(';')[0].split(':')[1];
|
||||
if (encoding?.startsWith('base64')) {
|
||||
buff = Buffer.from(data, 'base64');
|
||||
} else {
|
||||
buff = Buffer.from(decodeURIComponent(data), 'utf-8');
|
||||
}
|
||||
}
|
||||
if (input.startsWith('http')) {
|
||||
if (input.endsWith('.svg')) {
|
||||
throw new Error('Unsupported image type');
|
||||
}
|
||||
const r = await downloadFile(input);
|
||||
buff = Buffer.from(r.buff);
|
||||
contentType = r.contentType;
|
||||
}
|
||||
}
|
||||
|
||||
if (!buff) {
|
||||
throw new Error('Invalid input');
|
||||
}
|
||||
|
||||
const img = await canvas.loadImage(buff);
|
||||
Reflect.set(img, 'contentType', contentType);
|
||||
|
||||
return {
|
||||
img,
|
||||
buff,
|
||||
contentType,
|
||||
};
|
||||
}
|
||||
|
||||
export const loadImage = async (uri: string | Buffer) => {
|
||||
try {
|
||||
const theImage = await _loadImage(uri);
|
||||
|
||||
return theImage;
|
||||
} catch (err: any) {
|
||||
if (err?.message?.includes('Unsupported image type') || err?.message?.includes('unsupported')) {
|
||||
throw new Error(`Unknown image format for ${uri.slice(0, 128)}`);
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
export const fitImageToSquareBox = (image: canvas.Image | canvas.Canvas, size: number = 1024) => {
|
||||
if (image.width <= size && image.height <= size) {
|
||||
const canvasInstance = canvas.createCanvas(image.width, image.height);
|
||||
const ctx = canvasInstance.getContext('2d');
|
||||
ctx.drawImage(image, 0, 0, image.width, image.height, 0, 0, canvasInstance.width, canvasInstance.height);
|
||||
|
||||
return canvasInstance;
|
||||
}
|
||||
|
||||
const aspectRatio = image.width / image.height;
|
||||
|
||||
const resizedWidth = Math.round(aspectRatio > 1 ? size : size * aspectRatio);
|
||||
const resizedHeight = Math.round(aspectRatio > 1 ? size / aspectRatio : size);
|
||||
|
||||
const canvasInstance = canvas.createCanvas(resizedWidth, resizedHeight);
|
||||
const ctx = canvasInstance.getContext('2d');
|
||||
ctx.drawImage(image, 0, 0, image.width, image.height, 0, 0, resizedWidth, resizedHeight);
|
||||
|
||||
return canvasInstance;
|
||||
}
|
||||
|
||||
|
||||
export const canvasToDataUrl = (canvas: canvas.Canvas, mimeType?: 'image/png' | 'image/jpeg') => {
|
||||
return canvas.toDataURLAsync((mimeType || 'image/png') as 'image/png');
|
||||
}
|
||||
|
||||
export const canvasToBuffer = (canvas: canvas.Canvas, mimeType?: 'image/png' | 'image/jpeg') => {
|
||||
return canvas.toBuffer((mimeType || 'image/png') as 'image/png');
|
||||
}
|
||||
|
||||
export const processImage = async (url: string, tracker: TokenTracker): Promise<ImageObject | undefined> => {
|
||||
try {
|
||||
const { img, buff, contentType } = await loadImage(url);
|
||||
if (!img) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Check if the image is smaller than 256x256
|
||||
if (img.width < 256 || img.height < 256) {
|
||||
return;
|
||||
}
|
||||
|
||||
const newUrl = await saveImageToFirebase(buff, contentType);
|
||||
const canvas = fitImageToSquareBox(img, 512);
|
||||
const base64Data = (await canvasToDataUrl(canvas)).split(',')[1];
|
||||
|
||||
const {embeddings} = await getEmbeddings([{ image: base64Data }], tracker, {
|
||||
dimensions: 1024,
|
||||
model: 'jina-clip-v2',
|
||||
});
|
||||
|
||||
return {
|
||||
url: newUrl ?? url,
|
||||
embedding: embeddings,
|
||||
};
|
||||
|
||||
} catch (error) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
export const dedupImagesWithEmbeddings = (
|
||||
newImages: ImageObject[], // New images with embeddings
|
||||
existingImages: ImageObject[], // Existing images with embeddings
|
||||
similarityThreshold: number = 0.86, // Default similarity threshold
|
||||
): ImageObject[] =>{
|
||||
try {
|
||||
// Quick return for single new image with no existing images
|
||||
if (newImages.length === 1 && existingImages.length === 0) {
|
||||
return newImages;
|
||||
}
|
||||
|
||||
const uniqueImages: ImageObject[] = [];
|
||||
const usedIndices = new Set<number>();
|
||||
|
||||
// Compare each new image against existing images and already accepted images
|
||||
for (let i = 0; i < newImages.length; i++) {
|
||||
let isUnique = true;
|
||||
|
||||
// Check against existing images
|
||||
for (let j = 0; j < existingImages.length; j++) {
|
||||
const similarity = cosineSimilarity(
|
||||
newImages[i].embedding[0], // Use the first embedding for comparison
|
||||
existingImages[j].embedding[0]
|
||||
);
|
||||
if (similarity >= similarityThreshold) {
|
||||
isUnique = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Check against already accepted images
|
||||
if (isUnique) {
|
||||
for (const usedIndex of usedIndices) {
|
||||
const similarity = cosineSimilarity(
|
||||
newImages[i].embedding[0], // Use the first embedding for comparison
|
||||
newImages[usedIndex].embedding[0]
|
||||
);
|
||||
if (similarity >= similarityThreshold) {
|
||||
isUnique = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Add to unique images if passed all checks
|
||||
if (isUnique) {
|
||||
uniqueImages.push(newImages[i]);
|
||||
usedIndices.add(i);
|
||||
}
|
||||
}
|
||||
|
||||
return uniqueImages;
|
||||
} catch (error) {
|
||||
console.error('Error in image deduplication analysis:', error);
|
||||
|
||||
// Return all new images if there is an error
|
||||
return newImages;
|
||||
}
|
||||
}
|
||||
|
||||
export const saveImageToFirebase = async (
|
||||
buffer: Buffer,
|
||||
mimeType?: string | null,
|
||||
): Promise<string | undefined> => {
|
||||
if (!process.env.GCLOUD_PROJECT) {
|
||||
console.error('GCLOUD_PROJECT environment variable is not set');
|
||||
return;
|
||||
}
|
||||
const firebaseDefaultBucket = new Storage().bucket(`${process.env.GCLOUD_PROJECT}.appspot.com`);
|
||||
|
||||
try {
|
||||
let extension = 'png';
|
||||
const finalMimeType = mimeType || 'image/png';
|
||||
|
||||
if (!finalMimeType.startsWith('image/')) {
|
||||
return;
|
||||
} else {
|
||||
extension = finalMimeType?.split('/')[1] || 'png';
|
||||
}
|
||||
|
||||
const fileName = `readImages/${randomUUID()}.${extension}`;
|
||||
|
||||
const file = firebaseDefaultBucket.file(fileName);
|
||||
|
||||
await file.save(buffer, {
|
||||
contentType: finalMimeType,
|
||||
public: true,
|
||||
});
|
||||
|
||||
return file.publicUrl();
|
||||
} catch (error) {
|
||||
console.error('Error saving image to Firebase Storage:', error);
|
||||
return;
|
||||
}
|
||||
};
|
||||
@ -1,12 +1,13 @@
|
||||
import { BoostedSearchSnippet, KnowledgeItem, SearchSnippet, TrackerContext, VisitAction, WebContent } from "../types";
|
||||
import { getI18nText, smartMergeStrings } from "./text-tools";
|
||||
import { rerankDocuments } from "../tools/jina-rerank";
|
||||
import { readUrl } from "../tools/read";
|
||||
import { Schemas } from "./schemas";
|
||||
import { cherryPick } from "../tools/jina-latechunk";
|
||||
import { formatDateBasedOnType } from "./date-tools";
|
||||
import { classifyText } from "../tools/jina-classify-spam";
|
||||
import { segmentText } from "../tools/segment";
|
||||
import {BoostedSearchSnippet, ImageObject, KnowledgeItem, SearchSnippet, TrackerContext, VisitAction, WebContent} from "../types";
|
||||
import {getI18nText, smartMergeStrings} from "./text-tools";
|
||||
import {rerankDocuments} from "../tools/jina-rerank";
|
||||
import {readUrl} from "../tools/read";
|
||||
import {Schemas} from "./schemas";
|
||||
import {cherryPick} from "../tools/jina-latechunk";
|
||||
import {formatDateBasedOnType} from "./date-tools";
|
||||
import {classifyText} from "../tools/jina-classify-spam";
|
||||
import { processImage } from "./image-tools";
|
||||
import {segmentText} from "../tools/segment";
|
||||
import axiosClient from "./axios-client";
|
||||
|
||||
export function normalizeUrl(urlString: string, debug = false, options = {
|
||||
@ -460,9 +461,11 @@ export async function processURLs(
|
||||
allURLs: Record<string, SearchSnippet>,
|
||||
visitedURLs: string[],
|
||||
badURLs: string[],
|
||||
imageObjects: ImageObject[],
|
||||
schemaGen: Schemas,
|
||||
question: string,
|
||||
webContents: Record<string, WebContent>
|
||||
webContents: Record<string, WebContent>,
|
||||
withImages: boolean = false,
|
||||
): Promise<{ urlResults: any[], success: boolean }> {
|
||||
// Skip if no URLs to process
|
||||
if (urls.length === 0) {
|
||||
@ -491,8 +494,8 @@ export async function processURLs(
|
||||
// Store normalized URL for consistent reference
|
||||
url = normalizedUrl;
|
||||
|
||||
const { response } = await readUrl(url, true, context.tokenTracker);
|
||||
const { data } = response;
|
||||
const {response} = await readUrl(url, true, context.tokenTracker, withImages);
|
||||
const {data} = response;
|
||||
const guessedTime = await getLastModified(url);
|
||||
if (guessedTime) {
|
||||
console.log('Guessed time for', url, guessedTime);
|
||||
@ -554,7 +557,18 @@ export async function processURLs(
|
||||
}
|
||||
});
|
||||
|
||||
return { url, result: response };
|
||||
// Process images
|
||||
if (withImages && data.images) {
|
||||
const imageEntries = Object.entries(data.images || {});
|
||||
imageEntries.forEach(async ([alt, url]) => {
|
||||
const imageObject = await processImage(url, context.tokenTracker);
|
||||
if (imageObject && !imageObjects.find(i => i.url === imageObject.url)) {
|
||||
imageObjects.push(imageObject);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
return {url, result: response};
|
||||
} catch (error: any) {
|
||||
console.error('Error reading URL:', url, error);
|
||||
badURLs.push(url);
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user