mirror of
https://github.com/jina-ai/node-DeepResearch.git
synced 2026-03-22 07:29:35 +08:00
fix: logger
This commit is contained in:
@@ -38,7 +38,7 @@ export async function getEmbeddings(
|
|||||||
for (let i = 0; i < texts.length; i += BATCH_SIZE) {
|
for (let i = 0; i < texts.length; i += BATCH_SIZE) {
|
||||||
const batchTexts = texts.slice(i, i + BATCH_SIZE);
|
const batchTexts = texts.slice(i, i + BATCH_SIZE);
|
||||||
const currentBatch = Math.floor(i / BATCH_SIZE) + 1;
|
const currentBatch = Math.floor(i / BATCH_SIZE) + 1;
|
||||||
logDebug(`[embeddings] Processing batch ${currentBatch}/${batchCount} (${batchTexts.length} texts)`);
|
logDebug(`Embedding batch ${currentBatch}/${batchCount} (${batchTexts.length} texts)`);
|
||||||
|
|
||||||
// Get embeddings for the batch with retry logic for missing indices
|
// Get embeddings for the batch with retry logic for missing indices
|
||||||
const { batchEmbeddings, batchTokens } = await getBatchEmbeddingsWithRetry(
|
const { batchEmbeddings, batchTokens } = await getBatchEmbeddingsWithRetry(
|
||||||
|
|||||||
@@ -1,7 +1,6 @@
|
|||||||
import { TokenTracker } from "../utils/token-tracker";
|
import {TokenTracker} from "../utils/token-tracker";
|
||||||
import { cosineSimilarity } from "./cosine";
|
import {cosineSimilarity} from "./cosine";
|
||||||
import { getEmbeddings } from "./embeddings";
|
import {getEmbeddings} from "./embeddings";
|
||||||
import { logInfo, logError, logDebug, logWarning } from '../logging';
|
|
||||||
|
|
||||||
const SIMILARITY_THRESHOLD = 0.86; // Adjustable threshold for cosine similarity
|
const SIMILARITY_THRESHOLD = 0.86; // Adjustable threshold for cosine similarity
|
||||||
|
|
||||||
@@ -21,7 +20,7 @@ export async function dedupQueries(
|
|||||||
|
|
||||||
// Get embeddings for all queries in one batch
|
// Get embeddings for all queries in one batch
|
||||||
const allQueries = [...newQueries, ...existingQueries];
|
const allQueries = [...newQueries, ...existingQueries];
|
||||||
const { embeddings: allEmbeddings } = await getEmbeddings(allQueries, tracker);
|
const {embeddings: allEmbeddings} = await getEmbeddings(allQueries, tracker);
|
||||||
|
|
||||||
// If embeddings is empty (due to 402 error), return all new queries
|
// If embeddings is empty (due to 402 error), return all new queries
|
||||||
if (!allEmbeddings.length) {
|
if (!allEmbeddings.length) {
|
||||||
@@ -67,12 +66,12 @@ export async function dedupQueries(
|
|||||||
usedIndices.add(i);
|
usedIndices.add(i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
logInfo('Unique queries:', { queries: uniqueQueries });
|
console.log('Dedup:', uniqueQueries);
|
||||||
return {
|
return {
|
||||||
unique_queries: uniqueQueries,
|
unique_queries: uniqueQueries,
|
||||||
};
|
};
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
logError('Deduplication error:', { error });
|
console.error('Error in deduplication analysis:', error);
|
||||||
|
|
||||||
// return all new queries if there is an error
|
// return all new queries if there is an error
|
||||||
return {
|
return {
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ export async function cherryPick(question: string, longContext: string, options:
|
|||||||
|
|
||||||
if (longContext.length < snippetLength * 2) {
|
if (longContext.length < snippetLength * 2) {
|
||||||
// If the context is shorter than the snippet length, return the whole context
|
// If the context is shorter than the snippet length, return the whole context
|
||||||
logInfo('content is too short, dont bother');
|
logDebug('content is too short, dont bother');
|
||||||
return longContext;
|
return longContext;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -24,7 +24,7 @@ export async function cherryPick(question: string, longContext: string, options:
|
|||||||
chunks.push(longContext.substring(i, Math.min(i + chunkSize, longContext.length)));
|
chunks.push(longContext.substring(i, Math.min(i + chunkSize, longContext.length)));
|
||||||
}
|
}
|
||||||
|
|
||||||
logInfo('late chunking enabled! num chunks:', { count: chunks.length });
|
logDebug(`late chunking enabled! num chunks: ${chunks.length}`);
|
||||||
|
|
||||||
trackers.actionTracker.trackThink('late_chunk', schemaGen.languageCode, { url });
|
trackers.actionTracker.trackThink('late_chunk', schemaGen.languageCode, { url });
|
||||||
|
|
||||||
|
|||||||
@@ -44,7 +44,7 @@ export async function rerankDocuments(
|
|||||||
batches.push(documents.slice(i, i + batchSize));
|
batches.push(documents.slice(i, i + batchSize));
|
||||||
}
|
}
|
||||||
|
|
||||||
logInfo(`Processing ${documents.length} documents in ${batches.length} batches`);
|
logDebug(`Reranking ${documents.length} documents in ${batches.length} batches`);
|
||||||
|
|
||||||
// Process all batches in parallel
|
// Process all batches in parallel
|
||||||
const batchResults = await Promise.all(
|
const batchResults = await Promise.all(
|
||||||
|
|||||||
@@ -36,7 +36,7 @@ export async function search(
|
|||||||
throw new Error('Invalid response format');
|
throw new Error('Invalid response format');
|
||||||
}
|
}
|
||||||
|
|
||||||
logInfo('Search results metadata:', { metadata: data.meta });
|
logDebug('Search results metadata:', { metadata: data.meta });
|
||||||
|
|
||||||
const tokenTracker = tracker || new TokenTracker();
|
const tokenTracker = tracker || new TokenTracker();
|
||||||
tokenTracker.trackUsage('search', {
|
tokenTracker.trackUsage('search', {
|
||||||
|
|||||||
@@ -82,7 +82,7 @@ export async function reviseAnswer(
|
|||||||
|
|
||||||
|
|
||||||
logInfo(TOOL_NAME, { text: result.text });
|
logInfo(TOOL_NAME, { text: result.text });
|
||||||
logInfo('repaired before/after', { before: mdContent.length, after: result.text.length });
|
logDebug(`repaired before/after: ${mdContent.length} -> ${result.text.length}`);
|
||||||
|
|
||||||
if (result.text.length < mdContent.length * 0.85) {
|
if (result.text.length < mdContent.length * 0.85) {
|
||||||
logError(`repaired content length ${result.text.length} is significantly shorter than original content ${mdContent.length}, return original content instead.`);
|
logError(`repaired content length ${result.text.length} is significantly shorter than original content ${mdContent.length}, return original content instead.`);
|
||||||
|
|||||||
@@ -51,11 +51,7 @@ export async function readUrl(
|
|||||||
throw new Error('Invalid response data');
|
throw new Error('Invalid response data');
|
||||||
}
|
}
|
||||||
|
|
||||||
logInfo('Read:', {
|
logDebug(`Read: ${data.data.title} (${data.data.url})`);
|
||||||
title: data.data.title,
|
|
||||||
url: data.data.url,
|
|
||||||
tokens: data.data.usage?.tokens || 0
|
|
||||||
});
|
|
||||||
|
|
||||||
const tokens = data.data.usage?.tokens || 0;
|
const tokens = data.data.usage?.tokens || 0;
|
||||||
const tokenTracker = tracker || new TokenTracker();
|
const tokenTracker = tracker || new TokenTracker();
|
||||||
|
|||||||
@@ -25,7 +25,7 @@ export async function segmentText(
|
|||||||
|
|
||||||
// Split content into batches
|
// Split content into batches
|
||||||
const batches = splitTextIntoBatches(content, MAX_BATCH_SIZE);
|
const batches = splitTextIntoBatches(content, MAX_BATCH_SIZE);
|
||||||
logDebug(`Processing ${batches.length} batches`);
|
logDebug(`Segmenting ${batches.length} batches`);
|
||||||
|
|
||||||
// Calculate offsets for each batch upfront
|
// Calculate offsets for each batch upfront
|
||||||
const batchOffsets: number[] = [];
|
const batchOffsets: number[] = [];
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ import {
|
|||||||
import { TokenTracker } from "./token-tracker";
|
import { TokenTracker } from "./token-tracker";
|
||||||
import { getModel, ToolName, getToolConfig } from "../config";
|
import { getModel, ToolName, getToolConfig } from "../config";
|
||||||
import Hjson from 'hjson'; // Import Hjson library
|
import Hjson from 'hjson'; // Import Hjson library
|
||||||
import { logInfo, logError, logDebug, logWarning } from '../logging';
|
import { logError, logDebug, logWarning } from '../logging';
|
||||||
|
|
||||||
interface GenerateObjectResult<T> {
|
interface GenerateObjectResult<T> {
|
||||||
object: T;
|
object: T;
|
||||||
@@ -169,7 +169,7 @@ export class ObjectGeneratorSafe {
|
|||||||
} catch (parseError) {
|
} catch (parseError) {
|
||||||
|
|
||||||
if (numRetries > 0) {
|
if (numRetries > 0) {
|
||||||
logError(`${model} failed on object generation -> manual parsing failed -> retry with ${numRetries - 1} retries remaining`);
|
logWarning(`${model} failed on object generation -> manual parsing failed -> retry with ${numRetries - 1} retries remaining`);
|
||||||
return this.generateObject({
|
return this.generateObject({
|
||||||
model,
|
model,
|
||||||
schema,
|
schema,
|
||||||
@@ -180,7 +180,7 @@ export class ObjectGeneratorSafe {
|
|||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
// Second fallback: Try with fallback model if provided
|
// Second fallback: Try with fallback model if provided
|
||||||
logError(`${model} failed on object generation -> manual parsing failed -> trying fallback with distilled schema`);
|
logWarning(`${model} failed on object generation -> manual parsing failed -> trying fallback with distilled schema`);
|
||||||
try {
|
try {
|
||||||
let failedOutput = '';
|
let failedOutput = '';
|
||||||
|
|
||||||
@@ -201,7 +201,7 @@ export class ObjectGeneratorSafe {
|
|||||||
});
|
});
|
||||||
|
|
||||||
this.tokenTracker.trackUsage('fallback', fallbackResult.usage); // Track against fallback model
|
this.tokenTracker.trackUsage('fallback', fallbackResult.usage); // Track against fallback model
|
||||||
logInfo('Distilled schema parse success!');
|
logDebug('Distilled schema parse success!');
|
||||||
return fallbackResult;
|
return fallbackResult;
|
||||||
} catch (fallbackError) {
|
} catch (fallbackError) {
|
||||||
// If fallback model also fails, try parsing its error response
|
// If fallback model also fails, try parsing its error response
|
||||||
@@ -221,11 +221,11 @@ export class ObjectGeneratorSafe {
|
|||||||
|
|
||||||
private async handleGenerateObjectError<T>(error: unknown): Promise<GenerateObjectResult<T>> {
|
private async handleGenerateObjectError<T>(error: unknown): Promise<GenerateObjectResult<T>> {
|
||||||
if (NoObjectGeneratedError.isInstance(error)) {
|
if (NoObjectGeneratedError.isInstance(error)) {
|
||||||
logError('Object not generated according to schema, fallback to manual parsing');
|
logWarning('Object not generated according to schema, fallback to manual parsing');
|
||||||
try {
|
try {
|
||||||
// First try standard JSON parsing
|
// First try standard JSON parsing
|
||||||
const partialResponse = JSON.parse((error as any).text);
|
const partialResponse = JSON.parse((error as any).text);
|
||||||
logInfo('JSON parse success!');
|
logDebug('JSON parse success!');
|
||||||
return {
|
return {
|
||||||
object: partialResponse as T,
|
object: partialResponse as T,
|
||||||
usage: (error as any).usage
|
usage: (error as any).usage
|
||||||
@@ -234,7 +234,7 @@ export class ObjectGeneratorSafe {
|
|||||||
// Use Hjson to parse the error response for more lenient parsing
|
// Use Hjson to parse the error response for more lenient parsing
|
||||||
try {
|
try {
|
||||||
const hjsonResponse = Hjson.parse((error as any).text);
|
const hjsonResponse = Hjson.parse((error as any).text);
|
||||||
logInfo('Hjson parse success!');
|
logDebug('Hjson parse success!');
|
||||||
return {
|
return {
|
||||||
object: hjsonResponse as T,
|
object: hjsonResponse as T,
|
||||||
usage: (error as any).usage
|
usage: (error as any).usage
|
||||||
|
|||||||
@@ -118,7 +118,7 @@ export class Schemas {
|
|||||||
|
|
||||||
this.languageCode = result.object.langCode;
|
this.languageCode = result.object.langCode;
|
||||||
this.languageStyle = result.object.langStyle;
|
this.languageStyle = result.object.langStyle;
|
||||||
logInfo(`language`, { object: result.object });
|
logDebug(`language: ${this.languageCode} -> ${this.languageStyle}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
getLanguagePrompt() {
|
getLanguagePrompt() {
|
||||||
|
|||||||
@@ -270,7 +270,7 @@ export const rankURLs = (urlItems: SearchSnippet[], options: any = {}, trackers:
|
|||||||
// Step 2: Rerank only the unique contents
|
// Step 2: Rerank only the unique contents
|
||||||
const uniqueContents = Object.keys(uniqueContentMap);
|
const uniqueContents = Object.keys(uniqueContentMap);
|
||||||
const uniqueIndicesMap = Object.values(uniqueContentMap);
|
const uniqueIndicesMap = Object.values(uniqueContentMap);
|
||||||
logInfo(`rerank URLs: ${urlItems.length}->${uniqueContents.length}`);
|
logDebug(`unique URLs: ${urlItems.length}->${uniqueContents.length}`);
|
||||||
rerankDocuments(question, uniqueContents, trackers.tokenTracker)
|
rerankDocuments(question, uniqueContents, trackers.tokenTracker)
|
||||||
.then(({ results }) => {
|
.then(({ results }) => {
|
||||||
// Step 3: Map the scores back to all original items
|
// Step 3: Map the scores back to all original items
|
||||||
@@ -507,7 +507,7 @@ export async function processURLs(
|
|||||||
const { data } = response;
|
const { data } = response;
|
||||||
const guessedTime = await getLastModified(url);
|
const guessedTime = await getLastModified(url);
|
||||||
if (guessedTime) {
|
if (guessedTime) {
|
||||||
logInfo('Guessed time for', { url, guessedTime });
|
logDebug(`Guessed time for ${url}: ${guessedTime}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Early return if no valid data
|
// Early return if no valid data
|
||||||
@@ -520,7 +520,7 @@ export async function processURLs(
|
|||||||
const spamDetectLength = 300;
|
const spamDetectLength = 300;
|
||||||
const isGoodContent = data.content.length > spamDetectLength || !await classifyText(data.content);
|
const isGoodContent = data.content.length > spamDetectLength || !await classifyText(data.content);
|
||||||
if (!isGoodContent) {
|
if (!isGoodContent) {
|
||||||
logError(`Blocked content ${data.content.length}:`, {
|
logWarning(`Blocked content ${data.content.length}:`, {
|
||||||
url,
|
url,
|
||||||
content: data.content.slice(0, spamDetectLength)
|
content: data.content.slice(0, spamDetectLength)
|
||||||
});
|
});
|
||||||
|
|||||||
Reference in New Issue
Block a user