mirror of
https://github.com/jina-ai/node-DeepResearch.git
synced 2025-12-26 06:28:56 +08:00
fix: logger
This commit is contained in:
parent
1946fa7beb
commit
4b37ec8d04
@ -38,7 +38,7 @@ export async function getEmbeddings(
|
||||
for (let i = 0; i < texts.length; i += BATCH_SIZE) {
|
||||
const batchTexts = texts.slice(i, i + BATCH_SIZE);
|
||||
const currentBatch = Math.floor(i / BATCH_SIZE) + 1;
|
||||
logDebug(`[embeddings] Processing batch ${currentBatch}/${batchCount} (${batchTexts.length} texts)`);
|
||||
logDebug(`Embedding batch ${currentBatch}/${batchCount} (${batchTexts.length} texts)`);
|
||||
|
||||
// Get embeddings for the batch with retry logic for missing indices
|
||||
const { batchEmbeddings, batchTokens } = await getBatchEmbeddingsWithRetry(
|
||||
|
||||
@ -1,7 +1,6 @@
|
||||
import { TokenTracker } from "../utils/token-tracker";
|
||||
import { cosineSimilarity } from "./cosine";
|
||||
import { getEmbeddings } from "./embeddings";
|
||||
import { logInfo, logError, logDebug, logWarning } from '../logging';
|
||||
import {TokenTracker} from "../utils/token-tracker";
|
||||
import {cosineSimilarity} from "./cosine";
|
||||
import {getEmbeddings} from "./embeddings";
|
||||
|
||||
const SIMILARITY_THRESHOLD = 0.86; // Adjustable threshold for cosine similarity
|
||||
|
||||
@ -21,7 +20,7 @@ export async function dedupQueries(
|
||||
|
||||
// Get embeddings for all queries in one batch
|
||||
const allQueries = [...newQueries, ...existingQueries];
|
||||
const { embeddings: allEmbeddings } = await getEmbeddings(allQueries, tracker);
|
||||
const {embeddings: allEmbeddings} = await getEmbeddings(allQueries, tracker);
|
||||
|
||||
// If embeddings is empty (due to 402 error), return all new queries
|
||||
if (!allEmbeddings.length) {
|
||||
@ -67,12 +66,12 @@ export async function dedupQueries(
|
||||
usedIndices.add(i);
|
||||
}
|
||||
}
|
||||
logInfo('Unique queries:', { queries: uniqueQueries });
|
||||
console.log('Dedup:', uniqueQueries);
|
||||
return {
|
||||
unique_queries: uniqueQueries,
|
||||
};
|
||||
} catch (error) {
|
||||
logError('Deduplication error:', { error });
|
||||
console.error('Error in deduplication analysis:', error);
|
||||
|
||||
// return all new queries if there is an error
|
||||
return {
|
||||
|
||||
@ -14,7 +14,7 @@ export async function cherryPick(question: string, longContext: string, options:
|
||||
|
||||
if (longContext.length < snippetLength * 2) {
|
||||
// If the context is shorter than the snippet length, return the whole context
|
||||
logInfo('content is too short, dont bother');
|
||||
logDebug('content is too short, dont bother');
|
||||
return longContext;
|
||||
}
|
||||
|
||||
@ -24,7 +24,7 @@ export async function cherryPick(question: string, longContext: string, options:
|
||||
chunks.push(longContext.substring(i, Math.min(i + chunkSize, longContext.length)));
|
||||
}
|
||||
|
||||
logInfo('late chunking enabled! num chunks:', { count: chunks.length });
|
||||
logDebug(`late chunking enabled! num chunks: ${chunks.length}`);
|
||||
|
||||
trackers.actionTracker.trackThink('late_chunk', schemaGen.languageCode, { url });
|
||||
|
||||
|
||||
@ -44,7 +44,7 @@ export async function rerankDocuments(
|
||||
batches.push(documents.slice(i, i + batchSize));
|
||||
}
|
||||
|
||||
logInfo(`Processing ${documents.length} documents in ${batches.length} batches`);
|
||||
logDebug(`Reranking ${documents.length} documents in ${batches.length} batches`);
|
||||
|
||||
// Process all batches in parallel
|
||||
const batchResults = await Promise.all(
|
||||
|
||||
@ -36,7 +36,7 @@ export async function search(
|
||||
throw new Error('Invalid response format');
|
||||
}
|
||||
|
||||
logInfo('Search results metadata:', { metadata: data.meta });
|
||||
logDebug('Search results metadata:', { metadata: data.meta });
|
||||
|
||||
const tokenTracker = tracker || new TokenTracker();
|
||||
tokenTracker.trackUsage('search', {
|
||||
|
||||
@ -82,7 +82,7 @@ export async function reviseAnswer(
|
||||
|
||||
|
||||
logInfo(TOOL_NAME, { text: result.text });
|
||||
logInfo('repaired before/after', { before: mdContent.length, after: result.text.length });
|
||||
logDebug(`repaired before/after: ${mdContent.length} -> ${result.text.length}`);
|
||||
|
||||
if (result.text.length < mdContent.length * 0.85) {
|
||||
logError(`repaired content length ${result.text.length} is significantly shorter than original content ${mdContent.length}, return original content instead.`);
|
||||
|
||||
@ -51,11 +51,7 @@ export async function readUrl(
|
||||
throw new Error('Invalid response data');
|
||||
}
|
||||
|
||||
logInfo('Read:', {
|
||||
title: data.data.title,
|
||||
url: data.data.url,
|
||||
tokens: data.data.usage?.tokens || 0
|
||||
});
|
||||
logDebug(`Read: ${data.data.title} (${data.data.url})`);
|
||||
|
||||
const tokens = data.data.usage?.tokens || 0;
|
||||
const tokenTracker = tracker || new TokenTracker();
|
||||
|
||||
@ -25,7 +25,7 @@ export async function segmentText(
|
||||
|
||||
// Split content into batches
|
||||
const batches = splitTextIntoBatches(content, MAX_BATCH_SIZE);
|
||||
logDebug(`Processing ${batches.length} batches`);
|
||||
logDebug(`Segmenting ${batches.length} batches`);
|
||||
|
||||
// Calculate offsets for each batch upfront
|
||||
const batchOffsets: number[] = [];
|
||||
|
||||
@ -9,7 +9,7 @@ import {
|
||||
import { TokenTracker } from "./token-tracker";
|
||||
import { getModel, ToolName, getToolConfig } from "../config";
|
||||
import Hjson from 'hjson'; // Import Hjson library
|
||||
import { logInfo, logError, logDebug, logWarning } from '../logging';
|
||||
import { logError, logDebug, logWarning } from '../logging';
|
||||
|
||||
interface GenerateObjectResult<T> {
|
||||
object: T;
|
||||
@ -169,7 +169,7 @@ export class ObjectGeneratorSafe {
|
||||
} catch (parseError) {
|
||||
|
||||
if (numRetries > 0) {
|
||||
logError(`${model} failed on object generation -> manual parsing failed -> retry with ${numRetries - 1} retries remaining`);
|
||||
logWarning(`${model} failed on object generation -> manual parsing failed -> retry with ${numRetries - 1} retries remaining`);
|
||||
return this.generateObject({
|
||||
model,
|
||||
schema,
|
||||
@ -180,7 +180,7 @@ export class ObjectGeneratorSafe {
|
||||
});
|
||||
} else {
|
||||
// Second fallback: Try with fallback model if provided
|
||||
logError(`${model} failed on object generation -> manual parsing failed -> trying fallback with distilled schema`);
|
||||
logWarning(`${model} failed on object generation -> manual parsing failed -> trying fallback with distilled schema`);
|
||||
try {
|
||||
let failedOutput = '';
|
||||
|
||||
@ -201,7 +201,7 @@ export class ObjectGeneratorSafe {
|
||||
});
|
||||
|
||||
this.tokenTracker.trackUsage('fallback', fallbackResult.usage); // Track against fallback model
|
||||
logInfo('Distilled schema parse success!');
|
||||
logDebug('Distilled schema parse success!');
|
||||
return fallbackResult;
|
||||
} catch (fallbackError) {
|
||||
// If fallback model also fails, try parsing its error response
|
||||
@ -221,11 +221,11 @@ export class ObjectGeneratorSafe {
|
||||
|
||||
private async handleGenerateObjectError<T>(error: unknown): Promise<GenerateObjectResult<T>> {
|
||||
if (NoObjectGeneratedError.isInstance(error)) {
|
||||
logError('Object not generated according to schema, fallback to manual parsing');
|
||||
logWarning('Object not generated according to schema, fallback to manual parsing');
|
||||
try {
|
||||
// First try standard JSON parsing
|
||||
const partialResponse = JSON.parse((error as any).text);
|
||||
logInfo('JSON parse success!');
|
||||
logDebug('JSON parse success!');
|
||||
return {
|
||||
object: partialResponse as T,
|
||||
usage: (error as any).usage
|
||||
@ -234,7 +234,7 @@ export class ObjectGeneratorSafe {
|
||||
// Use Hjson to parse the error response for more lenient parsing
|
||||
try {
|
||||
const hjsonResponse = Hjson.parse((error as any).text);
|
||||
logInfo('Hjson parse success!');
|
||||
logDebug('Hjson parse success!');
|
||||
return {
|
||||
object: hjsonResponse as T,
|
||||
usage: (error as any).usage
|
||||
|
||||
@ -118,7 +118,7 @@ export class Schemas {
|
||||
|
||||
this.languageCode = result.object.langCode;
|
||||
this.languageStyle = result.object.langStyle;
|
||||
logInfo(`language`, { object: result.object });
|
||||
logDebug(`language: ${this.languageCode} -> ${this.languageStyle}`);
|
||||
}
|
||||
|
||||
getLanguagePrompt() {
|
||||
|
||||
@ -270,7 +270,7 @@ export const rankURLs = (urlItems: SearchSnippet[], options: any = {}, trackers:
|
||||
// Step 2: Rerank only the unique contents
|
||||
const uniqueContents = Object.keys(uniqueContentMap);
|
||||
const uniqueIndicesMap = Object.values(uniqueContentMap);
|
||||
logInfo(`rerank URLs: ${urlItems.length}->${uniqueContents.length}`);
|
||||
logDebug(`unique URLs: ${urlItems.length}->${uniqueContents.length}`);
|
||||
rerankDocuments(question, uniqueContents, trackers.tokenTracker)
|
||||
.then(({ results }) => {
|
||||
// Step 3: Map the scores back to all original items
|
||||
@ -507,7 +507,7 @@ export async function processURLs(
|
||||
const { data } = response;
|
||||
const guessedTime = await getLastModified(url);
|
||||
if (guessedTime) {
|
||||
logInfo('Guessed time for', { url, guessedTime });
|
||||
logDebug(`Guessed time for ${url}: ${guessedTime}`);
|
||||
}
|
||||
|
||||
// Early return if no valid data
|
||||
@ -520,7 +520,7 @@ export async function processURLs(
|
||||
const spamDetectLength = 300;
|
||||
const isGoodContent = data.content.length > spamDetectLength || !await classifyText(data.content);
|
||||
if (!isGoodContent) {
|
||||
logError(`Blocked content ${data.content.length}:`, {
|
||||
logWarning(`Blocked content ${data.content.length}:`, {
|
||||
url,
|
||||
content: data.content.slice(0, spamDetectLength)
|
||||
});
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user