diff --git a/src/tools/embeddings.ts b/src/tools/embeddings.ts index 326e25a..469d8b4 100644 --- a/src/tools/embeddings.ts +++ b/src/tools/embeddings.ts @@ -38,7 +38,7 @@ export async function getEmbeddings( for (let i = 0; i < texts.length; i += BATCH_SIZE) { const batchTexts = texts.slice(i, i + BATCH_SIZE); const currentBatch = Math.floor(i / BATCH_SIZE) + 1; - logDebug(`[embeddings] Processing batch ${currentBatch}/${batchCount} (${batchTexts.length} texts)`); + logDebug(`Embedding batch ${currentBatch}/${batchCount} (${batchTexts.length} texts)`); // Get embeddings for the batch with retry logic for missing indices const { batchEmbeddings, batchTokens } = await getBatchEmbeddingsWithRetry( diff --git a/src/tools/jina-dedup.ts b/src/tools/jina-dedup.ts index e3d0126..26f4e73 100644 --- a/src/tools/jina-dedup.ts +++ b/src/tools/jina-dedup.ts @@ -1,7 +1,6 @@ -import { TokenTracker } from "../utils/token-tracker"; -import { cosineSimilarity } from "./cosine"; -import { getEmbeddings } from "./embeddings"; -import { logInfo, logError, logDebug, logWarning } from '../logging'; +import {TokenTracker} from "../utils/token-tracker"; +import {cosineSimilarity} from "./cosine"; +import {getEmbeddings} from "./embeddings"; const SIMILARITY_THRESHOLD = 0.86; // Adjustable threshold for cosine similarity @@ -21,7 +20,7 @@ export async function dedupQueries( // Get embeddings for all queries in one batch const allQueries = [...newQueries, ...existingQueries]; - const { embeddings: allEmbeddings } = await getEmbeddings(allQueries, tracker); + const {embeddings: allEmbeddings} = await getEmbeddings(allQueries, tracker); // If embeddings is empty (due to 402 error), return all new queries if (!allEmbeddings.length) { @@ -67,12 +66,12 @@ export async function dedupQueries( usedIndices.add(i); } } - logInfo('Unique queries:', { queries: uniqueQueries }); + console.log('Dedup:', uniqueQueries); return { unique_queries: uniqueQueries, }; } catch (error) { - logError('Deduplication error:', { error }); + console.error('Error in deduplication analysis:', error); // return all new queries if there is an error return { diff --git a/src/tools/jina-latechunk.ts b/src/tools/jina-latechunk.ts index d7060de..eb7ab21 100644 --- a/src/tools/jina-latechunk.ts +++ b/src/tools/jina-latechunk.ts @@ -14,7 +14,7 @@ export async function cherryPick(question: string, longContext: string, options: if (longContext.length < snippetLength * 2) { // If the context is shorter than the snippet length, return the whole context - logInfo('content is too short, dont bother'); + logDebug('content is too short, dont bother'); return longContext; } @@ -24,7 +24,7 @@ export async function cherryPick(question: string, longContext: string, options: chunks.push(longContext.substring(i, Math.min(i + chunkSize, longContext.length))); } - logInfo('late chunking enabled! num chunks:', { count: chunks.length }); + logDebug(`late chunking enabled! num chunks: ${chunks.length}`); trackers.actionTracker.trackThink('late_chunk', schemaGen.languageCode, { url }); diff --git a/src/tools/jina-rerank.ts b/src/tools/jina-rerank.ts index 716472c..c4dad26 100644 --- a/src/tools/jina-rerank.ts +++ b/src/tools/jina-rerank.ts @@ -44,7 +44,7 @@ export async function rerankDocuments( batches.push(documents.slice(i, i + batchSize)); } - logInfo(`Processing ${documents.length} documents in ${batches.length} batches`); + logDebug(`Reranking ${documents.length} documents in ${batches.length} batches`); // Process all batches in parallel const batchResults = await Promise.all( diff --git a/src/tools/jina-search.ts b/src/tools/jina-search.ts index e47c737..9d5c6f5 100644 --- a/src/tools/jina-search.ts +++ b/src/tools/jina-search.ts @@ -36,7 +36,7 @@ export async function search( throw new Error('Invalid response format'); } - logInfo('Search results metadata:', { metadata: data.meta }); + logDebug('Search results metadata:', { metadata: data.meta }); const tokenTracker = tracker || new TokenTracker(); tokenTracker.trackUsage('search', { diff --git a/src/tools/md-fixer.ts b/src/tools/md-fixer.ts index a068720..4e9e38e 100644 --- a/src/tools/md-fixer.ts +++ b/src/tools/md-fixer.ts @@ -82,7 +82,7 @@ export async function reviseAnswer( logInfo(TOOL_NAME, { text: result.text }); - logInfo('repaired before/after', { before: mdContent.length, after: result.text.length }); + logDebug(`repaired before/after: ${mdContent.length} -> ${result.text.length}`); if (result.text.length < mdContent.length * 0.85) { logError(`repaired content length ${result.text.length} is significantly shorter than original content ${mdContent.length}, return original content instead.`); diff --git a/src/tools/read.ts b/src/tools/read.ts index 64161c4..7d5914e 100644 --- a/src/tools/read.ts +++ b/src/tools/read.ts @@ -51,11 +51,7 @@ export async function readUrl( throw new Error('Invalid response data'); } - logInfo('Read:', { - title: data.data.title, - url: data.data.url, - tokens: data.data.usage?.tokens || 0 - }); + logDebug(`Read: ${data.data.title} (${data.data.url})`); const tokens = data.data.usage?.tokens || 0; const tokenTracker = tracker || new TokenTracker(); diff --git a/src/tools/segment.ts b/src/tools/segment.ts index f70c2f1..6e66af4 100644 --- a/src/tools/segment.ts +++ b/src/tools/segment.ts @@ -25,7 +25,7 @@ export async function segmentText( // Split content into batches const batches = splitTextIntoBatches(content, MAX_BATCH_SIZE); - logDebug(`Processing ${batches.length} batches`); + logDebug(`Segmenting ${batches.length} batches`); // Calculate offsets for each batch upfront const batchOffsets: number[] = []; diff --git a/src/utils/safe-generator.ts b/src/utils/safe-generator.ts index d4936e3..01fa83d 100644 --- a/src/utils/safe-generator.ts +++ b/src/utils/safe-generator.ts @@ -9,7 +9,7 @@ import { import { TokenTracker } from "./token-tracker"; import { getModel, ToolName, getToolConfig } from "../config"; import Hjson from 'hjson'; // Import Hjson library -import { logInfo, logError, logDebug, logWarning } from '../logging'; +import { logError, logDebug, logWarning } from '../logging'; interface GenerateObjectResult { object: T; @@ -169,7 +169,7 @@ export class ObjectGeneratorSafe { } catch (parseError) { if (numRetries > 0) { - logError(`${model} failed on object generation -> manual parsing failed -> retry with ${numRetries - 1} retries remaining`); + logWarning(`${model} failed on object generation -> manual parsing failed -> retry with ${numRetries - 1} retries remaining`); return this.generateObject({ model, schema, @@ -180,7 +180,7 @@ export class ObjectGeneratorSafe { }); } else { // Second fallback: Try with fallback model if provided - logError(`${model} failed on object generation -> manual parsing failed -> trying fallback with distilled schema`); + logWarning(`${model} failed on object generation -> manual parsing failed -> trying fallback with distilled schema`); try { let failedOutput = ''; @@ -201,7 +201,7 @@ export class ObjectGeneratorSafe { }); this.tokenTracker.trackUsage('fallback', fallbackResult.usage); // Track against fallback model - logInfo('Distilled schema parse success!'); + logDebug('Distilled schema parse success!'); return fallbackResult; } catch (fallbackError) { // If fallback model also fails, try parsing its error response @@ -221,11 +221,11 @@ export class ObjectGeneratorSafe { private async handleGenerateObjectError(error: unknown): Promise> { if (NoObjectGeneratedError.isInstance(error)) { - logError('Object not generated according to schema, fallback to manual parsing'); + logWarning('Object not generated according to schema, fallback to manual parsing'); try { // First try standard JSON parsing const partialResponse = JSON.parse((error as any).text); - logInfo('JSON parse success!'); + logDebug('JSON parse success!'); return { object: partialResponse as T, usage: (error as any).usage @@ -234,7 +234,7 @@ export class ObjectGeneratorSafe { // Use Hjson to parse the error response for more lenient parsing try { const hjsonResponse = Hjson.parse((error as any).text); - logInfo('Hjson parse success!'); + logDebug('Hjson parse success!'); return { object: hjsonResponse as T, usage: (error as any).usage diff --git a/src/utils/schemas.ts b/src/utils/schemas.ts index 19e9661..3cfea09 100644 --- a/src/utils/schemas.ts +++ b/src/utils/schemas.ts @@ -118,7 +118,7 @@ export class Schemas { this.languageCode = result.object.langCode; this.languageStyle = result.object.langStyle; - logInfo(`language`, { object: result.object }); + logDebug(`language: ${this.languageCode} -> ${this.languageStyle}`); } getLanguagePrompt() { diff --git a/src/utils/url-tools.ts b/src/utils/url-tools.ts index caafd55..20e4260 100644 --- a/src/utils/url-tools.ts +++ b/src/utils/url-tools.ts @@ -270,7 +270,7 @@ export const rankURLs = (urlItems: SearchSnippet[], options: any = {}, trackers: // Step 2: Rerank only the unique contents const uniqueContents = Object.keys(uniqueContentMap); const uniqueIndicesMap = Object.values(uniqueContentMap); - logInfo(`rerank URLs: ${urlItems.length}->${uniqueContents.length}`); + logDebug(`unique URLs: ${urlItems.length}->${uniqueContents.length}`); rerankDocuments(question, uniqueContents, trackers.tokenTracker) .then(({ results }) => { // Step 3: Map the scores back to all original items @@ -507,7 +507,7 @@ export async function processURLs( const { data } = response; const guessedTime = await getLastModified(url); if (guessedTime) { - logInfo('Guessed time for', { url, guessedTime }); + logDebug(`Guessed time for ${url}: ${guessedTime}`); } // Early return if no valid data @@ -520,7 +520,7 @@ export async function processURLs( const spamDetectLength = 300; const isGoodContent = data.content.length > spamDetectLength || !await classifyText(data.content); if (!isGoodContent) { - logError(`Blocked content ${data.content.length}:`, { + logWarning(`Blocked content ${data.content.length}:`, { url, content: data.content.slice(0, spamDetectLength) });