fix: logger

This commit is contained in:
Han Xiao
2025-06-10 12:34:37 -07:00
parent 1946fa7beb
commit 4b37ec8d04
11 changed files with 25 additions and 30 deletions

View File

@@ -38,7 +38,7 @@ export async function getEmbeddings(
for (let i = 0; i < texts.length; i += BATCH_SIZE) { for (let i = 0; i < texts.length; i += BATCH_SIZE) {
const batchTexts = texts.slice(i, i + BATCH_SIZE); const batchTexts = texts.slice(i, i + BATCH_SIZE);
const currentBatch = Math.floor(i / BATCH_SIZE) + 1; const currentBatch = Math.floor(i / BATCH_SIZE) + 1;
logDebug(`[embeddings] Processing batch ${currentBatch}/${batchCount} (${batchTexts.length} texts)`); logDebug(`Embedding batch ${currentBatch}/${batchCount} (${batchTexts.length} texts)`);
// Get embeddings for the batch with retry logic for missing indices // Get embeddings for the batch with retry logic for missing indices
const { batchEmbeddings, batchTokens } = await getBatchEmbeddingsWithRetry( const { batchEmbeddings, batchTokens } = await getBatchEmbeddingsWithRetry(

View File

@@ -1,7 +1,6 @@
import { TokenTracker } from "../utils/token-tracker"; import {TokenTracker} from "../utils/token-tracker";
import { cosineSimilarity } from "./cosine"; import {cosineSimilarity} from "./cosine";
import { getEmbeddings } from "./embeddings"; import {getEmbeddings} from "./embeddings";
import { logInfo, logError, logDebug, logWarning } from '../logging';
const SIMILARITY_THRESHOLD = 0.86; // Adjustable threshold for cosine similarity const SIMILARITY_THRESHOLD = 0.86; // Adjustable threshold for cosine similarity
@@ -21,7 +20,7 @@ export async function dedupQueries(
// Get embeddings for all queries in one batch // Get embeddings for all queries in one batch
const allQueries = [...newQueries, ...existingQueries]; const allQueries = [...newQueries, ...existingQueries];
const { embeddings: allEmbeddings } = await getEmbeddings(allQueries, tracker); const {embeddings: allEmbeddings} = await getEmbeddings(allQueries, tracker);
// If embeddings is empty (due to 402 error), return all new queries // If embeddings is empty (due to 402 error), return all new queries
if (!allEmbeddings.length) { if (!allEmbeddings.length) {
@@ -67,12 +66,12 @@ export async function dedupQueries(
usedIndices.add(i); usedIndices.add(i);
} }
} }
logInfo('Unique queries:', { queries: uniqueQueries }); console.log('Dedup:', uniqueQueries);
return { return {
unique_queries: uniqueQueries, unique_queries: uniqueQueries,
}; };
} catch (error) { } catch (error) {
logError('Deduplication error:', { error }); console.error('Error in deduplication analysis:', error);
// return all new queries if there is an error // return all new queries if there is an error
return { return {

View File

@@ -14,7 +14,7 @@ export async function cherryPick(question: string, longContext: string, options:
if (longContext.length < snippetLength * 2) { if (longContext.length < snippetLength * 2) {
// If the context is shorter than the snippet length, return the whole context // If the context is shorter than the snippet length, return the whole context
logInfo('content is too short, dont bother'); logDebug('content is too short, dont bother');
return longContext; return longContext;
} }
@@ -24,7 +24,7 @@ export async function cherryPick(question: string, longContext: string, options:
chunks.push(longContext.substring(i, Math.min(i + chunkSize, longContext.length))); chunks.push(longContext.substring(i, Math.min(i + chunkSize, longContext.length)));
} }
logInfo('late chunking enabled! num chunks:', { count: chunks.length }); logDebug(`late chunking enabled! num chunks: ${chunks.length}`);
trackers.actionTracker.trackThink('late_chunk', schemaGen.languageCode, { url }); trackers.actionTracker.trackThink('late_chunk', schemaGen.languageCode, { url });

View File

@@ -44,7 +44,7 @@ export async function rerankDocuments(
batches.push(documents.slice(i, i + batchSize)); batches.push(documents.slice(i, i + batchSize));
} }
logInfo(`Processing ${documents.length} documents in ${batches.length} batches`); logDebug(`Reranking ${documents.length} documents in ${batches.length} batches`);
// Process all batches in parallel // Process all batches in parallel
const batchResults = await Promise.all( const batchResults = await Promise.all(

View File

@@ -36,7 +36,7 @@ export async function search(
throw new Error('Invalid response format'); throw new Error('Invalid response format');
} }
logInfo('Search results metadata:', { metadata: data.meta }); logDebug('Search results metadata:', { metadata: data.meta });
const tokenTracker = tracker || new TokenTracker(); const tokenTracker = tracker || new TokenTracker();
tokenTracker.trackUsage('search', { tokenTracker.trackUsage('search', {

View File

@@ -82,7 +82,7 @@ export async function reviseAnswer(
logInfo(TOOL_NAME, { text: result.text }); logInfo(TOOL_NAME, { text: result.text });
logInfo('repaired before/after', { before: mdContent.length, after: result.text.length }); logDebug(`repaired before/after: ${mdContent.length} -> ${result.text.length}`);
if (result.text.length < mdContent.length * 0.85) { if (result.text.length < mdContent.length * 0.85) {
logError(`repaired content length ${result.text.length} is significantly shorter than original content ${mdContent.length}, return original content instead.`); logError(`repaired content length ${result.text.length} is significantly shorter than original content ${mdContent.length}, return original content instead.`);

View File

@@ -51,11 +51,7 @@ export async function readUrl(
throw new Error('Invalid response data'); throw new Error('Invalid response data');
} }
logInfo('Read:', { logDebug(`Read: ${data.data.title} (${data.data.url})`);
title: data.data.title,
url: data.data.url,
tokens: data.data.usage?.tokens || 0
});
const tokens = data.data.usage?.tokens || 0; const tokens = data.data.usage?.tokens || 0;
const tokenTracker = tracker || new TokenTracker(); const tokenTracker = tracker || new TokenTracker();

View File

@@ -25,7 +25,7 @@ export async function segmentText(
// Split content into batches // Split content into batches
const batches = splitTextIntoBatches(content, MAX_BATCH_SIZE); const batches = splitTextIntoBatches(content, MAX_BATCH_SIZE);
logDebug(`Processing ${batches.length} batches`); logDebug(`Segmenting ${batches.length} batches`);
// Calculate offsets for each batch upfront // Calculate offsets for each batch upfront
const batchOffsets: number[] = []; const batchOffsets: number[] = [];

View File

@@ -9,7 +9,7 @@ import {
import { TokenTracker } from "./token-tracker"; import { TokenTracker } from "./token-tracker";
import { getModel, ToolName, getToolConfig } from "../config"; import { getModel, ToolName, getToolConfig } from "../config";
import Hjson from 'hjson'; // Import Hjson library import Hjson from 'hjson'; // Import Hjson library
import { logInfo, logError, logDebug, logWarning } from '../logging'; import { logError, logDebug, logWarning } from '../logging';
interface GenerateObjectResult<T> { interface GenerateObjectResult<T> {
object: T; object: T;
@@ -169,7 +169,7 @@ export class ObjectGeneratorSafe {
} catch (parseError) { } catch (parseError) {
if (numRetries > 0) { if (numRetries > 0) {
logError(`${model} failed on object generation -> manual parsing failed -> retry with ${numRetries - 1} retries remaining`); logWarning(`${model} failed on object generation -> manual parsing failed -> retry with ${numRetries - 1} retries remaining`);
return this.generateObject({ return this.generateObject({
model, model,
schema, schema,
@@ -180,7 +180,7 @@ export class ObjectGeneratorSafe {
}); });
} else { } else {
// Second fallback: Try with fallback model if provided // Second fallback: Try with fallback model if provided
logError(`${model} failed on object generation -> manual parsing failed -> trying fallback with distilled schema`); logWarning(`${model} failed on object generation -> manual parsing failed -> trying fallback with distilled schema`);
try { try {
let failedOutput = ''; let failedOutput = '';
@@ -201,7 +201,7 @@ export class ObjectGeneratorSafe {
}); });
this.tokenTracker.trackUsage('fallback', fallbackResult.usage); // Track against fallback model this.tokenTracker.trackUsage('fallback', fallbackResult.usage); // Track against fallback model
logInfo('Distilled schema parse success!'); logDebug('Distilled schema parse success!');
return fallbackResult; return fallbackResult;
} catch (fallbackError) { } catch (fallbackError) {
// If fallback model also fails, try parsing its error response // If fallback model also fails, try parsing its error response
@@ -221,11 +221,11 @@ export class ObjectGeneratorSafe {
private async handleGenerateObjectError<T>(error: unknown): Promise<GenerateObjectResult<T>> { private async handleGenerateObjectError<T>(error: unknown): Promise<GenerateObjectResult<T>> {
if (NoObjectGeneratedError.isInstance(error)) { if (NoObjectGeneratedError.isInstance(error)) {
logError('Object not generated according to schema, fallback to manual parsing'); logWarning('Object not generated according to schema, fallback to manual parsing');
try { try {
// First try standard JSON parsing // First try standard JSON parsing
const partialResponse = JSON.parse((error as any).text); const partialResponse = JSON.parse((error as any).text);
logInfo('JSON parse success!'); logDebug('JSON parse success!');
return { return {
object: partialResponse as T, object: partialResponse as T,
usage: (error as any).usage usage: (error as any).usage
@@ -234,7 +234,7 @@ export class ObjectGeneratorSafe {
// Use Hjson to parse the error response for more lenient parsing // Use Hjson to parse the error response for more lenient parsing
try { try {
const hjsonResponse = Hjson.parse((error as any).text); const hjsonResponse = Hjson.parse((error as any).text);
logInfo('Hjson parse success!'); logDebug('Hjson parse success!');
return { return {
object: hjsonResponse as T, object: hjsonResponse as T,
usage: (error as any).usage usage: (error as any).usage

View File

@@ -118,7 +118,7 @@ export class Schemas {
this.languageCode = result.object.langCode; this.languageCode = result.object.langCode;
this.languageStyle = result.object.langStyle; this.languageStyle = result.object.langStyle;
logInfo(`language`, { object: result.object }); logDebug(`language: ${this.languageCode} -> ${this.languageStyle}`);
} }
getLanguagePrompt() { getLanguagePrompt() {

View File

@@ -270,7 +270,7 @@ export const rankURLs = (urlItems: SearchSnippet[], options: any = {}, trackers:
// Step 2: Rerank only the unique contents // Step 2: Rerank only the unique contents
const uniqueContents = Object.keys(uniqueContentMap); const uniqueContents = Object.keys(uniqueContentMap);
const uniqueIndicesMap = Object.values(uniqueContentMap); const uniqueIndicesMap = Object.values(uniqueContentMap);
logInfo(`rerank URLs: ${urlItems.length}->${uniqueContents.length}`); logDebug(`unique URLs: ${urlItems.length}->${uniqueContents.length}`);
rerankDocuments(question, uniqueContents, trackers.tokenTracker) rerankDocuments(question, uniqueContents, trackers.tokenTracker)
.then(({ results }) => { .then(({ results }) => {
// Step 3: Map the scores back to all original items // Step 3: Map the scores back to all original items
@@ -507,7 +507,7 @@ export async function processURLs(
const { data } = response; const { data } = response;
const guessedTime = await getLastModified(url); const guessedTime = await getLastModified(url);
if (guessedTime) { if (guessedTime) {
logInfo('Guessed time for', { url, guessedTime }); logDebug(`Guessed time for ${url}: ${guessedTime}`);
} }
// Early return if no valid data // Early return if no valid data
@@ -520,7 +520,7 @@ export async function processURLs(
const spamDetectLength = 300; const spamDetectLength = 300;
const isGoodContent = data.content.length > spamDetectLength || !await classifyText(data.content); const isGoodContent = data.content.length > spamDetectLength || !await classifyText(data.content);
if (!isGoodContent) { if (!isGoodContent) {
logError(`Blocked content ${data.content.length}:`, { logWarning(`Blocked content ${data.content.length}:`, {
url, url,
content: data.content.slice(0, spamDetectLength) content: data.content.slice(0, spamDetectLength)
}); });