fix: logger

This commit is contained in:
Han Xiao 2025-06-10 12:34:37 -07:00
parent 1946fa7beb
commit 4b37ec8d04
11 changed files with 25 additions and 30 deletions

View File

@ -38,7 +38,7 @@ export async function getEmbeddings(
for (let i = 0; i < texts.length; i += BATCH_SIZE) {
const batchTexts = texts.slice(i, i + BATCH_SIZE);
const currentBatch = Math.floor(i / BATCH_SIZE) + 1;
logDebug(`[embeddings] Processing batch ${currentBatch}/${batchCount} (${batchTexts.length} texts)`);
logDebug(`Embedding batch ${currentBatch}/${batchCount} (${batchTexts.length} texts)`);
// Get embeddings for the batch with retry logic for missing indices
const { batchEmbeddings, batchTokens } = await getBatchEmbeddingsWithRetry(

View File

@ -1,7 +1,6 @@
import { TokenTracker } from "../utils/token-tracker";
import { cosineSimilarity } from "./cosine";
import { getEmbeddings } from "./embeddings";
import { logInfo, logError, logDebug, logWarning } from '../logging';
import {TokenTracker} from "../utils/token-tracker";
import {cosineSimilarity} from "./cosine";
import {getEmbeddings} from "./embeddings";
const SIMILARITY_THRESHOLD = 0.86; // Adjustable threshold for cosine similarity
@ -21,7 +20,7 @@ export async function dedupQueries(
// Get embeddings for all queries in one batch
const allQueries = [...newQueries, ...existingQueries];
const { embeddings: allEmbeddings } = await getEmbeddings(allQueries, tracker);
const {embeddings: allEmbeddings} = await getEmbeddings(allQueries, tracker);
// If embeddings is empty (due to 402 error), return all new queries
if (!allEmbeddings.length) {
@ -67,12 +66,12 @@ export async function dedupQueries(
usedIndices.add(i);
}
}
logInfo('Unique queries:', { queries: uniqueQueries });
console.log('Dedup:', uniqueQueries);
return {
unique_queries: uniqueQueries,
};
} catch (error) {
logError('Deduplication error:', { error });
console.error('Error in deduplication analysis:', error);
// return all new queries if there is an error
return {

View File

@ -14,7 +14,7 @@ export async function cherryPick(question: string, longContext: string, options:
if (longContext.length < snippetLength * 2) {
// If the context is shorter than the snippet length, return the whole context
logInfo('content is too short, dont bother');
logDebug('content is too short, dont bother');
return longContext;
}
@ -24,7 +24,7 @@ export async function cherryPick(question: string, longContext: string, options:
chunks.push(longContext.substring(i, Math.min(i + chunkSize, longContext.length)));
}
logInfo('late chunking enabled! num chunks:', { count: chunks.length });
logDebug(`late chunking enabled! num chunks: ${chunks.length}`);
trackers.actionTracker.trackThink('late_chunk', schemaGen.languageCode, { url });

View File

@ -44,7 +44,7 @@ export async function rerankDocuments(
batches.push(documents.slice(i, i + batchSize));
}
logInfo(`Processing ${documents.length} documents in ${batches.length} batches`);
logDebug(`Reranking ${documents.length} documents in ${batches.length} batches`);
// Process all batches in parallel
const batchResults = await Promise.all(

View File

@ -36,7 +36,7 @@ export async function search(
throw new Error('Invalid response format');
}
logInfo('Search results metadata:', { metadata: data.meta });
logDebug('Search results metadata:', { metadata: data.meta });
const tokenTracker = tracker || new TokenTracker();
tokenTracker.trackUsage('search', {

View File

@ -82,7 +82,7 @@ export async function reviseAnswer(
logInfo(TOOL_NAME, { text: result.text });
logInfo('repaired before/after', { before: mdContent.length, after: result.text.length });
logDebug(`repaired before/after: ${mdContent.length} -> ${result.text.length}`);
if (result.text.length < mdContent.length * 0.85) {
logError(`repaired content length ${result.text.length} is significantly shorter than original content ${mdContent.length}, return original content instead.`);

View File

@ -51,11 +51,7 @@ export async function readUrl(
throw new Error('Invalid response data');
}
logInfo('Read:', {
title: data.data.title,
url: data.data.url,
tokens: data.data.usage?.tokens || 0
});
logDebug(`Read: ${data.data.title} (${data.data.url})`);
const tokens = data.data.usage?.tokens || 0;
const tokenTracker = tracker || new TokenTracker();

View File

@ -25,7 +25,7 @@ export async function segmentText(
// Split content into batches
const batches = splitTextIntoBatches(content, MAX_BATCH_SIZE);
logDebug(`Processing ${batches.length} batches`);
logDebug(`Segmenting ${batches.length} batches`);
// Calculate offsets for each batch upfront
const batchOffsets: number[] = [];

View File

@ -9,7 +9,7 @@ import {
import { TokenTracker } from "./token-tracker";
import { getModel, ToolName, getToolConfig } from "../config";
import Hjson from 'hjson'; // Import Hjson library
import { logInfo, logError, logDebug, logWarning } from '../logging';
import { logError, logDebug, logWarning } from '../logging';
interface GenerateObjectResult<T> {
object: T;
@ -169,7 +169,7 @@ export class ObjectGeneratorSafe {
} catch (parseError) {
if (numRetries > 0) {
logError(`${model} failed on object generation -> manual parsing failed -> retry with ${numRetries - 1} retries remaining`);
logWarning(`${model} failed on object generation -> manual parsing failed -> retry with ${numRetries - 1} retries remaining`);
return this.generateObject({
model,
schema,
@ -180,7 +180,7 @@ export class ObjectGeneratorSafe {
});
} else {
// Second fallback: Try with fallback model if provided
logError(`${model} failed on object generation -> manual parsing failed -> trying fallback with distilled schema`);
logWarning(`${model} failed on object generation -> manual parsing failed -> trying fallback with distilled schema`);
try {
let failedOutput = '';
@ -201,7 +201,7 @@ export class ObjectGeneratorSafe {
});
this.tokenTracker.trackUsage('fallback', fallbackResult.usage); // Track against fallback model
logInfo('Distilled schema parse success!');
logDebug('Distilled schema parse success!');
return fallbackResult;
} catch (fallbackError) {
// If fallback model also fails, try parsing its error response
@ -221,11 +221,11 @@ export class ObjectGeneratorSafe {
private async handleGenerateObjectError<T>(error: unknown): Promise<GenerateObjectResult<T>> {
if (NoObjectGeneratedError.isInstance(error)) {
logError('Object not generated according to schema, fallback to manual parsing');
logWarning('Object not generated according to schema, fallback to manual parsing');
try {
// First try standard JSON parsing
const partialResponse = JSON.parse((error as any).text);
logInfo('JSON parse success!');
logDebug('JSON parse success!');
return {
object: partialResponse as T,
usage: (error as any).usage
@ -234,7 +234,7 @@ export class ObjectGeneratorSafe {
// Use Hjson to parse the error response for more lenient parsing
try {
const hjsonResponse = Hjson.parse((error as any).text);
logInfo('Hjson parse success!');
logDebug('Hjson parse success!');
return {
object: hjsonResponse as T,
usage: (error as any).usage

View File

@ -118,7 +118,7 @@ export class Schemas {
this.languageCode = result.object.langCode;
this.languageStyle = result.object.langStyle;
logInfo(`language`, { object: result.object });
logDebug(`language: ${this.languageCode} -> ${this.languageStyle}`);
}
getLanguagePrompt() {

View File

@ -270,7 +270,7 @@ export const rankURLs = (urlItems: SearchSnippet[], options: any = {}, trackers:
// Step 2: Rerank only the unique contents
const uniqueContents = Object.keys(uniqueContentMap);
const uniqueIndicesMap = Object.values(uniqueContentMap);
logInfo(`rerank URLs: ${urlItems.length}->${uniqueContents.length}`);
logDebug(`unique URLs: ${urlItems.length}->${uniqueContents.length}`);
rerankDocuments(question, uniqueContents, trackers.tokenTracker)
.then(({ results }) => {
// Step 3: Map the scores back to all original items
@ -507,7 +507,7 @@ export async function processURLs(
const { data } = response;
const guessedTime = await getLastModified(url);
if (guessedTime) {
logInfo('Guessed time for', { url, guessedTime });
logDebug(`Guessed time for ${url}: ${guessedTime}`);
}
// Early return if no valid data
@ -520,7 +520,7 @@ export async function processURLs(
const spamDetectLength = 300;
const isGoodContent = data.content.length > spamDetectLength || !await classifyText(data.content);
if (!isGoodContent) {
logError(`Blocked content ${data.content.length}:`, {
logWarning(`Blocked content ${data.content.length}:`, {
url,
content: data.content.slice(0, spamDetectLength)
});