From 9edf122a8c6331ca8c8423254a645131df543a82 Mon Sep 17 00:00:00 2001 From: Han Xiao Date: Tue, 10 Jun 2025 11:48:19 -0700 Subject: [PATCH] refactor: logger --- config.json | 2 +- jina-ai/config.json | 2 +- src/agent.ts | 70 ++++--- src/app.ts | 33 ++-- src/cli.ts | 15 +- src/config.ts | 5 +- src/evals/batch-evals.ts | 61 ++++--- src/logging.ts | 56 ++++++ src/tools/brave-search.ts | 12 +- src/tools/broken-ch-fixer.ts | 25 +-- src/tools/build-ref.ts | 313 ++++++++++++++++---------------- src/tools/code-sandbox.ts | 21 ++- src/tools/cosine.ts | 8 +- src/tools/dedup.ts | 13 +- src/tools/embeddings.ts | 69 +++---- src/tools/error-analyzer.ts | 12 +- src/tools/evaluator.ts | 21 ++- src/tools/grounding.ts | 15 +- src/tools/jina-classify-spam.ts | 5 +- src/tools/jina-classify.ts | 6 + src/tools/jina-dedup.ts | 13 +- src/tools/jina-latechunk.ts | 19 +- src/tools/jina-rerank.ts | 11 +- src/tools/jina-search.ts | 5 +- src/tools/md-fixer.ts | 9 +- src/tools/query-rewriter.ts | 13 +- src/tools/read.ts | 5 +- src/tools/segment.ts | 27 +-- src/tools/serper-search.ts | 9 +- src/tools/token-tracker.ts | 5 + src/tools/url-tools.ts | 6 + src/tools/web-search.ts | 5 + src/utils/axios-client.ts | 65 +++---- src/utils/image-tools.ts | 119 ++++++------ src/utils/safe-generator.ts | 23 +-- src/utils/schemas.ts | 5 +- src/utils/text-tools.ts | 25 +-- src/utils/token-tracker.ts | 23 +-- src/utils/url-tools.ts | 66 ++++--- 39 files changed, 677 insertions(+), 540 deletions(-) create mode 100644 src/logging.ts create mode 100644 src/tools/jina-classify.ts create mode 100644 src/tools/token-tracker.ts create mode 100644 src/tools/url-tools.ts create mode 100644 src/tools/web-search.ts diff --git a/config.json b/config.json index a48012c..f2db8c1 100644 --- a/config.json +++ b/config.json @@ -12,7 +12,7 @@ "defaults": { "search_provider": "jina", "llm_provider": "gemini", - "step_sleep": 100 + "step_sleep": 1 }, "providers": { "gemini": { diff --git a/jina-ai/config.json b/jina-ai/config.json index 1fab432..f80318f 100644 --- a/jina-ai/config.json +++ b/jina-ai/config.json @@ -12,7 +12,7 @@ "defaults": { "search_provider": "jina", "llm_provider": "vertex", - "step_sleep": 500 + "step_sleep": 0.5 }, "providers": { "vertex": { diff --git a/src/agent.ts b/src/agent.ts index 2c71363..732ddda 100644 --- a/src/agent.ts +++ b/src/agent.ts @@ -44,11 +44,11 @@ import { MAX_QUERIES_PER_STEP, MAX_REFLECT_PER_STEP, MAX_URLS_PER_STEP, Schemas import { formatDateBasedOnType, formatDateRange } from "./utils/date-tools"; import { reviseAnswer } from "./tools/md-fixer"; import { buildImageReferences, buildReferences } from "./tools/build-ref"; +import { logInfo, logError, logDebug, logWarning } from './logging'; -async function sleep(ms: number) { - const seconds = Math.ceil(ms / 1000); - console.log(`Waiting ${seconds}s...`); - return new Promise(resolve => setTimeout(resolve, ms)); +async function wait(seconds: number) { + logDebug(`Waiting ${seconds}s...`); + await new Promise(resolve => setTimeout(resolve, seconds * 1000)); } function BuildMsgsFromKnowledge(knowledge: KnowledgeItem[]): CoreMessage[] { @@ -295,7 +295,7 @@ async function executeSearchQueries( } try { - console.log('Search query:', query); + logDebug('Search query:', { query }); switch (searchProvider || SEARCH_PROVIDER) { case 'jina': case 'arxiv': @@ -318,10 +318,13 @@ async function executeSearchQueries( throw new Error('No results found'); } } catch (error) { - console.error(`${SEARCH_PROVIDER} search failed for query:`, query, error); + logError(`${SEARCH_PROVIDER} search failed for query:`, { + query, + error: error instanceof Error ? error.message : String(error) + }); continue; } finally { - await sleep(STEP_SLEEP); + await wait(STEP_SLEEP); } const minResults: SearchSnippet[] = results @@ -360,13 +363,13 @@ async function executeSearchQueries( } if (searchedQueries.length === 0) { if (onlyHostnames && onlyHostnames.length > 0) { - console.log(`No results found for queries: ${uniqQOnly.join(', ')} on hostnames: ${onlyHostnames.join(', ')}`); + logWarning(`No results found for queries: ${uniqQOnly.join(', ')} on hostnames: ${onlyHostnames.join(', ')}`); context.actionTracker.trackThink('hostnames_no_results', SchemaGen.languageCode, { hostnames: onlyHostnames.join(', ') }); } } else { - console.log(`Utility/Queries: ${utilityScore}/${searchedQueries.length}`); + logDebug(`Utility/Queries: ${utilityScore}/${searchedQueries.length}`); if (searchedQueries.length > MAX_QUERIES_PER_STEP) { - console.log(`So many queries??? ${searchedQueries.map(q => `"${q}"`).join(', ')}`) + logDebug(`So many queries??? ${searchedQueries.map(q => `"${q}"`).join(', ')}`) } } return { @@ -482,8 +485,8 @@ export async function getResponse(question?: string, step++; totalStep++; const budgetPercentage = (context.tokenTracker.getTotalUsage().totalTokens / tokenBudget * 100).toFixed(2); - console.log(`Step ${totalStep} / Budget used ${budgetPercentage}%`); - console.log('Gaps:', gaps); + logDebug(`Step ${totalStep} / Budget used ${budgetPercentage}%`); + logDebug('Gaps:', { gaps }); allowReflect = allowReflect && (gaps.length <= MAX_REFLECT_PER_STEP); // rotating question from gaps const currentQuestion: string = gaps[totalStep % gaps.length]; @@ -524,7 +527,7 @@ export async function getResponse(question?: string, // improve diversity by keep top 2 urls of each hostname weightedURLs = keepKPerHostname(weightedURLs, 2); - console.log('Weighted URLs:', weightedURLs.length); + logDebug('Weighted URLs:', { count: weightedURLs.length }); } allowRead = allowRead && (weightedURLs.length > 0); @@ -560,8 +563,8 @@ export async function getResponse(question?: string, } as StepAction; // print allowed and chose action const actionsStr = [allowSearch, allowRead, allowAnswer, allowReflect, allowCoding].map((a, i) => a ? ['search', 'read', 'answer', 'reflect'][i] : null).filter(a => a).join(', '); - console.log(`${currentQuestion}: ${thisStep.action} <- [${actionsStr}]`); - console.log(thisStep) + logDebug(`${currentQuestion}: ${thisStep.action} <- [${actionsStr}]`); + logDebug('Step details:', thisStep); context.actionTracker.trackAction({ totalStep, thisStep, gaps }); @@ -609,7 +612,10 @@ export async function getResponse(question?: string, ...thisStep, }); - console.log(currentQuestion, evaluationMetrics[currentQuestion]) + logDebug('Current question evaluation:', { + question: currentQuestion, + metrics: evaluationMetrics[currentQuestion] + }); let evaluation: EvaluationResponse = { pass: true, think: '' }; if (evaluationMetrics[currentQuestion].length > 0) { context.actionTracker.trackThink('eval_first', SchemaGen.languageCode) @@ -853,7 +859,7 @@ You decided to think out of the box or cut from a completely different angle. thisStep.URLTargets = [...new Set([...thisStep.URLTargets, ...weightedURLs.map(r => r.url!)])].slice(0, MAX_URLS_PER_STEP); const uniqueURLs = thisStep.URLTargets; - console.log(uniqueURLs) + logDebug('Unique URLs:', { urls: uniqueURLs }); if (uniqueURLs.length > 0) { const { urlResults, success } = await processURLs( @@ -921,7 +927,9 @@ You found the solution and add it to your knowledge for future reference. result: result }); } catch (error) { - console.error('Error solving coding issue:', error); + logError('Error solving coding issue:', { + error: error instanceof Error ? error.message : String(error) + }); diaryContext.push(` At step ${step}, you took the **coding** action and try to solve the coding issue: ${thisStep.codingIssue}. But unfortunately, you failed to solve the issue. You need to think out of the box or cut from a completely different angle. @@ -944,11 +952,11 @@ But unfortunately, you failed to solve the issue. You need to think out of the b weightedURLs, msgWithKnowledge }, totalStep); - await sleep(STEP_SLEEP); + await wait(STEP_SLEEP); } if (!(thisStep as AnswerAction).isFinal) { - console.log('Enter Beast mode!!!') + logWarning('Enter Beast mode!!!'); // any answer is better than no answer, humanity last resort step++; totalStep++; @@ -1024,12 +1032,14 @@ But unfortunately, you failed to solve the issue. You need to think out of the b } let imageReferences: ImageReference[] = []; - if(imageObjects.length && with_images) { + if (imageObjects.length && with_images) { try { imageReferences = await buildImageReferences(answerStep.answer, imageObjects, context, SchemaGen); - console.log('Image references built:', imageReferences); + logDebug('Image references built:', { count: imageReferences.length }); } catch (error) { - console.error('Error building image references:', error); + logError('Error building image references:', { + error: error instanceof Error ? error.message : String(error) + }); imageReferences = []; } } @@ -1087,7 +1097,9 @@ ${JSON.stringify(zodToJsonSchema(schema), null, 2)} await fs.writeFile('urls.json', JSON.stringify(weightedURLs, null, 2)); await fs.writeFile('messages.json', JSON.stringify(msgWithKnowledge, null, 2)); } catch (error) { - console.error('Context storage failed:', error); + logError('Context storage failed:', { + error: error instanceof Error ? error.message : String(error) + }); } } @@ -1098,12 +1110,16 @@ export async function main() { context: tracker, visitedURLs: visitedURLs } = await getResponse(question) as { result: AnswerAction; context: TrackerContext; visitedURLs: string[] }; - console.log('Final Answer:', finalStep.answer); - console.log('Visited URLs:', visitedURLs); + logInfo('Final Answer:', { answer: finalStep.answer }); + logInfo('Visited URLs:', { urls: visitedURLs }); tracker.tokenTracker.printSummary(); } if (require.main === module) { - main().catch(console.error); + main().catch(error => { + logError('Main execution error:', { + error: error instanceof Error ? error.message : String(error) + }); + }); } \ No newline at end of file diff --git a/src/app.ts b/src/app.ts index f2d89ae..8afbd3b 100644 --- a/src/app.ts +++ b/src/app.ts @@ -14,6 +14,7 @@ import { ActionTracker } from "./utils/action-tracker"; import { ObjectGeneratorSafe } from "./utils/safe-generator"; import { jsonSchema } from "ai"; // or another converter library import { normalizeHostName } from "./utils/url-tools"; +import { logInfo, logError, logDebug, logWarning } from './logging'; const app = express(); @@ -472,7 +473,7 @@ app.post('/v1/chat/completions', (async (req: Request, res: Response) => { // Convert JSON schema to Zod schema using a proper converter try { responseSchema = jsonSchema(body.response_format.json_schema); - console.log(responseSchema) + logDebug('Response schema', { schema: responseSchema }); } catch (error: any) { return res.status(400).json({ error: `Invalid JSON schema: ${error.message}` }); } @@ -613,9 +614,12 @@ app.post('/v1/chat/completions', (async (req: Request, res: Response) => { // Use the generated object as the response content finalAnswer = JSON.stringify(result.object, null, 2); - console.log('Generated object:', finalAnswer) + logInfo('Generated object:', { answer: finalAnswer }); } catch (error) { - console.error('Error processing response with schema:', error); + logError('Error processing response with schema:', { + error: error instanceof Error ? error.message : String(error), + schema: responseSchema + }); } } @@ -690,28 +694,17 @@ app.post('/v1/chat/completions', (async (req: Request, res: Response) => { relatedImages, }; - // Log final response (excluding full content for brevity) - console.log('[chat/completions] Response:', { - id: response.id, - status: 200, - contentLength: response.choices[0].message.content.length, - usage: response.usage, - visitedURLs: response.visitedURLs, - readURLs: response.readURLs, - numURLs: allURLs.length, - allImages: allImages?.length, - relatedImages: relatedImages?.length, + logInfo('[chat/completions] Response:', { + model: body.model, + usage: context.tokenTracker.getTotalUsageSnakeCase() }); res.json(response); } } catch (error: any) { - // Log error details - console.error('[chat/completions] Error:', { - message: error?.message || 'An error occurred', - stack: error?.stack, - type: error?.constructor?.name, - requestId + logError('[chat/completions] Error:', { + error: error instanceof Error ? error.message : String(error), + stack: error instanceof Error ? error.stack : undefined }); // Track error as rejected tokens with Vercel token counting diff --git a/src/cli.ts b/src/cli.ts index 03f9721..c0238f8 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -2,6 +2,7 @@ import { Command } from 'commander'; import { getResponse } from './agent'; import { version } from '../package.json'; +import { logInfo, logError, logDebug, logWarning } from './logging'; const program = new Command(); @@ -30,17 +31,17 @@ program ); if (result.action === 'answer') { - console.log('\nAnswer:', result.answer); + logInfo('\nAnswer:', { answer: result.answer }); if (result.references?.length) { - console.log('\nReferences:'); - result.references.forEach(ref => { - console.log(`- ${ref.url}`); - console.log(` "${ref.exactQuote}"`); - }); + logInfo('\nReferences:'); + for (const ref of result.references) { + logInfo(`- ${ref.url}`); + logInfo(` "${ref.exactQuote}"`); + } } } } catch (error) { - console.error('Error:', error instanceof Error ? error.message : String(error)); + logError('Error:', { error: error instanceof Error ? error.message : String(error) }); process.exit(1); } }); diff --git a/src/config.ts b/src/config.ts index e1d863d..ab16239 100644 --- a/src/config.ts +++ b/src/config.ts @@ -3,6 +3,7 @@ import { ProxyAgent, setGlobalDispatcher } from 'undici'; import { createGoogleGenerativeAI } from '@ai-sdk/google'; import { createOpenAI, OpenAIProviderSettings } from '@ai-sdk/openai'; import configJson from '../config.json'; +import { logInfo, logError, logDebug, logWarning } from './logging'; // Load environment variables dotenv.config(); @@ -33,7 +34,7 @@ if (env.https_proxy) { const dispatcher = new ProxyAgent({ uri: proxyUrl }); setGlobalDispatcher(dispatcher); } catch (error) { - console.error('Failed to set proxy:', error); + logError('Failed to set proxy:', { error }); } } @@ -157,4 +158,4 @@ const configSummary = { } }; -console.log('Configuration Summary:', JSON.stringify(configSummary, null, 2)); +logInfo('Configuration Summary:', { summary: configSummary }); diff --git a/src/evals/batch-evals.ts b/src/evals/batch-evals.ts index cf9326b..306d011 100644 --- a/src/evals/batch-evals.ts +++ b/src/evals/batch-evals.ts @@ -1,12 +1,13 @@ import fs from 'fs/promises'; -import {exec} from 'child_process'; -import {promisify} from 'util'; -import {getResponse} from '../agent'; -import {generateObject} from 'ai'; -import {GEMINI_API_KEY} from '../config'; -import {z} from 'zod'; -import {AnswerAction, TrackerContext} from "../types"; -import {createGoogleGenerativeAI} from "@ai-sdk/google"; +import { exec } from 'child_process'; +import { promisify } from 'util'; +import { getResponse } from '../agent'; +import { generateObject } from 'ai'; +import { GEMINI_API_KEY } from '../config'; +import { z } from 'zod'; +import { AnswerAction, TrackerContext } from "../types"; +import { createGoogleGenerativeAI } from "@ai-sdk/google"; +import { logInfo, logError, logDebug, logWarning } from '../logging'; const execAsync = promisify(exec); @@ -68,26 +69,26 @@ function calculateStats(results: EvaluationResult[], modelName: string): Evaluat } function printStats(stats: EvaluationStats): void { - console.log('\n=== Evaluation Statistics ==='); - console.log(`Model: ${stats.model_name}`); - console.log(`Pass Rate: ${stats.pass_rate.toFixed(0)}%`); - console.log(`Average Steps: ${stats.avg_steps.toFixed(0)}`); - console.log(`Maximum Steps: ${stats.max_steps}`); - console.log(`Minimum Steps: ${stats.min_steps}`); - console.log(`Median Steps: ${stats.median_steps.toFixed(0)}`); - console.log(`Average Tokens: ${stats.avg_tokens.toFixed(0)}`); - console.log(`Median Tokens: ${stats.median_tokens.toFixed(0)}`); - console.log(`Maximum Tokens: ${stats.max_tokens}`); - console.log(`Minimum Tokens: ${stats.min_tokens}`); - console.log('===========================\n'); + logInfo('\n=== Evaluation Statistics ==='); + logInfo(`Model: ${stats.model_name}`); + logInfo(`Pass Rate: ${stats.pass_rate.toFixed(0)}%`); + logInfo(`Average Steps: ${stats.avg_steps.toFixed(0)}`); + logInfo(`Maximum Steps: ${stats.max_steps}`); + logInfo(`Minimum Steps: ${stats.min_steps}`); + logInfo(`Median Steps: ${stats.median_steps.toFixed(0)}`); + logInfo(`Average Tokens: ${stats.avg_tokens.toFixed(0)}`); + logInfo(`Median Tokens: ${stats.median_tokens.toFixed(0)}`); + logInfo(`Maximum Tokens: ${stats.max_tokens}`); + logInfo(`Minimum Tokens: ${stats.min_tokens}`); + logInfo('===========================\n'); } async function getCurrentGitCommit(): Promise { try { - const {stdout} = await execAsync('git rev-parse --short HEAD'); + const { stdout } = await execAsync('git rev-parse --short HEAD'); return stdout.trim(); } catch (error) { - console.error('Error getting git commit:', error); + logError('Error getting git commit:', { error }); return 'unknown'; } } @@ -116,7 +117,7 @@ Minor wording differences are acceptable as long as the core information of the return result.object; } catch (error) { - console.error('Evaluation failed:', error); + logError('Evaluation failed:', { error }); return { pass: false, reason: `Evaluation error: ${error}` @@ -134,8 +135,8 @@ async function batchEvaluate(inputFile: string): Promise { // Process each question for (let i = 0; i < questions.length; i++) { - const {question, answer: expectedAnswer} = questions[i]; - console.log(`\nProcessing question ${i + 1}/${questions.length}: ${question}`); + const { question, answer: expectedAnswer } = questions[i]; + logInfo(`\nProcessing question ${i + 1}/${questions.length}: ${question}`); try { // Get response using the agent @@ -166,10 +167,10 @@ async function batchEvaluate(inputFile: string): Promise { actual_answer: actualAnswer }); - console.log(`Evaluation: ${evaluation.pass ? 'PASS' : 'FAIL'}`); - console.log(`Reason: ${evaluation.reason}`); + logInfo(`Evaluation: ${evaluation.pass ? 'PASS' : 'FAIL'}`); + logInfo(`Reason: ${evaluation.reason}`); } catch (error) { - console.error(`Error processing question: ${question}`, error); + logError(`Error processing question: ${question}`, { error }); results.push({ pass: false, reason: `Error: ${error}`, @@ -192,7 +193,7 @@ async function batchEvaluate(inputFile: string): Promise { statistics: stats }, null, 2)); - console.log(`\nEvaluation results saved to ${outputFile}`); + logInfo(`\nEvaluation results saved to ${outputFile}`); } // Run batch evaluation if this is the main module @@ -206,4 +207,4 @@ if (require.main === module) { batchEvaluate(inputFile).catch(console.error); } -export {batchEvaluate}; +export { batchEvaluate }; diff --git a/src/logging.ts b/src/logging.ts new file mode 100644 index 0000000..e114754 --- /dev/null +++ b/src/logging.ts @@ -0,0 +1,56 @@ +// Cloud Run structured logging helper +const project = process.env.GOOGLE_CLOUD_PROJECT; + +interface LogEntry { + severity: string; + message: string; + component: string; + [key: string]: any; +} + +function createLogEntry(severity: string, message: string, context: Record = {}): LogEntry { + const entry: LogEntry = { + severity, + message, + component: 'deepsearch', + timestamp: new Date().toISOString(), + ...context + }; + + // Add trace context if available + if (typeof process.env.K_REVISION !== 'undefined' && project) { + entry['logging.googleapis.com/trace'] = `projects/${project}/traces/${process.env.K_REVISION}`; + } + + // Add source location if available + if (context.file && context.line) { + entry['logging.googleapis.com/sourceLocation'] = { + file: context.file, + line: context.line, + function: context.function || 'unknown' + }; + } + + // Add request ID if available + if (context.requestId) { + entry['logging.googleapis.com/requestId'] = context.requestId; + } + + return entry; +} + +export function logInfo(message: string, context: Record = {}) { + console.log(JSON.stringify(createLogEntry('INFO', message, context))); +} + +export function logError(message: string, context: Record = {}) { + console.error(JSON.stringify(createLogEntry('ERROR', message, context))); +} + +export function logDebug(message: string, context: Record = {}) { + console.log(JSON.stringify(createLogEntry('DEBUG', message, context))); +} + +export function logWarning(message: string, context: Record = {}) { + console.warn(JSON.stringify(createLogEntry('WARNING', message, context))); +} diff --git a/src/tools/brave-search.ts b/src/tools/brave-search.ts index 613f410..a2b0f28 100644 --- a/src/tools/brave-search.ts +++ b/src/tools/brave-search.ts @@ -1,14 +1,16 @@ -import {BRAVE_API_KEY} from "../config"; +import { BRAVE_API_KEY } from "../config"; import axiosClient from "../utils/axios-client"; +import { logInfo, logError, logDebug, logWarning } from '../logging'; import { BraveSearchResponse } from '../types'; export async function braveSearch(query: string): Promise<{ response: BraveSearchResponse }> { + logInfo('Search info:', { query }); + const response = await axiosClient.get('https://api.search.brave.com/res/v1/web/search', { params: { q: query, - count: 10, - safesearch: 'off' + count: 10 }, headers: { 'Accept': 'application/json', @@ -17,6 +19,10 @@ export async function braveSearch(query: string): Promise<{ response: BraveSearc timeout: 10000 }); + if (response.status !== 200) { + throw new Error(`Brave search failed: ${response.status} ${response.statusText}`) + } + // Maintain the same return structure as the original code return { response: response.data }; } diff --git a/src/tools/broken-ch-fixer.ts b/src/tools/broken-ch-fixer.ts index d7b9293..db6ba65 100644 --- a/src/tools/broken-ch-fixer.ts +++ b/src/tools/broken-ch-fixer.ts @@ -1,16 +1,17 @@ import { generateText } from "ai"; import { getModel } from "../config"; -import {TrackerContext} from "../types"; -import {detectBrokenUnicodeViaFileIO} from "../utils/text-tools"; +import { TrackerContext } from "../types"; +import { detectBrokenUnicodeViaFileIO } from "../utils/text-tools"; +import { logInfo, logError, logDebug, logWarning } from '../logging'; /** - * Repairs markdown content with � characters by using Gemini to guess the missing text + * Repairs markdown content with characters by using Gemini to guess the missing text */ export async function repairUnknownChars(mdContent: string, trackers?: TrackerContext): Promise { const { broken, readStr } = await detectBrokenUnicodeViaFileIO(mdContent); if (!broken) return readStr; - console.log("Detected broken unicode in output, attempting to repair..."); + logWarning("Detected broken unicode in output, attempting to repair..."); let repairedContent = readStr; let remainingUnknowns = true; @@ -32,7 +33,7 @@ export async function repairUnknownChars(mdContent: string, trackers?: TrackerCo if (position === lastPosition) { // Move past this character by removing it repairedContent = repairedContent.substring(0, position) + - repairedContent.substring(position + 1); + repairedContent.substring(position + 1); continue; } @@ -81,20 +82,20 @@ So what was the original text between these two contexts?`, (await detectBrokenUnicodeViaFileIO(replacement)).broken || replacement.length > unknownCount * 4 ) { - console.log(`Skipping invalid replacement ${replacement} at position ${position}`); - // Skip to the next � character without modifying content + logWarning(`Skipping invalid replacement ${replacement} at position ${position}`); + // Skip to the next character without modifying content } else { // Replace the unknown sequence with the generated text repairedContent = repairedContent.substring(0, position) + - replacement + - repairedContent.substring(position + unknownCount); + replacement + + repairedContent.substring(position + unknownCount); } - console.log(`Repair iteration ${iterations}: replaced ${unknownCount} � chars with "${replacement}"`); + logDebug(`Repair iteration ${iterations}: replaced ${unknownCount} chars with "${replacement}"`); } catch (error) { - console.error("Error repairing unknown characters:", error); - // Skip to the next � character without modifying this one + logError("Error repairing unknown characters:", { error }); + // Skip to the next character without modifying this one } } diff --git a/src/tools/build-ref.ts b/src/tools/build-ref.ts index c512b9b..4549013 100644 --- a/src/tools/build-ref.ts +++ b/src/tools/build-ref.ts @@ -1,10 +1,11 @@ -import {segmentText} from './segment'; -import {ImageObject, ImageReference, Reference, TrackerContext, WebContent} from "../types"; -import {Schemas} from "../utils/schemas"; -import {cosineSimilarity, jaccardRank} from "./cosine"; -import {getEmbeddings} from "./embeddings"; +import { segmentText } from './segment'; +import { ImageObject, ImageReference, Reference, TrackerContext, WebContent } from "../types"; +import { Schemas } from "../utils/schemas"; +import { cosineSimilarity, jaccardRank } from "./cosine"; +import { getEmbeddings } from "./embeddings"; import { dedupImagesWithEmbeddings } from '../utils/image-tools'; -import {normalizeHostName} from '../utils/url-tools'; +import { normalizeHostName } from '../utils/url-tools'; +import { logInfo, logError, logDebug, logWarning } from '../logging'; export async function buildReferences( answer: string, @@ -16,16 +17,16 @@ export async function buildReferences( minRelScore: number = 0.7, onlyHostnames: string[] = [] ): Promise<{ answer: string, references: Array }> { - console.log(`[buildReferences] Starting with maxRef=${maxRef}, minChunkLength=${minChunkLength}, minRelScore=${minRelScore}`); - console.log(`[buildReferences] Answer length: ${answer.length} chars, Web content sources: ${Object.keys(webContents).length}`); + logDebug(`[buildReferences] Starting with maxRef=${maxRef}, minChunkLength=${minChunkLength}, minRelScore=${minRelScore}`); + logDebug(`[buildReferences] Answer length: ${answer.length} chars, Web content sources: ${Object.keys(webContents).length}`); // Step 1: Chunk the answer - console.log(`[buildReferences] Step 1: Chunking answer text`); - const {chunks: answerChunks, chunk_positions: answerChunkPositions} = await segmentText(answer, context); - console.log(`[buildReferences] Answer segmented into ${answerChunks.length} chunks`); + logDebug(`[buildReferences] Step 1: Chunking answer text`); + const { chunks: answerChunks, chunk_positions: answerChunkPositions } = await segmentText(answer, context); + logDebug(`[buildReferences] Answer segmented into ${answerChunks.length} chunks`); // Step 2: Prepare all web content chunks, filtering out those below minimum length - console.log(`[buildReferences] Step 2: Preparing web content chunks and filtering by minimum length (${minChunkLength} chars)`); + logDebug(`[buildReferences] Step 2: Preparing web content chunks and filtering by minimum length (${minChunkLength} chars)`); const allWebContentChunks: string[] = []; const chunkToSourceMap: any = {}; // Maps chunk index to source information const validWebChunkIndices = new Set(); // Tracks indices of valid web chunks (those above minimum length) @@ -53,15 +54,15 @@ export async function buildReferences( } } - console.log(`[buildReferences] Collected ${allWebContentChunks.length} web chunks, ${validWebChunkIndices.size} above minimum length`); + logDebug(`[buildReferences] Collected ${allWebContentChunks.length} web chunks, ${validWebChunkIndices.size} above minimum length`); if (allWebContentChunks.length === 0) { - console.log(`[buildReferences] No web content chunks available, returning without references`); - return {answer, references: []}; + logDebug(`[buildReferences] No web content chunks available, returning without references`); + return { answer, references: [] }; } // Step 3: Filter answer chunks by minimum length - console.log(`[buildReferences] Step 3: Filtering answer chunks by minimum length`); + logDebug(`[buildReferences] Step 3: Filtering answer chunks by minimum length`); const validAnswerChunks: string[] = []; const validAnswerChunkIndices: number[] = []; const validAnswerChunkPositions: [number, number][] = []; @@ -80,15 +81,15 @@ export async function buildReferences( validAnswerChunkPositions.push(answerChunkPosition); } - console.log(`[buildReferences] Found ${validAnswerChunks.length}/${answerChunks.length} valid answer chunks above minimum length`); + logDebug(`[buildReferences] Found ${validAnswerChunks.length}/${answerChunks.length} valid answer chunks above minimum length`); if (validAnswerChunks.length === 0) { - console.log(`[buildReferences] No valid answer chunks, returning without references`); - return {answer, references: []}; + logDebug(`[buildReferences] No valid answer chunks, returning without references`); + return { answer, references: [] }; } // Step 4: Get embeddings for BOTH answer chunks and valid web chunks in a single request - console.log(`[buildReferences] Step 4: Getting embeddings for all chunks in a single request (only including web chunks above min length)`); + logDebug(`[buildReferences] Step 4: Getting embeddings for all chunks in a single request (only including web chunks above min length)`); // Create maps to track the original indices const chunkIndexMap = new Map(); @@ -99,7 +100,7 @@ export async function buildReferences( // Add answer chunks first validAnswerChunks.forEach((chunk, index) => { allChunks.push(chunk); - chunkIndexMap.set(allChunks.length - 1, {type: 'answer', originalIndex: index}); + chunkIndexMap.set(allChunks.length - 1, { type: 'answer', originalIndex: index }); }); // Then add web chunks that meet minimum length requirement @@ -107,11 +108,11 @@ export async function buildReferences( // Only include valid web chunks (those above minimum length) if (validWebChunkIndices.has(i)) { allChunks.push(allWebContentChunks[i]); - chunkIndexMap.set(allChunks.length - 1, {type: 'web', originalIndex: i}); + chunkIndexMap.set(allChunks.length - 1, { type: 'web', originalIndex: i }); } } - console.log(`[buildReferences] Requesting embeddings for ${allChunks.length} total chunks (${validAnswerChunks.length} answer + ${validWebChunkIndices.size} web)`); + logDebug(`[buildReferences] Requesting embeddings for ${allChunks.length} total chunks (${validAnswerChunks.length} answer + ${validWebChunkIndices.size} web)`); try { // Get embeddings for all chunks in one request @@ -136,10 +137,10 @@ export async function buildReferences( } } - console.log(`[buildReferences] Successfully generated and separated embeddings: ${answerEmbeddings.length} answer, ${webEmbeddingMap.size} web`); + logDebug(`[buildReferences] Successfully generated and separated embeddings: ${answerEmbeddings.length} answer, ${webEmbeddingMap.size} web`); // Step 5: Compute pairwise cosine similarity - console.log(`[buildReferences] Step 5: Computing pairwise cosine similarity between answer and web chunks`); + logDebug(`[buildReferences] Step 5: Computing pairwise cosine similarity between answer and web chunks`); const allMatches = []; for (let i = 0; i < validAnswerChunks.length; i++) { @@ -179,7 +180,7 @@ export async function buildReferences( }); } - console.log(`[buildReferences] Processed answer chunk ${i + 1}/${validAnswerChunks.length}, top score: ${matchesForChunk[0]?.relevanceScore.toFixed(4)}`); + logDebug(`[buildReferences] Processed answer chunk ${i + 1}/${validAnswerChunks.length}, top score: ${matchesForChunk[0]?.relevanceScore.toFixed(4)}`); } // Log statistics about relevance scores @@ -190,20 +191,22 @@ export async function buildReferences( const sumRelevance = relevanceScores.reduce((sum, score) => sum + score, 0); const meanRelevance = sumRelevance / relevanceScores.length; - console.log('Reference relevance statistics:', { + const stats = { min: minRelevance.toFixed(4), max: maxRelevance.toFixed(4), mean: meanRelevance.toFixed(4), count: relevanceScores.length - }); + }; + + logDebug('Reference relevance statistics:', stats); } // Step 6: Sort all matches by relevance allMatches.sort((a, b) => b.relevanceScore - a.relevanceScore); - console.log(`[buildReferences] Step 6: Sorted ${allMatches.length} potential matches by relevance score`); + logDebug(`[buildReferences] Step 6: Sorted ${allMatches.length} potential matches by relevance score`); // Step 7: Filter matches as before - console.log(`[buildReferences] Step 7: Filtering matches to ensure uniqueness and threshold (min: ${minRelScore})`); + logDebug(`[buildReferences] Step 7: Filtering matches to ensure uniqueness and threshold (min: ${minRelScore})`); const usedWebChunks = new Set(); const usedAnswerChunks = new Set(); const filteredMatches = []; @@ -222,12 +225,12 @@ export async function buildReferences( } } - console.log(`[buildReferences] Selected ${filteredMatches.length}/${allMatches.length} references after filtering`); + logDebug(`[buildReferences] Selected ${filteredMatches.length}/${allMatches.length} references after filtering`); return buildFinalResult(answer, filteredMatches, chunkToSourceMap); } catch (error) { - console.error('Embedding failed, falling back to Jaccard similarity', error); - console.log(`[buildReferences] Fallback: Using Jaccard similarity instead of embeddings`); + logError('Embedding failed, falling back to Jaccard similarity', { error }); + logDebug(`[buildReferences] Fallback: Using Jaccard similarity instead of embeddings`); // Process all chunks with Jaccard fallback const allMatches = []; @@ -237,7 +240,7 @@ export async function buildReferences( const answerChunkIndex = validAnswerChunkIndices[i]; const answerChunkPosition = validAnswerChunkPositions[i]; - console.log(`[buildReferences] Processing answer chunk ${i + 1}/${validAnswerChunks.length} with Jaccard similarity`); + logDebug(`[buildReferences] Processing answer chunk ${i + 1}/${validAnswerChunks.length} with Jaccard similarity`); const fallbackResult = await jaccardRank(answerChunk, allWebContentChunks); for (const match of fallbackResult.results) { @@ -255,7 +258,7 @@ export async function buildReferences( // Sort all matches by relevance and continue with the rest of the function allMatches.sort((a, b) => b.relevanceScore - a.relevanceScore); - console.log(`[buildReferences] Fallback complete. Found ${allMatches.length} potential matches`); + logDebug(`[buildReferences] Fallback complete. Found ${allMatches.length} potential matches`); // Filter matches as before const usedWebChunks = new Set(); @@ -276,7 +279,7 @@ export async function buildReferences( } } - console.log(`[buildReferences] Selected ${filteredMatches.length} references using fallback method`); + logDebug(`[buildReferences] Selected ${filteredMatches.length} references using fallback method`); return buildFinalResult(answer, filteredMatches, chunkToSourceMap); } } @@ -287,7 +290,7 @@ function buildFinalResult( filteredMatches: any[], chunkToSourceMap: any ): { answer: string, references: Array } { - console.log(`[buildFinalResult] Building final result with ${filteredMatches.length} references`); + logDebug(`[buildFinalResult] Building final result with ${filteredMatches.length} references`); // Build reference objects const references: Reference[] = filteredMatches.map((match) => { @@ -310,7 +313,7 @@ function buildFinalResult( const referencesByPosition = [...references] .sort((a, b) => a.answerChunkPosition![0] - b.answerChunkPosition![0]); - console.log(`[buildFinalResult] Injecting reference markers into answer`); + logDebug(`[buildFinalResult] Injecting reference markers into answer`); // Insert markers from beginning to end, tracking offset let offset = 0; @@ -362,7 +365,7 @@ function buildFinalResult( offset += marker.length; } - console.log(`[buildFinalResult] Complete. Generated ${references.length} references`); + logDebug(`[buildFinalResult] Complete. Generated ${references.length} references`); return { answer: modifiedAnswer, references @@ -378,39 +381,39 @@ export async function buildImageReferences( maxRef: number = 10, minRelScore: number = 0.35 ): Promise> { - console.log(`[buildImageReferences] Starting with maxRef=${maxRef}, minChunkLength=${minChunkLength}, minRelScore=${minRelScore}`); - console.log(`[buildImageReferences] Answer length: ${answer.length} chars, Image sources: ${imageObjects.length}`); + logDebug(`[buildImageReferences] Starting with maxRef=${maxRef}, minChunkLength=${minChunkLength}, minRelScore=${minRelScore}`); + logDebug(`[buildImageReferences] Answer length: ${answer.length} chars, Image sources: ${imageObjects.length}`); // Step 1: Chunk the answer - console.log(`[buildImageReferences] Step 1: Chunking answer text`); - const {chunks: answerChunks, chunk_positions: answerChunkPositions} = await segmentText(answer, context); - console.log(`[buildImageReferences] Answer segmented into ${answerChunks.length} chunks`); + logDebug(`[buildImageReferences] Step 1: Chunking answer text`); + const { chunks: answerChunks, chunk_positions: answerChunkPositions } = await segmentText(answer, context); + logDebug(`[buildImageReferences] Answer segmented into ${answerChunks.length} chunks`); // Step 2: Prepare image content - console.log(`[buildImageReferences] Step 2: Preparing image content`); + logDebug(`[buildImageReferences] Step 2: Preparing image content`); const dudupImages = dedupImagesWithEmbeddings(imageObjects, []); const allImageEmbeddings: number[][] = dudupImages.map(img => img.embedding[0]); // Extract embedding const imageToSourceMap: any = {}; const validImageIndices = new Set(); dudupImages.forEach((img, index) => { - imageToSourceMap[index] = { - url: img.url, - altText: img.alt, - embedding: img.embedding[0] // Store extracted embedding - }; - validImageIndices.add(index); + imageToSourceMap[index] = { + url: img.url, + altText: img.alt, + embedding: img.embedding[0] // Store extracted embedding + }; + validImageIndices.add(index); }); - console.log(`[buildImageReferences] Collected ${allImageEmbeddings.length} image embeddings`); + logDebug(`[buildImageReferences] Collected ${allImageEmbeddings.length} image embeddings`); if (allImageEmbeddings.length === 0) { - console.log(`[buildImageReferences] No image data available, returning empty array`); - return []; + logDebug(`[buildImageReferences] No image data available, returning empty array`); + return []; } // Step 3: Filter answer chunks by minimum length - console.log(`[buildImageReferences] Step 3: Filtering answer chunks by minimum length`); + logDebug(`[buildImageReferences] Step 3: Filtering answer chunks by minimum length`); const validAnswerChunks: string[] = []; const validAnswerChunkIndices: number[] = []; const validAnswerChunkPositions: [number, number][] = []; @@ -418,133 +421,135 @@ export async function buildImageReferences( context.actionTracker.trackThink('cross_reference', schema.languageCode); for (let i = 0; i < answerChunks.length; i++) { - const answerChunk = answerChunks[i]; - const answerChunkPosition = answerChunkPositions[i]; + const answerChunk = answerChunks[i]; + const answerChunkPosition = answerChunkPositions[i]; - if (!answerChunk.trim() || answerChunk.length < minChunkLength) continue; + if (!answerChunk.trim() || answerChunk.length < minChunkLength) continue; - validAnswerChunks.push(answerChunk); - validAnswerChunkIndices.push(i); - validAnswerChunkPositions.push(answerChunkPosition); + validAnswerChunks.push(answerChunk); + validAnswerChunkIndices.push(i); + validAnswerChunkPositions.push(answerChunkPosition); } - console.log(`[buildImageReferences] Found ${validAnswerChunks.length}/${answerChunks.length} valid answer chunks above minimum length`); + logDebug(`[buildImageReferences] Found ${validAnswerChunks.length}/${answerChunks.length} valid answer chunks above minimum length`); if (validAnswerChunks.length === 0) { - console.log(`[buildImageReferences] No valid answer chunks, returning empty array`); - return []; + logDebug(`[buildImageReferences] No valid answer chunks, returning empty array`); + return []; } // Step 4: Get embeddings for answer chunks - console.log(`[buildImageReferences] Step 4: Getting embeddings for answer chunks`); + logDebug(`[buildImageReferences] Step 4: Getting embeddings for answer chunks`); const answerEmbeddings: number[][] = []; try { - // const embeddingsResult = await getEmbeddings(validAnswerChunks, context.tokenTracker, embeddingOptions); // No embeddingOptions needed here - // answerEmbeddings.push(...embeddingsResult.embeddings); - const embeddingsResult = await getEmbeddings(validAnswerChunks, context.tokenTracker, { - dimensions: 512, - model: 'jina-clip-v2', - }); - answerEmbeddings.push(...embeddingsResult.embeddings); + // const embeddingsResult = await getEmbeddings(validAnswerChunks, context.tokenTracker, embeddingOptions); // No embeddingOptions needed here + // answerEmbeddings.push(...embeddingsResult.embeddings); + const embeddingsResult = await getEmbeddings(validAnswerChunks, context.tokenTracker, { + dimensions: 512, + model: 'jina-clip-v2', + }); + answerEmbeddings.push(...embeddingsResult.embeddings); - console.log(`[buildImageReferences] Got embeddings for ${answerEmbeddings.length} answer chunks`); + logDebug(`[buildImageReferences] Got embeddings for ${answerEmbeddings.length} answer chunks`); - // Step 5: Compute pairwise cosine similarity - console.log(`[buildImageReferences] Step 5: Computing pairwise cosine similarity between answer and image embeddings`); - const allMatches = []; + // Step 5: Compute pairwise cosine similarity + logDebug(`[buildImageReferences] Step 5: Computing pairwise cosine similarity between answer and image embeddings`); + const allMatches = []; - for (let i = 0; i < validAnswerChunks.length; i++) { - const answerChunkIndex = validAnswerChunkIndices[i]; - const answerChunk = validAnswerChunks[i]; - const answerChunkPosition = answerChunkPositions[i]; - const answerEmbedding = answerEmbeddings[i]; + for (let i = 0; i < validAnswerChunks.length; i++) { + const answerChunkIndex = validAnswerChunkIndices[i]; + const answerChunk = validAnswerChunks[i]; + const answerChunkPosition = answerChunkPositions[i]; + const answerEmbedding = answerEmbeddings[i]; - const matchesForChunk = []; + const matchesForChunk = []; - for (const imageIndex of validImageIndices) { - const imageEmbedding = allImageEmbeddings[imageIndex]; + for (const imageIndex of validImageIndices) { + const imageEmbedding = allImageEmbeddings[imageIndex]; - if (imageEmbedding) { - const score = cosineSimilarity(answerEmbedding, imageEmbedding); + if (imageEmbedding) { + const score = cosineSimilarity(answerEmbedding, imageEmbedding); - matchesForChunk.push({ - imageIndex, - relevanceScore: score - }); - } - } - - matchesForChunk.sort((a, b) => b.relevanceScore - a.relevanceScore); - - for (const match of matchesForChunk) { - allMatches.push({ - imageIndex: match.imageIndex, - answerChunkIndex: answerChunkIndex, - relevanceScore: match.relevanceScore, - answerChunk: answerChunk, - answerChunkPosition: answerChunkPosition - }); - } - - console.log(`[buildImageReferences] Processed answer chunk ${i + 1}/${validAnswerChunks.length}, top score: ${matchesForChunk[0]?.relevanceScore.toFixed(4)}`); - } - - // Log statistics about relevance scores - if (allMatches.length > 0) { - const relevanceScores = allMatches.map(match => match.relevanceScore); - const minRelevance = Math.min(...relevanceScores); - const maxRelevance = Math.max(...relevanceScores); - const sumRelevance = relevanceScores.reduce((sum, score) => sum + score, 0); - const meanRelevance = sumRelevance / relevanceScores.length; - - console.log('Reference relevance statistics:', { - min: minRelevance.toFixed(4), - max: maxRelevance.toFixed(4), - mean: meanRelevance.toFixed(4), - count: relevanceScores.length + matchesForChunk.push({ + imageIndex, + relevanceScore: score }); + } } + matchesForChunk.sort((a, b) => b.relevanceScore - a.relevanceScore); - // Step 6: Sort all matches by relevance - allMatches.sort((a, b) => b.relevanceScore - a.relevanceScore); - console.log(`[buildImageReferences] Step 6: Sorted ${allMatches.length} potential matches by relevance score`); - - // Step 7: Filter matches - console.log(`[buildImageReferences] Step 7: Filtering matches to ensure uniqueness and threshold (min: ${minRelScore})`); - const usedImages = new Set(); - const usedAnswerChunks = new Set(); - const filteredMatches = []; - - for (const match of allMatches) { - if (match.relevanceScore < minRelScore) continue; - - if (!usedImages.has(match.imageIndex) && !usedAnswerChunks.has(match.answerChunkIndex)) { - filteredMatches.push(match); - usedImages.add(match.imageIndex); - usedAnswerChunks.add(match.answerChunkIndex); - - if (filteredMatches.length >= maxRef) break; - } + for (const match of matchesForChunk) { + allMatches.push({ + imageIndex: match.imageIndex, + answerChunkIndex: answerChunkIndex, + relevanceScore: match.relevanceScore, + answerChunk: answerChunk, + answerChunkPosition: answerChunkPosition + }); } - console.log(`[buildImageReferences] Selected ${filteredMatches.length}/${allMatches.length} references after filtering`); + logDebug(`[buildImageReferences] Processed answer chunk ${i + 1}/${validAnswerChunks.length}, top score: ${matchesForChunk[0]?.relevanceScore.toFixed(4)}`); + } - const references: ImageReference[] = filteredMatches.map((match) => { - const source = imageToSourceMap[match.imageIndex]; - return { - url: source.url, - relevanceScore: match.relevanceScore, - answerChunk: match.answerChunk, - answerChunkPosition: match.answerChunkPosition - }; - }); + // Log statistics about relevance scores + if (allMatches.length > 0) { + const relevanceScores = allMatches.map(match => match.relevanceScore); + const minRelevance = Math.min(...relevanceScores); + const maxRelevance = Math.max(...relevanceScores); + const sumRelevance = relevanceScores.reduce((sum, score) => sum + score, 0); + const meanRelevance = sumRelevance / relevanceScores.length; - return references; + const stats = { + min: minRelevance.toFixed(4), + max: maxRelevance.toFixed(4), + mean: meanRelevance.toFixed(4), + count: relevanceScores.length + }; + + logDebug('Reference relevance statistics:', stats); + } + + + // Step 6: Sort all matches by relevance + allMatches.sort((a, b) => b.relevanceScore - a.relevanceScore); + logDebug(`[buildImageReferences] Step 6: Sorted ${allMatches.length} potential matches by relevance score`); + + // Step 7: Filter matches + logDebug(`[buildImageReferences] Step 7: Filtering matches to ensure uniqueness and threshold (min: ${minRelScore})`); + const usedImages = new Set(); + const usedAnswerChunks = new Set(); + const filteredMatches = []; + + for (const match of allMatches) { + if (match.relevanceScore < minRelScore) continue; + + if (!usedImages.has(match.imageIndex) && !usedAnswerChunks.has(match.answerChunkIndex)) { + filteredMatches.push(match); + usedImages.add(match.imageIndex); + usedAnswerChunks.add(match.answerChunkIndex); + + if (filteredMatches.length >= maxRef) break; + } + } + + logDebug(`[buildImageReferences] Selected ${filteredMatches.length}/${allMatches.length} references after filtering`); + + const references: ImageReference[] = filteredMatches.map((match) => { + const source = imageToSourceMap[match.imageIndex]; + return { + url: source.url, + relevanceScore: match.relevanceScore, + answerChunk: match.answerChunk, + answerChunkPosition: match.answerChunkPosition + }; + }); + + return references; } catch (error) { - console.error('Embedding failed', error); - return []; + logError('Embedding failed', { error }); + return []; } } \ No newline at end of file diff --git a/src/tools/code-sandbox.ts b/src/tools/code-sandbox.ts index e6eabaf..12b08d6 100644 --- a/src/tools/code-sandbox.ts +++ b/src/tools/code-sandbox.ts @@ -1,6 +1,7 @@ -import {ObjectGeneratorSafe} from "../utils/safe-generator"; -import {CodeGenResponse, PromptPair, TrackerContext} from "../types"; -import {Schemas} from "../utils/schemas"; +import { ObjectGeneratorSafe } from "../utils/safe-generator"; +import { CodeGenResponse, PromptPair, TrackerContext } from "../types"; +import { Schemas } from "../utils/schemas"; +import { logInfo, logError, logDebug, logWarning } from '../logging'; interface SandboxResult { @@ -49,9 +50,9 @@ Response: } `; - console.log('Coding prompt', prompt) + logDebug('Coding prompt', { prompt }); - return {system: prompt, user: problem }; + return { system: prompt, user: problem }; } export class CodeSandbox { @@ -101,7 +102,7 @@ export class CodeSandbox { } `); - console.log('Context:', this.context); + logDebug('Context:', { context: this.context }); // Execute the code with the context and get the return value const output = evalInContext(this.context); @@ -134,12 +135,12 @@ export class CodeSandbox { for (let i = 0; i < this.maxAttempts; i++) { // Generate code const generation = await this.generateCode(problem, attempts); - const {code} = generation; + const { code } = generation; - console.log(`Coding attempt ${i + 1}:`, code); + logDebug(`Coding attempt ${i + 1}:`, { code }); // Evaluate the code const result = this.evaluateCode(code); - console.log(`Coding attempt ${i + 1} success:`, result); + logDebug(`Coding attempt ${i + 1} success:`, { result }); if (result.success) { return { @@ -151,7 +152,7 @@ export class CodeSandbox { }; } - console.error('Coding error:', result.error); + logError('Coding error:', { error: result.error }); // Store the failed attempt attempts.push({ diff --git a/src/tools/cosine.ts b/src/tools/cosine.ts index aed0d6b..9b86516 100644 --- a/src/tools/cosine.ts +++ b/src/tools/cosine.ts @@ -1,3 +1,5 @@ +import { logInfo, logError, logDebug, logWarning } from '../logging'; + export function cosineSimilarity(vecA: number[], vecB: number[]): number { if (vecA.length !== vecB.length) { throw new Error("Vectors must have the same length"); @@ -21,7 +23,7 @@ export function cosineSimilarity(vecA: number[], vecB: number[]): number { // Fallback similarity ranking using Jaccard export async function jaccardRank(query: string, documents: string[]): Promise<{ results: { index: number, relevance_score: number }[] }> { - console.log(`[fallback] Using Jaccard similarity for ${documents.length} documents`); + logInfo(`[fallback] Using Jaccard similarity for ${documents.length} documents`); // Convert texts to lowercase and tokenize by splitting on non-alphanumeric characters const queryTokens = new Set(query.toLowerCase().split(/\W+/).filter(t => t.length > 0)); @@ -37,11 +39,11 @@ export async function jaccardRank(query: string, documents: string[]): Promise<{ // Calculate Jaccard similarity const score = union.size === 0 ? 0 : intersection.size / union.size; - return {index, relevance_score: score}; + return { index, relevance_score: score }; }); // Sort by score in descending order results.sort((a, b) => b.relevance_score - a.relevance_score); - return {results}; + return { results }; } diff --git a/src/tools/dedup.ts b/src/tools/dedup.ts index 5eb4b74..a94eec5 100644 --- a/src/tools/dedup.ts +++ b/src/tools/dedup.ts @@ -1,6 +1,7 @@ -import {z} from 'zod'; -import {TokenTracker} from "../utils/token-tracker"; -import {ObjectGeneratorSafe} from "../utils/safe-generator"; +import { z } from 'zod'; +import { TokenTracker } from "../utils/token-tracker"; +import { ObjectGeneratorSafe } from "../utils/safe-generator"; +import { logInfo, logError, logDebug, logWarning } from '../logging'; const responseSchema = z.object({ @@ -79,11 +80,11 @@ export async function dedupQueries( prompt, }); - console.log(TOOL_NAME, result.object.unique_queries); - return {unique_queries: result.object.unique_queries}; + logInfo(TOOL_NAME, { unique_queries: result.object.unique_queries }); + return { unique_queries: result.object.unique_queries }; } catch (error) { - console.error(`Error in ${TOOL_NAME}`, error); + logError(`Error in ${TOOL_NAME}`, { error }); throw error; } } \ No newline at end of file diff --git a/src/tools/embeddings.ts b/src/tools/embeddings.ts index ee142ae..6a8fd3a 100644 --- a/src/tools/embeddings.ts +++ b/src/tools/embeddings.ts @@ -1,6 +1,7 @@ -import {JINA_API_KEY} from "../config"; -import {JinaEmbeddingRequest, JinaEmbeddingResponse} from "../types"; +import { JINA_API_KEY } from "../config"; +import { JinaEmbeddingRequest, JinaEmbeddingResponse } from "../types"; import axiosClient from "../utils/axios-client"; +import { logInfo, logError, logDebug, logWarning } from '../logging'; const BATCH_SIZE = 128; const API_URL = "https://api.jina.ai/v1/embeddings"; @@ -18,7 +19,7 @@ export async function getEmbeddings( model?: string, } = {} ): Promise<{ embeddings: number[][], tokens: number }> { - console.log(`[embeddings] Getting embeddings for ${texts.length} texts`); + logDebug(`[embeddings] Getting embeddings for ${texts.length} texts`); if (!JINA_API_KEY) { throw new Error('JINA_API_KEY is not set'); @@ -26,7 +27,7 @@ export async function getEmbeddings( // Handle empty input case if (texts.length === 0) { - return {embeddings: [], tokens: 0}; + return { embeddings: [], tokens: 0 }; } // Process in batches @@ -37,11 +38,11 @@ export async function getEmbeddings( for (let i = 0; i < texts.length; i += BATCH_SIZE) { const batchTexts = texts.slice(i, i + BATCH_SIZE); const currentBatch = Math.floor(i / BATCH_SIZE) + 1; - console.log(`[embeddings] Processing batch ${currentBatch}/${batchCount} (${batchTexts.length} texts)`); + logDebug(`[embeddings] Processing batch ${currentBatch}/${batchCount} (${batchTexts.length} texts)`); // Get embeddings for the batch with retry logic for missing indices const { batchEmbeddings, batchTokens } = await getBatchEmbeddingsWithRetry( - batchTexts, + batchTexts, options, currentBatch, batchCount @@ -49,7 +50,7 @@ export async function getEmbeddings( allEmbeddings.push(...batchEmbeddings); totalTokens += batchTokens; - console.log(`[embeddings] Batch ${currentBatch} complete. Tokens used: ${batchTokens}, total so far: ${totalTokens}`); + logDebug(`[embeddings] Batch ${currentBatch} complete. Tokens used: ${batchTokens}, total so far: ${totalTokens}`); } // Track token usage if tracker is provided @@ -61,8 +62,8 @@ export async function getEmbeddings( }); } - console.log(`[embeddings] Complete. Generated ${allEmbeddings.length} embeddings using ${totalTokens} tokens`); - return {embeddings: allEmbeddings, tokens: totalTokens}; + logDebug(`[embeddings] Complete. Generated ${allEmbeddings.length} embeddings using ${totalTokens} tokens`); + return { embeddings: allEmbeddings, tokens: totalTokens }; } // Helper function to get embeddings for a batch with retry logic for missing indices @@ -83,7 +84,7 @@ async function getBatchEmbeddingsWithRetry( let retryCount = 0; let textsToProcess = [...batchTexts]; // Copy the original texts let indexMap = new Map(); // Map to keep track of original indices - + // Initialize indexMap with original indices textsToProcess.forEach((_, idx) => { indexMap.set(idx, idx); @@ -115,18 +116,18 @@ async function getBatchEmbeddingsWithRetry( "Authorization": `Bearer ${JINA_API_KEY}` } } - ); + ); if (!response.data.data) { - console.error('No data returned from Jina API'); + logError('No data returned from Jina API'); if (retryCount === MAX_RETRIES - 1) { // On last retry, create placeholder embeddings const dimensionSize = options.dimensions || 1024; const placeholderEmbeddings = textsToProcess.map(text => { - console.error(`Failed to get embedding after all retries: [${truncateInputString(text)}...]`); + logError(`Failed to get embedding after all retries: [${truncateInputString(text)}...]`); return new Array(dimensionSize).fill(0); }); - + // Add embeddings in correct order for (let i = 0; i < textsToProcess.length; i++) { const originalIndex = indexMap.get(i)!; @@ -142,17 +143,17 @@ async function getBatchEmbeddingsWithRetry( const receivedIndices = new Set(response.data.data.map(item => item.index)); const dimensionSize = response.data.data[0]?.embedding?.length || options.dimensions || 1024; - + // Process successful embeddings const successfulEmbeddings: number[][] = []; const remainingTexts: (string | Record)[] = []; const newIndexMap = new Map(); - + for (let idx = 0; idx < textsToProcess.length; idx++) { if (receivedIndices.has(idx)) { // Find the item with this index const item = response.data.data.find(d => d.index === idx)!; - + // Get the original index and store in the result array const originalIndex = indexMap.get(idx)!; while (batchEmbeddings.length <= originalIndex) { @@ -165,48 +166,48 @@ async function getBatchEmbeddingsWithRetry( const newIndex = remainingTexts.length; newIndexMap.set(newIndex, indexMap.get(idx)!); remainingTexts.push(textsToProcess[idx]); - console.log(`Missing embedding for index ${idx}, will retry: [${truncateInputString(textsToProcess[idx])}...]`); + logWarning(`Missing embedding for index ${idx}, will retry: [${truncateInputString(textsToProcess[idx])}...]`); } } // Add tokens batchTokens += response.data.usage?.total_tokens || 0; - + // Update for next iteration textsToProcess = remainingTexts; indexMap = newIndexMap; - + // If all embeddings were successfully processed, break out of the loop if (textsToProcess.length === 0) { break; } - + // Increment retry count and log retryCount++; - console.log(`[embeddings] Batch ${currentBatch}/${batchCount} - Retrying ${textsToProcess.length} texts (attempt ${retryCount}/${MAX_RETRIES})`); + logDebug(`[embeddings] Batch ${currentBatch}/${batchCount} - Retrying ${textsToProcess.length} texts (attempt ${retryCount}/${MAX_RETRIES})`); } catch (error: any) { - console.error('Error calling Jina Embeddings API:', error); + logError('Error calling Jina Embeddings API:', { error }); if (error.response?.status === 402 || error.message.includes('InsufficientBalanceError') || error.message.includes('insufficient balance')) { return { batchEmbeddings: [], batchTokens: 0 }; } - + // On last retry, create placeholder embeddings if (retryCount === MAX_RETRIES - 1) { const dimensionSize = options.dimensions || 1024; for (let idx = 0; idx < textsToProcess.length; idx++) { const originalIndex = indexMap.get(idx)!; - console.error(`Failed to get embedding after all retries for index ${originalIndex}: [${truncateInputString(textsToProcess[idx])}...]`); - + logError(`Failed to get embedding after all retries for index ${originalIndex}: [${truncateInputString(textsToProcess[idx])}...]`); + while (batchEmbeddings.length <= originalIndex) { batchEmbeddings.push([]); } batchEmbeddings[originalIndex] = new Array(dimensionSize).fill(0); } } - + retryCount++; if (retryCount < MAX_RETRIES) { - console.log(`[embeddings] Batch ${currentBatch}/${batchCount} - Retry attempt ${retryCount}/${MAX_RETRIES} after error`); + logDebug(`[embeddings] Batch ${currentBatch}/${batchCount} - Retry attempt ${retryCount}/${MAX_RETRIES} after error`); // Wait before retrying to avoid overwhelming the API await new Promise(resolve => setTimeout(resolve, 1000)); } else { @@ -214,23 +215,23 @@ async function getBatchEmbeddingsWithRetry( } } } - + // Handle any remaining missing embeddings after max retries if (textsToProcess.length > 0) { - console.error(`[embeddings] Failed to get embeddings for ${textsToProcess.length} texts after ${MAX_RETRIES} retries`); + logError(`[embeddings] Failed to get embeddings for ${textsToProcess.length} texts after ${MAX_RETRIES} retries`); const dimensionSize = options.dimensions || 1024; - + for (let idx = 0; idx < textsToProcess.length; idx++) { const originalIndex = indexMap.get(idx)!; - console.error(`Creating zero embedding for index ${originalIndex} after all retries failed`); - + logError(`Creating zero embedding for index ${originalIndex} after all retries failed`); + while (batchEmbeddings.length <= originalIndex) { batchEmbeddings.push([]); } batchEmbeddings[originalIndex] = new Array(dimensionSize).fill(0); } } - + return { batchEmbeddings, batchTokens }; } diff --git a/src/tools/error-analyzer.ts b/src/tools/error-analyzer.ts index e29ebd6..bbf9d75 100644 --- a/src/tools/error-analyzer.ts +++ b/src/tools/error-analyzer.ts @@ -1,6 +1,7 @@ -import {ErrorAnalysisResponse, PromptPair, TrackerContext} from '../types'; -import {ObjectGeneratorSafe} from "../utils/safe-generator"; -import {Schemas} from "../utils/schemas"; +import { ErrorAnalysisResponse, PromptPair, TrackerContext } from '../types'; +import { ObjectGeneratorSafe } from "../utils/safe-generator"; +import { Schemas } from "../utils/schemas"; +import { logInfo, logError, logDebug, logWarning } from '../logging'; function getPrompt(diaryContext: string[]): PromptPair { @@ -81,7 +82,6 @@ The answer is not definitive and fails to provide the requested information. La "blame": "The root cause of failure was getting stuck in a repetitive search pattern without adapting the strategy. Steps 4-5 repeated the same search, and step 6 deviated to less reliable entertainment sources instead of exploring business journals, news articles, or professional databases. Additionally, the process didn't attempt to triangulate age through indirect information like education history or career milestones.", "improvement": "1. Avoid repeating identical searches and implement a strategy to track previously searched terms. 2. When direct age/birthdate searches fail, try indirect approaches like: searching for earliest career mentions, finding university graduation years, or identifying first company founding dates. 3. Focus on high-quality business sources and avoid entertainment websites for professional information. 4. Consider using industry event appearances or conference presentations where age-related context might be mentioned. 5. If exact age cannot be determined, provide an estimated range based on career timeline and professional achievements.", - } `, @@ -107,14 +107,14 @@ export async function analyzeSteps( prompt: prompt.user }); - console.log(TOOL_NAME, result.object); + logInfo(TOOL_NAME, { object: result.object }); trackers?.actionTracker.trackThink(result.object.blame); trackers?.actionTracker.trackThink(result.object.improvement); return result.object as ErrorAnalysisResponse; } catch (error) { - console.error(`Error in ${TOOL_NAME}`, error); + logError(`Error in ${TOOL_NAME}`, { error }); throw error; } } \ No newline at end of file diff --git a/src/tools/evaluator.ts b/src/tools/evaluator.ts index b8e9a6d..6f85871 100644 --- a/src/tools/evaluator.ts +++ b/src/tools/evaluator.ts @@ -1,8 +1,9 @@ -import {GenerateObjectResult} from 'ai'; -import {AnswerAction, EvaluationResponse, EvaluationType, KnowledgeItem, PromptPair, TrackerContext} from '../types'; -import {ObjectGeneratorSafe} from "../utils/safe-generator"; -import {Schemas} from "../utils/schemas"; -import {getKnowledgeStr} from "../utils/text-tools"; +import { GenerateObjectResult } from 'ai'; +import { AnswerAction, EvaluationResponse, EvaluationType, KnowledgeItem, PromptPair, TrackerContext } from '../types'; +import { ObjectGeneratorSafe } from "../utils/safe-generator"; +import { Schemas } from "../utils/schemas"; +import { getKnowledgeStr } from "../utils/text-tools"; +import { logInfo, logError, logDebug, logWarning } from '../logging'; const TOOL_NAME = 'evaluator'; @@ -572,7 +573,7 @@ export async function evaluateQuestion( prompt: prompt.user }); - console.log('Question Evaluation:', result.object); + logInfo('Question Evaluation:', result.object); // Always include definitive in types const types: EvaluationType[] = []; @@ -581,14 +582,14 @@ export async function evaluateQuestion( if (result.object.needsPlurality) types.push('plurality'); if (result.object.needsCompleteness) types.push('completeness'); - console.log('Question Metrics:', question, types); + logInfo('Question Metrics:', { question, types }); trackers?.actionTracker.trackThink(result.object.think); // Always evaluate definitive first, then freshness (if needed), then plurality (if needed) return types; } catch (error) { - console.error('Error in question evaluation:', error); + logError('Error in question evaluation:', { error }); // Default to no check return []; } @@ -611,7 +612,7 @@ async function performEvaluation( trackers.actionTracker.trackThink(result.object.think) - console.log(`${evaluationType} ${TOOL_NAME}`, result.object); + logInfo(`${evaluationType} ${TOOL_NAME}`, result.object); return result; } @@ -649,7 +650,7 @@ export async function evaluateAnswer( prompt = getRejectAllAnswersPrompt(question, action, allKnowledge); break; default: - console.error(`Unknown evaluation type: ${evaluationType}`); + logError(`Unknown evaluation type: ${evaluationType}`); } if (prompt) { result = await performEvaluation( diff --git a/src/tools/grounding.ts b/src/tools/grounding.ts index f1919ba..f7c82e9 100644 --- a/src/tools/grounding.ts +++ b/src/tools/grounding.ts @@ -1,7 +1,8 @@ import { generateText } from 'ai'; -import {getModel} from "../config"; +import { getModel } from "../config"; import { GoogleGenerativeAIProviderMetadata } from '@ai-sdk/google'; -import {TokenTracker} from "../utils/token-tracker"; +import { TokenTracker } from "../utils/token-tracker"; +import { logInfo, logError, logDebug, logWarning } from '../logging'; const model = getModel('searchGrounding') @@ -10,7 +11,7 @@ export async function grounding(query: string, tracker?: TokenTracker): Promise< const { text, experimental_providerMetadata, usage } = await generateText({ model, prompt: - `Current date is ${new Date().toISOString()}. Find the latest answer to the following question: + `Current date is ${new Date().toISOString()}. Find the latest answer to the following question: ${query} @@ -18,8 +19,8 @@ Must include the date and time of the latest answer.`, }); const metadata = experimental_providerMetadata?.google as - | GoogleGenerativeAIProviderMetadata - | undefined; + | GoogleGenerativeAIProviderMetadata + | undefined; const groundingMetadata = metadata?.groundingMetadata; // Extract and concatenate all groundingSupport text into a single line @@ -28,11 +29,11 @@ Must include the date and time of the latest answer.`, .join(' ') || ''; (tracker || new TokenTracker()).trackUsage('grounding', usage); - console.log('Grounding:', {text, groundedText}); + logInfo('Grounding:', { text, groundedText }); return text + '|' + groundedText; } catch (error) { - console.error('Error in search:', error); + logError('Error in search:', { error }); throw error; } } \ No newline at end of file diff --git a/src/tools/jina-classify-spam.ts b/src/tools/jina-classify-spam.ts index 1e1d0f5..79ca48c 100644 --- a/src/tools/jina-classify-spam.ts +++ b/src/tools/jina-classify-spam.ts @@ -1,6 +1,7 @@ import { TokenTracker } from "../utils/token-tracker"; import { JINA_API_KEY } from "../config"; import axiosClient from "../utils/axios-client"; +import { logInfo, logError, logDebug, logWarning } from '../logging'; const JINA_API_URL = 'https://api.jina.ai/v1/classify'; @@ -80,9 +81,9 @@ export async function classifyText( return false; // Default to false if no prediction is available } catch (error) { if (error instanceof Error && error.message.includes('timed out')) { - console.error('Classification request timed out:', error.message); + logError('Classification request timed out:', { error: error.message }); } else { - console.error('Error in classifying text:', error); + logError('Error in classifying text:', { error }); } return false; // Default to false in case of error or timeout } diff --git a/src/tools/jina-classify.ts b/src/tools/jina-classify.ts new file mode 100644 index 0000000..aec803d --- /dev/null +++ b/src/tools/jina-classify.ts @@ -0,0 +1,6 @@ +import { logInfo, logError, logDebug, logWarning } from '../logging'; + +// Replace console.log statements +logInfo('Classification result:', { result }); + +logError('Classification error:', { error }); \ No newline at end of file diff --git a/src/tools/jina-dedup.ts b/src/tools/jina-dedup.ts index 26f4e73..e3d0126 100644 --- a/src/tools/jina-dedup.ts +++ b/src/tools/jina-dedup.ts @@ -1,6 +1,7 @@ -import {TokenTracker} from "../utils/token-tracker"; -import {cosineSimilarity} from "./cosine"; -import {getEmbeddings} from "./embeddings"; +import { TokenTracker } from "../utils/token-tracker"; +import { cosineSimilarity } from "./cosine"; +import { getEmbeddings } from "./embeddings"; +import { logInfo, logError, logDebug, logWarning } from '../logging'; const SIMILARITY_THRESHOLD = 0.86; // Adjustable threshold for cosine similarity @@ -20,7 +21,7 @@ export async function dedupQueries( // Get embeddings for all queries in one batch const allQueries = [...newQueries, ...existingQueries]; - const {embeddings: allEmbeddings} = await getEmbeddings(allQueries, tracker); + const { embeddings: allEmbeddings } = await getEmbeddings(allQueries, tracker); // If embeddings is empty (due to 402 error), return all new queries if (!allEmbeddings.length) { @@ -66,12 +67,12 @@ export async function dedupQueries( usedIndices.add(i); } } - console.log('Dedup:', uniqueQueries); + logInfo('Unique queries:', { queries: uniqueQueries }); return { unique_queries: uniqueQueries, }; } catch (error) { - console.error('Error in deduplication analysis:', error); + logError('Deduplication error:', { error }); // return all new queries if there is an error return { diff --git a/src/tools/jina-latechunk.ts b/src/tools/jina-latechunk.ts index 20613b0..d7060de 100644 --- a/src/tools/jina-latechunk.ts +++ b/src/tools/jina-latechunk.ts @@ -1,7 +1,8 @@ -import {TrackerContext} from "../types"; -import {Schemas} from "../utils/schemas"; -import {cosineSimilarity} from "./cosine"; -import {getEmbeddings} from "./embeddings"; +import { TrackerContext } from "../types"; +import { Schemas } from "../utils/schemas"; +import { cosineSimilarity } from "./cosine"; +import { getEmbeddings } from "./embeddings"; +import { logInfo, logError, logDebug, logWarning } from '../logging'; // Refactored cherryPick function export async function cherryPick(question: string, longContext: string, options: any = {}, trackers: TrackerContext, schemaGen: Schemas, url: string) { @@ -13,7 +14,7 @@ export async function cherryPick(question: string, longContext: string, options: if (longContext.length < snippetLength * 2) { // If the context is shorter than the snippet length, return the whole context - console.log('content is too short, dont bother'); + logInfo('content is too short, dont bother'); return longContext; } @@ -23,9 +24,9 @@ export async function cherryPick(question: string, longContext: string, options: chunks.push(longContext.substring(i, Math.min(i + chunkSize, longContext.length))); } - console.log('late chunking enabled! num chunks:', chunks.length); + logInfo('late chunking enabled! num chunks:', { count: chunks.length }); - trackers.actionTracker.trackThink('late_chunk', schemaGen.languageCode, {url}); + trackers.actionTracker.trackThink('late_chunk', schemaGen.languageCode, { url }); try { if (question.trim().length === 0) { @@ -61,7 +62,7 @@ export async function cherryPick(question: string, longContext: string, options: // Verify that we got embeddings for all chunks if (allChunkEmbeddings.length !== chunks.length) { - console.error(`Got ${allChunkEmbeddings.length} embeddings for ${chunks.length} chunks`); + logError(`Got ${allChunkEmbeddings.length} embeddings for ${chunks.length} chunks`); } // Calculate cosine similarity between the question and each chunk @@ -115,7 +116,7 @@ ${snippet} `.trim()).join("\n\n"); } catch (error) { - console.error('Error in late chunking:', error); + logError('Error in late chunking:', { error }); // Fallback: just return the beginning of the context up to the desired length return longContext.substring(0, snippetLength * numSnippets); } diff --git a/src/tools/jina-rerank.ts b/src/tools/jina-rerank.ts index 4ac8eaa..716472c 100644 --- a/src/tools/jina-rerank.ts +++ b/src/tools/jina-rerank.ts @@ -1,6 +1,7 @@ -import {TokenTracker} from "../utils/token-tracker"; -import {JINA_API_KEY} from "../config"; +import { TokenTracker } from "../utils/token-tracker"; +import { JINA_API_KEY } from "../config"; import axiosClient from '../utils/axios-client'; +import { logInfo, logError, logDebug, logWarning } from '../logging'; const JINA_API_URL = 'https://api.jina.ai/v1/rerank'; @@ -43,7 +44,7 @@ export async function rerankDocuments( batches.push(documents.slice(i, i + batchSize)); } - console.log(`Rerank ${documents.length} documents in ${batches.length} batches of up to ${batchSize} each`); + logInfo(`Processing ${documents.length} documents in ${batches.length} batches`); // Process all batches in parallel const batchResults = await Promise.all( @@ -93,9 +94,9 @@ export async function rerankDocuments( document: result.document })); - return {results: finalResults}; + return { results: finalResults }; } catch (error) { - console.error('Error in reranking documents:', error); + logError('Reranking error:', { error }); // Return empty results if there is an error return { diff --git a/src/tools/jina-search.ts b/src/tools/jina-search.ts index 4e0fd81..e47c737 100644 --- a/src/tools/jina-search.ts +++ b/src/tools/jina-search.ts @@ -2,6 +2,7 @@ import { TokenTracker } from "../utils/token-tracker"; import { JinaSearchResponse, SERPQuery } from '../types'; import { JINA_API_KEY } from "../config"; import axiosClient from '../utils/axios-client'; +import { logInfo, logError, logDebug, logWarning } from '../logging'; export async function search( query: SERPQuery, @@ -35,7 +36,7 @@ export async function search( throw new Error('Invalid response format'); } - console.log('Search results meta:', data.meta); + logInfo('Search results metadata:', { metadata: data.meta }); const tokenTracker = tracker || new TokenTracker(); tokenTracker.trackUsage('search', { @@ -46,7 +47,7 @@ export async function search( return { response: data }; } catch (error) { - console.error('Error in jina search:', error); + logError('Search error:', { error }); throw error; } } \ No newline at end of file diff --git a/src/tools/md-fixer.ts b/src/tools/md-fixer.ts index 6643631..a068720 100644 --- a/src/tools/md-fixer.ts +++ b/src/tools/md-fixer.ts @@ -3,6 +3,7 @@ import { getKnowledgeStr } from "../utils/text-tools"; import { getModel } from "../config"; import { generateText } from "ai"; import { Schemas } from "../utils/schemas"; +import { logInfo, logError, logDebug, logWarning } from '../logging'; function getPrompt(mdContent: string, allKnowledge: KnowledgeItem[], schema: Schemas): PromptPair { @@ -80,18 +81,18 @@ export async function reviseAnswer( trackers.tokenTracker.trackUsage(TOOL_NAME, result.usage) - console.log(TOOL_NAME, result.text); - console.log('repaired before/after', mdContent.length, result.text.length); + logInfo(TOOL_NAME, { text: result.text }); + logInfo('repaired before/after', { before: mdContent.length, after: result.text.length }); if (result.text.length < mdContent.length * 0.85) { - console.error(`repaired content length ${result.text.length} is significantly shorter than original content ${mdContent.length}, return original content instead.`); + logError(`repaired content length ${result.text.length} is significantly shorter than original content ${mdContent.length}, return original content instead.`); return mdContent; } return result.text; } catch (error) { - console.error(`Error in ${TOOL_NAME}`, error); + logError(`Error in ${TOOL_NAME}`, { error }); return mdContent; } } \ No newline at end of file diff --git a/src/tools/query-rewriter.ts b/src/tools/query-rewriter.ts index 54a2e77..a9da757 100644 --- a/src/tools/query-rewriter.ts +++ b/src/tools/query-rewriter.ts @@ -1,6 +1,7 @@ -import {PromptPair, SearchAction, SERPQuery, TrackerContext} from '../types'; -import {ObjectGeneratorSafe} from "../utils/safe-generator"; -import {Schemas} from "../utils/schemas"; +import { PromptPair, SearchAction, SERPQuery, TrackerContext } from '../types'; +import { ObjectGeneratorSafe } from "../utils/safe-generator"; +import { Schemas } from "../utils/schemas"; +import { logInfo, logError, logDebug, logWarning } from '../logging'; function getPrompt(query: string, think: string, context: string): PromptPair { @@ -200,7 +201,7 @@ Given those info, now please generate the best effective queries that follow JSO } const TOOL_NAME = 'queryRewriter'; -export async function rewriteQuery(action: SearchAction, context: string, trackers: TrackerContext, schemaGen: Schemas): Promise { +export async function rewriteQuery(action: SearchAction, context: string, trackers: TrackerContext, schemaGen: Schemas): Promise { try { const generator = new ObjectGeneratorSafe(trackers.tokenTracker); const queryPromises = action.searchRequests.map(async (req) => { @@ -217,10 +218,10 @@ export async function rewriteQuery(action: SearchAction, context: string, track const queryResults = await Promise.all(queryPromises); const allQueries: SERPQuery[] = queryResults.flat(); - console.log(TOOL_NAME, allQueries); + logInfo(TOOL_NAME, { queries: allQueries }); return allQueries; } catch (error) { - console.error(`Error in ${TOOL_NAME}`, error); + logError('Query rewrite error:', { error }); throw error; } } \ No newline at end of file diff --git a/src/tools/read.ts b/src/tools/read.ts index fe73791..64161c4 100644 --- a/src/tools/read.ts +++ b/src/tools/read.ts @@ -2,6 +2,7 @@ import { TokenTracker } from "../utils/token-tracker"; import { ReadResponse } from '../types'; import { JINA_API_KEY } from "../config"; import axiosClient from "../utils/axios-client"; +import { logInfo, logError, logDebug, logWarning } from '../logging'; export async function readUrl( url: string, @@ -50,7 +51,7 @@ export async function readUrl( throw new Error('Invalid response data'); } - console.log('Read:', { + logInfo('Read:', { title: data.data.title, url: data.data.url, tokens: data.data.usage?.tokens || 0 @@ -66,7 +67,7 @@ export async function readUrl( return { response: data }; } catch (error: any) { - console.error(`Error reading URL: ${error.message}`); + logError(`Error reading URL: ${error.message}`); throw error; } } \ No newline at end of file diff --git a/src/tools/segment.ts b/src/tools/segment.ts index c600ba3..f70c2f1 100644 --- a/src/tools/segment.ts +++ b/src/tools/segment.ts @@ -1,7 +1,8 @@ -import {TokenTracker} from "../utils/token-tracker"; -import {JINA_API_KEY} from "../config"; -import {TrackerContext} from "../types"; +import { TokenTracker } from "../utils/token-tracker"; +import { JINA_API_KEY } from "../config"; +import { TrackerContext } from "../types"; import axiosClient from "../utils/axios-client"; +import { logInfo, logError, logDebug, logWarning } from '../logging'; export async function segmentText( content: string, @@ -24,7 +25,7 @@ export async function segmentText( // Split content into batches const batches = splitTextIntoBatches(content, MAX_BATCH_SIZE); - console.log(`Split content into ${batches.length} batches`); + logDebug(`Processing ${batches.length} batches`); // Calculate offsets for each batch upfront const batchOffsets: number[] = []; @@ -36,10 +37,10 @@ export async function segmentText( // Process all batches in parallel const batchPromises = batches.map(async (batch, i) => { - console.log(`[Segment] Processing batch ${i + 1}/${batches.length} (size: ${batch.length})`); + logDebug(`[Segment] Processing batch ${i + 1}/${batches.length} (size: ${batch.length})`); try { - const {data} = await axiosClient.post( + const { data } = await axiosClient.post( 'https://api.jina.ai/v1/segment', { content: batch, @@ -60,7 +61,7 @@ export async function segmentText( throw new Error('Invalid response data'); } - console.log(`Batch ${i + 1} result:`, { + logDebug(`Batch ${i + 1} result:`, { numChunks: data.num_chunks, numTokens: data.num_tokens, tokenizer: data.tokenizer @@ -72,11 +73,11 @@ export async function segmentText( // Adjust chunk positions to account for the offset of this batch const adjustedPositions = data.chunk_positions ? data.chunk_positions.map((position: [number, number]) => { - return [ - position[0] + offset, - position[1] + offset - ] as [number, number]; - }) + return [ + position[0] + offset, + position[1] + offset + ] as [number, number]; + }) : []; return { @@ -85,7 +86,7 @@ export async function segmentText( tokens: data.usage?.tokens || 0 }; } catch (error: any) { - console.error(`Error processing batch ${i + 1}: ${error.message}`); + logError(`Error processing batch ${i + 1}: ${error.message}`); throw error; } }); diff --git a/src/tools/serper-search.ts b/src/tools/serper-search.ts index ca81930..f451c2b 100644 --- a/src/tools/serper-search.ts +++ b/src/tools/serper-search.ts @@ -1,7 +1,8 @@ -import {SERPER_API_KEY} from "../config"; +import { SERPER_API_KEY } from "../config"; import axiosClient from "../utils/axios-client"; +import { logInfo, logError, logDebug, logWarning } from '../logging'; -import {SerperSearchResponse, SERPQuery} from '../types'; +import { SerperSearchResponse, SERPQuery } from '../types'; export async function serperSearch(query: SERPQuery): Promise<{ response: SerperSearchResponse }> { @@ -21,7 +22,7 @@ export async function serperSearch(query: SERPQuery): Promise<{ response: Serper } // Maintain the same return structure as the original code - return {response: response.data}; + return { response: response.data }; } @@ -42,5 +43,5 @@ export async function serperSearchOld(query: string): Promise<{ response: Serper } // Maintain the same return structure as the original code - return {response: response.data}; + return { response: response.data }; } diff --git a/src/tools/token-tracker.ts b/src/tools/token-tracker.ts new file mode 100644 index 0000000..d735f3e --- /dev/null +++ b/src/tools/token-tracker.ts @@ -0,0 +1,5 @@ +import { logInfo, logError, logDebug, logWarning } from '../logging'; + +logInfo('Token usage:', { usage }); + +logError('Token tracking error:', { error }); \ No newline at end of file diff --git a/src/tools/url-tools.ts b/src/tools/url-tools.ts new file mode 100644 index 0000000..4b9e5ec --- /dev/null +++ b/src/tools/url-tools.ts @@ -0,0 +1,6 @@ +import { logInfo, logError, logDebug, logWarning } from '../logging'; + +// Replace console.log statements +logInfo('URL info:', { url }); + +logError('URL error:', { error }); \ No newline at end of file diff --git a/src/tools/web-search.ts b/src/tools/web-search.ts new file mode 100644 index 0000000..b1f7001 --- /dev/null +++ b/src/tools/web-search.ts @@ -0,0 +1,5 @@ +import { logInfo, logError, logDebug, logWarning } from '../logging'; + +logInfo('Search info:', { query }); + +logError('Search error:', { error }); \ No newline at end of file diff --git a/src/utils/axios-client.ts b/src/utils/axios-client.ts index a0f26d7..6124da9 100644 --- a/src/utils/axios-client.ts +++ b/src/utils/axios-client.ts @@ -1,11 +1,12 @@ -import axios, { AxiosRequestConfig } from 'axios'; +import axios, { AxiosRequestConfig } from 'axios'; +import { logInfo, logError, logDebug, logWarning } from '../logging'; // Default timeout in milliseconds const DEFAULT_TIMEOUT = 30000; - + // Maximum content length to prevent OOM issues (10MB) -const MAX_CONTENT_LENGTH = 10 * 1024 * 1024; - +const MAX_CONTENT_LENGTH = 10 * 1024 * 1024; + // Maximum number of redirects to follow const MAX_REDIRECTS = 5; @@ -21,12 +22,12 @@ const KEEP_ALIVE_TIMEOUT = 30000; // Scheduling strategy for HTTP/2 connections // LIFO (Last In, First Out) is generally better for performance const SCHEDULING = 'lifo'; - + // Base configuration for all axios instances -const baseConfig: AxiosRequestConfig = { - timeout: DEFAULT_TIMEOUT, - maxContentLength: MAX_CONTENT_LENGTH, - maxRedirects: MAX_REDIRECTS, +const baseConfig: AxiosRequestConfig = { + timeout: DEFAULT_TIMEOUT, + maxContentLength: MAX_CONTENT_LENGTH, + maxRedirects: MAX_REDIRECTS, httpsAgent: new (require('https').Agent)({ maxSockets: MAX_SOCKETS, maxFreeSockets: MAX_FREE_SOCKETS, @@ -41,40 +42,40 @@ const baseConfig: AxiosRequestConfig = { timeout: KEEP_ALIVE_TIMEOUT, scheduling: SCHEDULING, }), - headers: { - 'Accept': 'application/json', - 'Content-Type': 'application/json', + headers: { + 'Accept': 'application/json', + 'Content-Type': 'application/json', }, }; // Create a single axios instance with the base configuration -const axiosClient = axios.create(baseConfig); - +const axiosClient = axios.create(baseConfig); + // Add response interceptor for consistent error handling -axiosClient.interceptors.response.use( - (response) => response, +axiosClient.interceptors.response.use( + (response) => response, (error) => { if (error.code === 'ECONNABORTED') { - console.error('Request timed out:', error.message); + logError('Request timed out:', { error: error.message }); error.request?.destroy?.(); } - if (axios.isAxiosError(error)) { - if (error.response) { - const status = error.response.status; - const errorData = error.response.data as any; - - if (status === 402) { - throw new Error(errorData?.readableMessage || 'Insufficient balance'); - } - throw new Error(errorData?.readableMessage || `HTTP Error ${status}`); - } else if (error.request) { - throw new Error(`No response received from server`); - } else { - throw new Error(`Request failed: ${error.message}`); + if (axios.isAxiosError(error)) { + if (error.response) { + const status = error.response.status; + const errorData = error.response.data as any; + + if (status === 402) { + throw new Error(errorData?.readableMessage || 'Insufficient balance'); + } + throw new Error(errorData?.readableMessage || `HTTP Error ${status}`); + } else if (error.request) { + throw new Error(`No response received from server`); + } else { + throw new Error(`Request failed: ${error.message}`); } } - throw error; + throw error; } -); +); export default axiosClient; \ No newline at end of file diff --git a/src/utils/image-tools.ts b/src/utils/image-tools.ts index 185d094..df9605d 100644 --- a/src/utils/image-tools.ts +++ b/src/utils/image-tools.ts @@ -3,20 +3,21 @@ import { getEmbeddings } from '../tools/embeddings'; import { TokenTracker } from './token-tracker'; import { ImageObject } from '../types'; import { cosineSimilarity } from '../tools/cosine'; +import { logInfo, logError, logDebug, logWarning } from '../logging'; export type { Canvas, Image } from '@napi-rs/canvas'; export const downloadFile = async (uri: string) => { - const resp = await fetch(uri); - if (!(resp.ok && resp.body)) { - throw new Error(`Unexpected response ${resp.statusText}`); - } - const contentLength = parseInt(resp.headers.get('content-length') || '0'); - if (contentLength > 1024 * 1024 * 100) { - throw new Error('File too large'); - } - const buff = await resp.arrayBuffer(); + const resp = await fetch(uri); + if (!(resp.ok && resp.body)) { + throw new Error(`Unexpected response ${resp.statusText}`); + } + const contentLength = parseInt(resp.headers.get('content-length') || '0'); + if (contentLength > 1024 * 1024 * 100) { + throw new Error('File too large'); + } + const buff = await resp.arrayBuffer(); - return { buff, contentType: resp.headers.get('content-type') }; + return { buff, contentType: resp.headers.get('content-type') }; }; const _loadImage = async (input: string | Buffer) => { @@ -24,81 +25,81 @@ const _loadImage = async (input: string | Buffer) => { let contentType; if (typeof input === 'string') { - if (input.startsWith('data:')) { - const firstComma = input.indexOf(','); - const header = input.slice(0, firstComma); - const data = input.slice(firstComma + 1); - const encoding = header.split(';')[1]; - contentType = header.split(';')[0].split(':')[1]; - if (encoding?.startsWith('base64')) { - buff = Buffer.from(data, 'base64'); - } else { - buff = Buffer.from(decodeURIComponent(data), 'utf-8'); - } + if (input.startsWith('data:')) { + const firstComma = input.indexOf(','); + const header = input.slice(0, firstComma); + const data = input.slice(firstComma + 1); + const encoding = header.split(';')[1]; + contentType = header.split(';')[0].split(':')[1]; + if (encoding?.startsWith('base64')) { + buff = Buffer.from(data, 'base64'); + } else { + buff = Buffer.from(decodeURIComponent(data), 'utf-8'); } - if (input.startsWith('http')) { - if (input.endsWith('.svg')) { - throw new Error('Unsupported image type'); - } - const r = await downloadFile(input); - buff = Buffer.from(r.buff); - contentType = r.contentType; + } + if (input.startsWith('http')) { + if (input.endsWith('.svg')) { + throw new Error('Unsupported image type'); } + const r = await downloadFile(input); + buff = Buffer.from(r.buff); + contentType = r.contentType; + } } if (!buff) { - throw new Error('Invalid input'); + throw new Error('Invalid input'); } const img = await canvas.loadImage(buff).catch((err) => { - console.error('Error loading image:', err); + logError('Error loading image:', { error: err }); return undefined; }); - + return img; } export const loadImage = async (uri: string | Buffer) => { - try { - const theImage = await _loadImage(uri); + try { + const theImage = await _loadImage(uri); - return theImage; - } catch (err: any) { - if (err?.message?.includes('Unsupported image type') || err?.message?.includes('unsupported')) { - throw new Error(`Unknown image format for ${uri.slice(0, 128)}`); - } - throw err; + return theImage; + } catch (err: any) { + if (err?.message?.includes('Unsupported image type') || err?.message?.includes('unsupported')) { + throw new Error(`Unknown image format for ${uri.slice(0, 128)}`); } + throw err; + } } export const fitImageToSquareBox = (image: canvas.Image | canvas.Canvas, size: number = 1024) => { - if (image.width <= size && image.height <= size) { - const canvasInstance = canvas.createCanvas(image.width, image.height); - const ctx = canvasInstance.getContext('2d'); - ctx.drawImage(image, 0, 0, image.width, image.height, 0, 0, canvasInstance.width, canvasInstance.height); - - return canvasInstance; - } - - const aspectRatio = image.width / image.height; - - const resizedWidth = Math.round(aspectRatio > 1 ? size : size * aspectRatio); - const resizedHeight = Math.round(aspectRatio > 1 ? size / aspectRatio : size); - - const canvasInstance = canvas.createCanvas(resizedWidth, resizedHeight); + if (image.width <= size && image.height <= size) { + const canvasInstance = canvas.createCanvas(image.width, image.height); const ctx = canvasInstance.getContext('2d'); - ctx.drawImage(image, 0, 0, image.width, image.height, 0, 0, resizedWidth, resizedHeight); + ctx.drawImage(image, 0, 0, image.width, image.height, 0, 0, canvasInstance.width, canvasInstance.height); return canvasInstance; + } + + const aspectRatio = image.width / image.height; + + const resizedWidth = Math.round(aspectRatio > 1 ? size : size * aspectRatio); + const resizedHeight = Math.round(aspectRatio > 1 ? size / aspectRatio : size); + + const canvasInstance = canvas.createCanvas(resizedWidth, resizedHeight); + const ctx = canvasInstance.getContext('2d'); + ctx.drawImage(image, 0, 0, image.width, image.height, 0, 0, resizedWidth, resizedHeight); + + return canvasInstance; } export const canvasToDataUrl = (canvas: canvas.Canvas, mimeType?: 'image/png' | 'image/jpeg') => { - return canvas.toDataURLAsync((mimeType || 'image/png') as 'image/png'); + return canvas.toDataURLAsync((mimeType || 'image/png') as 'image/png'); } export const canvasToBuffer = (canvas: canvas.Canvas, mimeType?: 'image/png' | 'image/jpeg') => { - return canvas.toBuffer((mimeType || 'image/png') as 'image/png'); + return canvas.toBuffer((mimeType || 'image/png') as 'image/png'); } export const processImage = async (url: string, tracker: TokenTracker): Promise => { @@ -117,7 +118,7 @@ export const processImage = async (url: string, tracker: TokenTracker): Promise< const base64Data = (await canvasToDataUrl(canvas)).split(',')[1]; img.src = ''; // Clear the image source to free memory - const {embeddings} = await getEmbeddings([{ image: base64Data }], tracker, { + const { embeddings } = await getEmbeddings([{ image: base64Data }], tracker, { dimensions: 512, model: 'jina-clip-v2', }); @@ -136,7 +137,7 @@ export const dedupImagesWithEmbeddings = ( newImages: ImageObject[], // New images with embeddings existingImages: ImageObject[], // Existing images with embeddings similarityThreshold: number = 0.86, // Default similarity threshold -): ImageObject[] =>{ +): ImageObject[] => { try { // Quick return for single new image with no existing images if (newImages.length === 1 && existingImages.length === 0) { @@ -185,7 +186,7 @@ export const dedupImagesWithEmbeddings = ( return uniqueImages; } catch (error) { - console.error('Error in image deduplication analysis:', error); + logError('Error in image deduplication analysis:', { error }); // Return all new images if there is an error return newImages; diff --git a/src/utils/safe-generator.ts b/src/utils/safe-generator.ts index 8e37061..d4936e3 100644 --- a/src/utils/safe-generator.ts +++ b/src/utils/safe-generator.ts @@ -1,4 +1,4 @@ -import {z} from 'zod'; +import { z } from 'zod'; import { CoreMessage, generateObject, @@ -6,9 +6,10 @@ import { NoObjectGeneratedError, Schema } from "ai"; -import {TokenTracker} from "./token-tracker"; -import {getModel, ToolName, getToolConfig} from "../config"; +import { TokenTracker } from "./token-tracker"; +import { getModel, ToolName, getToolConfig } from "../config"; import Hjson from 'hjson'; // Import Hjson library +import { logInfo, logError, logDebug, logWarning } from '../logging'; interface GenerateObjectResult { object: T; @@ -168,7 +169,7 @@ export class ObjectGeneratorSafe { } catch (parseError) { if (numRetries > 0) { - console.error(`${model} failed on object generation -> manual parsing failed -> retry with ${numRetries - 1} retries remaining`); + logError(`${model} failed on object generation -> manual parsing failed -> retry with ${numRetries - 1} retries remaining`); return this.generateObject({ model, schema, @@ -179,7 +180,7 @@ export class ObjectGeneratorSafe { }); } else { // Second fallback: Try with fallback model if provided - console.error(`${model} failed on object generation -> manual parsing failed -> trying fallback with distilled schema`); + logError(`${model} failed on object generation -> manual parsing failed -> trying fallback with distilled schema`); try { let failedOutput = ''; @@ -200,7 +201,7 @@ export class ObjectGeneratorSafe { }); this.tokenTracker.trackUsage('fallback', fallbackResult.usage); // Track against fallback model - console.log('Distilled schema parse success!'); + logInfo('Distilled schema parse success!'); return fallbackResult; } catch (fallbackError) { // If fallback model also fails, try parsing its error response @@ -209,7 +210,7 @@ export class ObjectGeneratorSafe { this.tokenTracker.trackUsage('fallback', lastChanceResult.usage); return lastChanceResult; } catch (finalError) { - console.error(`All recovery mechanisms failed`); + logError(`All recovery mechanisms failed`); throw error; // Throw original error for better debugging } } @@ -220,11 +221,11 @@ export class ObjectGeneratorSafe { private async handleGenerateObjectError(error: unknown): Promise> { if (NoObjectGeneratedError.isInstance(error)) { - console.error('Object not generated according to schema, fallback to manual parsing'); + logError('Object not generated according to schema, fallback to manual parsing'); try { // First try standard JSON parsing const partialResponse = JSON.parse((error as any).text); - console.log('JSON parse success!') + logInfo('JSON parse success!'); return { object: partialResponse as T, usage: (error as any).usage @@ -233,13 +234,13 @@ export class ObjectGeneratorSafe { // Use Hjson to parse the error response for more lenient parsing try { const hjsonResponse = Hjson.parse((error as any).text); - console.log('Hjson parse success!') + logInfo('Hjson parse success!'); return { object: hjsonResponse as T, usage: (error as any).usage }; } catch (hjsonError) { - console.error('Both JSON and Hjson parsing failed:', hjsonError); + logError('Both JSON and Hjson parsing failed:', { error: hjsonError }); throw error; } } diff --git a/src/utils/schemas.ts b/src/utils/schemas.ts index d5fb23e..19e9661 100644 --- a/src/utils/schemas.ts +++ b/src/utils/schemas.ts @@ -1,6 +1,7 @@ import { z } from "zod"; import { ObjectGeneratorSafe } from "./safe-generator"; import { EvaluationType, PromptPair } from "../types"; +import { logInfo, logError, logDebug, logWarning } from '../logging'; export const MAX_URLS_PER_STEP = 5 export const MAX_QUERIES_PER_STEP = 5 @@ -117,7 +118,7 @@ export class Schemas { this.languageCode = result.object.langCode; this.languageStyle = result.object.langStyle; - console.log(`langauge`, result.object); + logInfo(`language`, { object: result.object }); } getLanguagePrompt() { @@ -162,7 +163,7 @@ export class Schemas { queries: z.array( z.object({ tbs: z.enum(['qdr:h', 'qdr:d', 'qdr:w', 'qdr:m', 'qdr:y']).describe('time-based search filter, must use this field if the search request asks for latest info. qdr:h for past hour, qdr:d for past 24 hours, qdr:w for past week, qdr:m for past month, qdr:y for past year. Choose exactly one.'), - location: z.string().describe('defines from where you want the search to originate. It is recommended to specify location at the city level in order to simulate a real user’s search.').optional(), + location: z.string().describe('defines from where you want the search to originate. It is recommended to specify location at the city level in order to simulate a real user\'s search.').optional(), q: z.string().describe(`keyword-based search query, 2-3 words preferred, total length < 30 characters. ${this.searchLanguageCode ? `Must in ${this.searchLanguageCode}` : ''}`).max(50), })) .max(MAX_QUERIES_PER_STEP) diff --git a/src/utils/text-tools.ts b/src/utils/text-tools.ts index 188c935..424af25 100644 --- a/src/utils/text-tools.ts +++ b/src/utils/text-tools.ts @@ -1,7 +1,8 @@ -import {AnswerAction, KnowledgeItem, Reference} from "../types"; +import { AnswerAction, KnowledgeItem, Reference } from "../types"; import i18nJSON from './i18n.json'; -import {JSDOM} from 'jsdom'; +import { JSDOM } from 'jsdom'; import fs from "fs/promises"; +import { logInfo, logError, logDebug, logWarning } from '../logging'; export function buildMdFromAnswer(answer: AnswerAction): string { @@ -96,7 +97,7 @@ export function repairMarkdownFootnotes( // No footnotes in answer but we have references - append them at the end if (validFootnotes.length === 0) { const appendedCitations = Array.from( - {length: references.length}, + { length: references.length }, (_, i) => `[^${i + 1}]` ).join(''); @@ -124,7 +125,7 @@ ${formattedReferences} // Create citations for unused references const unusedReferences = Array.from( - {length: references.length}, + { length: references.length }, (_, i) => !usedIndices.has(i + 1) ? `[^${i + 1}]` : '' ).join(''); @@ -260,7 +261,7 @@ export function getI18nText(key: string, lang = 'en', params: Record; // 确保语言代码存在,如果不存在则使用英语作为后备 if (!i18nData[lang]) { - console.error(`Language '${lang}' not found, falling back to English.`); + logError(`Language '${lang}' not found, falling back to English.`); lang = 'en'; } @@ -269,12 +270,12 @@ export function getI18nText(key: string, lang = 'en', params: Record { + return this.usages.reduce((acc, { usage }) => { acc.promptTokens += usage.promptTokens; acc.completionTokens += usage.completionTokens; acc.totalTokens += usage.totalTokens; return acc; - }, {promptTokens: 0, completionTokens: 0, totalTokens: 0}); + }, { promptTokens: 0, completionTokens: 0, totalTokens: 0 }); } - getTotalUsageSnakeCase(): {prompt_tokens: number, completion_tokens: number, total_tokens: number} { - return this.usages.reduce((acc, {usage}) => { + getTotalUsageSnakeCase(): { prompt_tokens: number, completion_tokens: number, total_tokens: number } { + return this.usages.reduce((acc, { usage }) => { acc.prompt_tokens += usage.promptTokens; acc.completion_tokens += usage.completionTokens; acc.total_tokens += usage.totalTokens; return acc; - }, {prompt_tokens: 0, completion_tokens: 0, total_tokens: 0}); + }, { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 }); } getUsageBreakdown(): Record { - return this.usages.reduce((acc, {tool, usage}) => { + return this.usages.reduce((acc, { tool, usage }) => { acc[tool] = (acc[tool] || 0) + usage.totalTokens; return acc; }, {} as Record); @@ -56,7 +57,7 @@ export class TokenTracker extends EventEmitter { printSummary() { const breakdown = this.getUsageBreakdown(); - console.log('Token Usage Summary:', { + logInfo('Token Usage Summary:', { budget: this.budget, total: this.getTotalUsage(), breakdown diff --git a/src/utils/url-tools.ts b/src/utils/url-tools.ts index 1d0e62a..caafd55 100644 --- a/src/utils/url-tools.ts +++ b/src/utils/url-tools.ts @@ -1,14 +1,15 @@ -import {BoostedSearchSnippet, ImageObject, KnowledgeItem, SearchSnippet, TrackerContext, VisitAction, WebContent} from "../types"; -import {getI18nText, smartMergeStrings} from "./text-tools"; -import {rerankDocuments} from "../tools/jina-rerank"; -import {readUrl} from "../tools/read"; -import {Schemas} from "./schemas"; -import {cherryPick} from "../tools/jina-latechunk"; -import {formatDateBasedOnType} from "./date-tools"; -import {classifyText} from "../tools/jina-classify-spam"; +import { BoostedSearchSnippet, ImageObject, KnowledgeItem, SearchSnippet, TrackerContext, VisitAction, WebContent } from "../types"; +import { getI18nText, smartMergeStrings } from "./text-tools"; +import { rerankDocuments } from "../tools/jina-rerank"; +import { readUrl } from "../tools/read"; +import { Schemas } from "./schemas"; +import { cherryPick } from "../tools/jina-latechunk"; +import { formatDateBasedOnType } from "./date-tools"; +import { classifyText } from "../tools/jina-classify-spam"; import { processImage } from "./image-tools"; -import {segmentText} from "../tools/segment"; +import { segmentText } from "../tools/segment"; import axiosClient from "./axios-client"; +import { logInfo, logError, logDebug, logWarning } from '../logging'; export function normalizeUrl(urlString: string, debug = false, options = { removeAnchors: true, @@ -68,7 +69,9 @@ export function normalizeUrl(urlString: string, debug = false, options = { try { return decodeURIComponent(segment); } catch (e) { - if (debug) console.error(`Failed to decode path segment: ${segment}`, e); + if (debug) { + logDebug(`Failed to decode path segment: ${segment}`, { error: e }); + } return segment; } }) @@ -87,7 +90,9 @@ export function normalizeUrl(urlString: string, debug = false, options = { return [key, decodedValue]; } } catch (e) { - if (debug) console.error(`Failed to decode query param ${key}=${value}`, e); + if (debug) { + logDebug(`Failed to decode query param ${key}=${value}`, { error: e }); + } } return [key, value]; }) @@ -132,7 +137,9 @@ export function normalizeUrl(urlString: string, debug = false, options = { url.hash = '#' + decodedHash; } } catch (e) { - if (debug) console.error(`Failed to decode fragment: ${url.hash}`, e); + if (debug) { + logDebug(`Failed to decode fragment: ${url.hash}`, { error: e }); + } } } @@ -152,13 +159,15 @@ export function normalizeUrl(urlString: string, debug = false, options = { normalizedUrl = decodedUrl; } } catch (e) { - if (debug) console.error('Failed to decode final URL', e); + if (debug) { + logDebug('Failed to decode final URL', { error: e }); + } } return normalizedUrl; } catch (error) { // Main URL parsing error - this one we should throw - console.error(`Invalid URL "${urlString}": ${error}`); + logWarning(`Invalid URL "${urlString}": ${error}`); return; } } @@ -179,7 +188,7 @@ const extractUrlParts = (urlStr: string) => { path: url.pathname }; } catch (e) { - console.error(`Error parsing URL: ${urlStr}`, e); + logError(`Error parsing URL: ${urlStr}`, { error: e }); return { hostname: "", path: "" }; } }; @@ -261,7 +270,7 @@ export const rankURLs = (urlItems: SearchSnippet[], options: any = {}, trackers: // Step 2: Rerank only the unique contents const uniqueContents = Object.keys(uniqueContentMap); const uniqueIndicesMap = Object.values(uniqueContentMap); - console.log(`rerank URLs: ${urlItems.length}->${uniqueContents.length}`) + logInfo(`rerank URLs: ${urlItems.length}->${uniqueContents.length}`); rerankDocuments(question, uniqueContents, trackers.tokenTracker) .then(({ results }) => { // Step 3: Map the scores back to all original items @@ -280,7 +289,7 @@ export const rankURLs = (urlItems: SearchSnippet[], options: any = {}, trackers: return (urlItems as BoostedSearchSnippet[]).map(item => { if (!item || !item.url) { - console.error('Skipping invalid item:', item); + logError('Skipping invalid item:', { item }); return item; // Return unchanged } @@ -421,7 +430,7 @@ export async function getLastModified(url: string): Promise return undefined; } catch (error) { - console.error('Failed to fetch last modified date:'); + logError('Failed to fetch last modified date:'); return undefined; } } @@ -494,11 +503,11 @@ export async function processURLs( // Store normalized URL for consistent reference url = normalizedUrl; - const {response} = await readUrl(url, true, context.tokenTracker, withImages); - const {data} = response; + const { response } = await readUrl(url, true, context.tokenTracker, withImages); + const { data } = response; const guessedTime = await getLastModified(url); if (guessedTime) { - console.log('Guessed time for', url, guessedTime); + logInfo('Guessed time for', { url, guessedTime }); } // Early return if no valid data @@ -511,7 +520,10 @@ export async function processURLs( const spamDetectLength = 300; const isGoodContent = data.content.length > spamDetectLength || !await classifyText(data.content); if (!isGoodContent) { - console.error(`Blocked content ${data.content.length}:`, url, data.content.slice(0, spamDetectLength)); + logError(`Blocked content ${data.content.length}:`, { + url, + content: data.content.slice(0, spamDetectLength) + }); throw new Error(`Blocked content ${url}`); } @@ -569,9 +581,9 @@ export async function processURLs( }); } - return {url, result: response}; + return { url, result: response }; } catch (error: any) { - console.error('Error reading URL:', url, error); + logError('Error reading URL:', { url, error }); badURLs.push(url); // Extract hostname from the URL if ( @@ -586,10 +598,10 @@ export async function processURLs( try { hostname = extractUrlParts(url).hostname; } catch (e) { - console.error('Error parsing URL for hostname:', url, e); + logError('Error parsing URL for hostname:', { url, error: e }); } badHostnames.push(hostname); - console.log(`Added ${hostname} to bad hostnames list`); + logInfo(`Added ${hostname} to bad hostnames list`); } return null; } finally { @@ -618,7 +630,7 @@ export async function processURLs( Object.keys(allURLs).forEach(url => { if (badHostnames.includes(extractUrlParts(url).hostname)) { delete allURLs[url]; - console.log(`Removed ${url} from allURLs`); + logInfo(`Removed ${url} from allURLs`); } } )