refactor: logger

This commit is contained in:
Han Xiao 2025-06-10 11:48:19 -07:00
parent 0ee295b83e
commit 9edf122a8c
39 changed files with 677 additions and 540 deletions

View File

@ -12,7 +12,7 @@
"defaults": {
"search_provider": "jina",
"llm_provider": "gemini",
"step_sleep": 100
"step_sleep": 1
},
"providers": {
"gemini": {

View File

@ -12,7 +12,7 @@
"defaults": {
"search_provider": "jina",
"llm_provider": "vertex",
"step_sleep": 500
"step_sleep": 0.5
},
"providers": {
"vertex": {

View File

@ -44,11 +44,11 @@ import { MAX_QUERIES_PER_STEP, MAX_REFLECT_PER_STEP, MAX_URLS_PER_STEP, Schemas
import { formatDateBasedOnType, formatDateRange } from "./utils/date-tools";
import { reviseAnswer } from "./tools/md-fixer";
import { buildImageReferences, buildReferences } from "./tools/build-ref";
import { logInfo, logError, logDebug, logWarning } from './logging';
async function sleep(ms: number) {
const seconds = Math.ceil(ms / 1000);
console.log(`Waiting ${seconds}s...`);
return new Promise(resolve => setTimeout(resolve, ms));
async function wait(seconds: number) {
logDebug(`Waiting ${seconds}s...`);
await new Promise(resolve => setTimeout(resolve, seconds * 1000));
}
function BuildMsgsFromKnowledge(knowledge: KnowledgeItem[]): CoreMessage[] {
@ -295,7 +295,7 @@ async function executeSearchQueries(
}
try {
console.log('Search query:', query);
logDebug('Search query:', { query });
switch (searchProvider || SEARCH_PROVIDER) {
case 'jina':
case 'arxiv':
@ -318,10 +318,13 @@ async function executeSearchQueries(
throw new Error('No results found');
}
} catch (error) {
console.error(`${SEARCH_PROVIDER} search failed for query:`, query, error);
logError(`${SEARCH_PROVIDER} search failed for query:`, {
query,
error: error instanceof Error ? error.message : String(error)
});
continue;
} finally {
await sleep(STEP_SLEEP);
await wait(STEP_SLEEP);
}
const minResults: SearchSnippet[] = results
@ -360,13 +363,13 @@ async function executeSearchQueries(
}
if (searchedQueries.length === 0) {
if (onlyHostnames && onlyHostnames.length > 0) {
console.log(`No results found for queries: ${uniqQOnly.join(', ')} on hostnames: ${onlyHostnames.join(', ')}`);
logWarning(`No results found for queries: ${uniqQOnly.join(', ')} on hostnames: ${onlyHostnames.join(', ')}`);
context.actionTracker.trackThink('hostnames_no_results', SchemaGen.languageCode, { hostnames: onlyHostnames.join(', ') });
}
} else {
console.log(`Utility/Queries: ${utilityScore}/${searchedQueries.length}`);
logDebug(`Utility/Queries: ${utilityScore}/${searchedQueries.length}`);
if (searchedQueries.length > MAX_QUERIES_PER_STEP) {
console.log(`So many queries??? ${searchedQueries.map(q => `"${q}"`).join(', ')}`)
logDebug(`So many queries??? ${searchedQueries.map(q => `"${q}"`).join(', ')}`)
}
}
return {
@ -482,8 +485,8 @@ export async function getResponse(question?: string,
step++;
totalStep++;
const budgetPercentage = (context.tokenTracker.getTotalUsage().totalTokens / tokenBudget * 100).toFixed(2);
console.log(`Step ${totalStep} / Budget used ${budgetPercentage}%`);
console.log('Gaps:', gaps);
logDebug(`Step ${totalStep} / Budget used ${budgetPercentage}%`);
logDebug('Gaps:', { gaps });
allowReflect = allowReflect && (gaps.length <= MAX_REFLECT_PER_STEP);
// rotating question from gaps
const currentQuestion: string = gaps[totalStep % gaps.length];
@ -524,7 +527,7 @@ export async function getResponse(question?: string,
// improve diversity by keep top 2 urls of each hostname
weightedURLs = keepKPerHostname(weightedURLs, 2);
console.log('Weighted URLs:', weightedURLs.length);
logDebug('Weighted URLs:', { count: weightedURLs.length });
}
allowRead = allowRead && (weightedURLs.length > 0);
@ -560,8 +563,8 @@ export async function getResponse(question?: string,
} as StepAction;
// print allowed and chose action
const actionsStr = [allowSearch, allowRead, allowAnswer, allowReflect, allowCoding].map((a, i) => a ? ['search', 'read', 'answer', 'reflect'][i] : null).filter(a => a).join(', ');
console.log(`${currentQuestion}: ${thisStep.action} <- [${actionsStr}]`);
console.log(thisStep)
logDebug(`${currentQuestion}: ${thisStep.action} <- [${actionsStr}]`);
logDebug('Step details:', thisStep);
context.actionTracker.trackAction({ totalStep, thisStep, gaps });
@ -609,7 +612,10 @@ export async function getResponse(question?: string,
...thisStep,
});
console.log(currentQuestion, evaluationMetrics[currentQuestion])
logDebug('Current question evaluation:', {
question: currentQuestion,
metrics: evaluationMetrics[currentQuestion]
});
let evaluation: EvaluationResponse = { pass: true, think: '' };
if (evaluationMetrics[currentQuestion].length > 0) {
context.actionTracker.trackThink('eval_first', SchemaGen.languageCode)
@ -853,7 +859,7 @@ You decided to think out of the box or cut from a completely different angle.
thisStep.URLTargets = [...new Set([...thisStep.URLTargets, ...weightedURLs.map(r => r.url!)])].slice(0, MAX_URLS_PER_STEP);
const uniqueURLs = thisStep.URLTargets;
console.log(uniqueURLs)
logDebug('Unique URLs:', { urls: uniqueURLs });
if (uniqueURLs.length > 0) {
const { urlResults, success } = await processURLs(
@ -921,7 +927,9 @@ You found the solution and add it to your knowledge for future reference.
result: result
});
} catch (error) {
console.error('Error solving coding issue:', error);
logError('Error solving coding issue:', {
error: error instanceof Error ? error.message : String(error)
});
diaryContext.push(`
At step ${step}, you took the **coding** action and try to solve the coding issue: ${thisStep.codingIssue}.
But unfortunately, you failed to solve the issue. You need to think out of the box or cut from a completely different angle.
@ -944,11 +952,11 @@ But unfortunately, you failed to solve the issue. You need to think out of the b
weightedURLs,
msgWithKnowledge
}, totalStep);
await sleep(STEP_SLEEP);
await wait(STEP_SLEEP);
}
if (!(thisStep as AnswerAction).isFinal) {
console.log('Enter Beast mode!!!')
logWarning('Enter Beast mode!!!');
// any answer is better than no answer, humanity last resort
step++;
totalStep++;
@ -1024,12 +1032,14 @@ But unfortunately, you failed to solve the issue. You need to think out of the b
}
let imageReferences: ImageReference[] = [];
if(imageObjects.length && with_images) {
if (imageObjects.length && with_images) {
try {
imageReferences = await buildImageReferences(answerStep.answer, imageObjects, context, SchemaGen);
console.log('Image references built:', imageReferences);
logDebug('Image references built:', { count: imageReferences.length });
} catch (error) {
console.error('Error building image references:', error);
logError('Error building image references:', {
error: error instanceof Error ? error.message : String(error)
});
imageReferences = [];
}
}
@ -1087,7 +1097,9 @@ ${JSON.stringify(zodToJsonSchema(schema), null, 2)}
await fs.writeFile('urls.json', JSON.stringify(weightedURLs, null, 2));
await fs.writeFile('messages.json', JSON.stringify(msgWithKnowledge, null, 2));
} catch (error) {
console.error('Context storage failed:', error);
logError('Context storage failed:', {
error: error instanceof Error ? error.message : String(error)
});
}
}
@ -1098,12 +1110,16 @@ export async function main() {
context: tracker,
visitedURLs: visitedURLs
} = await getResponse(question) as { result: AnswerAction; context: TrackerContext; visitedURLs: string[] };
console.log('Final Answer:', finalStep.answer);
console.log('Visited URLs:', visitedURLs);
logInfo('Final Answer:', { answer: finalStep.answer });
logInfo('Visited URLs:', { urls: visitedURLs });
tracker.tokenTracker.printSummary();
}
if (require.main === module) {
main().catch(console.error);
main().catch(error => {
logError('Main execution error:', {
error: error instanceof Error ? error.message : String(error)
});
});
}

View File

@ -14,6 +14,7 @@ import { ActionTracker } from "./utils/action-tracker";
import { ObjectGeneratorSafe } from "./utils/safe-generator";
import { jsonSchema } from "ai"; // or another converter library
import { normalizeHostName } from "./utils/url-tools";
import { logInfo, logError, logDebug, logWarning } from './logging';
const app = express();
@ -472,7 +473,7 @@ app.post('/v1/chat/completions', (async (req: Request, res: Response) => {
// Convert JSON schema to Zod schema using a proper converter
try {
responseSchema = jsonSchema(body.response_format.json_schema);
console.log(responseSchema)
logDebug('Response schema', { schema: responseSchema });
} catch (error: any) {
return res.status(400).json({ error: `Invalid JSON schema: ${error.message}` });
}
@ -613,9 +614,12 @@ app.post('/v1/chat/completions', (async (req: Request, res: Response) => {
// Use the generated object as the response content
finalAnswer = JSON.stringify(result.object, null, 2);
console.log('Generated object:', finalAnswer)
logInfo('Generated object:', { answer: finalAnswer });
} catch (error) {
console.error('Error processing response with schema:', error);
logError('Error processing response with schema:', {
error: error instanceof Error ? error.message : String(error),
schema: responseSchema
});
}
}
@ -690,28 +694,17 @@ app.post('/v1/chat/completions', (async (req: Request, res: Response) => {
relatedImages,
};
// Log final response (excluding full content for brevity)
console.log('[chat/completions] Response:', {
id: response.id,
status: 200,
contentLength: response.choices[0].message.content.length,
usage: response.usage,
visitedURLs: response.visitedURLs,
readURLs: response.readURLs,
numURLs: allURLs.length,
allImages: allImages?.length,
relatedImages: relatedImages?.length,
logInfo('[chat/completions] Response:', {
model: body.model,
usage: context.tokenTracker.getTotalUsageSnakeCase()
});
res.json(response);
}
} catch (error: any) {
// Log error details
console.error('[chat/completions] Error:', {
message: error?.message || 'An error occurred',
stack: error?.stack,
type: error?.constructor?.name,
requestId
logError('[chat/completions] Error:', {
error: error instanceof Error ? error.message : String(error),
stack: error instanceof Error ? error.stack : undefined
});
// Track error as rejected tokens with Vercel token counting

View File

@ -2,6 +2,7 @@
import { Command } from 'commander';
import { getResponse } from './agent';
import { version } from '../package.json';
import { logInfo, logError, logDebug, logWarning } from './logging';
const program = new Command();
@ -30,17 +31,17 @@ program
);
if (result.action === 'answer') {
console.log('\nAnswer:', result.answer);
logInfo('\nAnswer:', { answer: result.answer });
if (result.references?.length) {
console.log('\nReferences:');
result.references.forEach(ref => {
console.log(`- ${ref.url}`);
console.log(` "${ref.exactQuote}"`);
});
logInfo('\nReferences:');
for (const ref of result.references) {
logInfo(`- ${ref.url}`);
logInfo(` "${ref.exactQuote}"`);
}
}
}
} catch (error) {
console.error('Error:', error instanceof Error ? error.message : String(error));
logError('Error:', { error: error instanceof Error ? error.message : String(error) });
process.exit(1);
}
});

View File

@ -3,6 +3,7 @@ import { ProxyAgent, setGlobalDispatcher } from 'undici';
import { createGoogleGenerativeAI } from '@ai-sdk/google';
import { createOpenAI, OpenAIProviderSettings } from '@ai-sdk/openai';
import configJson from '../config.json';
import { logInfo, logError, logDebug, logWarning } from './logging';
// Load environment variables
dotenv.config();
@ -33,7 +34,7 @@ if (env.https_proxy) {
const dispatcher = new ProxyAgent({ uri: proxyUrl });
setGlobalDispatcher(dispatcher);
} catch (error) {
console.error('Failed to set proxy:', error);
logError('Failed to set proxy:', { error });
}
}
@ -157,4 +158,4 @@ const configSummary = {
}
};
console.log('Configuration Summary:', JSON.stringify(configSummary, null, 2));
logInfo('Configuration Summary:', { summary: configSummary });

View File

@ -1,12 +1,13 @@
import fs from 'fs/promises';
import {exec} from 'child_process';
import {promisify} from 'util';
import {getResponse} from '../agent';
import {generateObject} from 'ai';
import {GEMINI_API_KEY} from '../config';
import {z} from 'zod';
import {AnswerAction, TrackerContext} from "../types";
import {createGoogleGenerativeAI} from "@ai-sdk/google";
import { exec } from 'child_process';
import { promisify } from 'util';
import { getResponse } from '../agent';
import { generateObject } from 'ai';
import { GEMINI_API_KEY } from '../config';
import { z } from 'zod';
import { AnswerAction, TrackerContext } from "../types";
import { createGoogleGenerativeAI } from "@ai-sdk/google";
import { logInfo, logError, logDebug, logWarning } from '../logging';
const execAsync = promisify(exec);
@ -68,26 +69,26 @@ function calculateStats(results: EvaluationResult[], modelName: string): Evaluat
}
function printStats(stats: EvaluationStats): void {
console.log('\n=== Evaluation Statistics ===');
console.log(`Model: ${stats.model_name}`);
console.log(`Pass Rate: ${stats.pass_rate.toFixed(0)}%`);
console.log(`Average Steps: ${stats.avg_steps.toFixed(0)}`);
console.log(`Maximum Steps: ${stats.max_steps}`);
console.log(`Minimum Steps: ${stats.min_steps}`);
console.log(`Median Steps: ${stats.median_steps.toFixed(0)}`);
console.log(`Average Tokens: ${stats.avg_tokens.toFixed(0)}`);
console.log(`Median Tokens: ${stats.median_tokens.toFixed(0)}`);
console.log(`Maximum Tokens: ${stats.max_tokens}`);
console.log(`Minimum Tokens: ${stats.min_tokens}`);
console.log('===========================\n');
logInfo('\n=== Evaluation Statistics ===');
logInfo(`Model: ${stats.model_name}`);
logInfo(`Pass Rate: ${stats.pass_rate.toFixed(0)}%`);
logInfo(`Average Steps: ${stats.avg_steps.toFixed(0)}`);
logInfo(`Maximum Steps: ${stats.max_steps}`);
logInfo(`Minimum Steps: ${stats.min_steps}`);
logInfo(`Median Steps: ${stats.median_steps.toFixed(0)}`);
logInfo(`Average Tokens: ${stats.avg_tokens.toFixed(0)}`);
logInfo(`Median Tokens: ${stats.median_tokens.toFixed(0)}`);
logInfo(`Maximum Tokens: ${stats.max_tokens}`);
logInfo(`Minimum Tokens: ${stats.min_tokens}`);
logInfo('===========================\n');
}
async function getCurrentGitCommit(): Promise<string> {
try {
const {stdout} = await execAsync('git rev-parse --short HEAD');
const { stdout } = await execAsync('git rev-parse --short HEAD');
return stdout.trim();
} catch (error) {
console.error('Error getting git commit:', error);
logError('Error getting git commit:', { error });
return 'unknown';
}
}
@ -116,7 +117,7 @@ Minor wording differences are acceptable as long as the core information of the
return result.object;
} catch (error) {
console.error('Evaluation failed:', error);
logError('Evaluation failed:', { error });
return {
pass: false,
reason: `Evaluation error: ${error}`
@ -134,8 +135,8 @@ async function batchEvaluate(inputFile: string): Promise<void> {
// Process each question
for (let i = 0; i < questions.length; i++) {
const {question, answer: expectedAnswer} = questions[i];
console.log(`\nProcessing question ${i + 1}/${questions.length}: ${question}`);
const { question, answer: expectedAnswer } = questions[i];
logInfo(`\nProcessing question ${i + 1}/${questions.length}: ${question}`);
try {
// Get response using the agent
@ -166,10 +167,10 @@ async function batchEvaluate(inputFile: string): Promise<void> {
actual_answer: actualAnswer
});
console.log(`Evaluation: ${evaluation.pass ? 'PASS' : 'FAIL'}`);
console.log(`Reason: ${evaluation.reason}`);
logInfo(`Evaluation: ${evaluation.pass ? 'PASS' : 'FAIL'}`);
logInfo(`Reason: ${evaluation.reason}`);
} catch (error) {
console.error(`Error processing question: ${question}`, error);
logError(`Error processing question: ${question}`, { error });
results.push({
pass: false,
reason: `Error: ${error}`,
@ -192,7 +193,7 @@ async function batchEvaluate(inputFile: string): Promise<void> {
statistics: stats
}, null, 2));
console.log(`\nEvaluation results saved to ${outputFile}`);
logInfo(`\nEvaluation results saved to ${outputFile}`);
}
// Run batch evaluation if this is the main module
@ -206,4 +207,4 @@ if (require.main === module) {
batchEvaluate(inputFile).catch(console.error);
}
export {batchEvaluate};
export { batchEvaluate };

56
src/logging.ts Normal file
View File

@ -0,0 +1,56 @@
// Cloud Run structured logging helper
const project = process.env.GOOGLE_CLOUD_PROJECT;
interface LogEntry {
severity: string;
message: string;
component: string;
[key: string]: any;
}
function createLogEntry(severity: string, message: string, context: Record<string, any> = {}): LogEntry {
const entry: LogEntry = {
severity,
message,
component: 'deepsearch',
timestamp: new Date().toISOString(),
...context
};
// Add trace context if available
if (typeof process.env.K_REVISION !== 'undefined' && project) {
entry['logging.googleapis.com/trace'] = `projects/${project}/traces/${process.env.K_REVISION}`;
}
// Add source location if available
if (context.file && context.line) {
entry['logging.googleapis.com/sourceLocation'] = {
file: context.file,
line: context.line,
function: context.function || 'unknown'
};
}
// Add request ID if available
if (context.requestId) {
entry['logging.googleapis.com/requestId'] = context.requestId;
}
return entry;
}
export function logInfo(message: string, context: Record<string, any> = {}) {
console.log(JSON.stringify(createLogEntry('INFO', message, context)));
}
export function logError(message: string, context: Record<string, any> = {}) {
console.error(JSON.stringify(createLogEntry('ERROR', message, context)));
}
export function logDebug(message: string, context: Record<string, any> = {}) {
console.log(JSON.stringify(createLogEntry('DEBUG', message, context)));
}
export function logWarning(message: string, context: Record<string, any> = {}) {
console.warn(JSON.stringify(createLogEntry('WARNING', message, context)));
}

View File

@ -1,14 +1,16 @@
import {BRAVE_API_KEY} from "../config";
import { BRAVE_API_KEY } from "../config";
import axiosClient from "../utils/axios-client";
import { logInfo, logError, logDebug, logWarning } from '../logging';
import { BraveSearchResponse } from '../types';
export async function braveSearch(query: string): Promise<{ response: BraveSearchResponse }> {
logInfo('Search info:', { query });
const response = await axiosClient.get<BraveSearchResponse>('https://api.search.brave.com/res/v1/web/search', {
params: {
q: query,
count: 10,
safesearch: 'off'
count: 10
},
headers: {
'Accept': 'application/json',
@ -17,6 +19,10 @@ export async function braveSearch(query: string): Promise<{ response: BraveSearc
timeout: 10000
});
if (response.status !== 200) {
throw new Error(`Brave search failed: ${response.status} ${response.statusText}`)
}
// Maintain the same return structure as the original code
return { response: response.data };
}

View File

@ -1,16 +1,17 @@
import { generateText } from "ai";
import { getModel } from "../config";
import {TrackerContext} from "../types";
import {detectBrokenUnicodeViaFileIO} from "../utils/text-tools";
import { TrackerContext } from "../types";
import { detectBrokenUnicodeViaFileIO } from "../utils/text-tools";
import { logInfo, logError, logDebug, logWarning } from '../logging';
/**
* Repairs markdown content with <EFBFBD> characters by using Gemini to guess the missing text
* Repairs markdown content with characters by using Gemini to guess the missing text
*/
export async function repairUnknownChars(mdContent: string, trackers?: TrackerContext): Promise<string> {
const { broken, readStr } = await detectBrokenUnicodeViaFileIO(mdContent);
if (!broken) return readStr;
console.log("Detected broken unicode in output, attempting to repair...");
logWarning("Detected broken unicode in output, attempting to repair...");
let repairedContent = readStr;
let remainingUnknowns = true;
@ -32,7 +33,7 @@ export async function repairUnknownChars(mdContent: string, trackers?: TrackerCo
if (position === lastPosition) {
// Move past this character by removing it
repairedContent = repairedContent.substring(0, position) +
repairedContent.substring(position + 1);
repairedContent.substring(position + 1);
continue;
}
@ -81,20 +82,20 @@ So what was the original text between these two contexts?`,
(await detectBrokenUnicodeViaFileIO(replacement)).broken ||
replacement.length > unknownCount * 4
) {
console.log(`Skipping invalid replacement ${replacement} at position ${position}`);
// Skip to the next <EFBFBD> character without modifying content
logWarning(`Skipping invalid replacement ${replacement} at position ${position}`);
// Skip to the next character without modifying content
} else {
// Replace the unknown sequence with the generated text
repairedContent = repairedContent.substring(0, position) +
replacement +
repairedContent.substring(position + unknownCount);
replacement +
repairedContent.substring(position + unknownCount);
}
console.log(`Repair iteration ${iterations}: replaced ${unknownCount} <20> chars with "${replacement}"`);
logDebug(`Repair iteration ${iterations}: replaced ${unknownCount} chars with "${replacement}"`);
} catch (error) {
console.error("Error repairing unknown characters:", error);
// Skip to the next <EFBFBD> character without modifying this one
logError("Error repairing unknown characters:", { error });
// Skip to the next character without modifying this one
}
}

View File

@ -1,10 +1,11 @@
import {segmentText} from './segment';
import {ImageObject, ImageReference, Reference, TrackerContext, WebContent} from "../types";
import {Schemas} from "../utils/schemas";
import {cosineSimilarity, jaccardRank} from "./cosine";
import {getEmbeddings} from "./embeddings";
import { segmentText } from './segment';
import { ImageObject, ImageReference, Reference, TrackerContext, WebContent } from "../types";
import { Schemas } from "../utils/schemas";
import { cosineSimilarity, jaccardRank } from "./cosine";
import { getEmbeddings } from "./embeddings";
import { dedupImagesWithEmbeddings } from '../utils/image-tools';
import {normalizeHostName} from '../utils/url-tools';
import { normalizeHostName } from '../utils/url-tools';
import { logInfo, logError, logDebug, logWarning } from '../logging';
export async function buildReferences(
answer: string,
@ -16,16 +17,16 @@ export async function buildReferences(
minRelScore: number = 0.7,
onlyHostnames: string[] = []
): Promise<{ answer: string, references: Array<Reference> }> {
console.log(`[buildReferences] Starting with maxRef=${maxRef}, minChunkLength=${minChunkLength}, minRelScore=${minRelScore}`);
console.log(`[buildReferences] Answer length: ${answer.length} chars, Web content sources: ${Object.keys(webContents).length}`);
logDebug(`[buildReferences] Starting with maxRef=${maxRef}, minChunkLength=${minChunkLength}, minRelScore=${minRelScore}`);
logDebug(`[buildReferences] Answer length: ${answer.length} chars, Web content sources: ${Object.keys(webContents).length}`);
// Step 1: Chunk the answer
console.log(`[buildReferences] Step 1: Chunking answer text`);
const {chunks: answerChunks, chunk_positions: answerChunkPositions} = await segmentText(answer, context);
console.log(`[buildReferences] Answer segmented into ${answerChunks.length} chunks`);
logDebug(`[buildReferences] Step 1: Chunking answer text`);
const { chunks: answerChunks, chunk_positions: answerChunkPositions } = await segmentText(answer, context);
logDebug(`[buildReferences] Answer segmented into ${answerChunks.length} chunks`);
// Step 2: Prepare all web content chunks, filtering out those below minimum length
console.log(`[buildReferences] Step 2: Preparing web content chunks and filtering by minimum length (${minChunkLength} chars)`);
logDebug(`[buildReferences] Step 2: Preparing web content chunks and filtering by minimum length (${minChunkLength} chars)`);
const allWebContentChunks: string[] = [];
const chunkToSourceMap: any = {}; // Maps chunk index to source information
const validWebChunkIndices = new Set<number>(); // Tracks indices of valid web chunks (those above minimum length)
@ -53,15 +54,15 @@ export async function buildReferences(
}
}
console.log(`[buildReferences] Collected ${allWebContentChunks.length} web chunks, ${validWebChunkIndices.size} above minimum length`);
logDebug(`[buildReferences] Collected ${allWebContentChunks.length} web chunks, ${validWebChunkIndices.size} above minimum length`);
if (allWebContentChunks.length === 0) {
console.log(`[buildReferences] No web content chunks available, returning without references`);
return {answer, references: []};
logDebug(`[buildReferences] No web content chunks available, returning without references`);
return { answer, references: [] };
}
// Step 3: Filter answer chunks by minimum length
console.log(`[buildReferences] Step 3: Filtering answer chunks by minimum length`);
logDebug(`[buildReferences] Step 3: Filtering answer chunks by minimum length`);
const validAnswerChunks: string[] = [];
const validAnswerChunkIndices: number[] = [];
const validAnswerChunkPositions: [number, number][] = [];
@ -80,15 +81,15 @@ export async function buildReferences(
validAnswerChunkPositions.push(answerChunkPosition);
}
console.log(`[buildReferences] Found ${validAnswerChunks.length}/${answerChunks.length} valid answer chunks above minimum length`);
logDebug(`[buildReferences] Found ${validAnswerChunks.length}/${answerChunks.length} valid answer chunks above minimum length`);
if (validAnswerChunks.length === 0) {
console.log(`[buildReferences] No valid answer chunks, returning without references`);
return {answer, references: []};
logDebug(`[buildReferences] No valid answer chunks, returning without references`);
return { answer, references: [] };
}
// Step 4: Get embeddings for BOTH answer chunks and valid web chunks in a single request
console.log(`[buildReferences] Step 4: Getting embeddings for all chunks in a single request (only including web chunks above min length)`);
logDebug(`[buildReferences] Step 4: Getting embeddings for all chunks in a single request (only including web chunks above min length)`);
// Create maps to track the original indices
const chunkIndexMap = new Map<number, { type: 'answer' | 'web', originalIndex: number }>();
@ -99,7 +100,7 @@ export async function buildReferences(
// Add answer chunks first
validAnswerChunks.forEach((chunk, index) => {
allChunks.push(chunk);
chunkIndexMap.set(allChunks.length - 1, {type: 'answer', originalIndex: index});
chunkIndexMap.set(allChunks.length - 1, { type: 'answer', originalIndex: index });
});
// Then add web chunks that meet minimum length requirement
@ -107,11 +108,11 @@ export async function buildReferences(
// Only include valid web chunks (those above minimum length)
if (validWebChunkIndices.has(i)) {
allChunks.push(allWebContentChunks[i]);
chunkIndexMap.set(allChunks.length - 1, {type: 'web', originalIndex: i});
chunkIndexMap.set(allChunks.length - 1, { type: 'web', originalIndex: i });
}
}
console.log(`[buildReferences] Requesting embeddings for ${allChunks.length} total chunks (${validAnswerChunks.length} answer + ${validWebChunkIndices.size} web)`);
logDebug(`[buildReferences] Requesting embeddings for ${allChunks.length} total chunks (${validAnswerChunks.length} answer + ${validWebChunkIndices.size} web)`);
try {
// Get embeddings for all chunks in one request
@ -136,10 +137,10 @@ export async function buildReferences(
}
}
console.log(`[buildReferences] Successfully generated and separated embeddings: ${answerEmbeddings.length} answer, ${webEmbeddingMap.size} web`);
logDebug(`[buildReferences] Successfully generated and separated embeddings: ${answerEmbeddings.length} answer, ${webEmbeddingMap.size} web`);
// Step 5: Compute pairwise cosine similarity
console.log(`[buildReferences] Step 5: Computing pairwise cosine similarity between answer and web chunks`);
logDebug(`[buildReferences] Step 5: Computing pairwise cosine similarity between answer and web chunks`);
const allMatches = [];
for (let i = 0; i < validAnswerChunks.length; i++) {
@ -179,7 +180,7 @@ export async function buildReferences(
});
}
console.log(`[buildReferences] Processed answer chunk ${i + 1}/${validAnswerChunks.length}, top score: ${matchesForChunk[0]?.relevanceScore.toFixed(4)}`);
logDebug(`[buildReferences] Processed answer chunk ${i + 1}/${validAnswerChunks.length}, top score: ${matchesForChunk[0]?.relevanceScore.toFixed(4)}`);
}
// Log statistics about relevance scores
@ -190,20 +191,22 @@ export async function buildReferences(
const sumRelevance = relevanceScores.reduce((sum, score) => sum + score, 0);
const meanRelevance = sumRelevance / relevanceScores.length;
console.log('Reference relevance statistics:', {
const stats = {
min: minRelevance.toFixed(4),
max: maxRelevance.toFixed(4),
mean: meanRelevance.toFixed(4),
count: relevanceScores.length
});
};
logDebug('Reference relevance statistics:', stats);
}
// Step 6: Sort all matches by relevance
allMatches.sort((a, b) => b.relevanceScore - a.relevanceScore);
console.log(`[buildReferences] Step 6: Sorted ${allMatches.length} potential matches by relevance score`);
logDebug(`[buildReferences] Step 6: Sorted ${allMatches.length} potential matches by relevance score`);
// Step 7: Filter matches as before
console.log(`[buildReferences] Step 7: Filtering matches to ensure uniqueness and threshold (min: ${minRelScore})`);
logDebug(`[buildReferences] Step 7: Filtering matches to ensure uniqueness and threshold (min: ${minRelScore})`);
const usedWebChunks = new Set();
const usedAnswerChunks = new Set();
const filteredMatches = [];
@ -222,12 +225,12 @@ export async function buildReferences(
}
}
console.log(`[buildReferences] Selected ${filteredMatches.length}/${allMatches.length} references after filtering`);
logDebug(`[buildReferences] Selected ${filteredMatches.length}/${allMatches.length} references after filtering`);
return buildFinalResult(answer, filteredMatches, chunkToSourceMap);
} catch (error) {
console.error('Embedding failed, falling back to Jaccard similarity', error);
console.log(`[buildReferences] Fallback: Using Jaccard similarity instead of embeddings`);
logError('Embedding failed, falling back to Jaccard similarity', { error });
logDebug(`[buildReferences] Fallback: Using Jaccard similarity instead of embeddings`);
// Process all chunks with Jaccard fallback
const allMatches = [];
@ -237,7 +240,7 @@ export async function buildReferences(
const answerChunkIndex = validAnswerChunkIndices[i];
const answerChunkPosition = validAnswerChunkPositions[i];
console.log(`[buildReferences] Processing answer chunk ${i + 1}/${validAnswerChunks.length} with Jaccard similarity`);
logDebug(`[buildReferences] Processing answer chunk ${i + 1}/${validAnswerChunks.length} with Jaccard similarity`);
const fallbackResult = await jaccardRank(answerChunk, allWebContentChunks);
for (const match of fallbackResult.results) {
@ -255,7 +258,7 @@ export async function buildReferences(
// Sort all matches by relevance and continue with the rest of the function
allMatches.sort((a, b) => b.relevanceScore - a.relevanceScore);
console.log(`[buildReferences] Fallback complete. Found ${allMatches.length} potential matches`);
logDebug(`[buildReferences] Fallback complete. Found ${allMatches.length} potential matches`);
// Filter matches as before
const usedWebChunks = new Set();
@ -276,7 +279,7 @@ export async function buildReferences(
}
}
console.log(`[buildReferences] Selected ${filteredMatches.length} references using fallback method`);
logDebug(`[buildReferences] Selected ${filteredMatches.length} references using fallback method`);
return buildFinalResult(answer, filteredMatches, chunkToSourceMap);
}
}
@ -287,7 +290,7 @@ function buildFinalResult(
filteredMatches: any[],
chunkToSourceMap: any
): { answer: string, references: Array<Reference> } {
console.log(`[buildFinalResult] Building final result with ${filteredMatches.length} references`);
logDebug(`[buildFinalResult] Building final result with ${filteredMatches.length} references`);
// Build reference objects
const references: Reference[] = filteredMatches.map((match) => {
@ -310,7 +313,7 @@ function buildFinalResult(
const referencesByPosition = [...references]
.sort((a, b) => a.answerChunkPosition![0] - b.answerChunkPosition![0]);
console.log(`[buildFinalResult] Injecting reference markers into answer`);
logDebug(`[buildFinalResult] Injecting reference markers into answer`);
// Insert markers from beginning to end, tracking offset
let offset = 0;
@ -362,7 +365,7 @@ function buildFinalResult(
offset += marker.length;
}
console.log(`[buildFinalResult] Complete. Generated ${references.length} references`);
logDebug(`[buildFinalResult] Complete. Generated ${references.length} references`);
return {
answer: modifiedAnswer,
references
@ -378,39 +381,39 @@ export async function buildImageReferences(
maxRef: number = 10,
minRelScore: number = 0.35
): Promise<Array<ImageReference>> {
console.log(`[buildImageReferences] Starting with maxRef=${maxRef}, minChunkLength=${minChunkLength}, minRelScore=${minRelScore}`);
console.log(`[buildImageReferences] Answer length: ${answer.length} chars, Image sources: ${imageObjects.length}`);
logDebug(`[buildImageReferences] Starting with maxRef=${maxRef}, minChunkLength=${minChunkLength}, minRelScore=${minRelScore}`);
logDebug(`[buildImageReferences] Answer length: ${answer.length} chars, Image sources: ${imageObjects.length}`);
// Step 1: Chunk the answer
console.log(`[buildImageReferences] Step 1: Chunking answer text`);
const {chunks: answerChunks, chunk_positions: answerChunkPositions} = await segmentText(answer, context);
console.log(`[buildImageReferences] Answer segmented into ${answerChunks.length} chunks`);
logDebug(`[buildImageReferences] Step 1: Chunking answer text`);
const { chunks: answerChunks, chunk_positions: answerChunkPositions } = await segmentText(answer, context);
logDebug(`[buildImageReferences] Answer segmented into ${answerChunks.length} chunks`);
// Step 2: Prepare image content
console.log(`[buildImageReferences] Step 2: Preparing image content`);
logDebug(`[buildImageReferences] Step 2: Preparing image content`);
const dudupImages = dedupImagesWithEmbeddings(imageObjects, []);
const allImageEmbeddings: number[][] = dudupImages.map(img => img.embedding[0]); // Extract embedding
const imageToSourceMap: any = {};
const validImageIndices = new Set<number>();
dudupImages.forEach((img, index) => {
imageToSourceMap[index] = {
url: img.url,
altText: img.alt,
embedding: img.embedding[0] // Store extracted embedding
};
validImageIndices.add(index);
imageToSourceMap[index] = {
url: img.url,
altText: img.alt,
embedding: img.embedding[0] // Store extracted embedding
};
validImageIndices.add(index);
});
console.log(`[buildImageReferences] Collected ${allImageEmbeddings.length} image embeddings`);
logDebug(`[buildImageReferences] Collected ${allImageEmbeddings.length} image embeddings`);
if (allImageEmbeddings.length === 0) {
console.log(`[buildImageReferences] No image data available, returning empty array`);
return [];
logDebug(`[buildImageReferences] No image data available, returning empty array`);
return [];
}
// Step 3: Filter answer chunks by minimum length
console.log(`[buildImageReferences] Step 3: Filtering answer chunks by minimum length`);
logDebug(`[buildImageReferences] Step 3: Filtering answer chunks by minimum length`);
const validAnswerChunks: string[] = [];
const validAnswerChunkIndices: number[] = [];
const validAnswerChunkPositions: [number, number][] = [];
@ -418,133 +421,135 @@ export async function buildImageReferences(
context.actionTracker.trackThink('cross_reference', schema.languageCode);
for (let i = 0; i < answerChunks.length; i++) {
const answerChunk = answerChunks[i];
const answerChunkPosition = answerChunkPositions[i];
const answerChunk = answerChunks[i];
const answerChunkPosition = answerChunkPositions[i];
if (!answerChunk.trim() || answerChunk.length < minChunkLength) continue;
if (!answerChunk.trim() || answerChunk.length < minChunkLength) continue;
validAnswerChunks.push(answerChunk);
validAnswerChunkIndices.push(i);
validAnswerChunkPositions.push(answerChunkPosition);
validAnswerChunks.push(answerChunk);
validAnswerChunkIndices.push(i);
validAnswerChunkPositions.push(answerChunkPosition);
}
console.log(`[buildImageReferences] Found ${validAnswerChunks.length}/${answerChunks.length} valid answer chunks above minimum length`);
logDebug(`[buildImageReferences] Found ${validAnswerChunks.length}/${answerChunks.length} valid answer chunks above minimum length`);
if (validAnswerChunks.length === 0) {
console.log(`[buildImageReferences] No valid answer chunks, returning empty array`);
return [];
logDebug(`[buildImageReferences] No valid answer chunks, returning empty array`);
return [];
}
// Step 4: Get embeddings for answer chunks
console.log(`[buildImageReferences] Step 4: Getting embeddings for answer chunks`);
logDebug(`[buildImageReferences] Step 4: Getting embeddings for answer chunks`);
const answerEmbeddings: number[][] = [];
try {
// const embeddingsResult = await getEmbeddings(validAnswerChunks, context.tokenTracker, embeddingOptions); // No embeddingOptions needed here
// answerEmbeddings.push(...embeddingsResult.embeddings);
const embeddingsResult = await getEmbeddings(validAnswerChunks, context.tokenTracker, {
dimensions: 512,
model: 'jina-clip-v2',
});
answerEmbeddings.push(...embeddingsResult.embeddings);
// const embeddingsResult = await getEmbeddings(validAnswerChunks, context.tokenTracker, embeddingOptions); // No embeddingOptions needed here
// answerEmbeddings.push(...embeddingsResult.embeddings);
const embeddingsResult = await getEmbeddings(validAnswerChunks, context.tokenTracker, {
dimensions: 512,
model: 'jina-clip-v2',
});
answerEmbeddings.push(...embeddingsResult.embeddings);
console.log(`[buildImageReferences] Got embeddings for ${answerEmbeddings.length} answer chunks`);
logDebug(`[buildImageReferences] Got embeddings for ${answerEmbeddings.length} answer chunks`);
// Step 5: Compute pairwise cosine similarity
console.log(`[buildImageReferences] Step 5: Computing pairwise cosine similarity between answer and image embeddings`);
const allMatches = [];
// Step 5: Compute pairwise cosine similarity
logDebug(`[buildImageReferences] Step 5: Computing pairwise cosine similarity between answer and image embeddings`);
const allMatches = [];
for (let i = 0; i < validAnswerChunks.length; i++) {
const answerChunkIndex = validAnswerChunkIndices[i];
const answerChunk = validAnswerChunks[i];
const answerChunkPosition = answerChunkPositions[i];
const answerEmbedding = answerEmbeddings[i];
for (let i = 0; i < validAnswerChunks.length; i++) {
const answerChunkIndex = validAnswerChunkIndices[i];
const answerChunk = validAnswerChunks[i];
const answerChunkPosition = answerChunkPositions[i];
const answerEmbedding = answerEmbeddings[i];
const matchesForChunk = [];
const matchesForChunk = [];
for (const imageIndex of validImageIndices) {
const imageEmbedding = allImageEmbeddings[imageIndex];
for (const imageIndex of validImageIndices) {
const imageEmbedding = allImageEmbeddings[imageIndex];
if (imageEmbedding) {
const score = cosineSimilarity(answerEmbedding, imageEmbedding);
if (imageEmbedding) {
const score = cosineSimilarity(answerEmbedding, imageEmbedding);
matchesForChunk.push({
imageIndex,
relevanceScore: score
});
}
}
matchesForChunk.sort((a, b) => b.relevanceScore - a.relevanceScore);
for (const match of matchesForChunk) {
allMatches.push({
imageIndex: match.imageIndex,
answerChunkIndex: answerChunkIndex,
relevanceScore: match.relevanceScore,
answerChunk: answerChunk,
answerChunkPosition: answerChunkPosition
});
}
console.log(`[buildImageReferences] Processed answer chunk ${i + 1}/${validAnswerChunks.length}, top score: ${matchesForChunk[0]?.relevanceScore.toFixed(4)}`);
}
// Log statistics about relevance scores
if (allMatches.length > 0) {
const relevanceScores = allMatches.map(match => match.relevanceScore);
const minRelevance = Math.min(...relevanceScores);
const maxRelevance = Math.max(...relevanceScores);
const sumRelevance = relevanceScores.reduce((sum, score) => sum + score, 0);
const meanRelevance = sumRelevance / relevanceScores.length;
console.log('Reference relevance statistics:', {
min: minRelevance.toFixed(4),
max: maxRelevance.toFixed(4),
mean: meanRelevance.toFixed(4),
count: relevanceScores.length
matchesForChunk.push({
imageIndex,
relevanceScore: score
});
}
}
matchesForChunk.sort((a, b) => b.relevanceScore - a.relevanceScore);
// Step 6: Sort all matches by relevance
allMatches.sort((a, b) => b.relevanceScore - a.relevanceScore);
console.log(`[buildImageReferences] Step 6: Sorted ${allMatches.length} potential matches by relevance score`);
// Step 7: Filter matches
console.log(`[buildImageReferences] Step 7: Filtering matches to ensure uniqueness and threshold (min: ${minRelScore})`);
const usedImages = new Set();
const usedAnswerChunks = new Set();
const filteredMatches = [];
for (const match of allMatches) {
if (match.relevanceScore < minRelScore) continue;
if (!usedImages.has(match.imageIndex) && !usedAnswerChunks.has(match.answerChunkIndex)) {
filteredMatches.push(match);
usedImages.add(match.imageIndex);
usedAnswerChunks.add(match.answerChunkIndex);
if (filteredMatches.length >= maxRef) break;
}
for (const match of matchesForChunk) {
allMatches.push({
imageIndex: match.imageIndex,
answerChunkIndex: answerChunkIndex,
relevanceScore: match.relevanceScore,
answerChunk: answerChunk,
answerChunkPosition: answerChunkPosition
});
}
console.log(`[buildImageReferences] Selected ${filteredMatches.length}/${allMatches.length} references after filtering`);
logDebug(`[buildImageReferences] Processed answer chunk ${i + 1}/${validAnswerChunks.length}, top score: ${matchesForChunk[0]?.relevanceScore.toFixed(4)}`);
}
const references: ImageReference[] = filteredMatches.map((match) => {
const source = imageToSourceMap[match.imageIndex];
return {
url: source.url,
relevanceScore: match.relevanceScore,
answerChunk: match.answerChunk,
answerChunkPosition: match.answerChunkPosition
};
});
// Log statistics about relevance scores
if (allMatches.length > 0) {
const relevanceScores = allMatches.map(match => match.relevanceScore);
const minRelevance = Math.min(...relevanceScores);
const maxRelevance = Math.max(...relevanceScores);
const sumRelevance = relevanceScores.reduce((sum, score) => sum + score, 0);
const meanRelevance = sumRelevance / relevanceScores.length;
return references;
const stats = {
min: minRelevance.toFixed(4),
max: maxRelevance.toFixed(4),
mean: meanRelevance.toFixed(4),
count: relevanceScores.length
};
logDebug('Reference relevance statistics:', stats);
}
// Step 6: Sort all matches by relevance
allMatches.sort((a, b) => b.relevanceScore - a.relevanceScore);
logDebug(`[buildImageReferences] Step 6: Sorted ${allMatches.length} potential matches by relevance score`);
// Step 7: Filter matches
logDebug(`[buildImageReferences] Step 7: Filtering matches to ensure uniqueness and threshold (min: ${minRelScore})`);
const usedImages = new Set();
const usedAnswerChunks = new Set();
const filteredMatches = [];
for (const match of allMatches) {
if (match.relevanceScore < minRelScore) continue;
if (!usedImages.has(match.imageIndex) && !usedAnswerChunks.has(match.answerChunkIndex)) {
filteredMatches.push(match);
usedImages.add(match.imageIndex);
usedAnswerChunks.add(match.answerChunkIndex);
if (filteredMatches.length >= maxRef) break;
}
}
logDebug(`[buildImageReferences] Selected ${filteredMatches.length}/${allMatches.length} references after filtering`);
const references: ImageReference[] = filteredMatches.map((match) => {
const source = imageToSourceMap[match.imageIndex];
return {
url: source.url,
relevanceScore: match.relevanceScore,
answerChunk: match.answerChunk,
answerChunkPosition: match.answerChunkPosition
};
});
return references;
} catch (error) {
console.error('Embedding failed', error);
return [];
logError('Embedding failed', { error });
return [];
}
}

View File

@ -1,6 +1,7 @@
import {ObjectGeneratorSafe} from "../utils/safe-generator";
import {CodeGenResponse, PromptPair, TrackerContext} from "../types";
import {Schemas} from "../utils/schemas";
import { ObjectGeneratorSafe } from "../utils/safe-generator";
import { CodeGenResponse, PromptPair, TrackerContext } from "../types";
import { Schemas } from "../utils/schemas";
import { logInfo, logError, logDebug, logWarning } from '../logging';
interface SandboxResult {
@ -49,9 +50,9 @@ Response:
}
</example>`;
console.log('Coding prompt', prompt)
logDebug('Coding prompt', { prompt });
return {system: prompt, user: problem };
return { system: prompt, user: problem };
}
export class CodeSandbox {
@ -101,7 +102,7 @@ export class CodeSandbox {
}
`);
console.log('Context:', this.context);
logDebug('Context:', { context: this.context });
// Execute the code with the context and get the return value
const output = evalInContext(this.context);
@ -134,12 +135,12 @@ export class CodeSandbox {
for (let i = 0; i < this.maxAttempts; i++) {
// Generate code
const generation = await this.generateCode(problem, attempts);
const {code} = generation;
const { code } = generation;
console.log(`Coding attempt ${i + 1}:`, code);
logDebug(`Coding attempt ${i + 1}:`, { code });
// Evaluate the code
const result = this.evaluateCode(code);
console.log(`Coding attempt ${i + 1} success:`, result);
logDebug(`Coding attempt ${i + 1} success:`, { result });
if (result.success) {
return {
@ -151,7 +152,7 @@ export class CodeSandbox {
};
}
console.error('Coding error:', result.error);
logError('Coding error:', { error: result.error });
// Store the failed attempt
attempts.push({

View File

@ -1,3 +1,5 @@
import { logInfo, logError, logDebug, logWarning } from '../logging';
export function cosineSimilarity(vecA: number[], vecB: number[]): number {
if (vecA.length !== vecB.length) {
throw new Error("Vectors must have the same length");
@ -21,7 +23,7 @@ export function cosineSimilarity(vecA: number[], vecB: number[]): number {
// Fallback similarity ranking using Jaccard
export async function jaccardRank(query: string, documents: string[]): Promise<{ results: { index: number, relevance_score: number }[] }> {
console.log(`[fallback] Using Jaccard similarity for ${documents.length} documents`);
logInfo(`[fallback] Using Jaccard similarity for ${documents.length} documents`);
// Convert texts to lowercase and tokenize by splitting on non-alphanumeric characters
const queryTokens = new Set(query.toLowerCase().split(/\W+/).filter(t => t.length > 0));
@ -37,11 +39,11 @@ export async function jaccardRank(query: string, documents: string[]): Promise<{
// Calculate Jaccard similarity
const score = union.size === 0 ? 0 : intersection.size / union.size;
return {index, relevance_score: score};
return { index, relevance_score: score };
});
// Sort by score in descending order
results.sort((a, b) => b.relevance_score - a.relevance_score);
return {results};
return { results };
}

View File

@ -1,6 +1,7 @@
import {z} from 'zod';
import {TokenTracker} from "../utils/token-tracker";
import {ObjectGeneratorSafe} from "../utils/safe-generator";
import { z } from 'zod';
import { TokenTracker } from "../utils/token-tracker";
import { ObjectGeneratorSafe } from "../utils/safe-generator";
import { logInfo, logError, logDebug, logWarning } from '../logging';
const responseSchema = z.object({
@ -79,11 +80,11 @@ export async function dedupQueries(
prompt,
});
console.log(TOOL_NAME, result.object.unique_queries);
return {unique_queries: result.object.unique_queries};
logInfo(TOOL_NAME, { unique_queries: result.object.unique_queries });
return { unique_queries: result.object.unique_queries };
} catch (error) {
console.error(`Error in ${TOOL_NAME}`, error);
logError(`Error in ${TOOL_NAME}`, { error });
throw error;
}
}

View File

@ -1,6 +1,7 @@
import {JINA_API_KEY} from "../config";
import {JinaEmbeddingRequest, JinaEmbeddingResponse} from "../types";
import { JINA_API_KEY } from "../config";
import { JinaEmbeddingRequest, JinaEmbeddingResponse } from "../types";
import axiosClient from "../utils/axios-client";
import { logInfo, logError, logDebug, logWarning } from '../logging';
const BATCH_SIZE = 128;
const API_URL = "https://api.jina.ai/v1/embeddings";
@ -18,7 +19,7 @@ export async function getEmbeddings(
model?: string,
} = {}
): Promise<{ embeddings: number[][], tokens: number }> {
console.log(`[embeddings] Getting embeddings for ${texts.length} texts`);
logDebug(`[embeddings] Getting embeddings for ${texts.length} texts`);
if (!JINA_API_KEY) {
throw new Error('JINA_API_KEY is not set');
@ -26,7 +27,7 @@ export async function getEmbeddings(
// Handle empty input case
if (texts.length === 0) {
return {embeddings: [], tokens: 0};
return { embeddings: [], tokens: 0 };
}
// Process in batches
@ -37,11 +38,11 @@ export async function getEmbeddings(
for (let i = 0; i < texts.length; i += BATCH_SIZE) {
const batchTexts = texts.slice(i, i + BATCH_SIZE);
const currentBatch = Math.floor(i / BATCH_SIZE) + 1;
console.log(`[embeddings] Processing batch ${currentBatch}/${batchCount} (${batchTexts.length} texts)`);
logDebug(`[embeddings] Processing batch ${currentBatch}/${batchCount} (${batchTexts.length} texts)`);
// Get embeddings for the batch with retry logic for missing indices
const { batchEmbeddings, batchTokens } = await getBatchEmbeddingsWithRetry(
batchTexts,
batchTexts,
options,
currentBatch,
batchCount
@ -49,7 +50,7 @@ export async function getEmbeddings(
allEmbeddings.push(...batchEmbeddings);
totalTokens += batchTokens;
console.log(`[embeddings] Batch ${currentBatch} complete. Tokens used: ${batchTokens}, total so far: ${totalTokens}`);
logDebug(`[embeddings] Batch ${currentBatch} complete. Tokens used: ${batchTokens}, total so far: ${totalTokens}`);
}
// Track token usage if tracker is provided
@ -61,8 +62,8 @@ export async function getEmbeddings(
});
}
console.log(`[embeddings] Complete. Generated ${allEmbeddings.length} embeddings using ${totalTokens} tokens`);
return {embeddings: allEmbeddings, tokens: totalTokens};
logDebug(`[embeddings] Complete. Generated ${allEmbeddings.length} embeddings using ${totalTokens} tokens`);
return { embeddings: allEmbeddings, tokens: totalTokens };
}
// Helper function to get embeddings for a batch with retry logic for missing indices
@ -83,7 +84,7 @@ async function getBatchEmbeddingsWithRetry(
let retryCount = 0;
let textsToProcess = [...batchTexts]; // Copy the original texts
let indexMap = new Map<number, number>(); // Map to keep track of original indices
// Initialize indexMap with original indices
textsToProcess.forEach((_, idx) => {
indexMap.set(idx, idx);
@ -115,18 +116,18 @@ async function getBatchEmbeddingsWithRetry(
"Authorization": `Bearer ${JINA_API_KEY}`
}
}
);
);
if (!response.data.data) {
console.error('No data returned from Jina API');
logError('No data returned from Jina API');
if (retryCount === MAX_RETRIES - 1) {
// On last retry, create placeholder embeddings
const dimensionSize = options.dimensions || 1024;
const placeholderEmbeddings = textsToProcess.map(text => {
console.error(`Failed to get embedding after all retries: [${truncateInputString(text)}...]`);
logError(`Failed to get embedding after all retries: [${truncateInputString(text)}...]`);
return new Array(dimensionSize).fill(0);
});
// Add embeddings in correct order
for (let i = 0; i < textsToProcess.length; i++) {
const originalIndex = indexMap.get(i)!;
@ -142,17 +143,17 @@ async function getBatchEmbeddingsWithRetry(
const receivedIndices = new Set(response.data.data.map(item => item.index));
const dimensionSize = response.data.data[0]?.embedding?.length || options.dimensions || 1024;
// Process successful embeddings
const successfulEmbeddings: number[][] = [];
const remainingTexts: (string | Record<string, string>)[] = [];
const newIndexMap = new Map<number, number>();
for (let idx = 0; idx < textsToProcess.length; idx++) {
if (receivedIndices.has(idx)) {
// Find the item with this index
const item = response.data.data.find(d => d.index === idx)!;
// Get the original index and store in the result array
const originalIndex = indexMap.get(idx)!;
while (batchEmbeddings.length <= originalIndex) {
@ -165,48 +166,48 @@ async function getBatchEmbeddingsWithRetry(
const newIndex = remainingTexts.length;
newIndexMap.set(newIndex, indexMap.get(idx)!);
remainingTexts.push(textsToProcess[idx]);
console.log(`Missing embedding for index ${idx}, will retry: [${truncateInputString(textsToProcess[idx])}...]`);
logWarning(`Missing embedding for index ${idx}, will retry: [${truncateInputString(textsToProcess[idx])}...]`);
}
}
// Add tokens
batchTokens += response.data.usage?.total_tokens || 0;
// Update for next iteration
textsToProcess = remainingTexts;
indexMap = newIndexMap;
// If all embeddings were successfully processed, break out of the loop
if (textsToProcess.length === 0) {
break;
}
// Increment retry count and log
retryCount++;
console.log(`[embeddings] Batch ${currentBatch}/${batchCount} - Retrying ${textsToProcess.length} texts (attempt ${retryCount}/${MAX_RETRIES})`);
logDebug(`[embeddings] Batch ${currentBatch}/${batchCount} - Retrying ${textsToProcess.length} texts (attempt ${retryCount}/${MAX_RETRIES})`);
} catch (error: any) {
console.error('Error calling Jina Embeddings API:', error);
logError('Error calling Jina Embeddings API:', { error });
if (error.response?.status === 402 || error.message.includes('InsufficientBalanceError') || error.message.includes('insufficient balance')) {
return { batchEmbeddings: [], batchTokens: 0 };
}
// On last retry, create placeholder embeddings
if (retryCount === MAX_RETRIES - 1) {
const dimensionSize = options.dimensions || 1024;
for (let idx = 0; idx < textsToProcess.length; idx++) {
const originalIndex = indexMap.get(idx)!;
console.error(`Failed to get embedding after all retries for index ${originalIndex}: [${truncateInputString(textsToProcess[idx])}...]`);
logError(`Failed to get embedding after all retries for index ${originalIndex}: [${truncateInputString(textsToProcess[idx])}...]`);
while (batchEmbeddings.length <= originalIndex) {
batchEmbeddings.push([]);
}
batchEmbeddings[originalIndex] = new Array(dimensionSize).fill(0);
}
}
retryCount++;
if (retryCount < MAX_RETRIES) {
console.log(`[embeddings] Batch ${currentBatch}/${batchCount} - Retry attempt ${retryCount}/${MAX_RETRIES} after error`);
logDebug(`[embeddings] Batch ${currentBatch}/${batchCount} - Retry attempt ${retryCount}/${MAX_RETRIES} after error`);
// Wait before retrying to avoid overwhelming the API
await new Promise(resolve => setTimeout(resolve, 1000));
} else {
@ -214,23 +215,23 @@ async function getBatchEmbeddingsWithRetry(
}
}
}
// Handle any remaining missing embeddings after max retries
if (textsToProcess.length > 0) {
console.error(`[embeddings] Failed to get embeddings for ${textsToProcess.length} texts after ${MAX_RETRIES} retries`);
logError(`[embeddings] Failed to get embeddings for ${textsToProcess.length} texts after ${MAX_RETRIES} retries`);
const dimensionSize = options.dimensions || 1024;
for (let idx = 0; idx < textsToProcess.length; idx++) {
const originalIndex = indexMap.get(idx)!;
console.error(`Creating zero embedding for index ${originalIndex} after all retries failed`);
logError(`Creating zero embedding for index ${originalIndex} after all retries failed`);
while (batchEmbeddings.length <= originalIndex) {
batchEmbeddings.push([]);
}
batchEmbeddings[originalIndex] = new Array(dimensionSize).fill(0);
}
}
return { batchEmbeddings, batchTokens };
}

View File

@ -1,6 +1,7 @@
import {ErrorAnalysisResponse, PromptPair, TrackerContext} from '../types';
import {ObjectGeneratorSafe} from "../utils/safe-generator";
import {Schemas} from "../utils/schemas";
import { ErrorAnalysisResponse, PromptPair, TrackerContext } from '../types';
import { ObjectGeneratorSafe } from "../utils/safe-generator";
import { Schemas } from "../utils/schemas";
import { logInfo, logError, logDebug, logWarning } from '../logging';
function getPrompt(diaryContext: string[]): PromptPair {
@ -81,7 +82,6 @@ The answer is not definitive and fails to provide the requested information. La
"blame": "The root cause of failure was getting stuck in a repetitive search pattern without adapting the strategy. Steps 4-5 repeated the same search, and step 6 deviated to less reliable entertainment sources instead of exploring business journals, news articles, or professional databases. Additionally, the process didn't attempt to triangulate age through indirect information like education history or career milestones.",
"improvement": "1. Avoid repeating identical searches and implement a strategy to track previously searched terms. 2. When direct age/birthdate searches fail, try indirect approaches like: searching for earliest career mentions, finding university graduation years, or identifying first company founding dates. 3. Focus on high-quality business sources and avoid entertainment websites for professional information. 4. Consider using industry event appearances or conference presentations where age-related context might be mentioned. 5. If exact age cannot be determined, provide an estimated range based on career timeline and professional achievements.",
}
</output>
</example>`,
@ -107,14 +107,14 @@ export async function analyzeSteps(
prompt: prompt.user
});
console.log(TOOL_NAME, result.object);
logInfo(TOOL_NAME, { object: result.object });
trackers?.actionTracker.trackThink(result.object.blame);
trackers?.actionTracker.trackThink(result.object.improvement);
return result.object as ErrorAnalysisResponse;
} catch (error) {
console.error(`Error in ${TOOL_NAME}`, error);
logError(`Error in ${TOOL_NAME}`, { error });
throw error;
}
}

View File

@ -1,8 +1,9 @@
import {GenerateObjectResult} from 'ai';
import {AnswerAction, EvaluationResponse, EvaluationType, KnowledgeItem, PromptPair, TrackerContext} from '../types';
import {ObjectGeneratorSafe} from "../utils/safe-generator";
import {Schemas} from "../utils/schemas";
import {getKnowledgeStr} from "../utils/text-tools";
import { GenerateObjectResult } from 'ai';
import { AnswerAction, EvaluationResponse, EvaluationType, KnowledgeItem, PromptPair, TrackerContext } from '../types';
import { ObjectGeneratorSafe } from "../utils/safe-generator";
import { Schemas } from "../utils/schemas";
import { getKnowledgeStr } from "../utils/text-tools";
import { logInfo, logError, logDebug, logWarning } from '../logging';
const TOOL_NAME = 'evaluator';
@ -572,7 +573,7 @@ export async function evaluateQuestion(
prompt: prompt.user
});
console.log('Question Evaluation:', result.object);
logInfo('Question Evaluation:', result.object);
// Always include definitive in types
const types: EvaluationType[] = [];
@ -581,14 +582,14 @@ export async function evaluateQuestion(
if (result.object.needsPlurality) types.push('plurality');
if (result.object.needsCompleteness) types.push('completeness');
console.log('Question Metrics:', question, types);
logInfo('Question Metrics:', { question, types });
trackers?.actionTracker.trackThink(result.object.think);
// Always evaluate definitive first, then freshness (if needed), then plurality (if needed)
return types;
} catch (error) {
console.error('Error in question evaluation:', error);
logError('Error in question evaluation:', { error });
// Default to no check
return [];
}
@ -611,7 +612,7 @@ async function performEvaluation<T>(
trackers.actionTracker.trackThink(result.object.think)
console.log(`${evaluationType} ${TOOL_NAME}`, result.object);
logInfo(`${evaluationType} ${TOOL_NAME}`, result.object);
return result;
}
@ -649,7 +650,7 @@ export async function evaluateAnswer(
prompt = getRejectAllAnswersPrompt(question, action, allKnowledge);
break;
default:
console.error(`Unknown evaluation type: ${evaluationType}`);
logError(`Unknown evaluation type: ${evaluationType}`);
}
if (prompt) {
result = await performEvaluation(

View File

@ -1,7 +1,8 @@
import { generateText } from 'ai';
import {getModel} from "../config";
import { getModel } from "../config";
import { GoogleGenerativeAIProviderMetadata } from '@ai-sdk/google';
import {TokenTracker} from "../utils/token-tracker";
import { TokenTracker } from "../utils/token-tracker";
import { logInfo, logError, logDebug, logWarning } from '../logging';
const model = getModel('searchGrounding')
@ -10,7 +11,7 @@ export async function grounding(query: string, tracker?: TokenTracker): Promise<
const { text, experimental_providerMetadata, usage } = await generateText({
model,
prompt:
`Current date is ${new Date().toISOString()}. Find the latest answer to the following question:
`Current date is ${new Date().toISOString()}. Find the latest answer to the following question:
<query>
${query}
</query>
@ -18,8 +19,8 @@ Must include the date and time of the latest answer.`,
});
const metadata = experimental_providerMetadata?.google as
| GoogleGenerativeAIProviderMetadata
| undefined;
| GoogleGenerativeAIProviderMetadata
| undefined;
const groundingMetadata = metadata?.groundingMetadata;
// Extract and concatenate all groundingSupport text into a single line
@ -28,11 +29,11 @@ Must include the date and time of the latest answer.`,
.join(' ') || '';
(tracker || new TokenTracker()).trackUsage('grounding', usage);
console.log('Grounding:', {text, groundedText});
logInfo('Grounding:', { text, groundedText });
return text + '|' + groundedText;
} catch (error) {
console.error('Error in search:', error);
logError('Error in search:', { error });
throw error;
}
}

View File

@ -1,6 +1,7 @@
import { TokenTracker } from "../utils/token-tracker";
import { JINA_API_KEY } from "../config";
import axiosClient from "../utils/axios-client";
import { logInfo, logError, logDebug, logWarning } from '../logging';
const JINA_API_URL = 'https://api.jina.ai/v1/classify';
@ -80,9 +81,9 @@ export async function classifyText(
return false; // Default to false if no prediction is available
} catch (error) {
if (error instanceof Error && error.message.includes('timed out')) {
console.error('Classification request timed out:', error.message);
logError('Classification request timed out:', { error: error.message });
} else {
console.error('Error in classifying text:', error);
logError('Error in classifying text:', { error });
}
return false; // Default to false in case of error or timeout
}

View File

@ -0,0 +1,6 @@
import { logInfo, logError, logDebug, logWarning } from '../logging';
// Replace console.log statements
logInfo('Classification result:', { result });
logError('Classification error:', { error });

View File

@ -1,6 +1,7 @@
import {TokenTracker} from "../utils/token-tracker";
import {cosineSimilarity} from "./cosine";
import {getEmbeddings} from "./embeddings";
import { TokenTracker } from "../utils/token-tracker";
import { cosineSimilarity } from "./cosine";
import { getEmbeddings } from "./embeddings";
import { logInfo, logError, logDebug, logWarning } from '../logging';
const SIMILARITY_THRESHOLD = 0.86; // Adjustable threshold for cosine similarity
@ -20,7 +21,7 @@ export async function dedupQueries(
// Get embeddings for all queries in one batch
const allQueries = [...newQueries, ...existingQueries];
const {embeddings: allEmbeddings} = await getEmbeddings(allQueries, tracker);
const { embeddings: allEmbeddings } = await getEmbeddings(allQueries, tracker);
// If embeddings is empty (due to 402 error), return all new queries
if (!allEmbeddings.length) {
@ -66,12 +67,12 @@ export async function dedupQueries(
usedIndices.add(i);
}
}
console.log('Dedup:', uniqueQueries);
logInfo('Unique queries:', { queries: uniqueQueries });
return {
unique_queries: uniqueQueries,
};
} catch (error) {
console.error('Error in deduplication analysis:', error);
logError('Deduplication error:', { error });
// return all new queries if there is an error
return {

View File

@ -1,7 +1,8 @@
import {TrackerContext} from "../types";
import {Schemas} from "../utils/schemas";
import {cosineSimilarity} from "./cosine";
import {getEmbeddings} from "./embeddings";
import { TrackerContext } from "../types";
import { Schemas } from "../utils/schemas";
import { cosineSimilarity } from "./cosine";
import { getEmbeddings } from "./embeddings";
import { logInfo, logError, logDebug, logWarning } from '../logging';
// Refactored cherryPick function
export async function cherryPick(question: string, longContext: string, options: any = {}, trackers: TrackerContext, schemaGen: Schemas, url: string) {
@ -13,7 +14,7 @@ export async function cherryPick(question: string, longContext: string, options:
if (longContext.length < snippetLength * 2) {
// If the context is shorter than the snippet length, return the whole context
console.log('content is too short, dont bother');
logInfo('content is too short, dont bother');
return longContext;
}
@ -23,9 +24,9 @@ export async function cherryPick(question: string, longContext: string, options:
chunks.push(longContext.substring(i, Math.min(i + chunkSize, longContext.length)));
}
console.log('late chunking enabled! num chunks:', chunks.length);
logInfo('late chunking enabled! num chunks:', { count: chunks.length });
trackers.actionTracker.trackThink('late_chunk', schemaGen.languageCode, {url});
trackers.actionTracker.trackThink('late_chunk', schemaGen.languageCode, { url });
try {
if (question.trim().length === 0) {
@ -61,7 +62,7 @@ export async function cherryPick(question: string, longContext: string, options:
// Verify that we got embeddings for all chunks
if (allChunkEmbeddings.length !== chunks.length) {
console.error(`Got ${allChunkEmbeddings.length} embeddings for ${chunks.length} chunks`);
logError(`Got ${allChunkEmbeddings.length} embeddings for ${chunks.length} chunks`);
}
// Calculate cosine similarity between the question and each chunk
@ -115,7 +116,7 @@ ${snippet}
</snippet-${index + 1}>`.trim()).join("\n\n");
} catch (error) {
console.error('Error in late chunking:', error);
logError('Error in late chunking:', { error });
// Fallback: just return the beginning of the context up to the desired length
return longContext.substring(0, snippetLength * numSnippets);
}

View File

@ -1,6 +1,7 @@
import {TokenTracker} from "../utils/token-tracker";
import {JINA_API_KEY} from "../config";
import { TokenTracker } from "../utils/token-tracker";
import { JINA_API_KEY } from "../config";
import axiosClient from '../utils/axios-client';
import { logInfo, logError, logDebug, logWarning } from '../logging';
const JINA_API_URL = 'https://api.jina.ai/v1/rerank';
@ -43,7 +44,7 @@ export async function rerankDocuments(
batches.push(documents.slice(i, i + batchSize));
}
console.log(`Rerank ${documents.length} documents in ${batches.length} batches of up to ${batchSize} each`);
logInfo(`Processing ${documents.length} documents in ${batches.length} batches`);
// Process all batches in parallel
const batchResults = await Promise.all(
@ -93,9 +94,9 @@ export async function rerankDocuments(
document: result.document
}));
return {results: finalResults};
return { results: finalResults };
} catch (error) {
console.error('Error in reranking documents:', error);
logError('Reranking error:', { error });
// Return empty results if there is an error
return {

View File

@ -2,6 +2,7 @@ import { TokenTracker } from "../utils/token-tracker";
import { JinaSearchResponse, SERPQuery } from '../types';
import { JINA_API_KEY } from "../config";
import axiosClient from '../utils/axios-client';
import { logInfo, logError, logDebug, logWarning } from '../logging';
export async function search(
query: SERPQuery,
@ -35,7 +36,7 @@ export async function search(
throw new Error('Invalid response format');
}
console.log('Search results meta:', data.meta);
logInfo('Search results metadata:', { metadata: data.meta });
const tokenTracker = tracker || new TokenTracker();
tokenTracker.trackUsage('search', {
@ -46,7 +47,7 @@ export async function search(
return { response: data };
} catch (error) {
console.error('Error in jina search:', error);
logError('Search error:', { error });
throw error;
}
}

View File

@ -3,6 +3,7 @@ import { getKnowledgeStr } from "../utils/text-tools";
import { getModel } from "../config";
import { generateText } from "ai";
import { Schemas } from "../utils/schemas";
import { logInfo, logError, logDebug, logWarning } from '../logging';
function getPrompt(mdContent: string, allKnowledge: KnowledgeItem[], schema: Schemas): PromptPair {
@ -80,18 +81,18 @@ export async function reviseAnswer(
trackers.tokenTracker.trackUsage(TOOL_NAME, result.usage)
console.log(TOOL_NAME, result.text);
console.log('repaired before/after', mdContent.length, result.text.length);
logInfo(TOOL_NAME, { text: result.text });
logInfo('repaired before/after', { before: mdContent.length, after: result.text.length });
if (result.text.length < mdContent.length * 0.85) {
console.error(`repaired content length ${result.text.length} is significantly shorter than original content ${mdContent.length}, return original content instead.`);
logError(`repaired content length ${result.text.length} is significantly shorter than original content ${mdContent.length}, return original content instead.`);
return mdContent;
}
return result.text;
} catch (error) {
console.error(`Error in ${TOOL_NAME}`, error);
logError(`Error in ${TOOL_NAME}`, { error });
return mdContent;
}
}

View File

@ -1,6 +1,7 @@
import {PromptPair, SearchAction, SERPQuery, TrackerContext} from '../types';
import {ObjectGeneratorSafe} from "../utils/safe-generator";
import {Schemas} from "../utils/schemas";
import { PromptPair, SearchAction, SERPQuery, TrackerContext } from '../types';
import { ObjectGeneratorSafe } from "../utils/safe-generator";
import { Schemas } from "../utils/schemas";
import { logInfo, logError, logDebug, logWarning } from '../logging';
function getPrompt(query: string, think: string, context: string): PromptPair {
@ -200,7 +201,7 @@ Given those info, now please generate the best effective queries that follow JSO
}
const TOOL_NAME = 'queryRewriter';
export async function rewriteQuery(action: SearchAction, context: string, trackers: TrackerContext, schemaGen: Schemas): Promise<SERPQuery[] > {
export async function rewriteQuery(action: SearchAction, context: string, trackers: TrackerContext, schemaGen: Schemas): Promise<SERPQuery[]> {
try {
const generator = new ObjectGeneratorSafe(trackers.tokenTracker);
const queryPromises = action.searchRequests.map(async (req) => {
@ -217,10 +218,10 @@ export async function rewriteQuery(action: SearchAction, context: string, track
const queryResults = await Promise.all(queryPromises);
const allQueries: SERPQuery[] = queryResults.flat();
console.log(TOOL_NAME, allQueries);
logInfo(TOOL_NAME, { queries: allQueries });
return allQueries;
} catch (error) {
console.error(`Error in ${TOOL_NAME}`, error);
logError('Query rewrite error:', { error });
throw error;
}
}

View File

@ -2,6 +2,7 @@ import { TokenTracker } from "../utils/token-tracker";
import { ReadResponse } from '../types';
import { JINA_API_KEY } from "../config";
import axiosClient from "../utils/axios-client";
import { logInfo, logError, logDebug, logWarning } from '../logging';
export async function readUrl(
url: string,
@ -50,7 +51,7 @@ export async function readUrl(
throw new Error('Invalid response data');
}
console.log('Read:', {
logInfo('Read:', {
title: data.data.title,
url: data.data.url,
tokens: data.data.usage?.tokens || 0
@ -66,7 +67,7 @@ export async function readUrl(
return { response: data };
} catch (error: any) {
console.error(`Error reading URL: ${error.message}`);
logError(`Error reading URL: ${error.message}`);
throw error;
}
}

View File

@ -1,7 +1,8 @@
import {TokenTracker} from "../utils/token-tracker";
import {JINA_API_KEY} from "../config";
import {TrackerContext} from "../types";
import { TokenTracker } from "../utils/token-tracker";
import { JINA_API_KEY } from "../config";
import { TrackerContext } from "../types";
import axiosClient from "../utils/axios-client";
import { logInfo, logError, logDebug, logWarning } from '../logging';
export async function segmentText(
content: string,
@ -24,7 +25,7 @@ export async function segmentText(
// Split content into batches
const batches = splitTextIntoBatches(content, MAX_BATCH_SIZE);
console.log(`Split content into ${batches.length} batches`);
logDebug(`Processing ${batches.length} batches`);
// Calculate offsets for each batch upfront
const batchOffsets: number[] = [];
@ -36,10 +37,10 @@ export async function segmentText(
// Process all batches in parallel
const batchPromises = batches.map(async (batch, i) => {
console.log(`[Segment] Processing batch ${i + 1}/${batches.length} (size: ${batch.length})`);
logDebug(`[Segment] Processing batch ${i + 1}/${batches.length} (size: ${batch.length})`);
try {
const {data} = await axiosClient.post(
const { data } = await axiosClient.post(
'https://api.jina.ai/v1/segment',
{
content: batch,
@ -60,7 +61,7 @@ export async function segmentText(
throw new Error('Invalid response data');
}
console.log(`Batch ${i + 1} result:`, {
logDebug(`Batch ${i + 1} result:`, {
numChunks: data.num_chunks,
numTokens: data.num_tokens,
tokenizer: data.tokenizer
@ -72,11 +73,11 @@ export async function segmentText(
// Adjust chunk positions to account for the offset of this batch
const adjustedPositions = data.chunk_positions
? data.chunk_positions.map((position: [number, number]) => {
return [
position[0] + offset,
position[1] + offset
] as [number, number];
})
return [
position[0] + offset,
position[1] + offset
] as [number, number];
})
: [];
return {
@ -85,7 +86,7 @@ export async function segmentText(
tokens: data.usage?.tokens || 0
};
} catch (error: any) {
console.error(`Error processing batch ${i + 1}: ${error.message}`);
logError(`Error processing batch ${i + 1}: ${error.message}`);
throw error;
}
});

View File

@ -1,7 +1,8 @@
import {SERPER_API_KEY} from "../config";
import { SERPER_API_KEY } from "../config";
import axiosClient from "../utils/axios-client";
import { logInfo, logError, logDebug, logWarning } from '../logging';
import {SerperSearchResponse, SERPQuery} from '../types';
import { SerperSearchResponse, SERPQuery } from '../types';
export async function serperSearch(query: SERPQuery): Promise<{ response: SerperSearchResponse }> {
@ -21,7 +22,7 @@ export async function serperSearch(query: SERPQuery): Promise<{ response: Serper
}
// Maintain the same return structure as the original code
return {response: response.data};
return { response: response.data };
}
@ -42,5 +43,5 @@ export async function serperSearchOld(query: string): Promise<{ response: Serper
}
// Maintain the same return structure as the original code
return {response: response.data};
return { response: response.data };
}

View File

@ -0,0 +1,5 @@
import { logInfo, logError, logDebug, logWarning } from '../logging';
logInfo('Token usage:', { usage });
logError('Token tracking error:', { error });

6
src/tools/url-tools.ts Normal file
View File

@ -0,0 +1,6 @@
import { logInfo, logError, logDebug, logWarning } from '../logging';
// Replace console.log statements
logInfo('URL info:', { url });
logError('URL error:', { error });

5
src/tools/web-search.ts Normal file
View File

@ -0,0 +1,5 @@
import { logInfo, logError, logDebug, logWarning } from '../logging';
logInfo('Search info:', { query });
logError('Search error:', { error });

View File

@ -1,11 +1,12 @@
import axios, { AxiosRequestConfig } from 'axios';
import axios, { AxiosRequestConfig } from 'axios';
import { logInfo, logError, logDebug, logWarning } from '../logging';
// Default timeout in milliseconds
const DEFAULT_TIMEOUT = 30000;
// Maximum content length to prevent OOM issues (10MB)
const MAX_CONTENT_LENGTH = 10 * 1024 * 1024;
const MAX_CONTENT_LENGTH = 10 * 1024 * 1024;
// Maximum number of redirects to follow
const MAX_REDIRECTS = 5;
@ -21,12 +22,12 @@ const KEEP_ALIVE_TIMEOUT = 30000;
// Scheduling strategy for HTTP/2 connections
// LIFO (Last In, First Out) is generally better for performance
const SCHEDULING = 'lifo';
// Base configuration for all axios instances
const baseConfig: AxiosRequestConfig = {
timeout: DEFAULT_TIMEOUT,
maxContentLength: MAX_CONTENT_LENGTH,
maxRedirects: MAX_REDIRECTS,
const baseConfig: AxiosRequestConfig = {
timeout: DEFAULT_TIMEOUT,
maxContentLength: MAX_CONTENT_LENGTH,
maxRedirects: MAX_REDIRECTS,
httpsAgent: new (require('https').Agent)({
maxSockets: MAX_SOCKETS,
maxFreeSockets: MAX_FREE_SOCKETS,
@ -41,40 +42,40 @@ const baseConfig: AxiosRequestConfig = {
timeout: KEEP_ALIVE_TIMEOUT,
scheduling: SCHEDULING,
}),
headers: {
'Accept': 'application/json',
'Content-Type': 'application/json',
headers: {
'Accept': 'application/json',
'Content-Type': 'application/json',
},
};
// Create a single axios instance with the base configuration
const axiosClient = axios.create(baseConfig);
const axiosClient = axios.create(baseConfig);
// Add response interceptor for consistent error handling
axiosClient.interceptors.response.use(
(response) => response,
axiosClient.interceptors.response.use(
(response) => response,
(error) => {
if (error.code === 'ECONNABORTED') {
console.error('Request timed out:', error.message);
logError('Request timed out:', { error: error.message });
error.request?.destroy?.();
}
if (axios.isAxiosError(error)) {
if (error.response) {
const status = error.response.status;
const errorData = error.response.data as any;
if (status === 402) {
throw new Error(errorData?.readableMessage || 'Insufficient balance');
}
throw new Error(errorData?.readableMessage || `HTTP Error ${status}`);
} else if (error.request) {
throw new Error(`No response received from server`);
} else {
throw new Error(`Request failed: ${error.message}`);
if (axios.isAxiosError(error)) {
if (error.response) {
const status = error.response.status;
const errorData = error.response.data as any;
if (status === 402) {
throw new Error(errorData?.readableMessage || 'Insufficient balance');
}
throw new Error(errorData?.readableMessage || `HTTP Error ${status}`);
} else if (error.request) {
throw new Error(`No response received from server`);
} else {
throw new Error(`Request failed: ${error.message}`);
}
}
throw error;
throw error;
}
);
);
export default axiosClient;

View File

@ -3,20 +3,21 @@ import { getEmbeddings } from '../tools/embeddings';
import { TokenTracker } from './token-tracker';
import { ImageObject } from '../types';
import { cosineSimilarity } from '../tools/cosine';
import { logInfo, logError, logDebug, logWarning } from '../logging';
export type { Canvas, Image } from '@napi-rs/canvas';
export const downloadFile = async (uri: string) => {
const resp = await fetch(uri);
if (!(resp.ok && resp.body)) {
throw new Error(`Unexpected response ${resp.statusText}`);
}
const contentLength = parseInt(resp.headers.get('content-length') || '0');
if (contentLength > 1024 * 1024 * 100) {
throw new Error('File too large');
}
const buff = await resp.arrayBuffer();
const resp = await fetch(uri);
if (!(resp.ok && resp.body)) {
throw new Error(`Unexpected response ${resp.statusText}`);
}
const contentLength = parseInt(resp.headers.get('content-length') || '0');
if (contentLength > 1024 * 1024 * 100) {
throw new Error('File too large');
}
const buff = await resp.arrayBuffer();
return { buff, contentType: resp.headers.get('content-type') };
return { buff, contentType: resp.headers.get('content-type') };
};
const _loadImage = async (input: string | Buffer) => {
@ -24,81 +25,81 @@ const _loadImage = async (input: string | Buffer) => {
let contentType;
if (typeof input === 'string') {
if (input.startsWith('data:')) {
const firstComma = input.indexOf(',');
const header = input.slice(0, firstComma);
const data = input.slice(firstComma + 1);
const encoding = header.split(';')[1];
contentType = header.split(';')[0].split(':')[1];
if (encoding?.startsWith('base64')) {
buff = Buffer.from(data, 'base64');
} else {
buff = Buffer.from(decodeURIComponent(data), 'utf-8');
}
if (input.startsWith('data:')) {
const firstComma = input.indexOf(',');
const header = input.slice(0, firstComma);
const data = input.slice(firstComma + 1);
const encoding = header.split(';')[1];
contentType = header.split(';')[0].split(':')[1];
if (encoding?.startsWith('base64')) {
buff = Buffer.from(data, 'base64');
} else {
buff = Buffer.from(decodeURIComponent(data), 'utf-8');
}
if (input.startsWith('http')) {
if (input.endsWith('.svg')) {
throw new Error('Unsupported image type');
}
const r = await downloadFile(input);
buff = Buffer.from(r.buff);
contentType = r.contentType;
}
if (input.startsWith('http')) {
if (input.endsWith('.svg')) {
throw new Error('Unsupported image type');
}
const r = await downloadFile(input);
buff = Buffer.from(r.buff);
contentType = r.contentType;
}
}
if (!buff) {
throw new Error('Invalid input');
throw new Error('Invalid input');
}
const img = await canvas.loadImage(buff).catch((err) => {
console.error('Error loading image:', err);
logError('Error loading image:', { error: err });
return undefined;
});
return img;
}
export const loadImage = async (uri: string | Buffer) => {
try {
const theImage = await _loadImage(uri);
try {
const theImage = await _loadImage(uri);
return theImage;
} catch (err: any) {
if (err?.message?.includes('Unsupported image type') || err?.message?.includes('unsupported')) {
throw new Error(`Unknown image format for ${uri.slice(0, 128)}`);
}
throw err;
return theImage;
} catch (err: any) {
if (err?.message?.includes('Unsupported image type') || err?.message?.includes('unsupported')) {
throw new Error(`Unknown image format for ${uri.slice(0, 128)}`);
}
throw err;
}
}
export const fitImageToSquareBox = (image: canvas.Image | canvas.Canvas, size: number = 1024) => {
if (image.width <= size && image.height <= size) {
const canvasInstance = canvas.createCanvas(image.width, image.height);
const ctx = canvasInstance.getContext('2d');
ctx.drawImage(image, 0, 0, image.width, image.height, 0, 0, canvasInstance.width, canvasInstance.height);
return canvasInstance;
}
const aspectRatio = image.width / image.height;
const resizedWidth = Math.round(aspectRatio > 1 ? size : size * aspectRatio);
const resizedHeight = Math.round(aspectRatio > 1 ? size / aspectRatio : size);
const canvasInstance = canvas.createCanvas(resizedWidth, resizedHeight);
if (image.width <= size && image.height <= size) {
const canvasInstance = canvas.createCanvas(image.width, image.height);
const ctx = canvasInstance.getContext('2d');
ctx.drawImage(image, 0, 0, image.width, image.height, 0, 0, resizedWidth, resizedHeight);
ctx.drawImage(image, 0, 0, image.width, image.height, 0, 0, canvasInstance.width, canvasInstance.height);
return canvasInstance;
}
const aspectRatio = image.width / image.height;
const resizedWidth = Math.round(aspectRatio > 1 ? size : size * aspectRatio);
const resizedHeight = Math.round(aspectRatio > 1 ? size / aspectRatio : size);
const canvasInstance = canvas.createCanvas(resizedWidth, resizedHeight);
const ctx = canvasInstance.getContext('2d');
ctx.drawImage(image, 0, 0, image.width, image.height, 0, 0, resizedWidth, resizedHeight);
return canvasInstance;
}
export const canvasToDataUrl = (canvas: canvas.Canvas, mimeType?: 'image/png' | 'image/jpeg') => {
return canvas.toDataURLAsync((mimeType || 'image/png') as 'image/png');
return canvas.toDataURLAsync((mimeType || 'image/png') as 'image/png');
}
export const canvasToBuffer = (canvas: canvas.Canvas, mimeType?: 'image/png' | 'image/jpeg') => {
return canvas.toBuffer((mimeType || 'image/png') as 'image/png');
return canvas.toBuffer((mimeType || 'image/png') as 'image/png');
}
export const processImage = async (url: string, tracker: TokenTracker): Promise<ImageObject | undefined> => {
@ -117,7 +118,7 @@ export const processImage = async (url: string, tracker: TokenTracker): Promise<
const base64Data = (await canvasToDataUrl(canvas)).split(',')[1];
img.src = ''; // Clear the image source to free memory
const {embeddings} = await getEmbeddings([{ image: base64Data }], tracker, {
const { embeddings } = await getEmbeddings([{ image: base64Data }], tracker, {
dimensions: 512,
model: 'jina-clip-v2',
});
@ -136,7 +137,7 @@ export const dedupImagesWithEmbeddings = (
newImages: ImageObject[], // New images with embeddings
existingImages: ImageObject[], // Existing images with embeddings
similarityThreshold: number = 0.86, // Default similarity threshold
): ImageObject[] =>{
): ImageObject[] => {
try {
// Quick return for single new image with no existing images
if (newImages.length === 1 && existingImages.length === 0) {
@ -185,7 +186,7 @@ export const dedupImagesWithEmbeddings = (
return uniqueImages;
} catch (error) {
console.error('Error in image deduplication analysis:', error);
logError('Error in image deduplication analysis:', { error });
// Return all new images if there is an error
return newImages;

View File

@ -1,4 +1,4 @@
import {z} from 'zod';
import { z } from 'zod';
import {
CoreMessage,
generateObject,
@ -6,9 +6,10 @@ import {
NoObjectGeneratedError,
Schema
} from "ai";
import {TokenTracker} from "./token-tracker";
import {getModel, ToolName, getToolConfig} from "../config";
import { TokenTracker } from "./token-tracker";
import { getModel, ToolName, getToolConfig } from "../config";
import Hjson from 'hjson'; // Import Hjson library
import { logInfo, logError, logDebug, logWarning } from '../logging';
interface GenerateObjectResult<T> {
object: T;
@ -168,7 +169,7 @@ export class ObjectGeneratorSafe {
} catch (parseError) {
if (numRetries > 0) {
console.error(`${model} failed on object generation -> manual parsing failed -> retry with ${numRetries - 1} retries remaining`);
logError(`${model} failed on object generation -> manual parsing failed -> retry with ${numRetries - 1} retries remaining`);
return this.generateObject({
model,
schema,
@ -179,7 +180,7 @@ export class ObjectGeneratorSafe {
});
} else {
// Second fallback: Try with fallback model if provided
console.error(`${model} failed on object generation -> manual parsing failed -> trying fallback with distilled schema`);
logError(`${model} failed on object generation -> manual parsing failed -> trying fallback with distilled schema`);
try {
let failedOutput = '';
@ -200,7 +201,7 @@ export class ObjectGeneratorSafe {
});
this.tokenTracker.trackUsage('fallback', fallbackResult.usage); // Track against fallback model
console.log('Distilled schema parse success!');
logInfo('Distilled schema parse success!');
return fallbackResult;
} catch (fallbackError) {
// If fallback model also fails, try parsing its error response
@ -209,7 +210,7 @@ export class ObjectGeneratorSafe {
this.tokenTracker.trackUsage('fallback', lastChanceResult.usage);
return lastChanceResult;
} catch (finalError) {
console.error(`All recovery mechanisms failed`);
logError(`All recovery mechanisms failed`);
throw error; // Throw original error for better debugging
}
}
@ -220,11 +221,11 @@ export class ObjectGeneratorSafe {
private async handleGenerateObjectError<T>(error: unknown): Promise<GenerateObjectResult<T>> {
if (NoObjectGeneratedError.isInstance(error)) {
console.error('Object not generated according to schema, fallback to manual parsing');
logError('Object not generated according to schema, fallback to manual parsing');
try {
// First try standard JSON parsing
const partialResponse = JSON.parse((error as any).text);
console.log('JSON parse success!')
logInfo('JSON parse success!');
return {
object: partialResponse as T,
usage: (error as any).usage
@ -233,13 +234,13 @@ export class ObjectGeneratorSafe {
// Use Hjson to parse the error response for more lenient parsing
try {
const hjsonResponse = Hjson.parse((error as any).text);
console.log('Hjson parse success!')
logInfo('Hjson parse success!');
return {
object: hjsonResponse as T,
usage: (error as any).usage
};
} catch (hjsonError) {
console.error('Both JSON and Hjson parsing failed:', hjsonError);
logError('Both JSON and Hjson parsing failed:', { error: hjsonError });
throw error;
}
}

View File

@ -1,6 +1,7 @@
import { z } from "zod";
import { ObjectGeneratorSafe } from "./safe-generator";
import { EvaluationType, PromptPair } from "../types";
import { logInfo, logError, logDebug, logWarning } from '../logging';
export const MAX_URLS_PER_STEP = 5
export const MAX_QUERIES_PER_STEP = 5
@ -117,7 +118,7 @@ export class Schemas {
this.languageCode = result.object.langCode;
this.languageStyle = result.object.langStyle;
console.log(`langauge`, result.object);
logInfo(`language`, { object: result.object });
}
getLanguagePrompt() {
@ -162,7 +163,7 @@ export class Schemas {
queries: z.array(
z.object({
tbs: z.enum(['qdr:h', 'qdr:d', 'qdr:w', 'qdr:m', 'qdr:y']).describe('time-based search filter, must use this field if the search request asks for latest info. qdr:h for past hour, qdr:d for past 24 hours, qdr:w for past week, qdr:m for past month, qdr:y for past year. Choose exactly one.'),
location: z.string().describe('defines from where you want the search to originate. It is recommended to specify location at the city level in order to simulate a real users search.').optional(),
location: z.string().describe('defines from where you want the search to originate. It is recommended to specify location at the city level in order to simulate a real user\'s search.').optional(),
q: z.string().describe(`keyword-based search query, 2-3 words preferred, total length < 30 characters. ${this.searchLanguageCode ? `Must in ${this.searchLanguageCode}` : ''}`).max(50),
}))
.max(MAX_QUERIES_PER_STEP)

View File

@ -1,7 +1,8 @@
import {AnswerAction, KnowledgeItem, Reference} from "../types";
import { AnswerAction, KnowledgeItem, Reference } from "../types";
import i18nJSON from './i18n.json';
import {JSDOM} from 'jsdom';
import { JSDOM } from 'jsdom';
import fs from "fs/promises";
import { logInfo, logError, logDebug, logWarning } from '../logging';
export function buildMdFromAnswer(answer: AnswerAction): string {
@ -96,7 +97,7 @@ export function repairMarkdownFootnotes(
// No footnotes in answer but we have references - append them at the end
if (validFootnotes.length === 0) {
const appendedCitations = Array.from(
{length: references.length},
{ length: references.length },
(_, i) => `[^${i + 1}]`
).join('');
@ -124,7 +125,7 @@ ${formattedReferences}
// Create citations for unused references
const unusedReferences = Array.from(
{length: references.length},
{ length: references.length },
(_, i) => !usedIndices.has(i + 1) ? `[^${i + 1}]` : ''
).join('');
@ -260,7 +261,7 @@ export function getI18nText(key: string, lang = 'en', params: Record<string, str
const i18nData = i18nJSON as Record<string, any>;
// 确保语言代码存在,如果不存在则使用英语作为后备
if (!i18nData[lang]) {
console.error(`Language '${lang}' not found, falling back to English.`);
logError(`Language '${lang}' not found, falling back to English.`);
lang = 'en';
}
@ -269,12 +270,12 @@ export function getI18nText(key: string, lang = 'en', params: Record<string, str
// 如果文本不存在,则使用英语作为后备
if (!text) {
console.error(`Key '${key}' not found for language '${lang}', falling back to English.`);
logError(`Key '${key}' not found for language '${lang}', falling back to English.`);
text = i18nData['en'][key];
// 如果英语版本也不存在,则返回键名
if (!text) {
console.error(`Key '${key}' not found for English either.`);
logError(`Key '${key}' not found for English either.`);
return key;
}
}
@ -363,7 +364,7 @@ export function fixCodeBlockIndentation(markdownText: string): string {
}
}
codeBlockStack.push({indent, language: restOfLine, listIndent});
codeBlockStack.push({ indent, language: restOfLine, listIndent });
result.push(line);
} else {
// This is a closing code fence
@ -484,7 +485,7 @@ export function convertHtmlTablesToMd(mdString: string): string {
return result;
} catch (error) {
console.error('Error converting HTML tables to Markdown:', error);
logError('Error converting HTML tables to Markdown:', { error });
return mdString; // Return original string if conversion fails
}
}
@ -625,7 +626,7 @@ function convertSingleHtmlTableToMd(htmlTable: string): string | null {
return mdTable;
} catch (error) {
console.error('Error converting single HTML table:', error);
logError('Error converting single HTML table:', { error });
return null;
}
}
@ -661,7 +662,7 @@ function sanitizeCell(content: string): string {
if (typeof window === 'undefined') {
global.DOMParser = class DOMParser {
parseFromString(htmlString: string, mimeType: string) {
const dom = new JSDOM(htmlString, {contentType: mimeType});
const dom = new JSDOM(htmlString, { contentType: mimeType });
return dom.window.document;
}
};
@ -821,5 +822,5 @@ export async function detectBrokenUnicodeViaFileIO(str: string) {
await fs.unlink(tempFilePath);
// Now check for the visible replacement character
return {broken: readStr.includes('<27>'), readStr};
return { broken: readStr.includes('<27>'), readStr };
}

View File

@ -1,7 +1,8 @@
import {EventEmitter} from 'events';
import { EventEmitter } from 'events';
import {TokenUsage} from '../types';
import {LanguageModelUsage} from "ai";
import { TokenUsage } from '../types';
import { LanguageModelUsage } from "ai";
import { logInfo, logError, logDebug, logWarning } from '../logging';
export class TokenTracker extends EventEmitter {
private usages: TokenUsage[] = [];
@ -23,31 +24,31 @@ export class TokenTracker extends EventEmitter {
}
trackUsage(tool: string, usage: LanguageModelUsage) {
const u = {tool, usage};
const u = { tool, usage };
this.usages.push(u);
this.emit('usage', usage);
}
getTotalUsage(): LanguageModelUsage {
return this.usages.reduce((acc, {usage}) => {
return this.usages.reduce((acc, { usage }) => {
acc.promptTokens += usage.promptTokens;
acc.completionTokens += usage.completionTokens;
acc.totalTokens += usage.totalTokens;
return acc;
}, {promptTokens: 0, completionTokens: 0, totalTokens: 0});
}, { promptTokens: 0, completionTokens: 0, totalTokens: 0 });
}
getTotalUsageSnakeCase(): {prompt_tokens: number, completion_tokens: number, total_tokens: number} {
return this.usages.reduce((acc, {usage}) => {
getTotalUsageSnakeCase(): { prompt_tokens: number, completion_tokens: number, total_tokens: number } {
return this.usages.reduce((acc, { usage }) => {
acc.prompt_tokens += usage.promptTokens;
acc.completion_tokens += usage.completionTokens;
acc.total_tokens += usage.totalTokens;
return acc;
}, {prompt_tokens: 0, completion_tokens: 0, total_tokens: 0});
}, { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 });
}
getUsageBreakdown(): Record<string, number> {
return this.usages.reduce((acc, {tool, usage}) => {
return this.usages.reduce((acc, { tool, usage }) => {
acc[tool] = (acc[tool] || 0) + usage.totalTokens;
return acc;
}, {} as Record<string, number>);
@ -56,7 +57,7 @@ export class TokenTracker extends EventEmitter {
printSummary() {
const breakdown = this.getUsageBreakdown();
console.log('Token Usage Summary:', {
logInfo('Token Usage Summary:', {
budget: this.budget,
total: this.getTotalUsage(),
breakdown

View File

@ -1,14 +1,15 @@
import {BoostedSearchSnippet, ImageObject, KnowledgeItem, SearchSnippet, TrackerContext, VisitAction, WebContent} from "../types";
import {getI18nText, smartMergeStrings} from "./text-tools";
import {rerankDocuments} from "../tools/jina-rerank";
import {readUrl} from "../tools/read";
import {Schemas} from "./schemas";
import {cherryPick} from "../tools/jina-latechunk";
import {formatDateBasedOnType} from "./date-tools";
import {classifyText} from "../tools/jina-classify-spam";
import { BoostedSearchSnippet, ImageObject, KnowledgeItem, SearchSnippet, TrackerContext, VisitAction, WebContent } from "../types";
import { getI18nText, smartMergeStrings } from "./text-tools";
import { rerankDocuments } from "../tools/jina-rerank";
import { readUrl } from "../tools/read";
import { Schemas } from "./schemas";
import { cherryPick } from "../tools/jina-latechunk";
import { formatDateBasedOnType } from "./date-tools";
import { classifyText } from "../tools/jina-classify-spam";
import { processImage } from "./image-tools";
import {segmentText} from "../tools/segment";
import { segmentText } from "../tools/segment";
import axiosClient from "./axios-client";
import { logInfo, logError, logDebug, logWarning } from '../logging';
export function normalizeUrl(urlString: string, debug = false, options = {
removeAnchors: true,
@ -68,7 +69,9 @@ export function normalizeUrl(urlString: string, debug = false, options = {
try {
return decodeURIComponent(segment);
} catch (e) {
if (debug) console.error(`Failed to decode path segment: ${segment}`, e);
if (debug) {
logDebug(`Failed to decode path segment: ${segment}`, { error: e });
}
return segment;
}
})
@ -87,7 +90,9 @@ export function normalizeUrl(urlString: string, debug = false, options = {
return [key, decodedValue];
}
} catch (e) {
if (debug) console.error(`Failed to decode query param ${key}=${value}`, e);
if (debug) {
logDebug(`Failed to decode query param ${key}=${value}`, { error: e });
}
}
return [key, value];
})
@ -132,7 +137,9 @@ export function normalizeUrl(urlString: string, debug = false, options = {
url.hash = '#' + decodedHash;
}
} catch (e) {
if (debug) console.error(`Failed to decode fragment: ${url.hash}`, e);
if (debug) {
logDebug(`Failed to decode fragment: ${url.hash}`, { error: e });
}
}
}
@ -152,13 +159,15 @@ export function normalizeUrl(urlString: string, debug = false, options = {
normalizedUrl = decodedUrl;
}
} catch (e) {
if (debug) console.error('Failed to decode final URL', e);
if (debug) {
logDebug('Failed to decode final URL', { error: e });
}
}
return normalizedUrl;
} catch (error) {
// Main URL parsing error - this one we should throw
console.error(`Invalid URL "${urlString}": ${error}`);
logWarning(`Invalid URL "${urlString}": ${error}`);
return;
}
}
@ -179,7 +188,7 @@ const extractUrlParts = (urlStr: string) => {
path: url.pathname
};
} catch (e) {
console.error(`Error parsing URL: ${urlStr}`, e);
logError(`Error parsing URL: ${urlStr}`, { error: e });
return { hostname: "", path: "" };
}
};
@ -261,7 +270,7 @@ export const rankURLs = (urlItems: SearchSnippet[], options: any = {}, trackers:
// Step 2: Rerank only the unique contents
const uniqueContents = Object.keys(uniqueContentMap);
const uniqueIndicesMap = Object.values(uniqueContentMap);
console.log(`rerank URLs: ${urlItems.length}->${uniqueContents.length}`)
logInfo(`rerank URLs: ${urlItems.length}->${uniqueContents.length}`);
rerankDocuments(question, uniqueContents, trackers.tokenTracker)
.then(({ results }) => {
// Step 3: Map the scores back to all original items
@ -280,7 +289,7 @@ export const rankURLs = (urlItems: SearchSnippet[], options: any = {}, trackers:
return (urlItems as BoostedSearchSnippet[]).map(item => {
if (!item || !item.url) {
console.error('Skipping invalid item:', item);
logError('Skipping invalid item:', { item });
return item; // Return unchanged
}
@ -421,7 +430,7 @@ export async function getLastModified(url: string): Promise<string | undefined>
return undefined;
} catch (error) {
console.error('Failed to fetch last modified date:');
logError('Failed to fetch last modified date:');
return undefined;
}
}
@ -494,11 +503,11 @@ export async function processURLs(
// Store normalized URL for consistent reference
url = normalizedUrl;
const {response} = await readUrl(url, true, context.tokenTracker, withImages);
const {data} = response;
const { response } = await readUrl(url, true, context.tokenTracker, withImages);
const { data } = response;
const guessedTime = await getLastModified(url);
if (guessedTime) {
console.log('Guessed time for', url, guessedTime);
logInfo('Guessed time for', { url, guessedTime });
}
// Early return if no valid data
@ -511,7 +520,10 @@ export async function processURLs(
const spamDetectLength = 300;
const isGoodContent = data.content.length > spamDetectLength || !await classifyText(data.content);
if (!isGoodContent) {
console.error(`Blocked content ${data.content.length}:`, url, data.content.slice(0, spamDetectLength));
logError(`Blocked content ${data.content.length}:`, {
url,
content: data.content.slice(0, spamDetectLength)
});
throw new Error(`Blocked content ${url}`);
}
@ -569,9 +581,9 @@ export async function processURLs(
});
}
return {url, result: response};
return { url, result: response };
} catch (error: any) {
console.error('Error reading URL:', url, error);
logError('Error reading URL:', { url, error });
badURLs.push(url);
// Extract hostname from the URL
if (
@ -586,10 +598,10 @@ export async function processURLs(
try {
hostname = extractUrlParts(url).hostname;
} catch (e) {
console.error('Error parsing URL for hostname:', url, e);
logError('Error parsing URL for hostname:', { url, error: e });
}
badHostnames.push(hostname);
console.log(`Added ${hostname} to bad hostnames list`);
logInfo(`Added ${hostname} to bad hostnames list`);
}
return null;
} finally {
@ -618,7 +630,7 @@ export async function processURLs(
Object.keys(allURLs).forEach(url => {
if (badHostnames.includes(extractUrlParts(url).hostname)) {
delete allURLs[url];
console.log(`Removed ${url} from allURLs`);
logInfo(`Removed ${url} from allURLs`);
}
}
)