style: format imports and add language_code to request

2025-12-26 06:28:56 +08:00 · 2025-05-09 14:51:49 +02:00 · 2025-05-09 14:51:49 +02:00 · 16f01546f5
commit 16f01546f5
parent 6f41539587
5 changed files with 115 additions and 73 deletions
--- a/src/agent.ts
+++ b/src/agent.ts
@ -1,15 +1,15 @@
-import {ZodObject} from 'zod';
-import {CoreMessage} from 'ai';
-import {SEARCH_PROVIDER, STEP_SLEEP} from "./config";
+import { ZodObject } from 'zod';
+import { CoreMessage } from 'ai';
+import { SEARCH_PROVIDER, STEP_SLEEP } from "./config";
 import fs from 'fs/promises';
-import {SafeSearchType, search as duckSearch} from "duck-duck-scrape";
-import {braveSearch} from "./tools/brave-search";
-import {rewriteQuery} from "./tools/query-rewriter";
-import {dedupQueries} from "./tools/jina-dedup";
-import {evaluateAnswer, evaluateQuestion} from "./tools/evaluator";
-import {analyzeSteps} from "./tools/error-analyzer";
-import {TokenTracker} from "./utils/token-tracker";
-import {ActionTracker} from "./utils/action-tracker";
+import { SafeSearchType, search as duckSearch } from "duck-duck-scrape";
+import { braveSearch } from "./tools/brave-search";
+import { rewriteQuery } from "./tools/query-rewriter";
+import { dedupQueries } from "./tools/jina-dedup";
+import { evaluateAnswer, evaluateQuestion } from "./tools/evaluator";
+import { analyzeSteps } from "./tools/error-analyzer";
+import { TokenTracker } from "./utils/token-tracker";
+import { ActionTracker } from "./utils/action-tracker";
 import {
  StepAction,
  AnswerAction,
@ -18,13 +18,13 @@ import {
  BoostedSearchSnippet,
  SearchSnippet, EvaluationResponse, Reference, SERPQuery, RepeatEvaluationType, UnNormalizedSearchSnippet, WebContent
 } from "./types";
-import {TrackerContext} from "./types";
-import {search} from "./tools/jina-search";
+import { TrackerContext } from "./types";
+import { search } from "./tools/jina-search";
 // import {grounding} from "./tools/grounding";
-import {zodToJsonSchema} from "zod-to-json-schema";
-import {ObjectGeneratorSafe} from "./utils/safe-generator";
-import {CodeSandbox} from "./tools/code-sandbox";
-import {serperSearch} from './tools/serper-search';
+import { zodToJsonSchema } from "zod-to-json-schema";
+import { ObjectGeneratorSafe } from "./utils/safe-generator";
+import { CodeSandbox } from "./tools/code-sandbox";
+import { serperSearch } from './tools/serper-search';
 import {
  addToAllURLs,
  rankURLs,
@ -38,11 +38,11 @@ import {
  removeExtraLineBreaks,
  removeHTMLtags, repairMarkdownFinal, repairMarkdownFootnotesOuter
 } from "./utils/text-tools";
-import {MAX_QUERIES_PER_STEP, MAX_REFLECT_PER_STEP, MAX_URLS_PER_STEP, Schemas} from "./utils/schemas";
-import {formatDateBasedOnType, formatDateRange} from "./utils/date-tools";
-import {repairUnknownChars} from "./tools/broken-ch-fixer";
-import {reviseAnswer} from "./tools/md-fixer";
-import {buildReferences} from "./tools/build-ref";
+import { MAX_QUERIES_PER_STEP, MAX_REFLECT_PER_STEP, MAX_URLS_PER_STEP, Schemas } from "./utils/schemas";
+import { formatDateBasedOnType, formatDateRange } from "./utils/date-tools";
+import { repairUnknownChars } from "./tools/broken-ch-fixer";
+import { reviseAnswer } from "./tools/md-fixer";
+import { buildReferences } from "./tools/build-ref";

 async function sleep(ms: number) {
  const seconds = Math.ceil(ms / 1000);
@ -54,7 +54,7 @@ function BuildMsgsFromKnowledge(knowledge: KnowledgeItem[]): CoreMessage[] {
  // build user, assistant pair messages from knowledge
  const messages: CoreMessage[] = [];
  knowledge.forEach(k => {
-    messages.push({role: 'user', content: k.question.trim()});
+    messages.push({ role: 'user', content: k.question.trim() });
    const aMsg = `
 ${k.updated && (k.type === 'url' || k.type === 'side-info') ? `
 <answer-datetime>
@ -71,7 +71,7 @@ ${k.references[0]}

 ${k.answer}
      `.trim();
-    messages.push({role: 'assistant', content: removeExtraLineBreaks(aMsg)});
+    messages.push({ role: 'assistant', content: removeExtraLineBreaks(aMsg) });
  });
  return messages;
 }
@ -96,7 +96,7 @@ ${p}
 </answer-requirements>` : ''}
    `.trim();

-  msgs.push({role: 'user', content: removeExtraLineBreaks(userContent)});
+  msgs.push({ role: 'user', content: removeExtraLineBreaks(userContent) });
  return msgs;
 }

@ -289,7 +289,7 @@ async function executeSearchQueries(
  const uniqQOnly = keywordsQueries.map(q => q.q);
  const newKnowledge: KnowledgeItem[] = [];
  const searchedQueries: string[] = [];
-  context.actionTracker.trackThink('search_for', SchemaGen.languageCode, {keywords: uniqQOnly.join(', ')});
+  context.actionTracker.trackThink('search_for', SchemaGen.languageCode, { keywords: uniqQOnly.join(', ') });
  let utilityScore = 0;
  for (const query of keywordsQueries) {
    let results: UnNormalizedSearchSnippet[] = [];
@ -305,7 +305,7 @@ async function executeSearchQueries(
          results = (await search(query, context.tokenTracker)).response?.data || [];
          break;
        case 'duck':
-          results = (await duckSearch(query.q, {safeSearch: SafeSearchType.STRICT})).results;
+          results = (await duckSearch(query.q, { safeSearch: SafeSearchType.STRICT })).results;
          break;
        case 'brave':
          results = (await braveSearch(query.q)).response.web?.results || [];
@ -364,7 +364,7 @@ async function executeSearchQueries(
  if (searchedQueries.length === 0) {
    if (onlyHostnames && onlyHostnames.length > 0) {
      console.log(`No results found for queries: ${uniqQOnly.join(', ')} on hostnames: ${onlyHostnames.join(', ')}`);
-      context.actionTracker.trackThink('hostnames_no_results', SchemaGen.languageCode, {hostnames: onlyHostnames.join(', ')});
+      context.actionTracker.trackThink('hostnames_no_results', SchemaGen.languageCode, { hostnames: onlyHostnames.join(', ') });
    }
  } else {
    console.log(`Utility/Queries: ${utilityScore}/${searchedQueries.length}`);
@ -383,17 +383,18 @@ function includesEval(allChecks: RepeatEvaluationType[], evalType: EvaluationTyp
 }

 export async function getResponse(question?: string,
-                                  tokenBudget: number = 1_000_000,
-                                  maxBadAttempts: number = 2,
-                                  existingContext?: Partial<TrackerContext>,
-                                  messages?: Array<CoreMessage>,
-                                  numReturnedURLs: number = 100,
-                                  noDirectAnswer: boolean = false,
-                                  boostHostnames: string[] = [],
-                                  badHostnames: string[] = [],
-                                  onlyHostnames: string[] = [],
-                                  maxRef: number = 10,
-                                  minRelScore: number = 0.75
+  tokenBudget: number = 1_000_000,
+  maxBadAttempts: number = 2,
+  existingContext?: Partial<TrackerContext>,
+  messages?: Array<CoreMessage>,
+  numReturnedURLs: number = 100,
+  noDirectAnswer: boolean = false,
+  boostHostnames: string[] = [],
+  badHostnames: string[] = [],
+  onlyHostnames: string[] = [],
+  maxRef: number = 10,
+  minRelScore: number = 0.75,
+  languageCode: string | undefined = 'zh-CN'
 ): Promise<{ result: StepAction; context: TrackerContext; visitedURLs: string[], readURLs: string[], allURLs: string[] }> {

  let step = 0;
@ -413,11 +414,11 @@ export async function getResponse(question?: string,
      question = lastContent.filter(c => c.type === 'text').pop()?.text || '';
    }
  } else {
-    messages = [{role: 'user', content: question.trim()}]
+    messages = [{ role: 'user', content: question.trim() }]
  }

  const SchemaGen = new Schemas();
-  await SchemaGen.setLanguage(question)
+  await SchemaGen.setLanguage(languageCode || question)
  const context: TrackerContext = {
    tokenTracker: existingContext?.tokenTracker || new TokenTracker(tokenBudget),
    actionTracker: existingContext?.actionTracker || new ActionTracker()
@ -439,7 +440,7 @@ export async function getResponse(question?: string,
  let allowReflect = true;
  let allowCoding = false;
  let msgWithKnowledge: CoreMessage[] = [];
-  let thisStep: StepAction = {action: 'answer', answer: '', references: [], think: '', isFinal: false};
+  let thisStep: StepAction = { action: 'answer', answer: '', references: [], think: '', isFinal: false };

  const allURLs: Record<string, SearchSnippet> = {};
  const allWebContents: Record<string, WebContent> = {};
@ -491,7 +492,7 @@ export async function getResponse(question?: string,
          } as RepeatEvaluationType
        })
      // force strict eval for the original question, at last, only once.
-      evaluationMetrics[currentQuestion].push({type: 'strict', numEvalsRequired: maxBadAttempts});
+      evaluationMetrics[currentQuestion].push({ type: 'strict', numEvalsRequired: maxBadAttempts });
    } else if (currentQuestion.trim() !== question) {
      evaluationMetrics[currentQuestion] = []
    }
@ -520,7 +521,7 @@ export async function getResponse(question?: string,
    allowSearch = allowSearch && (weightedURLs.length < 50);  // disable search when too many urls already

    // generate prompt for this step
-    const {system, urlList} = getPrompt(
+    const { system, urlList } = getPrompt(
      diaryContext,
      allQuestions,
      allKeywords,
@ -552,7 +553,7 @@ export async function getResponse(question?: string,
    console.log(`${currentQuestion}: ${thisStep.action} <- [${actionsStr}]`);
    console.log(thisStep)

-    context.actionTracker.trackAction({totalStep, thisStep, gaps});
+    context.actionTracker.trackAction({ totalStep, thisStep, gaps });

    // reset allow* to true
    allowAnswer = true;
@ -599,7 +600,7 @@ export async function getResponse(question?: string,
      });

      console.log(currentQuestion, evaluationMetrics[currentQuestion])
-      let evaluation: EvaluationResponse = {pass: true, think: ''};
+      let evaluation: EvaluationResponse = { pass: true, think: '' };
      if (evaluationMetrics[currentQuestion].length > 0) {
        context.actionTracker.trackThink('eval_first', SchemaGen.languageCode)
        evaluation = await evaluateAnswer(
@ -756,8 +757,8 @@ But then you realized you have asked them before. You decided to to think out of
      thisStep.searchRequests = chooseK((await dedupQueries(thisStep.searchRequests, [], context.tokenTracker)).unique_queries, MAX_QUERIES_PER_STEP);

      // do first search
-      const {searchedQueries, newKnowledge} = await executeSearchQueries(
-        thisStep.searchRequests.map(q => ({q})),
+      const { searchedQueries, newKnowledge } = await executeSearchQueries(
+        thisStep.searchRequests.map(q => ({ q })),
        context,
        allURLs,
        SchemaGen,
@ -777,13 +778,13 @@ But then you realized you have asked them before. You decided to to think out of
      keywordsQueries = keywordsQueries = uniqQOnly.map(q => {
        const matches = keywordsQueries.filter(kq => kq.q === q);
        // if there are multiple matches, keep the original query as the wider search
-        return matches.length > 1 ? {q} : matches[0];
+        return matches.length > 1 ? { q } : matches[0];
      }) as SERPQuery[];

      let anyResult = false;

      if (keywordsQueries.length > 0) {
-        const {searchedQueries, newKnowledge} =
+        const { searchedQueries, newKnowledge } =
          await executeSearchQueries(
            keywordsQueries,
            context,
@ -842,7 +843,7 @@ You decided to think out of the box or cut from a completely different angle.
      console.log(uniqueURLs)

      if (uniqueURLs.length > 0) {
-        const {urlResults, success} = await processURLs(
+        const { urlResults, success } = await processURLs(
          uniqueURLs,
          context,
          allKnowledge,
@ -885,7 +886,7 @@ You decided to think out of the box or cut from a completely different angle.`);
      }
      allowRead = false;
    } else if (thisStep.action === 'coding' && thisStep.codingIssue) {
-      const sandbox = new CodeSandbox({allContext, URLs: weightedURLs.slice(0, 20), allKnowledge}, context, SchemaGen);
+      const sandbox = new CodeSandbox({ allContext, URLs: weightedURLs.slice(0, 20), allKnowledge }, context, SchemaGen);
      try {
        const result = await sandbox.solve(thisStep.codingIssue);
        allKnowledge.push({
@ -936,7 +937,7 @@ But unfortunately, you failed to solve the issue. You need to think out of the b
    // any answer is better than no answer, humanity last resort
    step++;
    totalStep++;
-    const {system} = getPrompt(
+    const { system } = getPrompt(
      diaryContext,
      allQuestions,
      allKeywords,
@ -966,7 +967,7 @@ But unfortunately, you failed to solve the issue. You need to think out of the b
    } as AnswerAction;
    // await updateReferences(thisStep, allURLs);
    (thisStep as AnswerAction).isFinal = true;
-    context.actionTracker.trackAction({totalStep, thisStep, gaps});
+    context.actionTracker.trackAction({ totalStep, thisStep, gaps });
  }

  const answerStep = thisStep as AnswerAction;
@ -988,7 +989,7 @@ But unfortunately, you failed to solve the issue. You need to think out of the b
          ),
          allURLs)));

-    const {answer, references} = await buildReferences(
+    const { answer, references } = await buildReferences(
      answerStep.answer,
      allWebContents,
      context,
@ -1021,16 +1022,16 @@ But unfortunately, you failed to solve the issue. You need to think out of the b
 }

 async function storeContext(prompt: string, schema: any, memory: {
-                              allContext: StepAction[];
-                              allKeywords: string[];
-                              allQuestions: string[];
-                              allKnowledge: KnowledgeItem[];
-                              weightedURLs: BoostedSearchSnippet[];
-                              msgWithKnowledge: CoreMessage[];
-                            }
+  allContext: StepAction[];
+  allKeywords: string[];
+  allQuestions: string[];
+  allKnowledge: KnowledgeItem[];
+  weightedURLs: BoostedSearchSnippet[];
+  msgWithKnowledge: CoreMessage[];
+}
  , step: number) {

-  const {allContext, allKeywords, allQuestions, allKnowledge, weightedURLs, msgWithKnowledge} = memory;
+  const { allContext, allKeywords, allQuestions, allKnowledge, weightedURLs, msgWithKnowledge } = memory;
  if ((process as any).asyncLocalContext?.available?.()) {

    (process as any).asyncLocalContext.ctx.promptContext = {
--- a/src/app.ts
+++ b/src/app.ts
@ -566,7 +566,8 @@ app.post('/v1/chat/completions', (async (req: Request, res: Response) => {
      body.bad_hostnames?.map(i => normalizeHostName(i)),
      body.only_hostnames?.map(i => normalizeHostName(i)),
      body.max_annotations,
-      body.min_annotation_relevance
+      body.min_annotation_relevance,
+      body.language_code
      )
    let finalAnswer = (finalStep as AnswerAction).mdAnswer;

--- a/src/cli.ts
+++ b/src/cli.ts
@ -26,9 +26,9 @@ program
      const { result } = await getResponse(
        query,
        parseInt(options.tokenBudget),
-        parseInt(options.maxAttempts)
+        parseInt(options.maxAttempts),
      );
-      
+
      if (result.action === 'answer') {
        console.log('\nAnswer:', result.answer);
        if (result.references?.length) {
--- a/src/types.ts
+++ b/src/types.ts
@ -1,5 +1,5 @@
 // Action Types
-import {CoreMessage, LanguageModelUsage} from "ai";
+import { CoreMessage, LanguageModelUsage } from "ai";

 type BaseAction = {
  action: "search" | "answer" | "reflect" | "visit" | "coding";
@ -245,6 +245,7 @@ export interface ChatCompletionRequest {

  max_annotations?: number;
  min_annotation_relevance?: number;
+  language_code?: string;
 }

 export interface URLAnnotation {
@ -304,8 +305,8 @@ export interface ChatCompletionChunk {
 }

 // Tracker Types
-import {TokenTracker} from './utils/token-tracker';
-import {ActionTracker} from './utils/action-tracker';
+import { TokenTracker } from './utils/token-tracker';
+import { ActionTracker } from './utils/action-tracker';

 export interface TrackerContext {
  tokenTracker: TokenTracker;
--- a/src/utils/schemas.ts
+++ b/src/utils/schemas.ts
@ -1,6 +1,6 @@
-import {z} from "zod";
-import {ObjectGeneratorSafe} from "./safe-generator";
-import {EvaluationType, PromptPair} from "../types";
+import { z } from "zod";
+import { ObjectGeneratorSafe } from "./safe-generator";
+import { EvaluationType, PromptPair } from "../types";

 export const MAX_URLS_PER_STEP = 5
 export const MAX_QUERIES_PER_STEP = 5
@ -59,12 +59,51 @@ Evaluation: {
  };
 }

+const languageISO6391Map: Record<string, string> = {
+  'en': 'English',
+  'zh': 'Chinese',
+  'zh-CN': 'Simplified Chinese',
+  'zh-TW': 'Traditional Chinese',
+  'de': 'German',
+  'fr': 'French',
+  'es': 'Spanish',
+  'it': 'Italian',
+  'ja': 'Japanese',
+  'ko': 'Korean',
+  'pt': 'Portuguese',
+  'ru': 'Russian',
+  'ar': 'Arabic',
+  'hi': 'Hindi',
+  'bn': 'Bengali',
+  'tr': 'Turkish',
+  'nl': 'Dutch',
+  'pl': 'Polish',
+  'sv': 'Swedish',
+  'no': 'Norwegian',
+  'da': 'Danish',
+  'fi': 'Finnish',
+  'el': 'Greek',
+  'he': 'Hebrew',
+  'hu': 'Hungarian',
+  'id': 'Indonesian',
+  'ms': 'Malay',
+  'th': 'Thai',
+  'vi': 'Vietnamese',
+  'ro': 'Romanian',
+  'bg': 'Bulgarian',
+}
+
 export class Schemas {
  public languageStyle: string = 'formal English';
  public languageCode: string = 'en';


  async setLanguage(query: string) {
+    if (languageISO6391Map[query]) {
+      this.languageCode = query;
+      this.languageStyle = `formal ${languageISO6391Map[query]}`;
+      return;
+    }
    const generator = new ObjectGeneratorSafe();
    const prompt = getLanguagePrompt(query.slice(0, 100))

@ -194,7 +233,7 @@ export class Schemas {
  }

  getAgentSchema(allowReflect: boolean, allowRead: boolean, allowAnswer: boolean, allowSearch: boolean, allowCoding: boolean,
-                 currentQuestion?: string): z.ZodObject<any> {
+    currentQuestion?: string): z.ZodObject<any> {
    const actionSchemas: Record<string, z.ZodOptional<any>> = {};

    if (allowSearch) {