feat: add mdFixer tool config and update agent logic

2025-12-26 06:28:56 +08:00 · 2025-06-09 17:47:19 -07:00 · 2025-06-09 17:47:19 -07:00 · 78b2cbb2cf
commit 78b2cbb2cf
parent 2affd41c79
5 changed files with 73 additions and 41 deletions
--- a/config.json
+++ b/config.json
@ -56,6 +56,9 @@
        "agentBeastMode": {
          "temperature": 0.7
        },
+        "mdFixer": {
+          "model": "gemini-2.5-flash-preview-05-20"
+        },
        "fallback": {
          "maxTokens": 8000,
          "model": "gemini-2.0-flash-lite"
@ -91,7 +94,8 @@
        },
        "fallback": {
          "temperature": 0
-        }
+        },
+        "mdFixer": {}
      }
    }
  }
--- a/jina-ai/config.json
+++ b/jina-ai/config.json
@ -39,15 +39,30 @@
        "maxTokens": 8000
      },
      "tools": {
-        "coder": { "maxTokens": 2000, "model": "gemini-2.0-flash-lite" },
+        "coder": {
+          "maxTokens": 2000,
+          "model": "gemini-2.0-flash-lite"
+        },
        "searchGrounding": {},
-        "dedup": { },
-        "evaluator": {"maxTokens": 2000 },
-        "errorAnalyzer": {"maxTokens": 1000},
-        "queryRewriter": {"maxTokens": 2000},
+        "dedup": {},
+        "evaluator": {
+          "maxTokens": 2000
+        },
+        "errorAnalyzer": {
+          "maxTokens": 1000
+        },
+        "queryRewriter": {
+          "maxTokens": 2000
+        },
        "agent": {},
        "agentBeastMode": {},
-        "fallback": {"maxTokens": 8000, "model": "gemini-2.0-flash-lite"}
+        "fallback": {
+          "maxTokens": 8000,
+          "model": "gemini-2.0-flash-lite"
+        },
+        "mdFixer": {
+          "model": "gemini-2.5-flash-preview-05-20"
+        }
      }
    },
    "openai": {
@ -57,16 +72,31 @@
        "maxTokens": 8000
      },
      "tools": {
-        "coder": { "temperature": 0.7 },
-        "searchGrounding": { "temperature": 0 },
-        "dedup": { "temperature": 0.1 },
+        "coder": {
+          "temperature": 0.7
+        },
+        "searchGrounding": {
+          "temperature": 0
+        },
+        "dedup": {
+          "temperature": 0.1
+        },
        "evaluator": {},
        "errorAnalyzer": {},
-        "queryRewriter": { "temperature": 0.1 },
-        "agent": { "temperature": 0.7 },
-        "agentBeastMode": { "temperature": 0.7 },
-        "fallback": { "temperature": 0 }
+        "queryRewriter": {
+          "temperature": 0.1
+        },
+        "agent": {
+          "temperature": 0.7
+        },
+        "agentBeastMode": {
+          "temperature": 0.7
+        },
+        "fallback": {
+          "temperature": 0
+        },
+        "mdFixer": {}
      }
    }
  }
-}
+}
--- a/src/agent.ts
+++ b/src/agent.ts
@ -40,7 +40,6 @@ import {
 } from "./utils/text-tools";
 import { MAX_QUERIES_PER_STEP, MAX_REFLECT_PER_STEP, MAX_URLS_PER_STEP, Schemas } from "./utils/schemas";
 import { formatDateBasedOnType, formatDateRange } from "./utils/date-tools";
-import { repairUnknownChars } from "./tools/broken-ch-fixer";
 import { reviseAnswer } from "./tools/md-fixer";
 import { buildReferences } from "./tools/build-ref";
 import { arxivSearch } from './tools/arxiv-search';
@ -988,13 +987,13 @@ But unfortunately, you failed to solve the issue. You need to think out of the b
        fixBadURLMdLinks(
          fixCodeBlockIndentation(
            repairMarkdownFootnotesOuter(
-              await repairUnknownChars(
-                await reviseAnswer(
-                  answerStep.answer,
-                  allKnowledge,
-                  context,
-                  SchemaGen),
-                context))
+              await reviseAnswer(
+                answerStep.answer,
+                allKnowledge,
+                context,
+                SchemaGen
+              )
+            )
          ),
          allURLs)));

--- a/src/config.ts
+++ b/src/config.ts
@ -67,6 +67,7 @@ interface ToolConfig {
 }

 interface ToolOverrides {
+  model?: string;
  temperature?: number;
  maxTokens?: number;
 }
@ -78,7 +79,7 @@ export function getToolConfig(toolName: ToolName): ToolConfig {
  const toolOverrides = providerConfig.tools[toolName] as ToolOverrides;

  return {
-    model: process.env.DEFAULT_MODEL_NAME || defaultConfig.model,
+    model: toolOverrides.model ?? defaultConfig.model,
    temperature: toolOverrides.temperature ?? defaultConfig.temperature,
    maxTokens: toolOverrides.maxTokens ?? defaultConfig.maxTokens
  };
--- a/src/tools/md-fixer.ts
+++ b/src/tools/md-fixer.ts
@ -1,8 +1,8 @@
-import {KnowledgeItem, PromptPair, TrackerContext} from '../types';
-import {getKnowledgeStr} from "../utils/text-tools";
-import {getModel} from "../config";
-import {generateText} from "ai";
-import {Schemas} from "../utils/schemas";
+import { KnowledgeItem, PromptPair, TrackerContext } from '../types';
+import { getKnowledgeStr } from "../utils/text-tools";
+import { getModel } from "../config";
+import { generateText } from "ai";
+import { Schemas } from "../utils/schemas";


 function getPrompt(mdContent: string, allKnowledge: KnowledgeItem[], schema: Schemas): PromptPair {
@ -12,12 +12,12 @@ function getPrompt(mdContent: string, allKnowledge: KnowledgeItem[], schema: Sch
  return {
    system: `You are a senior editor with multiple best-selling books and columns published in top magazines. You break conventional thinking, establish unique cross-disciplinary connections, and bring new perspectives to the user.

-Your task is to revise the provided markdown content (written by your junior intern) while preserving its original vibe, structure, delivering a polished and professional version.
+Your task is to revise the provided markdown content (written by your junior intern) while preserving its original vibe, delivering a polished and professional version.

 <structure>
 - Begin with a blunt, fact-driven, and unapologetically statement of the main question or issue you'll address
 - Develop your argument using a logical progression of ideas while allowing for occasional contemplative digressions that enrich the reader's understanding
- Organize paragraphs with clear topic sentences but vary paragraph length to create rhythm and emphasis
+- Organize paragraphs with clear topic sentences but vary paragraph length to create rhythm and emphasis, do not use bullet points or numbered lists.
 - Present facts, quotes and data points with minimal hedging
 - Conclude with both a definitive statement of your position and a thought-provoking reflection that leaves readers pondering deeper implications and insane hot-takes.
 </structure>
@ -40,14 +40,12 @@ Your task is to revise the provided markdown content (written by your junior int
 </content-approach>

 <rules>
-1. Extend the content with 5W1H strategy and add more details to make it more informative and engaging. Use available knowledge to ground facts and fill in missing information.
-2. Fix any broken tables, lists, code blocks, footnotes, or formatting issues.
-3. Make sure nested lists are correctly indented, especially code blocks within the nested structure. Code block should be fenced with triple backticks, except HTML table.
+1. Avoid any bullet points or numbered lists, use natural language instead.
+2. Extend the content with 5W1H strategy and add more details to make it more informative and engaging. Use available knowledge to ground facts and fill in missing information.
+3. Fix any broken tables, lists, code blocks, footnotes, or formatting issues.
 4. Tables are good! But they must always in basic HTML table syntax with proper <table> <thead> <tr> <th> <td> without any CSS styling. STRICTLY AVOID any markdown table syntax. HTML Table should NEVER BE fenced with (\`\`\`html) triple backticks.
-5. Avoid over-using bullet points by elaborate deeply nested structure into natural language sections/paragraphs to make the content more readable. 
-6. Replace any obvious placeholders or Lorem Ipsum values such as "example.com" with the actual content derived from the knowledge.
-7. Conclusion section if exists should provide deep, unexpected insights, identifying hidden patterns and connections, and creating "aha moments.".
-8. Your output language must be the same as user input language.
+5. Replace any obvious placeholders or Lorem Ipsum values such as "example.com" with the actual content derived from the knowledge.
+6. Your output language must be the same as user input language.
 </rules>


@ -59,7 +57,7 @@ IMPORTANT: Do not begin your response with phrases like "Sure", "Here is", "Belo
  }
 }

-const TOOL_NAME = 'md-fixer';
+const TOOL_NAME = 'mdFixer';

 export async function reviseAnswer(
  mdContent: string,
@ -72,12 +70,12 @@ export async function reviseAnswer(
    trackers?.actionTracker.trackThink('final_answer', schema.languageCode)

    const result = await generateText({
-      model: getModel('agent'),
+      model: getModel(TOOL_NAME),
      system: prompt.system,
      prompt: prompt.user,
    });

-    trackers.tokenTracker.trackUsage('md-fixer', result.usage)
+    trackers.tokenTracker.trackUsage(TOOL_NAME, result.usage)


    console.log(TOOL_NAME, result.text);