feat: add mdFixer tool config and update agent logic

2025-12-26 06:28:56 +08:00 · 2025-06-09 17:47:19 -07:00 · 2025-06-09 17:47:19 -07:00 · 78b2cbb2cf
commit 78b2cbb2cf
parent 2affd41c79
5 changed files with 73 additions and 41 deletions
--- a/config.json
+++ b/config.json
@ -56,6 +56,9 @@
        "agentBeastMode": {
          "temperature": 0.7
        },
        "mdFixer": {
          "model": "gemini-2.5-flash-preview-05-20"
        },
        "fallback": {
          "maxTokens": 8000,
          "model": "gemini-2.0-flash-lite"
@ -91,7 +94,8 @@
        },
        "fallback": {
          "temperature": 0
-        }
+        },
        "mdFixer": {}
      }
    }
  }
--- a/jina-ai/config.json
+++ b/jina-ai/config.json
@ -39,15 +39,30 @@
        "maxTokens": 8000
      },
      "tools": {
-        "coder": { "maxTokens": 2000, "model": "gemini-2.0-flash-lite" },
+        "coder": {
          "maxTokens": 2000,
          "model": "gemini-2.0-flash-lite"
        },
        "searchGrounding": {},
-        "dedup": { },
+        "dedup": {},
-        "evaluator": {"maxTokens": 2000 },
+        "evaluator": {
-        "errorAnalyzer": {"maxTokens": 1000},
+          "maxTokens": 2000
-        "queryRewriter": {"maxTokens": 2000},
+        },
        "errorAnalyzer": {
          "maxTokens": 1000
        },
        "queryRewriter": {
          "maxTokens": 2000
        },
        "agent": {},
        "agentBeastMode": {},
-        "fallback": {"maxTokens": 8000, "model": "gemini-2.0-flash-lite"}
+        "fallback": {
          "maxTokens": 8000,
          "model": "gemini-2.0-flash-lite"
        },
        "mdFixer": {
          "model": "gemini-2.5-flash-preview-05-20"
        }
      }
    },
    "openai": {
@ -57,16 +72,31 @@
        "maxTokens": 8000
      },
      "tools": {
-        "coder": { "temperature": 0.7 },
+        "coder": {
-        "searchGrounding": { "temperature": 0 },
+          "temperature": 0.7
-        "dedup": { "temperature": 0.1 },
+        },
        "searchGrounding": {
          "temperature": 0
        },
        "dedup": {
          "temperature": 0.1
        },
        "evaluator": {},
        "errorAnalyzer": {},
-        "queryRewriter": { "temperature": 0.1 },
+        "queryRewriter": {
-        "agent": { "temperature": 0.7 },
+          "temperature": 0.1
-        "agentBeastMode": { "temperature": 0.7 },
+        },
-        "fallback": { "temperature": 0 }
+        "agent": {
          "temperature": 0.7
        },
        "agentBeastMode": {
          "temperature": 0.7
        },
        "fallback": {
          "temperature": 0
        },
        "mdFixer": {}
      }
    }
  }
-}
+}
--- a/src/agent.ts
+++ b/src/agent.ts
@ -40,7 +40,6 @@ import {
 } from "./utils/text-tools";
 import { MAX_QUERIES_PER_STEP, MAX_REFLECT_PER_STEP, MAX_URLS_PER_STEP, Schemas } from "./utils/schemas";
 import { formatDateBasedOnType, formatDateRange } from "./utils/date-tools";
 import { repairUnknownChars } from "./tools/broken-ch-fixer";
 import { reviseAnswer } from "./tools/md-fixer";
 import { buildReferences } from "./tools/build-ref";
 import { arxivSearch } from './tools/arxiv-search';
@ -988,13 +987,13 @@ But unfortunately, you failed to solve the issue. You need to think out of the b
        fixBadURLMdLinks(
          fixCodeBlockIndentation(
            repairMarkdownFootnotesOuter(
-              await repairUnknownChars(
+              await reviseAnswer(
-                await reviseAnswer(
+                answerStep.answer,
-                  answerStep.answer,
+                allKnowledge,
-                  allKnowledge,
+                context,
-                  context,
+                SchemaGen
-                  SchemaGen),
+              )
-                context))
+            )
          ),
          allURLs)));
--- a/src/config.ts
+++ b/src/config.ts
@ -67,6 +67,7 @@ interface ToolConfig {
 }
 interface ToolOverrides {
  model?: string;
  temperature?: number;
  maxTokens?: number;
 }
@ -78,7 +79,7 @@ export function getToolConfig(toolName: ToolName): ToolConfig {
  const toolOverrides = providerConfig.tools[toolName] as ToolOverrides;
  return {
-    model: process.env.DEFAULT_MODEL_NAME || defaultConfig.model,
+    model: toolOverrides.model ?? defaultConfig.model,
    temperature: toolOverrides.temperature ?? defaultConfig.temperature,
    maxTokens: toolOverrides.maxTokens ?? defaultConfig.maxTokens
  };
--- a/src/tools/md-fixer.ts
+++ b/src/tools/md-fixer.ts
@ -1,8 +1,8 @@
-import {KnowledgeItem, PromptPair, TrackerContext} from '../types';
+import { KnowledgeItem, PromptPair, TrackerContext } from '../types';
-import {getKnowledgeStr} from "../utils/text-tools";
+import { getKnowledgeStr } from "../utils/text-tools";
-import {getModel} from "../config";
+import { getModel } from "../config";
-import {generateText} from "ai";
+import { generateText } from "ai";
-import {Schemas} from "../utils/schemas";
+import { Schemas } from "../utils/schemas";
 function getPrompt(mdContent: string, allKnowledge: KnowledgeItem[], schema: Schemas): PromptPair {
@ -12,12 +12,12 @@ function getPrompt(mdContent: string, allKnowledge: KnowledgeItem[], schema: Sch
  return {
    system: `You are a senior editor with multiple best-selling books and columns published in top magazines. You break conventional thinking, establish unique cross-disciplinary connections, and bring new perspectives to the user.
-Your task is to revise the provided markdown content (written by your junior intern) while preserving its original vibe, structure, delivering a polished and professional version.
+Your task is to revise the provided markdown content (written by your junior intern) while preserving its original vibe, delivering a polished and professional version.
 <structure>
 - Begin with a blunt, fact-driven, and unapologetically statement of the main question or issue you'll address
 - Develop your argument using a logical progression of ideas while allowing for occasional contemplative digressions that enrich the reader's understanding
- Organize paragraphs with clear topic sentences but vary paragraph length to create rhythm and emphasis
+- Organize paragraphs with clear topic sentences but vary paragraph length to create rhythm and emphasis, do not use bullet points or numbered lists.
 - Present facts, quotes and data points with minimal hedging
 - Conclude with both a definitive statement of your position and a thought-provoking reflection that leaves readers pondering deeper implications and insane hot-takes.
 </structure>
@ -40,14 +40,12 @@ Your task is to revise the provided markdown content (written by your junior int
 </content-approach>
 <rules>
-1. Extend the content with 5W1H strategy and add more details to make it more informative and engaging. Use available knowledge to ground facts and fill in missing information.
+1. Avoid any bullet points or numbered lists, use natural language instead.
-2. Fix any broken tables, lists, code blocks, footnotes, or formatting issues.
+2. Extend the content with 5W1H strategy and add more details to make it more informative and engaging. Use available knowledge to ground facts and fill in missing information.
-3. Make sure nested lists are correctly indented, especially code blocks within the nested structure. Code block should be fenced with triple backticks, except HTML table.
+3. Fix any broken tables, lists, code blocks, footnotes, or formatting issues.
 4. Tables are good! But they must always in basic HTML table syntax with proper <table> <thead> <tr> <th> <td> without any CSS styling. STRICTLY AVOID any markdown table syntax. HTML Table should NEVER BE fenced with (\`\`\`html) triple backticks.
-5. Avoid over-using bullet points by elaborate deeply nested structure into natural language sections/paragraphs to make the content more readable. 
+5. Replace any obvious placeholders or Lorem Ipsum values such as "example.com" with the actual content derived from the knowledge.
-6. Replace any obvious placeholders or Lorem Ipsum values such as "example.com" with the actual content derived from the knowledge.
+6. Your output language must be the same as user input language.
 7. Conclusion section if exists should provide deep, unexpected insights, identifying hidden patterns and connections, and creating "aha moments.".
 8. Your output language must be the same as user input language.
 </rules>
@ -59,7 +57,7 @@ IMPORTANT: Do not begin your response with phrases like "Sure", "Here is", "Belo
  }
 }
-const TOOL_NAME = 'md-fixer';
+const TOOL_NAME = 'mdFixer';
 export async function reviseAnswer(
  mdContent: string,
@ -72,12 +70,12 @@ export async function reviseAnswer(
    trackers?.actionTracker.trackThink('final_answer', schema.languageCode)
    const result = await generateText({
-      model: getModel('agent'),
+      model: getModel(TOOL_NAME),
      system: prompt.system,
      prompt: prompt.user,
    });
-    trackers.tokenTracker.trackUsage('md-fixer', result.usage)
+    trackers.tokenTracker.trackUsage(TOOL_NAME, result.usage)
    console.log(TOOL_NAME, result.text);