Merge branch 'main' of github.com:jina-ai/node-DeepResearch

2025-12-26 06:28:56 +08:00 · 2025-06-13 12:32:33 +08:00 · 2025-06-13 12:32:33 +08:00 · 56b3c3c88e
commit 56b3c3c88e
parent 45c668106f a664e4d851
2 changed files with 65 additions and 45 deletions
--- a/src/agent.ts
+++ b/src/agent.ts
@ -449,6 +449,7 @@ export async function getResponse(question?: string,
  const gaps: string[] = [question];  // All questions to be answered including the orginal question
  const allQuestions = [question];
  const allKeywords: string[] = [];
+  let candidateAnswers: string[] = [];
  const allKnowledge: KnowledgeItem[] = [];  // knowledge are intermedidate questions that are answered

  let diaryContext = [];
@ -795,37 +796,44 @@ But then you realized you have asked them before. You decided to to think out of

      if (teamSize > 1) {
        const subproblems = await researchPlan(question, teamSize, soundBites, context, SchemaGen);
-        // parallel call getResponse for each subproblem with exact same parameters from the current step, but their teamSize is 1
-        const subproblemResponses = await Promise.all(subproblems.map(subproblem => getResponse(subproblem,
-          tokenBudget,
-          maxBadAttempts,
-          context,
-          messages,
-          numReturnedURLs,
-          noDirectAnswer,
-          boostHostnames,
-          badHostnames,
-          onlyHostnames,
-          maxRef,
-          minRelScore, languageCode, searchLanguageCode, searchProvider, withImages, 1)));
-        // convert current step to AnswerAction
-        thisStep = {
-          action: 'answer',
-          think: thisStep.think,
-          answer: subproblemResponses.map(r => (r.result as AnswerAction).answer).join('\n\n'),
-          mdAnswer: subproblemResponses.map(r => (r.result as AnswerAction).mdAnswer).join('\n\n'),
-          references: subproblemResponses.map(r => (r.result as AnswerAction).references).flat(),
-          imageReferences: subproblemResponses.map(r => (r.result as AnswerAction).imageReferences).flat(),
-          isFinal: true,
-          isAggregated: true
-        } as AnswerAction;
+        if (subproblems.length > 1) {

-        // aggregate urls
-        visitedURLs.push(...subproblemResponses.map(r => r.readURLs).flat());
-        weightedURLs = subproblemResponses.map(r => r.allURLs.map(url => ({ url, title: '' } as BoostedSearchSnippet))).flat();
+          // parallel call getResponse for each subproblem with exact same parameters from the current step, but their teamSize is 1
+          const subproblemResponses = await Promise.all(subproblems.map(subproblem => getResponse(subproblem,
+            tokenBudget,
+            maxBadAttempts,
+            context,
+            messages,
+            numReturnedURLs,
+            noDirectAnswer,
+            boostHostnames,
+            badHostnames,
+            onlyHostnames,
+            maxRef,
+            minRelScore, languageCode, searchLanguageCode, searchProvider, withImages, 1)));
+          // convert current step to AnswerAction
+          thisStep = {
+            action: 'answer',
+            think: thisStep.think,
+            answer: subproblemResponses.map(r => (r.result as AnswerAction).answer).join('\n\n'),
+            mdAnswer: subproblemResponses.map(r => (r.result as AnswerAction).mdAnswer).join('\n\n'),
+            references: subproblemResponses.map(r => (r.result as AnswerAction).references).flat(),
+            imageReferences: subproblemResponses.map(r => (r.result as AnswerAction).imageReferences).flat(),
+            isFinal: true,
+            isAggregated: true
+          } as AnswerAction;
+          candidateAnswers = subproblemResponses.map(r => (r.result as AnswerAction).mdAnswer).filter(a => a) as string[];

-        // break the loop, jump directly final boxing
-        break;
+          // aggregate urls
+          visitedURLs.push(...subproblemResponses.map(r => r.readURLs).flat());
+          weightedURLs = subproblemResponses.map(r => r.allURLs.map(url => ({ url, title: '' } as BoostedSearchSnippet))).flat();
+
+          // break the loop, jump directly final boxing
+          break;
+        } else {
+          // if there is only one subproblem, then we skip the recurrsion
+          gaps.push(subproblems[0]);
+        }
      }

      // rewrite queries with initial soundbites
@ -1083,7 +1091,7 @@ But unfortunately, you failed to solve the issue. You need to think out of the b
      }
    }
  } else if (answerStep.isAggregated) {
-    answerStep.answer = await reduceAnswers(answerStep.answer, context, SchemaGen);
+    answerStep.answer = candidateAnswers.join('\n\n'); // await reduceAnswers(candidateAnswers, context, SchemaGen);
    answerStep.mdAnswer = repairMarkdownFootnotesOuter(buildMdFromAnswer(answerStep));
    logDebug('[agent] all image references:', { count: answerStep.imageReferences?.length });
    const dedupImages = dedupImagesWithEmbeddings(answerStep.imageReferences as ImageObject[], []);
--- a/src/tools/reducer.ts
+++ b/src/tools/reducer.ts
@ -2,10 +2,10 @@ import { PromptPair, TrackerContext } from '../types';
 import { getModel } from "../config";
 import { generateText } from "ai";
 import { Schemas } from "../utils/schemas";
-import { logInfo, logError, logDebug, logWarning } from '../logging';
+import { logError, logDebug, logWarning } from '../logging';


-function getPrompt(mdContent: string): PromptPair {
+function getPrompt(answers: string[]): PromptPair {


  return {
@ -14,7 +14,7 @@ You are an article aggregator that creates a coherent, high-quality article by s

 <core-instructions>
 1. Content Preservation
-ALWAYS preserve original sentences verbatim - do not paraphrase or rewrite
+ALWAYS preserve original sentences verbatim - do not delete
 Select the highest quality version when multiple articles cover the same point
 Maintain the original author's voice and technical accuracy
 Keep direct quotes, statistics, and factual claims exactly as written
@ -46,22 +46,29 @@ No attribution to individual sources (present as unified piece)

 Do not add your own commentary or analysis
 Do not change technical terms, names, or specific details
-
-Your final output should read as a cohesive, high-quality article that appears to be written by a single author, while actually being a careful curation of the best sentences from all input sources.
    `,
-    user: mdContent
+    user: `
+    Here are the answers to merge:
+${answers.map((a, i) => `
+<answer-${i + 1}>
+${a}
+</answer-${i + 1}>
+
+Your output should read as a coherent, high-quality article that appears to be written by a single author, while actually being a careful curation of the best sentences from all input sources.
+`).join('\n\n')}
+    `
  }
 }

 const TOOL_NAME = 'reducer';

 export async function reduceAnswers(
-  mdContent: string,
+  answers: string[],
  trackers: TrackerContext,
  schema: Schemas
 ): Promise<string> {
  try {
-    const prompt = getPrompt(mdContent);
+    const prompt = getPrompt(answers);
    trackers?.actionTracker.trackThink('reduce_answer', schema.languageCode)

    const result = await generateText({
@ -71,25 +78,30 @@ export async function reduceAnswers(
    });

    trackers.tokenTracker.trackUsage(TOOL_NAME, result.usage)
+    const totalLength = answers.reduce((acc, curr) => acc + curr.length, 0);
+    const reducedLength = result.text.length;


-    logDebug(`${TOOL_NAME} before/after: ${mdContent.length} -> ${result.text.length}`, {
-      originalContent: mdContent,
+    logDebug(`${TOOL_NAME} before/after: ${totalLength} -> ${reducedLength}`, {
+      answers,
      reducedContent: result.text
    });

-    if (result.text.length < mdContent.length * 0.5) {
-      logWarning(`reduce content length ${result.text.length} is significantly shorter than original content ${mdContent.length}, return original content instead.`, {
-        originalContent: mdContent,
+
+    const reductionRatio = reducedLength / totalLength;
+    if (reductionRatio < 0.6) {
+      logWarning(`reducer content length ${reducedLength} is significantly shorter than original content ${totalLength}, return original content instead.`, {
+        originalContent: answers,
        repairedContent: result.text
      });
-      return mdContent;
+      // return simple join of answers
+      return answers.join('\n\n');
    }

    return result.text;

  } catch (error) {
    logError(TOOL_NAME, { error });
-    return mdContent;
+    return answers.join('\n\n');
  }
 }