feat: add coding tools

2026-03-22 07:29:35 +08:00 · 2025-02-17 14:19:36 +08:00
parent b487563882
commit f8aa2b1353
6 changed files with 313 additions and 15 deletions
--- a/config.json
+++ b/config.json
@@ -32,6 +32,7 @@
        "maxTokens": 8000
      },
      "tools": {
        "coder": { "temperature": 0.7 },
        "searchGrounding": { "temperature": 0 },
        "dedup": { "temperature": 0.1 },
        "evaluator": {},
@@ -49,6 +50,7 @@
        "maxTokens": 8000
      },
      "tools": {
        "coder": { "temperature": 0.7 },
        "searchGrounding": { "temperature": 0 },
        "dedup": { "temperature": 0.1 },
        "evaluator": {},
--- a/jina-ai/config.json
+++ b/jina-ai/config.json
@@ -38,6 +38,7 @@
        "maxTokens": 8000
      },
      "tools": {
        "coder": { "temperature": 0.7 },
        "searchGrounding": { "temperature": 0 },
        "dedup": { "temperature": 0.1 },
        "evaluator": {},
@@ -55,6 +56,7 @@
        "maxTokens": 8000
      },
      "tools": {
        "coder": { "temperature": 0.7 },
        "searchGrounding": { "temperature": 0 },
        "dedup": { "temperature": 0.1 },
        "evaluator": {},
--- a/src/agent.ts
+++ b/src/agent.ts
@@ -17,6 +17,7 @@ import {search} from "./tools/jina-search";
 // import {grounding} from "./tools/grounding";
 import {zodToJsonSchema} from "zod-to-json-schema";
 import {ObjectGeneratorSafe} from "./utils/safe-generator";
 import {CodeSandbox} from "./tools/code-sandbox";
 async function sleep(ms: number) {
  const seconds = Math.ceil(ms / 1000);
@@ -24,7 +25,7 @@ async function sleep(ms: number) {
  return new Promise(resolve => setTimeout(resolve, ms));
 }
-function getSchema(allowReflect: boolean, allowRead: boolean, allowAnswer: boolean, allowSearch: boolean, languageStyle: string = 'same language as the question') {
+function getSchema(allowReflect: boolean, allowRead: boolean, allowAnswer: boolean, allowSearch: boolean, allowCoding: boolean, languageStyle: string = 'same language as the question') {
  const actions: string[] = [];
  const properties: Record<string, z.ZodTypeAny> = {
    action: z.enum(['placeholder']), // Will update later with actual actions
@@ -37,11 +38,17 @@ function getSchema(allowReflect: boolean, allowRead: boolean, allowAnswer: boole
      .describe("Required when action='search'. Must be a short, keyword-based query that BM25, tf-idf based search engines can understand. Existing queries must be avoided").optional();
  }
  if (allowCoding) {
    actions.push("coding");
    properties.codingIssue = z.string().max(500)
      .describe("Required when action='coding'. Describe what issue to solve with coding, format like a github issue ticket. Specify the input value when it is short.").optional();
  }
  if (allowAnswer) {
    actions.push("answer");
    properties.references = z.array(
      z.object({
-        exactQuote: z.string().describe("Exact relevant quote from the document"),
+        exactQuote: z.string().describe("Exact relevant quote from the document, must be a soundbite, short and to the point, no fluff").max(30),
        url: z.string().describe("source URL; must be directly from the context")
      }).required()
    ).describe("Required when action='answer'. Must be an array of references that support the answer, each reference must contain an exact quote and the URL of the document").optional();
@@ -83,6 +90,7 @@ function getPrompt(
  allowAnswer: boolean = true,
  allowRead: boolean = true,
  allowSearch: boolean = true,
  allowCoding: boolean = true,
  badContext?: { question: string, answer: string, evaluation: string, recap: string; blame: string; improvement: string; }[],
  knowledge?: KnowledgeItem[],
  allURLs?: Record<string, string>,
@@ -148,7 +156,6 @@ ${knowledgeItems}
  }
  // Add context section if exists
  if (context?.length) {
    sections.push(`
@@ -215,6 +222,15 @@ ${urlList}
 `);
  }
  if (allowCoding) {
    actionSections.push(`
 <action-coding>
 - This action allows you to solve the problem with coding in javascript. This is useful when you need some programming logic, like counting, filtering, or transforming, sorting, regex extraction, pre-processing, or post-processing of the data.
 - You only need to describe the issue you aim to solve in the "codingIssue" field. Specify the input either with real values or variable names. 
 - You do not need to generate any actual code. Some senior engineers will help you with actual implementation.
 </action-coding>`);
  }
  if (allowSearch) {
    actionSections.push(`
@@ -259,7 +275,7 @@ FAILURE IS NOT AN OPTION. EXECUTE WITH EXTREME PREJUDICE! ⚡️
  if (allowReflect) {
    actionSections.push(`
 <action-reflect>    
- Perform critical analysis through hypothetical scenarios or systematic breakdowns
+- Perform critical reflection through hypothetical scenarios or systematic breakdowns
 - Identify knowledge gaps and formulate essential clarifying questions
 </action-reflect>
 `);
@@ -313,7 +329,7 @@ export async function getResponse(question: string,
  let step = 0;
  let totalStep = 0;
  let badAttempts = 0;
-  let schema: ZodObject<any> = getSchema(true, true, true, true)
+  let schema: ZodObject<any> = getSchema(true, true, true, true, true)
  question = question.trim()
  const gaps: string[] = [question];  // All questions to be answered including the orginal question
  const allQuestions = [question];
@@ -344,6 +360,7 @@ export async function getResponse(question: string,
  let allowSearch = true;
  let allowRead = true;
  let allowReflect = true;
  let allowCoding = true;
  let prompt = '';
  let thisStep: StepAction = {action: 'answer', answer: '', references: [], think: '', isFinal: false};
@@ -379,12 +396,13 @@ export async function getResponse(question: string,
      allowAnswer,
      allowRead,
      allowSearch,
      allowCoding,
      badContext,
      allKnowledge,
      allURLs,
      false,
    );
-    schema = getSchema(allowReflect, allowRead, allowAnswer, allowSearch,
+    schema = getSchema(allowReflect, allowRead, allowAnswer, allowSearch, allowCoding,
      evaluationMetrics[currentQuestion].languageStyle)
    const generator = new ObjectGeneratorSafe(context.tokenTracker);
    const result = await generator.generateObject({
@@ -394,7 +412,7 @@ export async function getResponse(question: string,
    });
    thisStep = result.object as StepAction;
    // print allowed and chose action
-    const actionsStr = [allowSearch, allowRead, allowAnswer, allowReflect].map((a, i) => a ? ['search', 'read', 'answer', 'reflect'][i] : null).filter(a => a).join(', ');
+    const actionsStr = [allowSearch, allowRead, allowAnswer, allowReflect, allowCoding].map((a, i) => a ? ['search', 'read', 'answer', 'reflect'][i] : null).filter(a => a).join(', ');
    console.log(`${thisStep.action} <- [${actionsStr}]`);
    console.log(thisStep)
@@ -423,7 +441,10 @@ export async function getResponse(question: string,
      context.actionTracker.trackThink(`But wait, let me evaluate the answer first.`)
      const {response: evaluation} = await evaluateAnswer(currentQuestion, thisStep,
-        evaluationMetrics[currentQuestion], [context.tokenTracker, context.actionTracker]);
+        evaluationMetrics[currentQuestion],
        [context.tokenTracker, context.actionTracker],
        visitedURLs
        );
      if (currentQuestion.trim() === question) {
        if (evaluation.pass) {
@@ -530,6 +551,11 @@ You will now figure out the answers to these sub-questions and see if they can h
        gaps.push(...newGapQuestions.slice(0, 2));
        allQuestions.push(...newGapQuestions.slice(0, 2));
        gaps.push(question);  // always keep the original question in the gaps
        updateContext({
          totalStep,
          ...thisStep,
        });
      } else {
        diaryContext.push(`
 At step ${step}, you took **reflect** and think about the knowledge gaps. You tried to break down the question "${currentQuestion}" into gap-questions like this: ${oldQuestions.join(', ')} 
@@ -701,8 +727,41 @@ You decided to think out of the box or cut from a completely different angle.`);
        allowRead = false;
      }
    } else if (thisStep.action === 'coding' && thisStep.codingIssue) {
      const sandbox = new CodeSandbox({allContext}, context.tokenTracker);
      try {
        const result = await sandbox.solve(thisStep.codingIssue);
        allKnowledge.push({
          question: `What is the solution to the coding issue: ${thisStep.codingIssue}?`,
          answer: result.solution.output,
          type: 'coding',
          updated: new Date().toISOString()
        });
        diaryContext.push(`
 At step ${step}, you took the **coding** action and try to solve the coding issue: ${thisStep.codingIssue}.
 You found the solution and add it to your knowledge for future reference.
 `);
        updateContext({
          totalStep,
          ...thisStep,
          result: result
        });
      } catch (error) {
        console.error('Error solving coding issue:', error);
        diaryContext.push(`
 At step ${step}, you took the **coding** action and try to solve the coding issue: ${thisStep.codingIssue}.
 But unfortunately, you failed to solve the issue. You need to think out of the box or cut from a completely different angle.
 `);
        updateContext({
          totalStep,
          ...thisStep,
          result: 'You have tried all possible solutions and found no new information. You must think out of the box or different angle!!!'
        });
        allowCoding = false;
      }
    }
    await storeContext(prompt, schema, [allContext, allKeywords, allQuestions, allKnowledge], totalStep);
  }
@@ -722,13 +781,14 @@ You decided to think out of the box or cut from a completely different angle.`);
      false,
      false,
      false,
      false,
      badContext,
      allKnowledge,
      allURLs,
      true,
    );
-    schema = getSchema(false, false, true, false,
+    schema = getSchema(false, false, true, false, false,
      evaluationMetrics[question]?.languageStyle || 'same language as the question');
    const generator = new ObjectGeneratorSafe(context.tokenTracker);
    const result = await generator.generateObject({
--- a/src/tools/code-sandbox.ts
+++ b/src/tools/code-sandbox.ts
@@ -0,0 +1,228 @@
 import { z } from 'zod';
 import { TokenTracker } from "../utils/token-tracker";
 import { ObjectGeneratorSafe } from "../utils/safe-generator";
 // Define the response schema for code generation
 const codeGenerationSchema = z.object({
  code: z.string().describe('The JavaScript code that solves the problem and always use \'return\' statement to return the result. Focus on solving the core problem; No need for error handling or try-catch blocks.'),
 });
 // Define the types
 interface CodeGenerationResponse {
  code: string;
 }
 interface SandboxResult {
  success: boolean;
  output?: any;
  error?: string;
 }
 interface AvailableVariable {
  name: string;
  type: string;
  sample?: string;
 }
 function getPrompt(
  problem: string,
  availableVars: AvailableVariable[],
  previousAttempts: Array<{ code: string; error?: string }> = []
 ): string {
  const previousAttemptsContext = previousAttempts.map((attempt, index) => `
 Attempt ${index + 1}:
 ${attempt.code}
 ${attempt.error ? `Error: ${attempt.error}` : ''}
 `).join('\n');
  const varsContext = availableVars.map(v =>
    `${v.name} (${v.type})${v.sample ? ` e.g. ${v.sample}` : ''}`
  ).join('\n');
  return `You are an expert JavaScript programmer. Your task is to generate JavaScript code to solve the given problem.
 <rules>
 1. Generate plain JavaScript code that returns the result directly
 2. You can use any of these available variables directly:
 ${varsContext}
 3. No need to declare variables that are already available, especially big long strings or arrays; try to always start with using "allContext" object
 4. Focus on solving the core problem; No need for error handling or try-catch blocks; Always use 'return' statement to return the result
 </rules>
 ${previousAttempts.length > 0 ? `Previous attempts and their errors:
 ${previousAttemptsContext}
 ` : ''}
 <example>
 Available variables:
 numbers (Array<number>) e.g. [1, 2, 3, 4, 5, 6]
 threshold (number) e.g. 4
 Problem: Sum all numbers above threshold
 Response:
 {
  "code": "return numbers.filter(n => n > threshold).reduce((a, b) => a + b, 0);"
 }
 </example>
 Problem to solve:
 ${problem}`;
 }
 export class CodeSandbox {
  private tracker?: TokenTracker;
  private generator: ObjectGeneratorSafe;
  private maxAttempts: number;
  private availableVars: AvailableVariable[];
  private context: Record<string, any>;
  constructor(
    context: Record<string, any> = {},
    tracker?: TokenTracker,
    maxAttempts: number = 3
  ) {
    this.tracker = tracker;
    this.generator = new ObjectGeneratorSafe(tracker);
    this.maxAttempts = maxAttempts;
    this.context = context;
    this.availableVars = this.collectVariables(context);
  }
  private collectVariables(context: Record<string, any>): AvailableVariable[] {
    const vars: AvailableVariable[] = [];
    // Collect from provided context
    for (const [name, value] of Object.entries(context)) {
      vars.push(this.createVariableInfo(name, value));
    }
    // Collect from global scope (window in browser, global in Node)
    const globalObj = typeof window !== 'undefined' ? window : global;
    for (const key of Object.keys(globalObj)) {
      if (key === 'window' || key === 'global' || key === 'globalThis') continue;
      const value = (globalObj as any)[key];
      if (typeof value === 'function') continue; // Skip functions
      if (!vars.some(v => v.name === key)) { // Avoid duplicates
        vars.push(this.createVariableInfo(key, value));
      }
    }
    return vars;
  }
  private createVariableInfo(name: string, value: any): AvailableVariable {
    const type = Array.isArray(value)
      ? `Array<${typeof value[0]}>`
      : typeof value;
    let sample: string | undefined;
    try {
      if (Array.isArray(value)) {
        sample = JSON.stringify(value.slice(0, 3));
        if (value.length > 3) sample = sample.replace(']', ', ...]');
      } else if (typeof value === 'object' && value !== null) {
        const entries = Object.entries(value).slice(0, 2);
        sample = JSON.stringify(Object.fromEntries(entries));
        if (Object.keys(value).length > 2) sample = sample.replace('}', ', ...}');
      } else if (value !== undefined && value !== null) {
        sample = JSON.stringify(value);
      }
    } catch (e) {
      // If we can't stringify the value, skip the sample
    }
    return { name, type, sample };
  }
  private async generateCode(
    problem: string,
    previousAttempts: Array<{ code: string; error?: string }> = []
  ): Promise<CodeGenerationResponse> {
    const prompt = getPrompt(problem, this.availableVars, previousAttempts);
    const result = await this.generator.generateObject({
      model: 'coder',
      schema: codeGenerationSchema,
      prompt,
    });
    return result.object;
  }
  private evaluateCode(code: string): SandboxResult {
    try {
      // Create a function that uses 'with' to evaluate in the context and return the result
      const evalInContext = new Function('context', `
        with (context) {
          ${code}
        }
      `);
      console.log('Context:', this.context);
      // Execute the code with the context and get the return value
      const output = evalInContext(this.context);
      if (output === undefined) {
        return {
          success: false,
          error: 'No value was returned'
        };
      }
      return {
        success: true,
        output
      };
    } catch (error) {
      return {
        success: false,
        error: error instanceof Error ? error.message : 'Unknown error occurred'
      };
    }
  }
  async solve(problem: string): Promise<{
    solution: { code: string; output: any };
    attempts: Array<{ code: string; error?: string }>;
  }> {
    const attempts: Array<{ code: string; error?: string }> = [];
    for (let i = 0; i < this.maxAttempts; i++) {
      // Generate code
      const generation = await this.generateCode(problem, attempts);
      const { code } = generation;
      console.log(`Coding attempt ${i + 1}:`, code);
      // Evaluate the code
      const result = this.evaluateCode(code);
      if (result.success) {
        return {
          solution: {
            code,
            output: result.output
          },
          attempts
        };
      }
      console.error('Coding error:', result.error);
      // Store the failed attempt
      attempts.push({
        code,
        error: result.error
      });
      // If we've reached max attempts, throw an error
      if (i === this.maxAttempts - 1) {
        throw new Error(`Failed to generate working code after ${this.maxAttempts} attempts`);
      }
    }
    // This should never be reached due to the throw above
    throw new Error('Unexpected end of execution');
  }
 }
--- a/src/tools/evaluator.ts
+++ b/src/tools/evaluator.ts
@@ -460,12 +460,13 @@ export async function evaluateAnswer(
  question: string,
  action: AnswerAction,
  evaluationCri: EvaluationCriteria,
-  trackers: [TokenTracker, ActionTracker]
+  trackers: [TokenTracker, ActionTracker],
  visitedURLs: string[] = []
 ): Promise<{ response: EvaluationResponse }> {
  let result;
  // Only add attribution if we have valid references
-  if (action.references && action.references.length > 0) {
+  if (action.references && action.references.length > 0 && action.references.some(ref => ref.url.startsWith('http'))) {
    evaluationCri.types = ['attribution', ...evaluationCri.types];
  }
@@ -473,7 +474,7 @@ export async function evaluateAnswer(
    switch (evaluationType) {
      case 'attribution': {
        // Safely handle references and ensure we have content
-        const urls = action.references?.map(ref => ref.url) ?? [];
+        const urls = action.references?.filter(ref => ref.url.startsWith('http') && !visitedURLs.includes(ref.url)).map(ref => ref.url) || [];
        const uniqueURLs = [...new Set(urls)];
        const allKnowledge = await fetchSourceContent(uniqueURLs, trackers);
--- a/src/types.ts
+++ b/src/types.ts
@@ -2,7 +2,7 @@
 import {CoreAssistantMessage, CoreUserMessage, LanguageModelUsage} from "ai";
 type BaseAction = {
-  action: "search" | "answer" | "reflect" | "visit";
+  action: "search" | "answer" | "reflect" | "visit" | "coding";
  think: string;
 };
@@ -29,7 +29,7 @@ export type KnowledgeItem = {
    exactQuote: string;
    url: string;
  }> | Array<any>;
-  type: 'qa' | 'side-info' | 'chat-history' | 'url',
+  type: 'qa' | 'side-info' | 'chat-history' | 'url' | 'coding',
  updated: string,
 }
@@ -43,7 +43,12 @@ export type VisitAction = BaseAction & {
  URLTargets: string[];
 };
-export type StepAction = SearchAction | AnswerAction | ReflectAction | VisitAction;
+export type CodingAction = BaseAction & {
  action: "coding";
  codingIssue: string;
 };
 export type StepAction = SearchAction | AnswerAction | ReflectAction | VisitAction | CodingAction;
 export type EvaluationType = 'definitive' | 'freshness' | 'plurality' | 'attribution';
 export type EvaluationCriteria = {