feat: add coding tools

2025-12-26 06:28:56 +08:00 · 2025-02-17 14:19:36 +08:00 · 2025-02-17 14:19:36 +08:00 · f8aa2b1353
commit f8aa2b1353
parent b487563882
6 changed files with 313 additions and 15 deletions
--- a/config.json
+++ b/config.json
@ -32,6 +32,7 @@
        "maxTokens": 8000
      },
      "tools": {
+        "coder": { "temperature": 0.7 },
        "searchGrounding": { "temperature": 0 },
        "dedup": { "temperature": 0.1 },
        "evaluator": {},
@ -49,6 +50,7 @@
        "maxTokens": 8000
      },
      "tools": {
+        "coder": { "temperature": 0.7 },
        "searchGrounding": { "temperature": 0 },
        "dedup": { "temperature": 0.1 },
        "evaluator": {},
--- a/jina-ai/config.json
+++ b/jina-ai/config.json
@ -38,6 +38,7 @@
        "maxTokens": 8000
      },
      "tools": {
+        "coder": { "temperature": 0.7 },
        "searchGrounding": { "temperature": 0 },
        "dedup": { "temperature": 0.1 },
        "evaluator": {},
@ -55,6 +56,7 @@
        "maxTokens": 8000
      },
      "tools": {
+        "coder": { "temperature": 0.7 },
        "searchGrounding": { "temperature": 0 },
        "dedup": { "temperature": 0.1 },
        "evaluator": {},
--- a/src/agent.ts
+++ b/src/agent.ts
@ -17,6 +17,7 @@ import {search} from "./tools/jina-search";
 // import {grounding} from "./tools/grounding";
 import {zodToJsonSchema} from "zod-to-json-schema";
 import {ObjectGeneratorSafe} from "./utils/safe-generator";
+import {CodeSandbox} from "./tools/code-sandbox";

 async function sleep(ms: number) {
  const seconds = Math.ceil(ms / 1000);
@ -24,7 +25,7 @@ async function sleep(ms: number) {
  return new Promise(resolve => setTimeout(resolve, ms));
 }

-function getSchema(allowReflect: boolean, allowRead: boolean, allowAnswer: boolean, allowSearch: boolean, languageStyle: string = 'same language as the question') {
+function getSchema(allowReflect: boolean, allowRead: boolean, allowAnswer: boolean, allowSearch: boolean, allowCoding: boolean, languageStyle: string = 'same language as the question') {
  const actions: string[] = [];
  const properties: Record<string, z.ZodTypeAny> = {
    action: z.enum(['placeholder']), // Will update later with actual actions
@ -37,11 +38,17 @@ function getSchema(allowReflect: boolean, allowRead: boolean, allowAnswer: boole
      .describe("Required when action='search'. Must be a short, keyword-based query that BM25, tf-idf based search engines can understand. Existing queries must be avoided").optional();
  }

+  if (allowCoding) {
+    actions.push("coding");
+    properties.codingIssue = z.string().max(500)
+      .describe("Required when action='coding'. Describe what issue to solve with coding, format like a github issue ticket. Specify the input value when it is short.").optional();
+  }
+
  if (allowAnswer) {
    actions.push("answer");
    properties.references = z.array(
      z.object({
-        exactQuote: z.string().describe("Exact relevant quote from the document"),
+        exactQuote: z.string().describe("Exact relevant quote from the document, must be a soundbite, short and to the point, no fluff").max(30),
        url: z.string().describe("source URL; must be directly from the context")
      }).required()
    ).describe("Required when action='answer'. Must be an array of references that support the answer, each reference must contain an exact quote and the URL of the document").optional();
@ -83,6 +90,7 @@ function getPrompt(
  allowAnswer: boolean = true,
  allowRead: boolean = true,
  allowSearch: boolean = true,
+  allowCoding: boolean = true,
  badContext?: { question: string, answer: string, evaluation: string, recap: string; blame: string; improvement: string; }[],
  knowledge?: KnowledgeItem[],
  allURLs?: Record<string, string>,
@ -148,7 +156,6 @@ ${knowledgeItems}
  }


-
  // Add context section if exists
  if (context?.length) {
    sections.push(`
@ -215,6 +222,15 @@ ${urlList}
 `);
  }

+  if (allowCoding) {
+    actionSections.push(`
+<action-coding>
+- This action allows you to solve the problem with coding in javascript. This is useful when you need some programming logic, like counting, filtering, or transforming, sorting, regex extraction, pre-processing, or post-processing of the data.
+- You only need to describe the issue you aim to solve in the "codingIssue" field. Specify the input either with real values or variable names. 
+- You do not need to generate any actual code. Some senior engineers will help you with actual implementation.
+</action-coding>`);
+  }
+
  if (allowSearch) {

    actionSections.push(`
@ -259,7 +275,7 @@ FAILURE IS NOT AN OPTION. EXECUTE WITH EXTREME PREJUDICE! ⚡️
  if (allowReflect) {
    actionSections.push(`
 <action-reflect>    
- Perform critical analysis through hypothetical scenarios or systematic breakdowns
+- Perform critical reflection through hypothetical scenarios or systematic breakdowns
 - Identify knowledge gaps and formulate essential clarifying questions
 </action-reflect>
 `);
@ -313,7 +329,7 @@ export async function getResponse(question: string,
  let step = 0;
  let totalStep = 0;
  let badAttempts = 0;
-  let schema: ZodObject<any> = getSchema(true, true, true, true)
+  let schema: ZodObject<any> = getSchema(true, true, true, true, true)
  question = question.trim()
  const gaps: string[] = [question];  // All questions to be answered including the orginal question
  const allQuestions = [question];
@ -344,6 +360,7 @@ export async function getResponse(question: string,
  let allowSearch = true;
  let allowRead = true;
  let allowReflect = true;
+  let allowCoding = true;
  let prompt = '';
  let thisStep: StepAction = {action: 'answer', answer: '', references: [], think: '', isFinal: false};

@ -379,12 +396,13 @@ export async function getResponse(question: string,
      allowAnswer,
      allowRead,
      allowSearch,
+      allowCoding,
      badContext,
      allKnowledge,
      allURLs,
      false,
    );
-    schema = getSchema(allowReflect, allowRead, allowAnswer, allowSearch,
+    schema = getSchema(allowReflect, allowRead, allowAnswer, allowSearch, allowCoding,
      evaluationMetrics[currentQuestion].languageStyle)
    const generator = new ObjectGeneratorSafe(context.tokenTracker);
    const result = await generator.generateObject({
@ -394,7 +412,7 @@ export async function getResponse(question: string,
    });
    thisStep = result.object as StepAction;
    // print allowed and chose action
-    const actionsStr = [allowSearch, allowRead, allowAnswer, allowReflect].map((a, i) => a ? ['search', 'read', 'answer', 'reflect'][i] : null).filter(a => a).join(', ');
+    const actionsStr = [allowSearch, allowRead, allowAnswer, allowReflect, allowCoding].map((a, i) => a ? ['search', 'read', 'answer', 'reflect'][i] : null).filter(a => a).join(', ');
    console.log(`${thisStep.action} <- [${actionsStr}]`);
    console.log(thisStep)

@ -423,7 +441,10 @@ export async function getResponse(question: string,
      context.actionTracker.trackThink(`But wait, let me evaluate the answer first.`)

      const {response: evaluation} = await evaluateAnswer(currentQuestion, thisStep,
-        evaluationMetrics[currentQuestion], [context.tokenTracker, context.actionTracker]);
+        evaluationMetrics[currentQuestion],
+        [context.tokenTracker, context.actionTracker],
+        visitedURLs
+        );

      if (currentQuestion.trim() === question) {
        if (evaluation.pass) {
@ -530,6 +551,11 @@ You will now figure out the answers to these sub-questions and see if they can h
        gaps.push(...newGapQuestions.slice(0, 2));
        allQuestions.push(...newGapQuestions.slice(0, 2));
        gaps.push(question);  // always keep the original question in the gaps
+        updateContext({
+          totalStep,
+          ...thisStep,
+        });
+
      } else {
        diaryContext.push(`
 At step ${step}, you took **reflect** and think about the knowledge gaps. You tried to break down the question "${currentQuestion}" into gap-questions like this: ${oldQuestions.join(', ')} 
@ -701,8 +727,41 @@ You decided to think out of the box or cut from a completely different angle.`);

        allowRead = false;
      }
+    } else if (thisStep.action === 'coding' && thisStep.codingIssue) {
+      const sandbox = new CodeSandbox({allContext}, context.tokenTracker);
+      try {
+        const result = await sandbox.solve(thisStep.codingIssue);
+        allKnowledge.push({
+          question: `What is the solution to the coding issue: ${thisStep.codingIssue}?`,
+          answer: result.solution.output,
+          type: 'coding',
+          updated: new Date().toISOString()
+        });
+        diaryContext.push(`
+At step ${step}, you took the **coding** action and try to solve the coding issue: ${thisStep.codingIssue}.
+You found the solution and add it to your knowledge for future reference.
+`);
+        updateContext({
+          totalStep,
+          ...thisStep,
+          result: result
+        });
+      } catch (error) {
+        console.error('Error solving coding issue:', error);
+        diaryContext.push(`
+At step ${step}, you took the **coding** action and try to solve the coding issue: ${thisStep.codingIssue}.
+But unfortunately, you failed to solve the issue. You need to think out of the box or cut from a completely different angle.
+`);
+        updateContext({
+          totalStep,
+          ...thisStep,
+          result: 'You have tried all possible solutions and found no new information. You must think out of the box or different angle!!!'
+        });
+        allowCoding = false;
+      }
    }

+
    await storeContext(prompt, schema, [allContext, allKeywords, allQuestions, allKnowledge], totalStep);
  }

@ -722,13 +781,14 @@ You decided to think out of the box or cut from a completely different angle.`);
      false,
      false,
      false,
+      false,
      badContext,
      allKnowledge,
      allURLs,
      true,
    );

-    schema = getSchema(false, false, true, false,
+    schema = getSchema(false, false, true, false, false,
      evaluationMetrics[question]?.languageStyle || 'same language as the question');
    const generator = new ObjectGeneratorSafe(context.tokenTracker);
    const result = await generator.generateObject({
--- a/src/tools/code-sandbox.ts
+++ b/src/tools/code-sandbox.ts
@ -0,0 +1,228 @@
+import { z } from 'zod';
+import { TokenTracker } from "../utils/token-tracker";
+import { ObjectGeneratorSafe } from "../utils/safe-generator";
+
+// Define the response schema for code generation
+const codeGenerationSchema = z.object({
+  code: z.string().describe('The JavaScript code that solves the problem and always use \'return\' statement to return the result. Focus on solving the core problem; No need for error handling or try-catch blocks.'),
+});
+
+// Define the types
+interface CodeGenerationResponse {
+  code: string;
+}
+
+interface SandboxResult {
+  success: boolean;
+  output?: any;
+  error?: string;
+}
+
+interface AvailableVariable {
+  name: string;
+  type: string;
+  sample?: string;
+}
+
+function getPrompt(
+  problem: string,
+  availableVars: AvailableVariable[],
+  previousAttempts: Array<{ code: string; error?: string }> = []
+): string {
+  const previousAttemptsContext = previousAttempts.map((attempt, index) => `
+Attempt ${index + 1}:
+${attempt.code}
+${attempt.error ? `Error: ${attempt.error}` : ''}
+`).join('\n');
+
+  const varsContext = availableVars.map(v =>
+    `${v.name} (${v.type})${v.sample ? ` e.g. ${v.sample}` : ''}`
+  ).join('\n');
+
+  return `You are an expert JavaScript programmer. Your task is to generate JavaScript code to solve the given problem.
+
+<rules>
+1. Generate plain JavaScript code that returns the result directly
+2. You can use any of these available variables directly:
+${varsContext}
+3. No need to declare variables that are already available, especially big long strings or arrays; try to always start with using "allContext" object
+4. Focus on solving the core problem; No need for error handling or try-catch blocks; Always use 'return' statement to return the result
+</rules>
+
+${previousAttempts.length > 0 ? `Previous attempts and their errors:
+${previousAttemptsContext}
+` : ''}
+
+<example>
+Available variables:
+numbers (Array<number>) e.g. [1, 2, 3, 4, 5, 6]
+threshold (number) e.g. 4
+
+Problem: Sum all numbers above threshold
+
+Response:
+{
+  "code": "return numbers.filter(n => n > threshold).reduce((a, b) => a + b, 0);"
+}
+</example>
+
+Problem to solve:
+${problem}`;
+}
+
+export class CodeSandbox {
+  private tracker?: TokenTracker;
+  private generator: ObjectGeneratorSafe;
+  private maxAttempts: number;
+  private availableVars: AvailableVariable[];
+  private context: Record<string, any>;
+
+  constructor(
+    context: Record<string, any> = {},
+    tracker?: TokenTracker,
+    maxAttempts: number = 3
+  ) {
+    this.tracker = tracker;
+    this.generator = new ObjectGeneratorSafe(tracker);
+    this.maxAttempts = maxAttempts;
+    this.context = context;
+    this.availableVars = this.collectVariables(context);
+  }
+
+  private collectVariables(context: Record<string, any>): AvailableVariable[] {
+    const vars: AvailableVariable[] = [];
+
+    // Collect from provided context
+    for (const [name, value] of Object.entries(context)) {
+      vars.push(this.createVariableInfo(name, value));
+    }
+
+    // Collect from global scope (window in browser, global in Node)
+    const globalObj = typeof window !== 'undefined' ? window : global;
+    for (const key of Object.keys(globalObj)) {
+      if (key === 'window' || key === 'global' || key === 'globalThis') continue;
+      const value = (globalObj as any)[key];
+      if (typeof value === 'function') continue; // Skip functions
+      if (!vars.some(v => v.name === key)) { // Avoid duplicates
+        vars.push(this.createVariableInfo(key, value));
+      }
+    }
+
+    return vars;
+  }
+
+  private createVariableInfo(name: string, value: any): AvailableVariable {
+    const type = Array.isArray(value)
+      ? `Array<${typeof value[0]}>`
+      : typeof value;
+
+    let sample: string | undefined;
+    try {
+      if (Array.isArray(value)) {
+        sample = JSON.stringify(value.slice(0, 3));
+        if (value.length > 3) sample = sample.replace(']', ', ...]');
+      } else if (typeof value === 'object' && value !== null) {
+        const entries = Object.entries(value).slice(0, 2);
+        sample = JSON.stringify(Object.fromEntries(entries));
+        if (Object.keys(value).length > 2) sample = sample.replace('}', ', ...}');
+      } else if (value !== undefined && value !== null) {
+        sample = JSON.stringify(value);
+      }
+    } catch (e) {
+      // If we can't stringify the value, skip the sample
+    }
+
+    return { name, type, sample };
+  }
+
+  private async generateCode(
+    problem: string,
+    previousAttempts: Array<{ code: string; error?: string }> = []
+  ): Promise<CodeGenerationResponse> {
+    const prompt = getPrompt(problem, this.availableVars, previousAttempts);
+
+    const result = await this.generator.generateObject({
+      model: 'coder',
+      schema: codeGenerationSchema,
+      prompt,
+    });
+
+    return result.object;
+  }
+
+  private evaluateCode(code: string): SandboxResult {
+    try {
+      // Create a function that uses 'with' to evaluate in the context and return the result
+      const evalInContext = new Function('context', `
+        with (context) {
+          ${code}
+        }
+      `);
+
+      console.log('Context:', this.context);
+
+      // Execute the code with the context and get the return value
+      const output = evalInContext(this.context);
+
+      if (output === undefined) {
+        return {
+          success: false,
+          error: 'No value was returned'
+        };
+      }
+
+      return {
+        success: true,
+        output
+      };
+    } catch (error) {
+      return {
+        success: false,
+        error: error instanceof Error ? error.message : 'Unknown error occurred'
+      };
+    }
+  }
+
+  async solve(problem: string): Promise<{
+    solution: { code: string; output: any };
+    attempts: Array<{ code: string; error?: string }>;
+  }> {
+    const attempts: Array<{ code: string; error?: string }> = [];
+
+    for (let i = 0; i < this.maxAttempts; i++) {
+      // Generate code
+      const generation = await this.generateCode(problem, attempts);
+      const { code } = generation;
+
+      console.log(`Coding attempt ${i + 1}:`, code);
+      // Evaluate the code
+      const result = this.evaluateCode(code);
+
+      if (result.success) {
+        return {
+          solution: {
+            code,
+            output: result.output
+          },
+          attempts
+        };
+      }
+
+      console.error('Coding error:', result.error);
+
+      // Store the failed attempt
+      attempts.push({
+        code,
+        error: result.error
+      });
+
+      // If we've reached max attempts, throw an error
+      if (i === this.maxAttempts - 1) {
+        throw new Error(`Failed to generate working code after ${this.maxAttempts} attempts`);
+      }
+    }
+
+    // This should never be reached due to the throw above
+    throw new Error('Unexpected end of execution');
+  }
+}
--- a/src/tools/evaluator.ts
+++ b/src/tools/evaluator.ts
@ -460,12 +460,13 @@ export async function evaluateAnswer(
  question: string,
  action: AnswerAction,
  evaluationCri: EvaluationCriteria,
-  trackers: [TokenTracker, ActionTracker]
+  trackers: [TokenTracker, ActionTracker],
+  visitedURLs: string[] = []
 ): Promise<{ response: EvaluationResponse }> {
  let result;

  // Only add attribution if we have valid references
-  if (action.references && action.references.length > 0) {
+  if (action.references && action.references.length > 0 && action.references.some(ref => ref.url.startsWith('http'))) {
    evaluationCri.types = ['attribution', ...evaluationCri.types];
  }

@ -473,7 +474,7 @@ export async function evaluateAnswer(
    switch (evaluationType) {
      case 'attribution': {
        // Safely handle references and ensure we have content
-        const urls = action.references?.map(ref => ref.url) ?? [];
+        const urls = action.references?.filter(ref => ref.url.startsWith('http') && !visitedURLs.includes(ref.url)).map(ref => ref.url) || [];
        const uniqueURLs = [...new Set(urls)];
        const allKnowledge = await fetchSourceContent(uniqueURLs, trackers);

--- a/src/types.ts
+++ b/src/types.ts
@ -2,7 +2,7 @@
 import {CoreAssistantMessage, CoreUserMessage, LanguageModelUsage} from "ai";

 type BaseAction = {
-  action: "search" | "answer" | "reflect" | "visit";
+  action: "search" | "answer" | "reflect" | "visit" | "coding";
  think: string;
 };

@ -29,7 +29,7 @@ export type KnowledgeItem = {
    exactQuote: string;
    url: string;
  }> | Array<any>;
-  type: 'qa' | 'side-info' | 'chat-history' | 'url',
+  type: 'qa' | 'side-info' | 'chat-history' | 'url' | 'coding',
  updated: string,
 }

@ -43,7 +43,12 @@ export type VisitAction = BaseAction & {
  URLTargets: string[];
 };

-export type StepAction = SearchAction | AnswerAction | ReflectAction | VisitAction;
+export type CodingAction = BaseAction & {
+  action: "coding";
+  codingIssue: string;
+};
+
+export type StepAction = SearchAction | AnswerAction | ReflectAction | VisitAction | CodingAction;

 export type EvaluationType = 'definitive' | 'freshness' | 'plurality' | 'attribution';
 export type EvaluationCriteria = {