feat: add coding tools

This commit is contained in:
Han Xiao 2025-02-17 14:19:36 +08:00
parent b487563882
commit f8aa2b1353
6 changed files with 313 additions and 15 deletions

View File

@ -32,6 +32,7 @@
"maxTokens": 8000
},
"tools": {
"coder": { "temperature": 0.7 },
"searchGrounding": { "temperature": 0 },
"dedup": { "temperature": 0.1 },
"evaluator": {},
@ -49,6 +50,7 @@
"maxTokens": 8000
},
"tools": {
"coder": { "temperature": 0.7 },
"searchGrounding": { "temperature": 0 },
"dedup": { "temperature": 0.1 },
"evaluator": {},

View File

@ -38,6 +38,7 @@
"maxTokens": 8000
},
"tools": {
"coder": { "temperature": 0.7 },
"searchGrounding": { "temperature": 0 },
"dedup": { "temperature": 0.1 },
"evaluator": {},
@ -55,6 +56,7 @@
"maxTokens": 8000
},
"tools": {
"coder": { "temperature": 0.7 },
"searchGrounding": { "temperature": 0 },
"dedup": { "temperature": 0.1 },
"evaluator": {},

View File

@ -17,6 +17,7 @@ import {search} from "./tools/jina-search";
// import {grounding} from "./tools/grounding";
import {zodToJsonSchema} from "zod-to-json-schema";
import {ObjectGeneratorSafe} from "./utils/safe-generator";
import {CodeSandbox} from "./tools/code-sandbox";
async function sleep(ms: number) {
const seconds = Math.ceil(ms / 1000);
@ -24,7 +25,7 @@ async function sleep(ms: number) {
return new Promise(resolve => setTimeout(resolve, ms));
}
function getSchema(allowReflect: boolean, allowRead: boolean, allowAnswer: boolean, allowSearch: boolean, languageStyle: string = 'same language as the question') {
function getSchema(allowReflect: boolean, allowRead: boolean, allowAnswer: boolean, allowSearch: boolean, allowCoding: boolean, languageStyle: string = 'same language as the question') {
const actions: string[] = [];
const properties: Record<string, z.ZodTypeAny> = {
action: z.enum(['placeholder']), // Will update later with actual actions
@ -37,11 +38,17 @@ function getSchema(allowReflect: boolean, allowRead: boolean, allowAnswer: boole
.describe("Required when action='search'. Must be a short, keyword-based query that BM25, tf-idf based search engines can understand. Existing queries must be avoided").optional();
}
if (allowCoding) {
actions.push("coding");
properties.codingIssue = z.string().max(500)
.describe("Required when action='coding'. Describe what issue to solve with coding, format like a github issue ticket. Specify the input value when it is short.").optional();
}
if (allowAnswer) {
actions.push("answer");
properties.references = z.array(
z.object({
exactQuote: z.string().describe("Exact relevant quote from the document"),
exactQuote: z.string().describe("Exact relevant quote from the document, must be a soundbite, short and to the point, no fluff").max(30),
url: z.string().describe("source URL; must be directly from the context")
}).required()
).describe("Required when action='answer'. Must be an array of references that support the answer, each reference must contain an exact quote and the URL of the document").optional();
@ -83,6 +90,7 @@ function getPrompt(
allowAnswer: boolean = true,
allowRead: boolean = true,
allowSearch: boolean = true,
allowCoding: boolean = true,
badContext?: { question: string, answer: string, evaluation: string, recap: string; blame: string; improvement: string; }[],
knowledge?: KnowledgeItem[],
allURLs?: Record<string, string>,
@ -148,7 +156,6 @@ ${knowledgeItems}
}
// Add context section if exists
if (context?.length) {
sections.push(`
@ -215,6 +222,15 @@ ${urlList}
`);
}
if (allowCoding) {
actionSections.push(`
<action-coding>
- This action allows you to solve the problem with coding in javascript. This is useful when you need some programming logic, like counting, filtering, or transforming, sorting, regex extraction, pre-processing, or post-processing of the data.
- You only need to describe the issue you aim to solve in the "codingIssue" field. Specify the input either with real values or variable names.
- You do not need to generate any actual code. Some senior engineers will help you with actual implementation.
</action-coding>`);
}
if (allowSearch) {
actionSections.push(`
@ -259,7 +275,7 @@ FAILURE IS NOT AN OPTION. EXECUTE WITH EXTREME PREJUDICE! ⚡️
if (allowReflect) {
actionSections.push(`
<action-reflect>
- Perform critical analysis through hypothetical scenarios or systematic breakdowns
- Perform critical reflection through hypothetical scenarios or systematic breakdowns
- Identify knowledge gaps and formulate essential clarifying questions
</action-reflect>
`);
@ -313,7 +329,7 @@ export async function getResponse(question: string,
let step = 0;
let totalStep = 0;
let badAttempts = 0;
let schema: ZodObject<any> = getSchema(true, true, true, true)
let schema: ZodObject<any> = getSchema(true, true, true, true, true)
question = question.trim()
const gaps: string[] = [question]; // All questions to be answered including the orginal question
const allQuestions = [question];
@ -344,6 +360,7 @@ export async function getResponse(question: string,
let allowSearch = true;
let allowRead = true;
let allowReflect = true;
let allowCoding = true;
let prompt = '';
let thisStep: StepAction = {action: 'answer', answer: '', references: [], think: '', isFinal: false};
@ -379,12 +396,13 @@ export async function getResponse(question: string,
allowAnswer,
allowRead,
allowSearch,
allowCoding,
badContext,
allKnowledge,
allURLs,
false,
);
schema = getSchema(allowReflect, allowRead, allowAnswer, allowSearch,
schema = getSchema(allowReflect, allowRead, allowAnswer, allowSearch, allowCoding,
evaluationMetrics[currentQuestion].languageStyle)
const generator = new ObjectGeneratorSafe(context.tokenTracker);
const result = await generator.generateObject({
@ -394,7 +412,7 @@ export async function getResponse(question: string,
});
thisStep = result.object as StepAction;
// print allowed and chose action
const actionsStr = [allowSearch, allowRead, allowAnswer, allowReflect].map((a, i) => a ? ['search', 'read', 'answer', 'reflect'][i] : null).filter(a => a).join(', ');
const actionsStr = [allowSearch, allowRead, allowAnswer, allowReflect, allowCoding].map((a, i) => a ? ['search', 'read', 'answer', 'reflect'][i] : null).filter(a => a).join(', ');
console.log(`${thisStep.action} <- [${actionsStr}]`);
console.log(thisStep)
@ -423,7 +441,10 @@ export async function getResponse(question: string,
context.actionTracker.trackThink(`But wait, let me evaluate the answer first.`)
const {response: evaluation} = await evaluateAnswer(currentQuestion, thisStep,
evaluationMetrics[currentQuestion], [context.tokenTracker, context.actionTracker]);
evaluationMetrics[currentQuestion],
[context.tokenTracker, context.actionTracker],
visitedURLs
);
if (currentQuestion.trim() === question) {
if (evaluation.pass) {
@ -530,6 +551,11 @@ You will now figure out the answers to these sub-questions and see if they can h
gaps.push(...newGapQuestions.slice(0, 2));
allQuestions.push(...newGapQuestions.slice(0, 2));
gaps.push(question); // always keep the original question in the gaps
updateContext({
totalStep,
...thisStep,
});
} else {
diaryContext.push(`
At step ${step}, you took **reflect** and think about the knowledge gaps. You tried to break down the question "${currentQuestion}" into gap-questions like this: ${oldQuestions.join(', ')}
@ -701,8 +727,41 @@ You decided to think out of the box or cut from a completely different angle.`);
allowRead = false;
}
} else if (thisStep.action === 'coding' && thisStep.codingIssue) {
const sandbox = new CodeSandbox({allContext}, context.tokenTracker);
try {
const result = await sandbox.solve(thisStep.codingIssue);
allKnowledge.push({
question: `What is the solution to the coding issue: ${thisStep.codingIssue}?`,
answer: result.solution.output,
type: 'coding',
updated: new Date().toISOString()
});
diaryContext.push(`
At step ${step}, you took the **coding** action and try to solve the coding issue: ${thisStep.codingIssue}.
You found the solution and add it to your knowledge for future reference.
`);
updateContext({
totalStep,
...thisStep,
result: result
});
} catch (error) {
console.error('Error solving coding issue:', error);
diaryContext.push(`
At step ${step}, you took the **coding** action and try to solve the coding issue: ${thisStep.codingIssue}.
But unfortunately, you failed to solve the issue. You need to think out of the box or cut from a completely different angle.
`);
updateContext({
totalStep,
...thisStep,
result: 'You have tried all possible solutions and found no new information. You must think out of the box or different angle!!!'
});
allowCoding = false;
}
}
await storeContext(prompt, schema, [allContext, allKeywords, allQuestions, allKnowledge], totalStep);
}
@ -722,13 +781,14 @@ You decided to think out of the box or cut from a completely different angle.`);
false,
false,
false,
false,
badContext,
allKnowledge,
allURLs,
true,
);
schema = getSchema(false, false, true, false,
schema = getSchema(false, false, true, false, false,
evaluationMetrics[question]?.languageStyle || 'same language as the question');
const generator = new ObjectGeneratorSafe(context.tokenTracker);
const result = await generator.generateObject({

228
src/tools/code-sandbox.ts Normal file
View File

@ -0,0 +1,228 @@
import { z } from 'zod';
import { TokenTracker } from "../utils/token-tracker";
import { ObjectGeneratorSafe } from "../utils/safe-generator";
// Define the response schema for code generation
const codeGenerationSchema = z.object({
code: z.string().describe('The JavaScript code that solves the problem and always use \'return\' statement to return the result. Focus on solving the core problem; No need for error handling or try-catch blocks.'),
});
// Define the types
interface CodeGenerationResponse {
code: string;
}
interface SandboxResult {
success: boolean;
output?: any;
error?: string;
}
interface AvailableVariable {
name: string;
type: string;
sample?: string;
}
function getPrompt(
problem: string,
availableVars: AvailableVariable[],
previousAttempts: Array<{ code: string; error?: string }> = []
): string {
const previousAttemptsContext = previousAttempts.map((attempt, index) => `
Attempt ${index + 1}:
${attempt.code}
${attempt.error ? `Error: ${attempt.error}` : ''}
`).join('\n');
const varsContext = availableVars.map(v =>
`${v.name} (${v.type})${v.sample ? ` e.g. ${v.sample}` : ''}`
).join('\n');
return `You are an expert JavaScript programmer. Your task is to generate JavaScript code to solve the given problem.
<rules>
1. Generate plain JavaScript code that returns the result directly
2. You can use any of these available variables directly:
${varsContext}
3. No need to declare variables that are already available, especially big long strings or arrays; try to always start with using "allContext" object
4. Focus on solving the core problem; No need for error handling or try-catch blocks; Always use 'return' statement to return the result
</rules>
${previousAttempts.length > 0 ? `Previous attempts and their errors:
${previousAttemptsContext}
` : ''}
<example>
Available variables:
numbers (Array<number>) e.g. [1, 2, 3, 4, 5, 6]
threshold (number) e.g. 4
Problem: Sum all numbers above threshold
Response:
{
"code": "return numbers.filter(n => n > threshold).reduce((a, b) => a + b, 0);"
}
</example>
Problem to solve:
${problem}`;
}
export class CodeSandbox {
private tracker?: TokenTracker;
private generator: ObjectGeneratorSafe;
private maxAttempts: number;
private availableVars: AvailableVariable[];
private context: Record<string, any>;
constructor(
context: Record<string, any> = {},
tracker?: TokenTracker,
maxAttempts: number = 3
) {
this.tracker = tracker;
this.generator = new ObjectGeneratorSafe(tracker);
this.maxAttempts = maxAttempts;
this.context = context;
this.availableVars = this.collectVariables(context);
}
private collectVariables(context: Record<string, any>): AvailableVariable[] {
const vars: AvailableVariable[] = [];
// Collect from provided context
for (const [name, value] of Object.entries(context)) {
vars.push(this.createVariableInfo(name, value));
}
// Collect from global scope (window in browser, global in Node)
const globalObj = typeof window !== 'undefined' ? window : global;
for (const key of Object.keys(globalObj)) {
if (key === 'window' || key === 'global' || key === 'globalThis') continue;
const value = (globalObj as any)[key];
if (typeof value === 'function') continue; // Skip functions
if (!vars.some(v => v.name === key)) { // Avoid duplicates
vars.push(this.createVariableInfo(key, value));
}
}
return vars;
}
private createVariableInfo(name: string, value: any): AvailableVariable {
const type = Array.isArray(value)
? `Array<${typeof value[0]}>`
: typeof value;
let sample: string | undefined;
try {
if (Array.isArray(value)) {
sample = JSON.stringify(value.slice(0, 3));
if (value.length > 3) sample = sample.replace(']', ', ...]');
} else if (typeof value === 'object' && value !== null) {
const entries = Object.entries(value).slice(0, 2);
sample = JSON.stringify(Object.fromEntries(entries));
if (Object.keys(value).length > 2) sample = sample.replace('}', ', ...}');
} else if (value !== undefined && value !== null) {
sample = JSON.stringify(value);
}
} catch (e) {
// If we can't stringify the value, skip the sample
}
return { name, type, sample };
}
private async generateCode(
problem: string,
previousAttempts: Array<{ code: string; error?: string }> = []
): Promise<CodeGenerationResponse> {
const prompt = getPrompt(problem, this.availableVars, previousAttempts);
const result = await this.generator.generateObject({
model: 'coder',
schema: codeGenerationSchema,
prompt,
});
return result.object;
}
private evaluateCode(code: string): SandboxResult {
try {
// Create a function that uses 'with' to evaluate in the context and return the result
const evalInContext = new Function('context', `
with (context) {
${code}
}
`);
console.log('Context:', this.context);
// Execute the code with the context and get the return value
const output = evalInContext(this.context);
if (output === undefined) {
return {
success: false,
error: 'No value was returned'
};
}
return {
success: true,
output
};
} catch (error) {
return {
success: false,
error: error instanceof Error ? error.message : 'Unknown error occurred'
};
}
}
async solve(problem: string): Promise<{
solution: { code: string; output: any };
attempts: Array<{ code: string; error?: string }>;
}> {
const attempts: Array<{ code: string; error?: string }> = [];
for (let i = 0; i < this.maxAttempts; i++) {
// Generate code
const generation = await this.generateCode(problem, attempts);
const { code } = generation;
console.log(`Coding attempt ${i + 1}:`, code);
// Evaluate the code
const result = this.evaluateCode(code);
if (result.success) {
return {
solution: {
code,
output: result.output
},
attempts
};
}
console.error('Coding error:', result.error);
// Store the failed attempt
attempts.push({
code,
error: result.error
});
// If we've reached max attempts, throw an error
if (i === this.maxAttempts - 1) {
throw new Error(`Failed to generate working code after ${this.maxAttempts} attempts`);
}
}
// This should never be reached due to the throw above
throw new Error('Unexpected end of execution');
}
}

View File

@ -460,12 +460,13 @@ export async function evaluateAnswer(
question: string,
action: AnswerAction,
evaluationCri: EvaluationCriteria,
trackers: [TokenTracker, ActionTracker]
trackers: [TokenTracker, ActionTracker],
visitedURLs: string[] = []
): Promise<{ response: EvaluationResponse }> {
let result;
// Only add attribution if we have valid references
if (action.references && action.references.length > 0) {
if (action.references && action.references.length > 0 && action.references.some(ref => ref.url.startsWith('http'))) {
evaluationCri.types = ['attribution', ...evaluationCri.types];
}
@ -473,7 +474,7 @@ export async function evaluateAnswer(
switch (evaluationType) {
case 'attribution': {
// Safely handle references and ensure we have content
const urls = action.references?.map(ref => ref.url) ?? [];
const urls = action.references?.filter(ref => ref.url.startsWith('http') && !visitedURLs.includes(ref.url)).map(ref => ref.url) || [];
const uniqueURLs = [...new Set(urls)];
const allKnowledge = await fetchSourceContent(uniqueURLs, trackers);

View File

@ -2,7 +2,7 @@
import {CoreAssistantMessage, CoreUserMessage, LanguageModelUsage} from "ai";
type BaseAction = {
action: "search" | "answer" | "reflect" | "visit";
action: "search" | "answer" | "reflect" | "visit" | "coding";
think: string;
};
@ -29,7 +29,7 @@ export type KnowledgeItem = {
exactQuote: string;
url: string;
}> | Array<any>;
type: 'qa' | 'side-info' | 'chat-history' | 'url',
type: 'qa' | 'side-info' | 'chat-history' | 'url' | 'coding',
updated: string,
}
@ -43,7 +43,12 @@ export type VisitAction = BaseAction & {
URLTargets: string[];
};
export type StepAction = SearchAction | AnswerAction | ReflectAction | VisitAction;
export type CodingAction = BaseAction & {
action: "coding";
codingIssue: string;
};
export type StepAction = SearchAction | AnswerAction | ReflectAction | VisitAction | CodingAction;
export type EvaluationType = 'definitive' | 'freshness' | 'plurality' | 'attribution';
export type EvaluationCriteria = {