diff --git a/src/agent.ts b/src/agent.ts index 38ed071..f1457ce 100644 --- a/src/agent.ts +++ b/src/agent.ts @@ -3,7 +3,8 @@ import dotenv from 'dotenv'; import {ProxyAgent, setGlobalDispatcher} from "undici"; import {readUrl} from "./tools/read"; import {search} from "./tools/search"; -// 获取代理URL并设置代理 + +// Proxy setup remains the same if (process.env.https_proxy) { try { const proxyUrl = new URL(process.env.https_proxy).toString(); @@ -15,169 +16,210 @@ if (process.env.https_proxy) { } dotenv.config(); -const schema = { - type: SchemaType.OBJECT, +type ResponseSchema = { + type: SchemaType.OBJECT; properties: { action: { - type: SchemaType.STRING, - enum: ["search", "readURL", "answer", "reflect"], - description: "Must match exactly one action type" - }, - questionsToAnswer: { - type: SchemaType.ARRAY, - items: { - type: SchemaType.STRING, - description: "each question must be a single line, concise and clear. not composite or compound, less than 20 words.", - }, - description: "Only required when choosing 'reflect' action, list of most important questions to answer to fill the knowledge gaps.", - maxItems: 2 - }, - searchKeywords: { - type: SchemaType.ARRAY, - items: { - type: SchemaType.STRING - }, - description: "Only required when choosing 'search' action, must be an array of keywords", - maxItems: 3 - }, + type: SchemaType.STRING; + enum: string[]; + description: string; + }; + searchQuery: { + type: SchemaType.STRING; + description: string; + }; URLTargets: { - type: SchemaType.ARRAY, + type: SchemaType.ARRAY; items: { - type: SchemaType.STRING - }, - description: "Only required when choosing 'readURL' action, must be an array of URLs" - }, + type: SchemaType.STRING; + }; + description: string; + }; answer: { - type: SchemaType.STRING, - description: "Only required when choosing 'answer' action, must be the final answer in natural language" - }, + type: SchemaType.STRING; + description: string; + }; references: { - type: SchemaType.ARRAY, + type: SchemaType.ARRAY; items: { - type: SchemaType.OBJECT, + type: SchemaType.OBJECT; properties: { title: { - type: SchemaType.STRING, - description: "Title of the document; must be directly from the context" - }, + type: SchemaType.STRING; + description: string; + }; url: { - type: SchemaType.STRING, - description: "URL of the document; must be directly from the context" - } - }, - required: ["title", "url"] - }, - description: "Only required when choosing 'answer' action, must be an array of references" - }, + type: SchemaType.STRING; + description: string; + }; + }; + required: string[]; + }; + minItems: number; + description: string; + }; reasoning: { - type: SchemaType.STRING, - description: "Explain why choose this action?" - }, + type: SchemaType.STRING; + description: string; + }; confidence: { - type: SchemaType.NUMBER, - minimum: 0.0, - maximum: 1.0, - description: "Represents the confidence level of in answering the question BEFORE taking the action. Must be a float between 0.0 and 1.0", - } - }, - required: ["action", "reasoning", "confidence"], + type: SchemaType.NUMBER; + minimum: number; + maximum: number; + description: string; + }; + questionsToAnswer?: { + type: SchemaType.ARRAY; + items: { + type: SchemaType.STRING; + description: string; + }; + description: string; + maxItems: number; + }; + }; + required: string[]; }; -const apiKey = process.env.GEMINI_API_KEY as string; -const jinaToken = process.env.JINA_API_KEY as string; -if (!apiKey) { - throw new Error("GEMINI_API_KEY not found"); -} -if (!jinaToken) { - throw new Error("JINA_API_KEY not found"); +function getSchema(allowReflect: boolean): ResponseSchema { + return { + type: SchemaType.OBJECT, + properties: { + action: { + type: SchemaType.STRING, + enum: allowReflect ? ["search", "readURL", "answer", "reflect"] : ["search", "readURL", "answer"], + description: "Must match exactly one action type" + }, + questionsToAnswer: allowReflect ? { + type: SchemaType.ARRAY, + items: { + type: SchemaType.STRING, + description: "each question must be a single line, concise and clear. not composite or compound, less than 20 words.", + }, + description: "Only required when choosing 'reflect' action, list of most important questions to answer to fill the knowledge gaps.", + maxItems: 2 + } : undefined, + searchQuery: { + type: SchemaType.STRING, + description: "Only required when choosing 'search' action, must be a short, keyword-based query that BM25, tf-idf based search engines can understand.", + }, + URLTargets: { + type: SchemaType.ARRAY, + items: { + type: SchemaType.STRING + }, + description: "Only required when choosing 'readURL' action, must be an array of URLs" + }, + answer: { + type: SchemaType.STRING, + description: "Only required when choosing 'answer' action, must be the final answer in natural language" + }, + references: { + type: SchemaType.ARRAY, + items: { + type: SchemaType.OBJECT, + properties: { + title: { + type: SchemaType.STRING, + description: "Title of the document; must be directly from the context", + }, + url: { + type: SchemaType.STRING, + description: "URL of the document; must be directly from the context" + } + }, + required: ["title", "url"] + }, + minItems: 1, + description: "Only required when choosing 'answer' action, must be an array of references" + }, + reasoning: { + type: SchemaType.STRING, + description: "Explain why choose this action?" + }, + confidence: { + type: SchemaType.NUMBER, + minimum: 0.0, + maximum: 1.0, + description: "Represents the confidence level of in answering the question BEFORE taking the action. Must be a float between 0.0 and 1.0", + } + }, + required: ["action", "reasoning", "confidence"], + }; } -const modelName = 'gemini-1.5-flash'; -const genAI = new GoogleGenerativeAI(apiKey); -const model = genAI.getGenerativeModel({ - model: modelName, - generationConfig: { - temperature: 0.7, - responseMimeType: "application/json", - responseSchema: schema - } -}); +function getPrompt(question: string, context?: string, allowReflect: boolean = false) { + const contextIntro = context ? + `\nYour current context contains these previous actions:\n\n ${context}\n` + : ''; -function getPrompt(question: string, context?: string, allowReflect:boolean = false) { - let contextIntro = ``; - if (!!context) { - contextIntro = ` -You have the following actions records in your context: - - ${context} - `; - } - - let reflectAction = ''; - if (allowReflect) { - reflectAction = ` -If you are not 100% confident in your answer, then identify the gaps in your knowledge with "reflect" action: - -**reflect**: -- Challenge existing knowledge with what-if or divide-and-conquer thinking. -- Reflect on the gaps in your knowledge and ask for most important questions to fill those gaps. -- You use this action when you feel like you need to first answer those questions before proceeding with the current one. -- Should not similar to the original question or existing questionsToAnswer in the context. -- Each question must be concise and clear less than 20 words and not composite or compound. - - ` - } - - - - return `You are an AI research analyst capable of multi-step reasoning. - -${contextIntro} - -Based on the previous actions and the knowledge in your training data, you must answer the following question with 100% confidence: + let actionsDescription = ` +Using your training data and prior context, answer the following question with absolute certainty: ${question} -${reflectAction} -Or you can take one of the following actions: +When uncertain or needing additional information, select one of these actions: **search**: -- Search external real-world information via a public search engine. -- The search engine works best with short, keyword-based queries. -- You use this action when you need more world knowledge or up to date information that is not covered in your training data or cut-off knowledge base. +- Query external sources using a public search engine +- Optimize for concise, keyword-based searches +- Use for recent information (post-training data) or missing domain knowledge **readURL**: -- Provide a specific URL to fetch and read its content in detail. -- Any URL must come from the current context. -- You use this action when you feel like that particular URL might have the information you need to answer the question. +- Access content from specific URLs found in current context +- Requires existing URLs from previous actions +- Use when confident a contextual URL contains needed information **answer**: -- Provide your answer to the user, **only** if you are completely sure. +- Provide final response only when 100% certain +- Responses must be definitive (no ambiguity, uncertainty, or disclaimers) +${allowReflect ? `- If doubts remain, use "reflect" instead` : ''}`; -When you decide on your action, respond **only** in valid JSON format according to the schema below. + if (allowReflect) { + actionsDescription += `\n\n**reflect**: +- Perform critical analysis through hypothetical scenarios or systematic breakdowns +- Identify knowledge gaps and formulate essential clarifying questions +- Questions must be: + - Original (not variations of existing questions) + - Focused on single concepts + - Under 20 words + - Non-compound/non-complex`; + } -**Important**: -- Do not include any extra keys. -- Do not include explanatory text, markdown formatting, or reasoning in the final output. -- Output exactly one JSON object in your response. - `; + return `You are an advanced AI research analyst specializing in multi-step reasoning.${contextIntro}${actionsDescription} +Respond exclusively in valid JSON format matching exact JSON schema. + +Critical Requirements: +- Include ONLY ONE action type +- Never add unsupported keys +- Exclude all non-JSON text, markdown, or explanations +- Maintain strict JSON syntax`; } - async function getResponse(question: string) { let tokenBudget = 30000000; let totalTokens = 0; - let context = ''; // global context to store all the actions records + let context = ''; let step = 0; - let gaps: string[] = []; + let gaps: string[] = [question]; // All questions to be answered including the orginal question + while (totalTokens < tokenBudget) { - const allowReflect = gaps.length === 0; + console.log('Gaps:', gaps) + const allowReflect = gaps.length <= 1; const currentQuestion = gaps.length > 0 ? gaps.shift()! : question; const prompt = getPrompt(currentQuestion, context, allowReflect); - console.log('Prompt length:', prompt.length); - console.log('Context:', context.length); - console.log('Gaps:', gaps.length); + console.log('Prompt:', prompt.length) + + const model = genAI.getGenerativeModel({ + model: modelName, + generationConfig: { + temperature: 0.7, + responseMimeType: "application/json", + responseSchema: getSchema(allowReflect) + } + }); + const result = await model.generateContent(prompt); const response = await result.response; const usage = response.usageMetadata; @@ -191,37 +233,36 @@ async function getResponse(question: string) { if (action.action === 'answer') { if (currentQuestion === question) { - return action; // Exit only for original question's answer not the gap question + return action; } else { - const contextRecord = JSON.stringify({ + context = `${context}\n${JSON.stringify({ step, ...action, question: currentQuestion - }); - context = `${context}\n${contextRecord}`; + })}`; } } if (action.action === 'reflect' && action.questionsToAnswer) { gaps.push(...action.questionsToAnswer); - const contextRecord = JSON.stringify({ + gaps.push(question); // always keep the original question in the gaps + context = `${context}\n${JSON.stringify({ step, ...action, question: currentQuestion - }); - context = `${context}\n${contextRecord}`; + })}`; } + // Rest of the action handling remains the same try { - if (action.action === 'search' && action.searchKeywords) { - const results = await search(action.searchKeywords.join(' '), jinaToken); - const contextRecord = JSON.stringify({ + if (action.action === 'search' && action.searchQuery) { + const results = await search(action.searchQuery, jinaToken); + context = `${context}\n${JSON.stringify({ step, ...action, question: currentQuestion, result: results.data - }); - context = `${context}\n${contextRecord}`; + })}`; totalTokens += results.data.reduce((sum, r) => sum + r.usage.tokens, 0); } else if (action.action === 'readURL' && action.URLTargets?.length) { const urlResults = await Promise.all( @@ -231,25 +272,13 @@ async function getResponse(question: string) { }) ); - const contextRecord = JSON.stringify({ + context = `${context}\n${JSON.stringify({ step, ...action, question: currentQuestion, result: urlResults - }); - context = `${context}\n${contextRecord}`; + })}`; totalTokens += urlResults.reduce((sum, r) => sum + r.result.data.usage.tokens, 0); - } else if (action.action === 'rewrite' && action.rewriteQuery) { - // Immediately search with the new rewriteQuery - const results = await search(action.rewriteQuery, jinaToken); - const contextRecord = JSON.stringify({ - step, - ...action, - question: currentQuestion, - result: results.data - }); - context = `${context}\n${contextRecord}`; - totalTokens += results.data.reduce((sum, r) => sum + r.usage.tokens, 0); } } catch (error) { console.error('Error fetching data:', error); @@ -257,6 +286,13 @@ async function getResponse(question: string) { } } +const apiKey = process.env.GEMINI_API_KEY as string; +const jinaToken = process.env.JINA_API_KEY as string; +if (!apiKey) throw new Error("GEMINI_API_KEY not found"); +if (!jinaToken) throw new Error("JINA_API_KEY not found"); + +const modelName = 'gemini-1.5-flash'; +const genAI = new GoogleGenerativeAI(apiKey); const question = process.argv[2] || ""; -getResponse(question); +getResponse(question); \ No newline at end of file