chore: first commit

This commit is contained in:
Han Xiao 2025-01-27 15:21:40 +08:00
parent 2415ec3ebd
commit 21af8a6c82
3 changed files with 213 additions and 22 deletions

View File

@ -16,3 +16,68 @@ npm run dev "what is the twitter account of jina ai's founder"
npm run dev "who will be president of US in 2028?"
npm run dev "what should be jina ai strategy for 2025?"
```
```mermaid
flowchart TD
subgraph Inputs[System Inputs]
OrigQuestion[Original Question]
TokenBudget[Token Budget]
end
subgraph States[Global States]
direction TB
GapQueue[Question Queue]
ContextStore[Action History]
BadStore[Failed Attempts]
QuestionStore[Question History]
KeywordStore[Keyword History]
end
subgraph Outputs[System Outputs]
FinalAnswer[Answer]
end
TokenBudget -->|check| End[System End]
OrigQuestion -->|initialize| GapQueue
GapQueue -->|pop| NextQ[Question]
NextQ -->|generate| AIResponse[Response]
AIResponse -->|analyze| ActionType{Action Type}
ActionType -->|is search| SearchOp[Search Results]
SearchOp -->|store| ContextStore
SearchOp -->|add| KeywordStore
SearchOp -->|continue| TokenBudget
ActionType -->|is read| URLData[URL Content]
URLData -->|store| ContextStore
URLData -->|continue| TokenBudget
ActionType -->|is reflect| NewQuestions[Questions]
NewQuestions -->|check against| QuestionStore
NewQuestions -->|filter| UniqueQuestions[Unique Questions]
UniqueQuestions -->|push to| GapQueue
UniqueQuestions -->|add to| QuestionStore
UniqueQuestions -->|continue| TokenBudget
ActionType -->|is answer| AnswerCheck{Original Question}
AnswerCheck -->|compare with| OrigQuestion
AnswerCheck -->|is not| ContextStore
ContextStore -->|continue| TokenBudget
AnswerCheck -->|is| Evaluation[Answer Quality]
Evaluation -->|check| ValidCheck{Quality}
ValidCheck -->|passes| FinalAnswer
FinalAnswer -->|return| End
ValidCheck -->|fails| BadStore
ValidCheck -->|fails and clear| ContextStore
classDef state fill:#e1f5fe,stroke:#01579b
classDef input fill:#e8f5e9,stroke:#2e7d32
classDef output fill:#fce4ec,stroke:#c2185b
class GapQueue,ContextStore,BadStore,QuestionStore,KeywordStore state
class OrigQuestion,TokenBudget input
class FinalAnswer output
```

View File

@ -6,6 +6,7 @@ import fs from 'fs/promises';
import {SafeSearchType, search} from "duck-duck-scrape";
import {rewriteQuery} from "./tools/query-rewriter";
import {dedupQueries} from "./tools/dedup";
import {evaluateAnswer} from "./tools/evaluator";
// Proxy setup remains the same
if (process.env.https_proxy) {
@ -90,12 +91,6 @@ type ResponseSchema = {
type: SchemaType.STRING;
description: string;
};
confidence: {
type: SchemaType.NUMBER;
minimum: number;
maximum: number;
description: string;
};
questionsToAnswer?: {
type: SchemaType.ARRAY;
items: {
@ -164,18 +159,20 @@ function getSchema(allowReflect: boolean): ResponseSchema {
type: SchemaType.STRING,
description: "Explain why choose this action?"
},
confidence: {
type: SchemaType.NUMBER,
minimum: 0.0,
maximum: 1.0,
description: "Represents the confidence level of in answering the question BEFORE taking the action.",
}
},
required: ["action", "reasoning", "confidence"],
required: ["action", "reasoning"],
};
}
function getPrompt(question: string, context?: any[], allQuestions?: string[], allowReflect: boolean = false) {
function getPrompt(question: string, context?: any[], allQuestions?: string[], allowReflect: boolean = false, badContext?: any[] ) {
const badContextIntro = badContext?.length ?
`Your last unsuccessful answer contains these previous actions and knowledge:
${JSON.stringify(badContext, null, 2)}
Learn to avoid these mistakes and think of a new approach, from a different angle, e.g. search for different keywords, read different URLs, or ask different questions.
`
: '';
const contextIntro = context?.length ?
`Your current context contains these previous actions and knowledge:
${JSON.stringify(context, null, 2)}
@ -183,7 +180,7 @@ function getPrompt(question: string, context?: any[], allQuestions?: string[], a
: '';
let actionsDescription = `
Using your training data and prior context, answer the following question with absolute certainty:
Using your training data and prior lessons learned, answer the following question with absolute certainty:
${question}
@ -216,7 +213,7 @@ ${allQuestions?.length ? `Existing questions you have asked, make sure to not re
`;
}
return `You are an advanced AI research analyst specializing in multi-step reasoning.${contextIntro}${actionsDescription}
return `You are an advanced AI research analyst specializing in multi-step reasoning.${contextIntro}${badContextIntro}${actionsDescription}
Respond exclusively in valid JSON format matching exact JSON schema.
@ -227,14 +224,14 @@ Critical Requirements:
- Maintain strict JSON syntax`;
}
async function getResponse(question: string) {
let tokenBudget = 30000000;
async function getResponse(question: string, tokenBudget: number=30000000) {
let totalTokens = 0;
let context = [];
let step = 0;
let gaps: string[] = [question]; // All questions to be answered including the orginal question
let allQuestions = [question];
let allKeywords = [];
let badContext = [];
while (totalTokens < tokenBudget) {
// add 1s delay to avoid rate limiting
@ -267,14 +264,19 @@ async function getResponse(question: string) {
console.log('Question-Action:', currentQuestion, action);
if (action.action === 'answer') {
if (currentQuestion === question) {
return action;
} else {
context.push({
context.push({
step,
question: currentQuestion,
...action,
});
if (currentQuestion === question) {
const evaluation = await evaluateAnswer(currentQuestion, action.answer);
if (evaluation) {
return action;
} else {
badContext.push(...context);
context = [];
}
}
}

124
src/tools/evaluator.ts Normal file
View File

@ -0,0 +1,124 @@
import { GoogleGenerativeAI, SchemaType } from "@google/generative-ai";
import dotenv from 'dotenv';
import { ProxyAgent, setGlobalDispatcher } from "undici";
// Proxy setup
if (process.env.https_proxy) {
try {
const proxyUrl = new URL(process.env.https_proxy).toString();
const dispatcher = new ProxyAgent({ uri: proxyUrl });
setGlobalDispatcher(dispatcher);
} catch (error) {
console.error('Failed to set proxy:', error);
}
}
dotenv.config();
const apiKey = process.env.GEMINI_API_KEY;
if (!apiKey) {
throw new Error("GEMINI_API_KEY not found in environment variables");
}
type EvaluationResponse = {
is_valid_answer: boolean;
reasoning: string;
};
const responseSchema = {
type: SchemaType.OBJECT,
properties: {
is_valid_answer: {
type: SchemaType.BOOLEAN,
description: "Whether the answer properly addresses the question"
},
reasoning: {
type: SchemaType.STRING,
description: "Detailed explanation of the evaluation"
}
},
required: ["is_valid_answer", "reasoning"]
};
const modelName = 'gemini-1.5-flash';
const genAI = new GoogleGenerativeAI(apiKey);
const model = genAI.getGenerativeModel({
model: modelName,
generationConfig: {
temperature: 0.1,
responseMimeType: "application/json",
responseSchema: responseSchema
}
});
function getPrompt(question: string, answer: string): string {
return `You are an expert evaluator of question-answer pairs. Analyze if the given answer properly addresses the question and provides meaningful information.
Core Evaluation Criteria:
1. Completeness: Answer must directly address the main point of the question
2. Clarity: Answer should be clear and unambiguous
3. Informativeness: Answer must provide substantial, useful information
4. Specificity: Generic or vague responses are not acceptable
5. Definitiveness: "I don't know" or highly uncertain responses are not valid
6. Relevance: Answer must be directly related to the question topic
7. Accuracy: Information provided should be factually sound (if verifiable)
Examples:
Question: "What are the system requirements for running Python 3.9?"
Answer: "I'm not entirely sure, but I think you need a computer with some RAM."
Evaluation: {
"is_valid_answer": false,
"reasoning": "The answer is vague, uncertain, and lacks specific information about actual system requirements. It fails the specificity and informativeness criteria."
}
Question: "What are the system requirements for running Python 3.9?"
Answer: "Python 3.9 requires: Windows 7 or later, macOS 10.11 or later, or Linux. Minimum 4GB RAM recommended, 2GB disk space, and x86-64 processor. For Windows, you'll need Microsoft Visual C++ 2015 or later."
Evaluation: {
"is_valid_answer": true,
"reasoning": "The answer is comprehensive, specific, and covers all key system requirements across different operating systems. It provides concrete numbers and necessary additional components."
}
Now evaluate this pair:
Question: ${JSON.stringify(question)}
Answer: ${JSON.stringify(answer)}`;
}
export async function evaluateAnswer(question: string, answer: string): Promise<boolean> {
try {
const prompt = getPrompt(question, answer);
const result = await model.generateContent(prompt);
const response = await result.response;
const json = JSON.parse(response.text()) as EvaluationResponse;
console.log('Evaluation:', json);
return json.is_valid_answer;
} catch (error) {
console.error('Error in answer evaluation:', error);
throw error;
}
}
// Example usage
async function main() {
const question = process.argv[2] || '';
const answer = process.argv[3] || '';
if (!question || !answer) {
console.error('Please provide both question and answer as command line arguments');
process.exit(1);
}
console.log('\nQuestion:', question);
console.log('Answer:', answer);
try {
const evaluation = await evaluateAnswer(question, answer);
console.log('\nEvaluation Result:', evaluation);
} catch (error) {
console.error('Failed to evaluate answer:', error);
}
}
if (require.main === module) {
main().catch(console.error);
}