mirror of
https://github.com/jina-ai/node-DeepResearch.git
synced 2025-12-26 06:28:56 +08:00
fix: update schema types and token tracking
Co-Authored-By: Han Xiao <han.xiao@jina.ai>
This commit is contained in:
parent
8b1263e42c
commit
f0ce6a1f06
44
src/agent.ts
44
src/agent.ts
@ -1,4 +1,4 @@
|
||||
import {GoogleGenerativeAI, SchemaType} from "@google/generative-ai";
|
||||
import { GoogleGenerativeAI, SchemaType } from '@google/generative-ai';
|
||||
import {readUrl} from "./tools/read";
|
||||
import fs from 'fs/promises';
|
||||
import {SafeSearchType, search as duckSearch} from "duck-duck-scrape";
|
||||
@ -22,14 +22,14 @@ async function sleep(ms: number) {
|
||||
|
||||
function getSchema(allowReflect: boolean, allowRead: boolean, allowAnswer: boolean, allowSearch: boolean): ResponseSchema {
|
||||
const actions: string[] = [];
|
||||
const properties: Record<string, SchemaProperty> = {
|
||||
const properties: Record<string, any> = {
|
||||
action: {
|
||||
type: SchemaType.STRING,
|
||||
type: 'STRING',
|
||||
enum: actions,
|
||||
description: "Must match exactly one action type"
|
||||
},
|
||||
think: {
|
||||
type: SchemaType.STRING,
|
||||
type: 'STRING',
|
||||
description: "Explain why choose this action, what's the thought process behind choosing this action"
|
||||
}
|
||||
};
|
||||
@ -37,7 +37,7 @@ function getSchema(allowReflect: boolean, allowRead: boolean, allowAnswer: boole
|
||||
if (allowSearch) {
|
||||
actions.push("search");
|
||||
properties.searchQuery = {
|
||||
type: SchemaType.STRING,
|
||||
type: 'STRING',
|
||||
description: "Only required when choosing 'search' action, must be a short, keyword-based query that BM25, tf-idf based search engines can understand."
|
||||
};
|
||||
}
|
||||
@ -45,20 +45,20 @@ function getSchema(allowReflect: boolean, allowRead: boolean, allowAnswer: boole
|
||||
if (allowAnswer) {
|
||||
actions.push("answer");
|
||||
properties.answer = {
|
||||
type: SchemaType.STRING,
|
||||
type: 'STRING',
|
||||
description: "Only required when choosing 'answer' action, must be the final answer in natural language"
|
||||
};
|
||||
properties.references = {
|
||||
type: SchemaType.ARRAY,
|
||||
type: 'ARRAY',
|
||||
items: {
|
||||
type: SchemaType.OBJECT,
|
||||
type: 'OBJECT',
|
||||
properties: {
|
||||
exactQuote: {
|
||||
type: SchemaType.STRING,
|
||||
type: 'STRING',
|
||||
description: "Exact relevant quote from the document"
|
||||
},
|
||||
url: {
|
||||
type: SchemaType.STRING,
|
||||
type: 'STRING',
|
||||
description: "URL of the document; must be directly from the context"
|
||||
}
|
||||
},
|
||||
@ -71,9 +71,9 @@ function getSchema(allowReflect: boolean, allowRead: boolean, allowAnswer: boole
|
||||
if (allowReflect) {
|
||||
actions.push("reflect");
|
||||
properties.questionsToAnswer = {
|
||||
type: SchemaType.ARRAY,
|
||||
type: 'ARRAY',
|
||||
items: {
|
||||
type: SchemaType.STRING,
|
||||
type: 'STRING',
|
||||
description: "each question must be a single line, concise and clear. not composite or compound, less than 20 words."
|
||||
},
|
||||
description: "List of most important questions to fill the knowledge gaps of finding the answer to the original question",
|
||||
@ -84,9 +84,9 @@ function getSchema(allowReflect: boolean, allowRead: boolean, allowAnswer: boole
|
||||
if (allowRead) {
|
||||
actions.push("visit");
|
||||
properties.URLTargets = {
|
||||
type: SchemaType.ARRAY,
|
||||
type: 'ARRAY',
|
||||
items: {
|
||||
type: SchemaType.STRING
|
||||
type: 'STRING'
|
||||
},
|
||||
maxItems: 2,
|
||||
description: "Must be an array of URLs, choose up the most relevant 2 URLs to visit"
|
||||
@ -356,6 +356,7 @@ export async function getResponse(question: string, tokenBudget: number = 1_000_
|
||||
false
|
||||
);
|
||||
|
||||
const genAI = new GoogleGenerativeAI(process.env.GOOGLE_API_KEY || '');
|
||||
const model = genAI.getGenerativeModel({
|
||||
model: modelConfigs.agent.model,
|
||||
generationConfig: {
|
||||
@ -364,14 +365,14 @@ export async function getResponse(question: string, tokenBudget: number = 1_000_
|
||||
responseSchema: getSchema(allowReflect, allowRead, allowAnswer, allowSearch)
|
||||
}
|
||||
});
|
||||
|
||||
const result = await model.generateContent(prompt);
|
||||
const response = await result.response;
|
||||
const usage = response.usageMetadata;
|
||||
const json = JSON.parse(response.text());
|
||||
|
||||
context.tokenTracker.trackUsage('agent', usage?.totalTokenCount || 0);
|
||||
|
||||
|
||||
thisStep = JSON.parse(response.text());
|
||||
thisStep = json;
|
||||
// print allowed and chose action
|
||||
const actionsStr = [allowSearch, allowRead, allowAnswer, allowReflect].map((a, i) => a ? ['search', 'read', 'answer', 'reflect'][i] : null).filter(a => a).join(', ');
|
||||
console.log(`${thisStep.action} <- [${actionsStr}]`);
|
||||
@ -699,6 +700,7 @@ You decided to think out of the box or cut from a completely different angle.`);
|
||||
true
|
||||
);
|
||||
|
||||
const genAI = new GoogleGenerativeAI(process.env.GOOGLE_API_KEY || '');
|
||||
const model = genAI.getGenerativeModel({
|
||||
model: modelConfigs.agentBeastMode.model,
|
||||
generationConfig: {
|
||||
@ -707,14 +709,15 @@ You decided to think out of the box or cut from a completely different angle.`);
|
||||
responseSchema: getSchema(false, false, allowAnswer, false)
|
||||
}
|
||||
});
|
||||
|
||||
const result = await model.generateContent(prompt);
|
||||
const response = await result.response;
|
||||
const usage = response.usageMetadata;
|
||||
const json = JSON.parse(response.text());
|
||||
|
||||
context.tokenTracker.trackUsage('agent', usage?.totalTokenCount || 0);
|
||||
|
||||
await storeContext(prompt, [allContext, allKeywords, allQuestions, allKnowledge], totalStep);
|
||||
thisStep = JSON.parse(response.text());
|
||||
thisStep = json;
|
||||
console.log(thisStep)
|
||||
return {result: thisStep, context};
|
||||
}
|
||||
@ -733,9 +736,6 @@ async function storeContext(prompt: string, memory: any[][], step: number) {
|
||||
}
|
||||
}
|
||||
|
||||
const genAI = new GoogleGenerativeAI(GEMINI_API_KEY);
|
||||
|
||||
|
||||
export async function main() {
|
||||
const question = process.argv[2] || "";
|
||||
const {
|
||||
|
||||
@ -1,8 +1,8 @@
|
||||
import { GoogleGenerativeAI, SchemaType } from "@google/generative-ai";
|
||||
import { GEMINI_API_KEY, modelConfigs } from "../config";
|
||||
import { GoogleGenerativeAI, SchemaType, ResponseSchema } from '@google/generative-ai';
|
||||
import { modelConfigs } from "../config";
|
||||
import { TokenTracker } from "../utils/token-tracker";
|
||||
|
||||
import { DedupResponse } from '../types';
|
||||
|
||||
|
||||
const responseSchema = {
|
||||
type: SchemaType.OBJECT,
|
||||
@ -23,16 +23,6 @@ const responseSchema = {
|
||||
required: ["think", "unique_queries"]
|
||||
};
|
||||
|
||||
const genAI = new GoogleGenerativeAI(GEMINI_API_KEY);
|
||||
const model = genAI.getGenerativeModel({
|
||||
model: modelConfigs.dedup.model,
|
||||
generationConfig: {
|
||||
temperature: modelConfigs.dedup.temperature,
|
||||
responseMimeType: "application/json",
|
||||
responseSchema: responseSchema
|
||||
}
|
||||
});
|
||||
|
||||
function getPrompt(newQueries: string[], existingQueries: string[]): string {
|
||||
return `You are an expert in semantic similarity analysis. Given a set of queries (setA) and a set of queries (setB)
|
||||
|
||||
@ -88,10 +78,24 @@ SetB: ${JSON.stringify(existingQueries)}`;
|
||||
export async function dedupQueries(newQueries: string[], existingQueries: string[], tracker?: TokenTracker): Promise<{ unique_queries: string[], tokens: number }> {
|
||||
try {
|
||||
const prompt = getPrompt(newQueries, existingQueries);
|
||||
const genAI = new GoogleGenerativeAI(process.env.GOOGLE_API_KEY || '');
|
||||
const model = genAI.getGenerativeModel({
|
||||
model: modelConfigs.dedup.model,
|
||||
generationConfig: {
|
||||
temperature: modelConfigs.dedup.temperature,
|
||||
responseMimeType: "application/json",
|
||||
responseSchema: responseSchema
|
||||
}
|
||||
});
|
||||
const result = await model.generateContent(prompt);
|
||||
const response = await result.response;
|
||||
const usage = response.usageMetadata;
|
||||
const json = JSON.parse(response.text()) as DedupResponse;
|
||||
const json = JSON.parse(response.text());
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
console.log('Dedup:', json.unique_queries);
|
||||
const tokens = usage?.totalTokenCount || 0;
|
||||
(tracker || new TokenTracker()).trackUsage('dedup', tokens);
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
import {GoogleGenerativeAI, SchemaType} from "@google/generative-ai";
|
||||
import { GEMINI_API_KEY, modelConfigs } from "../config";
|
||||
import { GoogleGenerativeAI, SchemaType, ResponseSchema } from '@google/generative-ai';
|
||||
import { modelConfigs } from "../config";
|
||||
import { TokenTracker } from "../utils/token-tracker";
|
||||
|
||||
import { ErrorAnalysisResponse } from '../types';
|
||||
@ -23,16 +23,6 @@ const responseSchema = {
|
||||
required: ["recap", "blame", "improvement"]
|
||||
};
|
||||
|
||||
const genAI = new GoogleGenerativeAI(GEMINI_API_KEY);
|
||||
const model = genAI.getGenerativeModel({
|
||||
model: modelConfigs.errorAnalyzer.model,
|
||||
generationConfig: {
|
||||
temperature: modelConfigs.errorAnalyzer.temperature,
|
||||
responseMimeType: "application/json",
|
||||
responseSchema: responseSchema
|
||||
}
|
||||
});
|
||||
|
||||
function getPrompt(diaryContext: string[]): string {
|
||||
return `You are an expert at analyzing search and reasoning processes. Your task is to analyze the given sequence of steps and identify what went wrong in the search process.
|
||||
|
||||
@ -124,10 +114,24 @@ ${diaryContext.join('\n')}
|
||||
export async function analyzeSteps(diaryContext: string[], tracker?: TokenTracker): Promise<{ response: ErrorAnalysisResponse, tokens: number }> {
|
||||
try {
|
||||
const prompt = getPrompt(diaryContext);
|
||||
const genAI = new GoogleGenerativeAI(process.env.GOOGLE_API_KEY || '');
|
||||
const model = genAI.getGenerativeModel({
|
||||
model: modelConfigs.errorAnalyzer.model,
|
||||
generationConfig: {
|
||||
temperature: modelConfigs.errorAnalyzer.temperature,
|
||||
responseMimeType: "application/json",
|
||||
responseSchema: responseSchema
|
||||
}
|
||||
});
|
||||
const result = await model.generateContent(prompt);
|
||||
const response = await result.response;
|
||||
const usage = response.usageMetadata;
|
||||
const json = JSON.parse(response.text()) as ErrorAnalysisResponse;
|
||||
const json = JSON.parse(response.text());
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
console.log('Error analysis:', {
|
||||
is_valid: !json.blame,
|
||||
reason: json.blame || 'No issues found'
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
import { GoogleGenerativeAI, SchemaType } from "@google/generative-ai";
|
||||
import { GEMINI_API_KEY, modelConfigs } from "../config";
|
||||
import { GoogleGenerativeAI, SchemaType, ResponseSchema } from '@google/generative-ai';
|
||||
import { modelConfigs } from "../config";
|
||||
import { TokenTracker } from "../utils/token-tracker";
|
||||
|
||||
import { EvaluationResponse } from '../types';
|
||||
@ -19,16 +19,6 @@ const responseSchema = {
|
||||
required: ["is_definitive", "reasoning"]
|
||||
};
|
||||
|
||||
const genAI = new GoogleGenerativeAI(GEMINI_API_KEY);
|
||||
const model = genAI.getGenerativeModel({
|
||||
model: modelConfigs.evaluator.model,
|
||||
generationConfig: {
|
||||
temperature: modelConfigs.evaluator.temperature,
|
||||
responseMimeType: "application/json",
|
||||
responseSchema: responseSchema
|
||||
}
|
||||
});
|
||||
|
||||
function getPrompt(question: string, answer: string): string {
|
||||
return `You are an evaluator of answer definitiveness. Analyze if the given answer provides a definitive response or not.
|
||||
|
||||
@ -66,10 +56,24 @@ Answer: ${JSON.stringify(answer)}`;
|
||||
export async function evaluateAnswer(question: string, answer: string, tracker?: TokenTracker): Promise<{ response: EvaluationResponse, tokens: number }> {
|
||||
try {
|
||||
const prompt = getPrompt(question, answer);
|
||||
const genAI = new GoogleGenerativeAI(process.env.GOOGLE_API_KEY || '');
|
||||
const model = genAI.getGenerativeModel({
|
||||
model: modelConfigs.evaluator.model,
|
||||
generationConfig: {
|
||||
temperature: modelConfigs.evaluator.temperature,
|
||||
responseMimeType: "application/json",
|
||||
responseSchema: responseSchema
|
||||
}
|
||||
});
|
||||
const result = await model.generateContent(prompt);
|
||||
const response = await result.response;
|
||||
const usage = response.usageMetadata;
|
||||
const json = JSON.parse(response.text()) as EvaluationResponse;
|
||||
const json = JSON.parse(response.text());
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
console.log('Evaluation:', {
|
||||
definitive: json.is_definitive,
|
||||
reason: json.reasoning
|
||||
|
||||
@ -1,9 +1,8 @@
|
||||
import { GoogleGenerativeAI, SchemaType } from "@google/generative-ai";
|
||||
import { GEMINI_API_KEY, modelConfigs } from "../config";
|
||||
import { GoogleGenerativeAI, SchemaType, ResponseSchema } from '@google/generative-ai';
|
||||
import { modelConfigs } from "../config";
|
||||
import { TokenTracker } from "../utils/token-tracker";
|
||||
import { SearchAction } from "../types";
|
||||
|
||||
import { KeywordsResponse } from '../types';
|
||||
import { SearchAction } from '../types';
|
||||
|
||||
const responseSchema = {
|
||||
type: SchemaType.OBJECT,
|
||||
@ -26,16 +25,6 @@ const responseSchema = {
|
||||
required: ["think", "queries"]
|
||||
};
|
||||
|
||||
const genAI = new GoogleGenerativeAI(GEMINI_API_KEY);
|
||||
const model = genAI.getGenerativeModel({
|
||||
model: modelConfigs.queryRewriter.model,
|
||||
generationConfig: {
|
||||
temperature: modelConfigs.queryRewriter.temperature,
|
||||
responseMimeType: "application/json",
|
||||
responseSchema: responseSchema
|
||||
}
|
||||
});
|
||||
|
||||
function getPrompt(action: SearchAction): string {
|
||||
return `You are an expert Information Retrieval Assistant. Transform user queries into precise keyword combinations with strategic reasoning and appropriate search operators.
|
||||
|
||||
@ -115,18 +104,30 @@ Intention: ${action.think}
|
||||
export async function rewriteQuery(action: SearchAction, tracker?: TokenTracker): Promise<{ queries: string[], tokens: number }> {
|
||||
try {
|
||||
const prompt = getPrompt(action);
|
||||
const genAI = new GoogleGenerativeAI(process.env.GOOGLE_API_KEY || '');
|
||||
const model = genAI.getGenerativeModel({
|
||||
model: modelConfigs.queryRewriter.model,
|
||||
generationConfig: {
|
||||
temperature: modelConfigs.queryRewriter.temperature,
|
||||
responseMimeType: "application/json",
|
||||
responseSchema: responseSchema
|
||||
}
|
||||
});
|
||||
const result = await model.generateContent(prompt);
|
||||
const response = await result.response;
|
||||
const usage = response.usageMetadata;
|
||||
const json = JSON.parse(response.text()) as KeywordsResponse;
|
||||
const json = JSON.parse(response.text());
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
console.log('Query rewriter:', json.queries);
|
||||
const tokens = usage?.totalTokenCount || 0;
|
||||
(tracker || new TokenTracker()).trackUsage('query-rewriter', tokens);
|
||||
|
||||
return { queries: json.queries, tokens };
|
||||
} catch (error) {
|
||||
console.error('Error in query rewriting:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user