mirror of
https://github.com/jina-ai/node-DeepResearch.git
synced 2025-12-26 06:28:56 +08:00
style: format imports and add language_code to request
This commit is contained in:
parent
6f41539587
commit
16f01546f5
127
src/agent.ts
127
src/agent.ts
@ -1,15 +1,15 @@
|
||||
import {ZodObject} from 'zod';
|
||||
import {CoreMessage} from 'ai';
|
||||
import {SEARCH_PROVIDER, STEP_SLEEP} from "./config";
|
||||
import { ZodObject } from 'zod';
|
||||
import { CoreMessage } from 'ai';
|
||||
import { SEARCH_PROVIDER, STEP_SLEEP } from "./config";
|
||||
import fs from 'fs/promises';
|
||||
import {SafeSearchType, search as duckSearch} from "duck-duck-scrape";
|
||||
import {braveSearch} from "./tools/brave-search";
|
||||
import {rewriteQuery} from "./tools/query-rewriter";
|
||||
import {dedupQueries} from "./tools/jina-dedup";
|
||||
import {evaluateAnswer, evaluateQuestion} from "./tools/evaluator";
|
||||
import {analyzeSteps} from "./tools/error-analyzer";
|
||||
import {TokenTracker} from "./utils/token-tracker";
|
||||
import {ActionTracker} from "./utils/action-tracker";
|
||||
import { SafeSearchType, search as duckSearch } from "duck-duck-scrape";
|
||||
import { braveSearch } from "./tools/brave-search";
|
||||
import { rewriteQuery } from "./tools/query-rewriter";
|
||||
import { dedupQueries } from "./tools/jina-dedup";
|
||||
import { evaluateAnswer, evaluateQuestion } from "./tools/evaluator";
|
||||
import { analyzeSteps } from "./tools/error-analyzer";
|
||||
import { TokenTracker } from "./utils/token-tracker";
|
||||
import { ActionTracker } from "./utils/action-tracker";
|
||||
import {
|
||||
StepAction,
|
||||
AnswerAction,
|
||||
@ -18,13 +18,13 @@ import {
|
||||
BoostedSearchSnippet,
|
||||
SearchSnippet, EvaluationResponse, Reference, SERPQuery, RepeatEvaluationType, UnNormalizedSearchSnippet, WebContent
|
||||
} from "./types";
|
||||
import {TrackerContext} from "./types";
|
||||
import {search} from "./tools/jina-search";
|
||||
import { TrackerContext } from "./types";
|
||||
import { search } from "./tools/jina-search";
|
||||
// import {grounding} from "./tools/grounding";
|
||||
import {zodToJsonSchema} from "zod-to-json-schema";
|
||||
import {ObjectGeneratorSafe} from "./utils/safe-generator";
|
||||
import {CodeSandbox} from "./tools/code-sandbox";
|
||||
import {serperSearch} from './tools/serper-search';
|
||||
import { zodToJsonSchema } from "zod-to-json-schema";
|
||||
import { ObjectGeneratorSafe } from "./utils/safe-generator";
|
||||
import { CodeSandbox } from "./tools/code-sandbox";
|
||||
import { serperSearch } from './tools/serper-search';
|
||||
import {
|
||||
addToAllURLs,
|
||||
rankURLs,
|
||||
@ -38,11 +38,11 @@ import {
|
||||
removeExtraLineBreaks,
|
||||
removeHTMLtags, repairMarkdownFinal, repairMarkdownFootnotesOuter
|
||||
} from "./utils/text-tools";
|
||||
import {MAX_QUERIES_PER_STEP, MAX_REFLECT_PER_STEP, MAX_URLS_PER_STEP, Schemas} from "./utils/schemas";
|
||||
import {formatDateBasedOnType, formatDateRange} from "./utils/date-tools";
|
||||
import {repairUnknownChars} from "./tools/broken-ch-fixer";
|
||||
import {reviseAnswer} from "./tools/md-fixer";
|
||||
import {buildReferences} from "./tools/build-ref";
|
||||
import { MAX_QUERIES_PER_STEP, MAX_REFLECT_PER_STEP, MAX_URLS_PER_STEP, Schemas } from "./utils/schemas";
|
||||
import { formatDateBasedOnType, formatDateRange } from "./utils/date-tools";
|
||||
import { repairUnknownChars } from "./tools/broken-ch-fixer";
|
||||
import { reviseAnswer } from "./tools/md-fixer";
|
||||
import { buildReferences } from "./tools/build-ref";
|
||||
|
||||
async function sleep(ms: number) {
|
||||
const seconds = Math.ceil(ms / 1000);
|
||||
@ -54,7 +54,7 @@ function BuildMsgsFromKnowledge(knowledge: KnowledgeItem[]): CoreMessage[] {
|
||||
// build user, assistant pair messages from knowledge
|
||||
const messages: CoreMessage[] = [];
|
||||
knowledge.forEach(k => {
|
||||
messages.push({role: 'user', content: k.question.trim()});
|
||||
messages.push({ role: 'user', content: k.question.trim() });
|
||||
const aMsg = `
|
||||
${k.updated && (k.type === 'url' || k.type === 'side-info') ? `
|
||||
<answer-datetime>
|
||||
@ -71,7 +71,7 @@ ${k.references[0]}
|
||||
|
||||
${k.answer}
|
||||
`.trim();
|
||||
messages.push({role: 'assistant', content: removeExtraLineBreaks(aMsg)});
|
||||
messages.push({ role: 'assistant', content: removeExtraLineBreaks(aMsg) });
|
||||
});
|
||||
return messages;
|
||||
}
|
||||
@ -96,7 +96,7 @@ ${p}
|
||||
</answer-requirements>` : ''}
|
||||
`.trim();
|
||||
|
||||
msgs.push({role: 'user', content: removeExtraLineBreaks(userContent)});
|
||||
msgs.push({ role: 'user', content: removeExtraLineBreaks(userContent) });
|
||||
return msgs;
|
||||
}
|
||||
|
||||
@ -289,7 +289,7 @@ async function executeSearchQueries(
|
||||
const uniqQOnly = keywordsQueries.map(q => q.q);
|
||||
const newKnowledge: KnowledgeItem[] = [];
|
||||
const searchedQueries: string[] = [];
|
||||
context.actionTracker.trackThink('search_for', SchemaGen.languageCode, {keywords: uniqQOnly.join(', ')});
|
||||
context.actionTracker.trackThink('search_for', SchemaGen.languageCode, { keywords: uniqQOnly.join(', ') });
|
||||
let utilityScore = 0;
|
||||
for (const query of keywordsQueries) {
|
||||
let results: UnNormalizedSearchSnippet[] = [];
|
||||
@ -305,7 +305,7 @@ async function executeSearchQueries(
|
||||
results = (await search(query, context.tokenTracker)).response?.data || [];
|
||||
break;
|
||||
case 'duck':
|
||||
results = (await duckSearch(query.q, {safeSearch: SafeSearchType.STRICT})).results;
|
||||
results = (await duckSearch(query.q, { safeSearch: SafeSearchType.STRICT })).results;
|
||||
break;
|
||||
case 'brave':
|
||||
results = (await braveSearch(query.q)).response.web?.results || [];
|
||||
@ -364,7 +364,7 @@ async function executeSearchQueries(
|
||||
if (searchedQueries.length === 0) {
|
||||
if (onlyHostnames && onlyHostnames.length > 0) {
|
||||
console.log(`No results found for queries: ${uniqQOnly.join(', ')} on hostnames: ${onlyHostnames.join(', ')}`);
|
||||
context.actionTracker.trackThink('hostnames_no_results', SchemaGen.languageCode, {hostnames: onlyHostnames.join(', ')});
|
||||
context.actionTracker.trackThink('hostnames_no_results', SchemaGen.languageCode, { hostnames: onlyHostnames.join(', ') });
|
||||
}
|
||||
} else {
|
||||
console.log(`Utility/Queries: ${utilityScore}/${searchedQueries.length}`);
|
||||
@ -383,17 +383,18 @@ function includesEval(allChecks: RepeatEvaluationType[], evalType: EvaluationTyp
|
||||
}
|
||||
|
||||
export async function getResponse(question?: string,
|
||||
tokenBudget: number = 1_000_000,
|
||||
maxBadAttempts: number = 2,
|
||||
existingContext?: Partial<TrackerContext>,
|
||||
messages?: Array<CoreMessage>,
|
||||
numReturnedURLs: number = 100,
|
||||
noDirectAnswer: boolean = false,
|
||||
boostHostnames: string[] = [],
|
||||
badHostnames: string[] = [],
|
||||
onlyHostnames: string[] = [],
|
||||
maxRef: number = 10,
|
||||
minRelScore: number = 0.75
|
||||
tokenBudget: number = 1_000_000,
|
||||
maxBadAttempts: number = 2,
|
||||
existingContext?: Partial<TrackerContext>,
|
||||
messages?: Array<CoreMessage>,
|
||||
numReturnedURLs: number = 100,
|
||||
noDirectAnswer: boolean = false,
|
||||
boostHostnames: string[] = [],
|
||||
badHostnames: string[] = [],
|
||||
onlyHostnames: string[] = [],
|
||||
maxRef: number = 10,
|
||||
minRelScore: number = 0.75,
|
||||
languageCode: string | undefined = 'zh-CN'
|
||||
): Promise<{ result: StepAction; context: TrackerContext; visitedURLs: string[], readURLs: string[], allURLs: string[] }> {
|
||||
|
||||
let step = 0;
|
||||
@ -413,11 +414,11 @@ export async function getResponse(question?: string,
|
||||
question = lastContent.filter(c => c.type === 'text').pop()?.text || '';
|
||||
}
|
||||
} else {
|
||||
messages = [{role: 'user', content: question.trim()}]
|
||||
messages = [{ role: 'user', content: question.trim() }]
|
||||
}
|
||||
|
||||
const SchemaGen = new Schemas();
|
||||
await SchemaGen.setLanguage(question)
|
||||
await SchemaGen.setLanguage(languageCode || question)
|
||||
const context: TrackerContext = {
|
||||
tokenTracker: existingContext?.tokenTracker || new TokenTracker(tokenBudget),
|
||||
actionTracker: existingContext?.actionTracker || new ActionTracker()
|
||||
@ -439,7 +440,7 @@ export async function getResponse(question?: string,
|
||||
let allowReflect = true;
|
||||
let allowCoding = false;
|
||||
let msgWithKnowledge: CoreMessage[] = [];
|
||||
let thisStep: StepAction = {action: 'answer', answer: '', references: [], think: '', isFinal: false};
|
||||
let thisStep: StepAction = { action: 'answer', answer: '', references: [], think: '', isFinal: false };
|
||||
|
||||
const allURLs: Record<string, SearchSnippet> = {};
|
||||
const allWebContents: Record<string, WebContent> = {};
|
||||
@ -491,7 +492,7 @@ export async function getResponse(question?: string,
|
||||
} as RepeatEvaluationType
|
||||
})
|
||||
// force strict eval for the original question, at last, only once.
|
||||
evaluationMetrics[currentQuestion].push({type: 'strict', numEvalsRequired: maxBadAttempts});
|
||||
evaluationMetrics[currentQuestion].push({ type: 'strict', numEvalsRequired: maxBadAttempts });
|
||||
} else if (currentQuestion.trim() !== question) {
|
||||
evaluationMetrics[currentQuestion] = []
|
||||
}
|
||||
@ -520,7 +521,7 @@ export async function getResponse(question?: string,
|
||||
allowSearch = allowSearch && (weightedURLs.length < 50); // disable search when too many urls already
|
||||
|
||||
// generate prompt for this step
|
||||
const {system, urlList} = getPrompt(
|
||||
const { system, urlList } = getPrompt(
|
||||
diaryContext,
|
||||
allQuestions,
|
||||
allKeywords,
|
||||
@ -552,7 +553,7 @@ export async function getResponse(question?: string,
|
||||
console.log(`${currentQuestion}: ${thisStep.action} <- [${actionsStr}]`);
|
||||
console.log(thisStep)
|
||||
|
||||
context.actionTracker.trackAction({totalStep, thisStep, gaps});
|
||||
context.actionTracker.trackAction({ totalStep, thisStep, gaps });
|
||||
|
||||
// reset allow* to true
|
||||
allowAnswer = true;
|
||||
@ -599,7 +600,7 @@ export async function getResponse(question?: string,
|
||||
});
|
||||
|
||||
console.log(currentQuestion, evaluationMetrics[currentQuestion])
|
||||
let evaluation: EvaluationResponse = {pass: true, think: ''};
|
||||
let evaluation: EvaluationResponse = { pass: true, think: '' };
|
||||
if (evaluationMetrics[currentQuestion].length > 0) {
|
||||
context.actionTracker.trackThink('eval_first', SchemaGen.languageCode)
|
||||
evaluation = await evaluateAnswer(
|
||||
@ -756,8 +757,8 @@ But then you realized you have asked them before. You decided to to think out of
|
||||
thisStep.searchRequests = chooseK((await dedupQueries(thisStep.searchRequests, [], context.tokenTracker)).unique_queries, MAX_QUERIES_PER_STEP);
|
||||
|
||||
// do first search
|
||||
const {searchedQueries, newKnowledge} = await executeSearchQueries(
|
||||
thisStep.searchRequests.map(q => ({q})),
|
||||
const { searchedQueries, newKnowledge } = await executeSearchQueries(
|
||||
thisStep.searchRequests.map(q => ({ q })),
|
||||
context,
|
||||
allURLs,
|
||||
SchemaGen,
|
||||
@ -777,13 +778,13 @@ But then you realized you have asked them before. You decided to to think out of
|
||||
keywordsQueries = keywordsQueries = uniqQOnly.map(q => {
|
||||
const matches = keywordsQueries.filter(kq => kq.q === q);
|
||||
// if there are multiple matches, keep the original query as the wider search
|
||||
return matches.length > 1 ? {q} : matches[0];
|
||||
return matches.length > 1 ? { q } : matches[0];
|
||||
}) as SERPQuery[];
|
||||
|
||||
let anyResult = false;
|
||||
|
||||
if (keywordsQueries.length > 0) {
|
||||
const {searchedQueries, newKnowledge} =
|
||||
const { searchedQueries, newKnowledge } =
|
||||
await executeSearchQueries(
|
||||
keywordsQueries,
|
||||
context,
|
||||
@ -842,7 +843,7 @@ You decided to think out of the box or cut from a completely different angle.
|
||||
console.log(uniqueURLs)
|
||||
|
||||
if (uniqueURLs.length > 0) {
|
||||
const {urlResults, success} = await processURLs(
|
||||
const { urlResults, success } = await processURLs(
|
||||
uniqueURLs,
|
||||
context,
|
||||
allKnowledge,
|
||||
@ -885,7 +886,7 @@ You decided to think out of the box or cut from a completely different angle.`);
|
||||
}
|
||||
allowRead = false;
|
||||
} else if (thisStep.action === 'coding' && thisStep.codingIssue) {
|
||||
const sandbox = new CodeSandbox({allContext, URLs: weightedURLs.slice(0, 20), allKnowledge}, context, SchemaGen);
|
||||
const sandbox = new CodeSandbox({ allContext, URLs: weightedURLs.slice(0, 20), allKnowledge }, context, SchemaGen);
|
||||
try {
|
||||
const result = await sandbox.solve(thisStep.codingIssue);
|
||||
allKnowledge.push({
|
||||
@ -936,7 +937,7 @@ But unfortunately, you failed to solve the issue. You need to think out of the b
|
||||
// any answer is better than no answer, humanity last resort
|
||||
step++;
|
||||
totalStep++;
|
||||
const {system} = getPrompt(
|
||||
const { system } = getPrompt(
|
||||
diaryContext,
|
||||
allQuestions,
|
||||
allKeywords,
|
||||
@ -966,7 +967,7 @@ But unfortunately, you failed to solve the issue. You need to think out of the b
|
||||
} as AnswerAction;
|
||||
// await updateReferences(thisStep, allURLs);
|
||||
(thisStep as AnswerAction).isFinal = true;
|
||||
context.actionTracker.trackAction({totalStep, thisStep, gaps});
|
||||
context.actionTracker.trackAction({ totalStep, thisStep, gaps });
|
||||
}
|
||||
|
||||
const answerStep = thisStep as AnswerAction;
|
||||
@ -988,7 +989,7 @@ But unfortunately, you failed to solve the issue. You need to think out of the b
|
||||
),
|
||||
allURLs)));
|
||||
|
||||
const {answer, references} = await buildReferences(
|
||||
const { answer, references } = await buildReferences(
|
||||
answerStep.answer,
|
||||
allWebContents,
|
||||
context,
|
||||
@ -1021,16 +1022,16 @@ But unfortunately, you failed to solve the issue. You need to think out of the b
|
||||
}
|
||||
|
||||
async function storeContext(prompt: string, schema: any, memory: {
|
||||
allContext: StepAction[];
|
||||
allKeywords: string[];
|
||||
allQuestions: string[];
|
||||
allKnowledge: KnowledgeItem[];
|
||||
weightedURLs: BoostedSearchSnippet[];
|
||||
msgWithKnowledge: CoreMessage[];
|
||||
}
|
||||
allContext: StepAction[];
|
||||
allKeywords: string[];
|
||||
allQuestions: string[];
|
||||
allKnowledge: KnowledgeItem[];
|
||||
weightedURLs: BoostedSearchSnippet[];
|
||||
msgWithKnowledge: CoreMessage[];
|
||||
}
|
||||
, step: number) {
|
||||
|
||||
const {allContext, allKeywords, allQuestions, allKnowledge, weightedURLs, msgWithKnowledge} = memory;
|
||||
const { allContext, allKeywords, allQuestions, allKnowledge, weightedURLs, msgWithKnowledge } = memory;
|
||||
if ((process as any).asyncLocalContext?.available?.()) {
|
||||
|
||||
(process as any).asyncLocalContext.ctx.promptContext = {
|
||||
|
||||
@ -566,7 +566,8 @@ app.post('/v1/chat/completions', (async (req: Request, res: Response) => {
|
||||
body.bad_hostnames?.map(i => normalizeHostName(i)),
|
||||
body.only_hostnames?.map(i => normalizeHostName(i)),
|
||||
body.max_annotations,
|
||||
body.min_annotation_relevance
|
||||
body.min_annotation_relevance,
|
||||
body.language_code
|
||||
)
|
||||
let finalAnswer = (finalStep as AnswerAction).mdAnswer;
|
||||
|
||||
|
||||
@ -26,9 +26,9 @@ program
|
||||
const { result } = await getResponse(
|
||||
query,
|
||||
parseInt(options.tokenBudget),
|
||||
parseInt(options.maxAttempts)
|
||||
parseInt(options.maxAttempts),
|
||||
);
|
||||
|
||||
|
||||
if (result.action === 'answer') {
|
||||
console.log('\nAnswer:', result.answer);
|
||||
if (result.references?.length) {
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
// Action Types
|
||||
import {CoreMessage, LanguageModelUsage} from "ai";
|
||||
import { CoreMessage, LanguageModelUsage } from "ai";
|
||||
|
||||
type BaseAction = {
|
||||
action: "search" | "answer" | "reflect" | "visit" | "coding";
|
||||
@ -245,6 +245,7 @@ export interface ChatCompletionRequest {
|
||||
|
||||
max_annotations?: number;
|
||||
min_annotation_relevance?: number;
|
||||
language_code?: string;
|
||||
}
|
||||
|
||||
export interface URLAnnotation {
|
||||
@ -304,8 +305,8 @@ export interface ChatCompletionChunk {
|
||||
}
|
||||
|
||||
// Tracker Types
|
||||
import {TokenTracker} from './utils/token-tracker';
|
||||
import {ActionTracker} from './utils/action-tracker';
|
||||
import { TokenTracker } from './utils/token-tracker';
|
||||
import { ActionTracker } from './utils/action-tracker';
|
||||
|
||||
export interface TrackerContext {
|
||||
tokenTracker: TokenTracker;
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
import {z} from "zod";
|
||||
import {ObjectGeneratorSafe} from "./safe-generator";
|
||||
import {EvaluationType, PromptPair} from "../types";
|
||||
import { z } from "zod";
|
||||
import { ObjectGeneratorSafe } from "./safe-generator";
|
||||
import { EvaluationType, PromptPair } from "../types";
|
||||
|
||||
export const MAX_URLS_PER_STEP = 5
|
||||
export const MAX_QUERIES_PER_STEP = 5
|
||||
@ -59,12 +59,51 @@ Evaluation: {
|
||||
};
|
||||
}
|
||||
|
||||
const languageISO6391Map: Record<string, string> = {
|
||||
'en': 'English',
|
||||
'zh': 'Chinese',
|
||||
'zh-CN': 'Simplified Chinese',
|
||||
'zh-TW': 'Traditional Chinese',
|
||||
'de': 'German',
|
||||
'fr': 'French',
|
||||
'es': 'Spanish',
|
||||
'it': 'Italian',
|
||||
'ja': 'Japanese',
|
||||
'ko': 'Korean',
|
||||
'pt': 'Portuguese',
|
||||
'ru': 'Russian',
|
||||
'ar': 'Arabic',
|
||||
'hi': 'Hindi',
|
||||
'bn': 'Bengali',
|
||||
'tr': 'Turkish',
|
||||
'nl': 'Dutch',
|
||||
'pl': 'Polish',
|
||||
'sv': 'Swedish',
|
||||
'no': 'Norwegian',
|
||||
'da': 'Danish',
|
||||
'fi': 'Finnish',
|
||||
'el': 'Greek',
|
||||
'he': 'Hebrew',
|
||||
'hu': 'Hungarian',
|
||||
'id': 'Indonesian',
|
||||
'ms': 'Malay',
|
||||
'th': 'Thai',
|
||||
'vi': 'Vietnamese',
|
||||
'ro': 'Romanian',
|
||||
'bg': 'Bulgarian',
|
||||
}
|
||||
|
||||
export class Schemas {
|
||||
public languageStyle: string = 'formal English';
|
||||
public languageCode: string = 'en';
|
||||
|
||||
|
||||
async setLanguage(query: string) {
|
||||
if (languageISO6391Map[query]) {
|
||||
this.languageCode = query;
|
||||
this.languageStyle = `formal ${languageISO6391Map[query]}`;
|
||||
return;
|
||||
}
|
||||
const generator = new ObjectGeneratorSafe();
|
||||
const prompt = getLanguagePrompt(query.slice(0, 100))
|
||||
|
||||
@ -194,7 +233,7 @@ export class Schemas {
|
||||
}
|
||||
|
||||
getAgentSchema(allowReflect: boolean, allowRead: boolean, allowAnswer: boolean, allowSearch: boolean, allowCoding: boolean,
|
||||
currentQuestion?: string): z.ZodObject<any> {
|
||||
currentQuestion?: string): z.ZodObject<any> {
|
||||
const actionSchemas: Record<string, z.ZodOptional<any>> = {};
|
||||
|
||||
if (allowSearch) {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user