mirror of
https://github.com/jina-ai/node-DeepResearch.git
synced 2025-12-26 06:28:56 +08:00
fix: improve prompting
This commit is contained in:
parent
9f871ef37e
commit
fd40c7b020
@ -1,6 +1,6 @@
|
||||
# DeepResearch
|
||||
|
||||
Keep searching and reading webpages until finding the answer (or exceeding the token budget).
|
||||
Keep searching, reading webpages, reasoning until it finds the answer (or exceeding the token budget).
|
||||
|
||||
Query: `"who is the biggest? cohere, jina ai, voyage?"` - 13 steps
|
||||

|
||||
@ -81,11 +81,11 @@ The server will emit the following event types:
|
||||
|
||||
Example events:
|
||||
```
|
||||
data: {"type":"progress","trackers":{"tokenUsage":74950,"tokenBreakdown":{"agent":64631,"read":10319},"actionState":{"action":"search","thoughts":"The provided text mentions several investors in Jina AI but doesn't specify ownership percentages. A direct search for ownership percentages is needed to answer the question definitively.","URLTargets":[],"answer":"","questionsToAnswer":[],"references":[],"searchQuery":"Jina AI investor ownership percentages"},"step":7,"badAttempts":0,"gaps":[]}}
|
||||
data: {"type":"progress","trackers":{"tokenUsage":74950,"tokenBreakdown":{"agent":64631,"read":10319},"actionState":{"action":"search","think":"The provided text mentions several investors in Jina AI but doesn't specify ownership percentages. A direct search for ownership percentages is needed to answer the question definitively.","URLTargets":[],"answer":"","questionsToAnswer":[],"references":[],"searchQuery":"Jina AI investor ownership percentages"},"step":7,"badAttempts":0,"gaps":[]}}
|
||||
|
||||
data: {"type":"progress","trackers":{"tokenUsage":74950,"tokenBreakdown":{"agent":64631,"read":10319},"actionState":{"action":"search","thoughts":"The provided text mentions several investors in Jina AI's funding rounds but doesn't specify ownership percentages. A search focusing on equity stakes and ownership percentages held by each investor will provide the necessary information to answer the main question.","URLTargets":[],"answer":"","questionsToAnswer":[],"references":[],"searchQuery":"Jina AI investor equity percentage ownership stake"},"step":8,"badAttempts":0,"gaps":[]}}
|
||||
data: {"type":"progress","trackers":{"tokenUsage":74950,"tokenBreakdown":{"agent":64631,"read":10319},"actionState":{"action":"search","think":"The provided text mentions several investors in Jina AI's funding rounds but doesn't specify ownership percentages. A search focusing on equity stakes and ownership percentages held by each investor will provide the necessary information to answer the main question.","URLTargets":[],"answer":"","questionsToAnswer":[],"references":[],"searchQuery":"Jina AI investor equity percentage ownership stake"},"step":8,"badAttempts":0,"gaps":[]}}
|
||||
|
||||
data: {"type":"progress","trackers":{"tokenUsage":88096,"tokenBreakdown":{"agent":77777,"read":10319},"actionState":{"action":"search","thoughts":"The provided text mentions several investors in Jina AI's funding rounds but doesn't specify ownership percentages. A search focusing on equity stakes and ownership percentages held by each investor will provide the necessary information to answer the main question.","URLTargets":[],"answer":"","questionsToAnswer":[],"references":[],"searchQuery":"Jina AI investor equity percentage ownership stake"},"step":8,"badAttempts":0,"gaps":[]}}
|
||||
data: {"type":"progress","trackers":{"tokenUsage":88096,"tokenBreakdown":{"agent":77777,"read":10319},"actionState":{"action":"search","think":"The provided text mentions several investors in Jina AI's funding rounds but doesn't specify ownership percentages. A search focusing on equity stakes and ownership percentages held by each investor will provide the necessary information to answer the main question.","URLTargets":[],"answer":"","questionsToAnswer":[],"references":[],"searchQuery":"Jina AI investor equity percentage ownership stake"},"step":8,"badAttempts":0,"gaps":[]}}
|
||||
```
|
||||
|
||||
## Docker
|
||||
|
||||
32
src/agent.ts
32
src/agent.ts
@ -27,7 +27,7 @@ function getSchema(allowReflect: boolean, allowRead: boolean, allowAnswer: boole
|
||||
enum: actions,
|
||||
description: "Must match exactly one action type"
|
||||
},
|
||||
thoughts: {
|
||||
think: {
|
||||
type: SchemaType.STRING,
|
||||
description: "Explain why choose this action, what's the thought process behind choosing this action"
|
||||
}
|
||||
@ -98,7 +98,7 @@ function getSchema(allowReflect: boolean, allowRead: boolean, allowAnswer: boole
|
||||
return {
|
||||
type: SchemaType.OBJECT,
|
||||
properties,
|
||||
required: ["action", "thoughts"]
|
||||
required: ["action", "think"]
|
||||
};
|
||||
}
|
||||
|
||||
@ -130,9 +130,8 @@ ${question}
|
||||
// Add context section if exists
|
||||
if (context?.length) {
|
||||
sections.push(`
|
||||
<context>
|
||||
You have conducted the following actions:
|
||||
|
||||
<context>
|
||||
${context.join('\n')}
|
||||
|
||||
</context>
|
||||
@ -158,8 +157,8 @@ ${JSON.stringify(k.references)}
|
||||
.join('\n\n');
|
||||
|
||||
sections.push(`
|
||||
You have successfully gathered some knowledge which might be useful for answering the original question. Here is the knowledge you have gathered so far:
|
||||
<knowledge>
|
||||
You have successfully gathered some knowledge which might be useful for answering the original question. Here is the knowledge you have gathered so far
|
||||
|
||||
${knowledgeItems}
|
||||
|
||||
@ -184,13 +183,14 @@ ${knowledgeItems}
|
||||
const learnedStrategy = badContext.map(c => c.improvement).join('\n');
|
||||
|
||||
sections.push(`
|
||||
Your have tried the following actions but failed to find the answer to the question:
|
||||
<bad-attempts>
|
||||
Your have tried the following actions but failed to find the answer to the question.
|
||||
|
||||
${attempts}
|
||||
|
||||
</bad-attempts>
|
||||
|
||||
Based on the failed attempts, you have learned the following strategy:
|
||||
<learned-strategy>
|
||||
${learnedStrategy}
|
||||
</learned-strategy>
|
||||
@ -263,8 +263,8 @@ ${urlList}
|
||||
}
|
||||
|
||||
sections.push(`
|
||||
<actions>
|
||||
Based on the current context, you must choose one of the following actions:
|
||||
<actions>
|
||||
${actions.join('\n\n')}
|
||||
</actions>
|
||||
`);
|
||||
@ -317,7 +317,7 @@ export async function getResponse(question: string, tokenBudget: number = 1_000_
|
||||
let allowRead = true;
|
||||
let allowReflect = true;
|
||||
let prompt = '';
|
||||
let thisStep: StepAction = {action: 'answer', answer: '', references: [], thoughts: ''};
|
||||
let thisStep: StepAction = {action: 'answer', answer: '', references: [], think: ''};
|
||||
let isAnswered = false;
|
||||
|
||||
const allURLs: Record<string, string> = {};
|
||||
@ -559,19 +559,21 @@ But then you realized you have asked them before. You decided to to think out of
|
||||
description: r.description,
|
||||
}));
|
||||
|
||||
allKnowledge.push({
|
||||
question: `What do Internet say about ${query}?`,
|
||||
answer: removeHTMLtags(minResults.map(r => `${r.description}`).join('; ')),
|
||||
references: minResults.map(r => r.url),
|
||||
type: 'side-info'
|
||||
});
|
||||
|
||||
for (const r of minResults) {
|
||||
allURLs[r.url] = r.title;
|
||||
}
|
||||
searchResults.push({query, results: minResults});
|
||||
allKeywords.push(query);
|
||||
}
|
||||
|
||||
allKnowledge.push({
|
||||
question: `What do Internet say about ${thisStep.searchQuery}?`,
|
||||
answer: removeHTMLtags(searchResults.map(r => r.results.map(r => r.description).join('; ')).join('; ')),
|
||||
// flatten into one url list, and take unique urls
|
||||
references: searchResults.map(r => r.results.map(r => r.url)).flat().filter((v, i, a) => a.indexOf(v) === i),
|
||||
type: 'side-info'
|
||||
});
|
||||
|
||||
diaryContext.push(`
|
||||
At step ${step}, you took the **search** action and look for external information for the question: "${currentQuestion}".
|
||||
In particular, you tried to search for the following keywords: "${keywordsQueries.join(', ')}".
|
||||
|
||||
@ -5,7 +5,7 @@ describe('rewriteQuery', () => {
|
||||
const { queries } = await rewriteQuery({
|
||||
action: 'search',
|
||||
searchQuery: 'how does typescript work',
|
||||
thoughts: 'Understanding TypeScript basics'
|
||||
think: 'Understanding TypeScript basics'
|
||||
});
|
||||
expect(Array.isArray(queries)).toBe(true);
|
||||
expect(queries.length).toBeGreaterThan(0);
|
||||
|
||||
@ -8,7 +8,7 @@ import { KeywordsResponse } from '../types';
|
||||
const responseSchema = {
|
||||
type: SchemaType.OBJECT,
|
||||
properties: {
|
||||
thought: {
|
||||
think: {
|
||||
type: SchemaType.STRING,
|
||||
description: "Strategic reasoning about query complexity and search approach"
|
||||
},
|
||||
@ -23,7 +23,7 @@ const responseSchema = {
|
||||
maxItems: 3
|
||||
}
|
||||
},
|
||||
required: ["thought", "queries"]
|
||||
required: ["think", "queries"]
|
||||
};
|
||||
|
||||
const genAI = new GoogleGenerativeAI(GEMINI_API_KEY);
|
||||
@ -67,7 +67,9 @@ A query can't only have operators; and operators can't be at the start a query;
|
||||
|
||||
<examples>
|
||||
Input Query: What's the difference between ReactJS and Vue.js for building web applications?
|
||||
Thought: This is a comparison query. User is likely looking for technical evaluation and objective feature comparisons, possibly for framework selection decisions. We'll split this into separate queries to capture both high-level differences and specific technical aspects.
|
||||
<think>
|
||||
This is a comparison query. User is likely looking for technical evaluation and objective feature comparisons, possibly for framework selection decisions. We'll split this into separate queries to capture both high-level differences and specific technical aspects.
|
||||
</think>
|
||||
Queries: [
|
||||
"react performance",
|
||||
"vue performance",
|
||||
@ -75,7 +77,9 @@ Queries: [
|
||||
]
|
||||
|
||||
Input Query: How to fix a leaking kitchen faucet?
|
||||
Thought: This is a how-to query seeking practical solutions. User likely wants step-by-step guidance and visual demonstrations for DIY repair. We'll target both video tutorials and written guides.
|
||||
<think>
|
||||
This is a how-to query seeking practical solutions. User likely wants step-by-step guidance and visual demonstrations for DIY repair. We'll target both video tutorials and written guides.
|
||||
</think>
|
||||
Queries: [
|
||||
"kitchen faucet leak repair",
|
||||
"faucet drip fix site:youtube.com",
|
||||
@ -83,7 +87,9 @@ Queries: [
|
||||
]
|
||||
|
||||
Input Query: What are healthy breakfast options for type 2 diabetes?
|
||||
Thought: This is a health-specific informational query. User needs authoritative medical advice combined with practical meal suggestions. Splitting into medical guidelines and recipes will provide comprehensive coverage.
|
||||
<think>
|
||||
This is a health-specific informational query. User needs authoritative medical advice combined with practical meal suggestions. Splitting into medical guidelines and recipes will provide comprehensive coverage.
|
||||
</think>
|
||||
Queries: [
|
||||
"what to eat for type 2 diabetes",
|
||||
"type 2 diabetes breakfast guidelines",
|
||||
@ -91,7 +97,9 @@ Queries: [
|
||||
]
|
||||
|
||||
Input Query: Latest AWS Lambda features for serverless applications
|
||||
Thought: This is a product research query focused on recent updates. User wants current information about specific technology features, likely for implementation purposes. We'll target official docs and community insights.
|
||||
<think>
|
||||
This is a product research query focused on recent updates. User wants current information about specific technology features, likely for implementation purposes. We'll target official docs and community insights.
|
||||
</think>
|
||||
Queries: [
|
||||
"aws lambda features site:aws.amazon.com intitle:2025",
|
||||
"new features lambda serverless"
|
||||
@ -100,7 +108,7 @@ Queries: [
|
||||
|
||||
Now, process this query:
|
||||
Input Query: ${action.searchQuery}
|
||||
Intention: ${action.thoughts}
|
||||
Intention: ${action.think}
|
||||
`;
|
||||
}
|
||||
|
||||
|
||||
@ -27,7 +27,7 @@ export function readUrl(url: string, token: string, tracker?: TokenTracker): Pro
|
||||
res.on('data', (chunk) => responseData += chunk);
|
||||
res.on('end', () => {
|
||||
const response = JSON.parse(responseData) as ReadResponse;
|
||||
console.log('Raw read response:', response);
|
||||
// console.log('Raw read response:', response);
|
||||
|
||||
if (response.code === 402) {
|
||||
reject(new Error(response.readableMessage || 'Insufficient balance'));
|
||||
|
||||
@ -3,7 +3,7 @@ import { SchemaType } from "@google/generative-ai";
|
||||
// Action Types
|
||||
type BaseAction = {
|
||||
action: "search" | "answer" | "reflect" | "visit";
|
||||
thoughts: string;
|
||||
think: string;
|
||||
};
|
||||
|
||||
export type SearchAction = BaseAction & {
|
||||
@ -64,7 +64,7 @@ export interface BraveSearchResponse {
|
||||
}
|
||||
|
||||
export type DedupResponse = {
|
||||
thought: string;
|
||||
think: string;
|
||||
unique_queries: string[];
|
||||
};
|
||||
|
||||
@ -115,7 +115,7 @@ export interface StepData {
|
||||
}
|
||||
|
||||
export type KeywordsResponse = {
|
||||
thought: string;
|
||||
think: string;
|
||||
queries: string[];
|
||||
};
|
||||
|
||||
|
||||
@ -10,7 +10,7 @@ interface ActionState {
|
||||
|
||||
export class ActionTracker extends EventEmitter {
|
||||
private state: ActionState = {
|
||||
thisStep: {action: 'answer', answer: '', references: [], thoughts: ''},
|
||||
thisStep: {action: 'answer', answer: '', references: [], think: ''},
|
||||
gaps: [],
|
||||
badAttempts: 0,
|
||||
totalStep: 0
|
||||
@ -27,7 +27,7 @@ export class ActionTracker extends EventEmitter {
|
||||
|
||||
reset() {
|
||||
this.state = {
|
||||
thisStep: {action: 'answer', answer: '', references: [], thoughts: ''},
|
||||
thisStep: {action: 'answer', answer: '', references: [], think: ''},
|
||||
gaps: [],
|
||||
badAttempts: 0,
|
||||
totalStep: 0
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user