mirror of
https://github.com/dzhng/deep-research.git
synced 2025-12-25 20:36:48 +08:00
added working files
This commit is contained in:
parent
7d9998f65a
commit
3d1c1e7658
38
.gitignore
vendored
Normal file
38
.gitignore
vendored
Normal file
@ -0,0 +1,38 @@
|
||||
# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
|
||||
|
||||
# Dependencies
|
||||
node_modules
|
||||
.pnp
|
||||
.pnp.js
|
||||
|
||||
# Local env files
|
||||
.env
|
||||
.env.local
|
||||
.env.development.local
|
||||
.env.test.local
|
||||
.env.production.local
|
||||
|
||||
# Testing
|
||||
coverage
|
||||
|
||||
# Turbo
|
||||
.turbo
|
||||
|
||||
# Vercel
|
||||
.vercel
|
||||
|
||||
# Build Outputs
|
||||
.next/
|
||||
out/
|
||||
build
|
||||
dist
|
||||
|
||||
|
||||
# Debug
|
||||
npm-debug.log*
|
||||
yarn-debug.log*
|
||||
yarn-error.log*
|
||||
|
||||
# Misc
|
||||
.DS_Store
|
||||
*.pem
|
||||
1
.prettierignore
Normal file
1
.prettierignore
Normal file
@ -0,0 +1 @@
|
||||
*.hbs
|
||||
131
README.md
Normal file
131
README.md
Normal file
@ -0,0 +1,131 @@
|
||||
# Open Deep Research
|
||||
|
||||
An AI-powered research assistant that performs iterative, deep research on any topic by combining search engines, web scraping, and large language models. If you like this project, please consider starring it and giving me a follow on [X](https://x.com/dzhng).
|
||||
|
||||
## How It Works
|
||||
|
||||
```mermaid
|
||||
flowchart TB
|
||||
subgraph Input
|
||||
Q[User Query]
|
||||
B[Breadth Parameter]
|
||||
D[Depth Parameter]
|
||||
end
|
||||
|
||||
DR[Deep Research] -->
|
||||
SQ[SERP Queries] -->
|
||||
PR[Process Results] -->
|
||||
DP{depth > 0?}
|
||||
|
||||
RD["Next Direction:
|
||||
- Prior Goals
|
||||
- New Questions
|
||||
- Learnings"]
|
||||
|
||||
MR[Markdown Report]
|
||||
|
||||
%% Main Flow
|
||||
Q & B & D --> DR
|
||||
|
||||
%% Circular Flow
|
||||
DP -->|Yes| RD
|
||||
RD -->|New Context| DR
|
||||
|
||||
%% Final Output
|
||||
DP -->|No| MR
|
||||
|
||||
%% Styling
|
||||
classDef input fill:#7bed9f,stroke:#2ed573,color:black
|
||||
classDef process fill:#70a1ff,stroke:#1e90ff,color:black
|
||||
classDef recursive fill:#ffa502,stroke:#ff7f50,color:black
|
||||
classDef output fill:#ff4757,stroke:#ff6b81,color:black
|
||||
|
||||
class Q,B,D input
|
||||
class DR,SQ,PR process
|
||||
class DP,RD recursive
|
||||
class MR output
|
||||
```
|
||||
|
||||
## Features
|
||||
|
||||
- **Iterative Research**: Performs deep research by iteratively generating search queries, processing results, and diving deeper based on findings
|
||||
- **Intelligent Query Generation**: Uses LLMs to generate targeted search queries based on research goals and previous findings
|
||||
- **Depth & Breadth Control**: Configurable parameters to control how wide (breadth) and deep (depth) the research goes
|
||||
- **Smart Follow-up**: Generates follow-up questions to better understand research needs
|
||||
- **Comprehensive Reports**: Produces detailed markdown reports with findings and sources
|
||||
- **Concurrent Processing**: Handles multiple searches and result processing in parallel for efficiency
|
||||
|
||||
## Requirements
|
||||
|
||||
- Node.js environment
|
||||
- API keys for:
|
||||
- Firecrawl API (for web search and content extraction)
|
||||
- OpenAI API (for o3 mini model)
|
||||
|
||||
## Setup
|
||||
|
||||
1. Clone the repository
|
||||
2. Install dependencies:
|
||||
|
||||
```bash
|
||||
npm install
|
||||
```
|
||||
|
||||
3. Set up environment variables in a `.env.local` file:
|
||||
|
||||
```bash
|
||||
FIRECRAWL_KEY="your_firecrawl_key"
|
||||
OPENAI_KEY="your_openai_key"
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
Run the research assistant:
|
||||
|
||||
```bash
|
||||
npm start
|
||||
```
|
||||
|
||||
You'll be prompted to:
|
||||
|
||||
1. Enter your research query
|
||||
2. Specify research breadth (recommended: 3-10, default: 6)
|
||||
3. Specify research depth (recommended: 1-5, default: 3)
|
||||
4. Answer follow-up questions to refine the research direction
|
||||
|
||||
The system will then:
|
||||
|
||||
1. Generate and execute search queries
|
||||
2. Process and analyze search results
|
||||
3. Recursively explore deeper based on findings
|
||||
4. Generate a comprehensive markdown report
|
||||
|
||||
The final report will be saved as `report.md` in your working directory.
|
||||
|
||||
## How It Works
|
||||
|
||||
1. **Initial Setup**
|
||||
|
||||
- Takes user query and research parameters (breadth & depth)
|
||||
- Generates follow-up questions to understand research needs better
|
||||
|
||||
2. **Deep Research Process**
|
||||
|
||||
- Generates multiple SERP queries based on research goals
|
||||
- Processes search results to extract key learnings
|
||||
- Generates follow-up research directions
|
||||
|
||||
3. **Recursive Exploration**
|
||||
|
||||
- If depth > 0, takes new research directions and continues exploration
|
||||
- Each iteration builds on previous learnings
|
||||
- Maintains context of research goals and findings
|
||||
|
||||
4. **Report Generation**
|
||||
- Compiles all findings into a comprehensive markdown report
|
||||
- Includes all sources and references
|
||||
- Organizes information in a clear, readable format
|
||||
|
||||
## License
|
||||
|
||||
MIT License - feel free to use and modify as needed.
|
||||
1376
package-lock.json
generated
Normal file
1376
package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load Diff
34
package.json
Normal file
34
package.json
Normal file
@ -0,0 +1,34 @@
|
||||
{
|
||||
"name": "open-deep-research",
|
||||
"version": "0.0.1",
|
||||
"main": "index.ts",
|
||||
"scripts": {
|
||||
"format": "prettier --write \"src/**/*.{ts,tsx}\"",
|
||||
"tsx": "tsx --env-file=.env.local",
|
||||
"start": "tsx --env-file=.env.local src/run.ts",
|
||||
"test": "echo \"Error: no test specified\" && exit 1"
|
||||
},
|
||||
"author": "",
|
||||
"license": "ISC",
|
||||
"description": "",
|
||||
"devDependencies": {
|
||||
"@ianvs/prettier-plugin-sort-imports": "^4.4.1",
|
||||
"@types/lodash-es": "^4.17.12",
|
||||
"@types/node": "^22.13.0",
|
||||
"prettier": "^3.4.2",
|
||||
"tsx": "^4.19.2",
|
||||
"typescript": "^5.7.3"
|
||||
},
|
||||
"dependencies": {
|
||||
"@ai-sdk/openai": "^1.1.9",
|
||||
"@mendable/firecrawl-js": "^1.16.0",
|
||||
"ai": "^4.1.17",
|
||||
"js-tiktoken": "^1.0.17",
|
||||
"lodash-es": "^4.17.21",
|
||||
"p-limit": "^6.2.0",
|
||||
"zod": "^3.24.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": "22.x"
|
||||
}
|
||||
}
|
||||
24
prettier.config.mjs
Normal file
24
prettier.config.mjs
Normal file
@ -0,0 +1,24 @@
|
||||
/** @type {import('prettier').Config} */
|
||||
export default {
|
||||
endOfLine: 'lf',
|
||||
semi: true,
|
||||
useTabs: false,
|
||||
singleQuote: true,
|
||||
arrowParens: 'avoid',
|
||||
tabWidth: 2,
|
||||
trailingComma: 'all',
|
||||
importOrder: [
|
||||
'^(react/(.*)$)|^(react$)',
|
||||
'^(next/(.*)$)|^(next$)',
|
||||
'<THIRD_PARTY_MODULES>',
|
||||
'',
|
||||
'@repo/(.*)$',
|
||||
'',
|
||||
'^@/(.*)$',
|
||||
'',
|
||||
'^[./]',
|
||||
],
|
||||
importOrderParserPlugins: ['typescript', 'jsx'],
|
||||
importOrderTypeScriptVersion: '5.7.2',
|
||||
plugins: ['@ianvs/prettier-plugin-sort-imports'],
|
||||
};
|
||||
59
src/ai/providers.ts
Normal file
59
src/ai/providers.ts
Normal file
@ -0,0 +1,59 @@
|
||||
import { createOpenAI } from '@ai-sdk/openai';
|
||||
import { getEncoding } from 'js-tiktoken';
|
||||
|
||||
import { RecursiveCharacterTextSplitter } from './text-splitter';
|
||||
|
||||
// Providers
|
||||
|
||||
const openai = createOpenAI({
|
||||
apiKey: process.env.OPENAI_KEY!,
|
||||
});
|
||||
|
||||
// Models
|
||||
|
||||
export const gpt4Model = openai('gpt-4o', {
|
||||
structuredOutputs: true,
|
||||
});
|
||||
export const gpt4MiniModel = openai('gpt-4o-mini', {
|
||||
structuredOutputs: true,
|
||||
});
|
||||
export const o3MiniModel = openai('o3-mini', {
|
||||
reasoningEffort: 'medium',
|
||||
structuredOutputs: true,
|
||||
});
|
||||
|
||||
const MinChunkSize = 140;
|
||||
const encoder = getEncoding('o200k_base');
|
||||
|
||||
// trim prompt to maximum context size
|
||||
export function trimPrompt(prompt: string, contextSize = 120_000) {
|
||||
if (!prompt) {
|
||||
return '';
|
||||
}
|
||||
|
||||
const length = encoder.encode(prompt).length;
|
||||
if (length <= contextSize) {
|
||||
return prompt;
|
||||
}
|
||||
|
||||
const overflowTokens = length - contextSize;
|
||||
// on average it's 3 characters per token, so multiply by 3 to get a rough estimate of the number of characters
|
||||
const chunkSize = prompt.length - overflowTokens * 3;
|
||||
if (chunkSize < MinChunkSize) {
|
||||
return prompt.slice(0, MinChunkSize);
|
||||
}
|
||||
|
||||
const splitter = new RecursiveCharacterTextSplitter({
|
||||
chunkSize,
|
||||
chunkOverlap: 0,
|
||||
});
|
||||
const trimmedPrompt = splitter.splitText(prompt)[0] ?? '';
|
||||
|
||||
// last catch, there's a chance that the trimmed prompt is same length as the original prompt, due to how tokens are split & innerworkings of the splitter, handle this case by just doing a hard cut
|
||||
if (trimmedPrompt.length === prompt.length) {
|
||||
return trimPrompt(prompt.slice(0, chunkSize), contextSize);
|
||||
}
|
||||
|
||||
// recursively trim until the prompt is within the context size
|
||||
return trimPrompt(trimmedPrompt, contextSize);
|
||||
}
|
||||
50
src/ai/text-splitter.test.ts
Normal file
50
src/ai/text-splitter.test.ts
Normal file
@ -0,0 +1,50 @@
|
||||
import assert from 'node:assert';
|
||||
import { describe, it } from 'node:test';
|
||||
|
||||
import { RecursiveCharacterTextSplitter } from './text-splitter';
|
||||
|
||||
describe('RecursiveCharacterTextSplitter', () => {
|
||||
it('Should correctly split text by separators', () => {
|
||||
const splitter = new RecursiveCharacterTextSplitter({
|
||||
chunkSize: 50,
|
||||
chunkOverlap: 10,
|
||||
});
|
||||
assert.deepEqual(
|
||||
splitter.splitText(
|
||||
'Hello world, this is a test of the recursive text splitter.',
|
||||
),
|
||||
['Hello world', 'this is a test of the recursive text splitter'],
|
||||
);
|
||||
|
||||
splitter.chunkSize = 100;
|
||||
assert.deepEqual(
|
||||
splitter.splitText(
|
||||
'Hello world, this is a test of the recursive text splitter. If I have a period, it should split along the period.',
|
||||
),
|
||||
[
|
||||
'Hello world, this is a test of the recursive text splitter',
|
||||
'If I have a period, it should split along the period.',
|
||||
],
|
||||
);
|
||||
|
||||
splitter.chunkSize = 110;
|
||||
assert.deepEqual(
|
||||
splitter.splitText(
|
||||
'Hello world, this is a test of the recursive text splitter. If I have a period, it should split along the period.\nOr, if there is a new line, it should prioritize splitting on new lines instead.',
|
||||
),
|
||||
[
|
||||
'Hello world, this is a test of the recursive text splitter',
|
||||
'If I have a period, it should split along the period.',
|
||||
'Or, if there is a new line, it should prioritize splitting on new lines instead.',
|
||||
],
|
||||
);
|
||||
});
|
||||
|
||||
it('Should handle empty string', () => {
|
||||
const splitter = new RecursiveCharacterTextSplitter({
|
||||
chunkSize: 50,
|
||||
chunkOverlap: 10,
|
||||
});
|
||||
assert.deepEqual(splitter.splitText(''), []);
|
||||
});
|
||||
});
|
||||
143
src/ai/text-splitter.ts
Normal file
143
src/ai/text-splitter.ts
Normal file
@ -0,0 +1,143 @@
|
||||
interface TextSplitterParams {
|
||||
chunkSize: number;
|
||||
|
||||
chunkOverlap: number;
|
||||
}
|
||||
|
||||
abstract class TextSplitter implements TextSplitterParams {
|
||||
chunkSize = 1000;
|
||||
chunkOverlap = 200;
|
||||
|
||||
constructor(fields?: Partial<TextSplitterParams>) {
|
||||
this.chunkSize = fields?.chunkSize ?? this.chunkSize;
|
||||
this.chunkOverlap = fields?.chunkOverlap ?? this.chunkOverlap;
|
||||
if (this.chunkOverlap >= this.chunkSize) {
|
||||
throw new Error('Cannot have chunkOverlap >= chunkSize');
|
||||
}
|
||||
}
|
||||
|
||||
abstract splitText(text: string): string[];
|
||||
|
||||
createDocuments(texts: string[]): string[] {
|
||||
const documents: string[] = [];
|
||||
for (let i = 0; i < texts.length; i += 1) {
|
||||
const text = texts[i];
|
||||
for (const chunk of this.splitText(text!)) {
|
||||
documents.push(chunk);
|
||||
}
|
||||
}
|
||||
return documents;
|
||||
}
|
||||
|
||||
splitDocuments(documents: string[]): string[] {
|
||||
return this.createDocuments(documents);
|
||||
}
|
||||
|
||||
private joinDocs(docs: string[], separator: string): string | null {
|
||||
const text = docs.join(separator).trim();
|
||||
return text === '' ? null : text;
|
||||
}
|
||||
|
||||
mergeSplits(splits: string[], separator: string): string[] {
|
||||
const docs: string[] = [];
|
||||
const currentDoc: string[] = [];
|
||||
let total = 0;
|
||||
for (const d of splits) {
|
||||
const _len = d.length;
|
||||
if (total + _len >= this.chunkSize) {
|
||||
if (total > this.chunkSize) {
|
||||
console.warn(
|
||||
`Created a chunk of size ${total}, +
|
||||
which is longer than the specified ${this.chunkSize}`,
|
||||
);
|
||||
}
|
||||
if (currentDoc.length > 0) {
|
||||
const doc = this.joinDocs(currentDoc, separator);
|
||||
if (doc !== null) {
|
||||
docs.push(doc);
|
||||
}
|
||||
// Keep on popping if:
|
||||
// - we have a larger chunk than in the chunk overlap
|
||||
// - or if we still have any chunks and the length is long
|
||||
while (
|
||||
total > this.chunkOverlap ||
|
||||
(total + _len > this.chunkSize && total > 0)
|
||||
) {
|
||||
total -= currentDoc[0]!.length;
|
||||
currentDoc.shift();
|
||||
}
|
||||
}
|
||||
}
|
||||
currentDoc.push(d);
|
||||
total += _len;
|
||||
}
|
||||
const doc = this.joinDocs(currentDoc, separator);
|
||||
if (doc !== null) {
|
||||
docs.push(doc);
|
||||
}
|
||||
return docs;
|
||||
}
|
||||
}
|
||||
|
||||
export interface RecursiveCharacterTextSplitterParams
|
||||
extends TextSplitterParams {
|
||||
separators: string[];
|
||||
}
|
||||
|
||||
export class RecursiveCharacterTextSplitter
|
||||
extends TextSplitter
|
||||
implements RecursiveCharacterTextSplitterParams
|
||||
{
|
||||
separators: string[] = ['\n\n', '\n', '.', ',', '>', '<', ' ', ''];
|
||||
|
||||
constructor(fields?: Partial<RecursiveCharacterTextSplitterParams>) {
|
||||
super(fields);
|
||||
this.separators = fields?.separators ?? this.separators;
|
||||
}
|
||||
|
||||
splitText(text: string): string[] {
|
||||
const finalChunks: string[] = [];
|
||||
|
||||
// Get appropriate separator to use
|
||||
let separator: string = this.separators[this.separators.length - 1]!;
|
||||
for (const s of this.separators) {
|
||||
if (s === '') {
|
||||
separator = s;
|
||||
break;
|
||||
}
|
||||
if (text.includes(s)) {
|
||||
separator = s;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Now that we have the separator, split the text
|
||||
let splits: string[];
|
||||
if (separator) {
|
||||
splits = text.split(separator);
|
||||
} else {
|
||||
splits = text.split('');
|
||||
}
|
||||
|
||||
// Now go merging things, recursively splitting longer texts.
|
||||
let goodSplits: string[] = [];
|
||||
for (const s of splits) {
|
||||
if (s.length < this.chunkSize) {
|
||||
goodSplits.push(s);
|
||||
} else {
|
||||
if (goodSplits.length) {
|
||||
const mergedText = this.mergeSplits(goodSplits, separator);
|
||||
finalChunks.push(...mergedText);
|
||||
goodSplits = [];
|
||||
}
|
||||
const otherInfo = this.splitText(s);
|
||||
finalChunks.push(...otherInfo);
|
||||
}
|
||||
}
|
||||
if (goodSplits.length) {
|
||||
const mergedText = this.mergeSplits(goodSplits, separator);
|
||||
finalChunks.push(...mergedText);
|
||||
}
|
||||
return finalChunks;
|
||||
}
|
||||
}
|
||||
205
src/deep-research.ts
Normal file
205
src/deep-research.ts
Normal file
@ -0,0 +1,205 @@
|
||||
import FirecrawlApp, { SearchResponse } from '@mendable/firecrawl-js';
|
||||
import { generateObject } from 'ai';
|
||||
import { compact } from 'lodash-es';
|
||||
import pLimit from 'p-limit';
|
||||
import { z } from 'zod';
|
||||
|
||||
import { o3MiniModel } from './ai/providers';
|
||||
import { systemPrompt } from './prompt';
|
||||
|
||||
type ResearchResult = {
|
||||
learnings: string[];
|
||||
visitedUrls: string[];
|
||||
};
|
||||
|
||||
// increase this if you have higher API rate limits
|
||||
const ConcurrencyLimit = 2;
|
||||
|
||||
const firecrawl = new FirecrawlApp({
|
||||
apiKey: process.env.FIRECRAWL_KEY!,
|
||||
});
|
||||
|
||||
// take in user query, return a list of SERP queries
|
||||
async function generateSerpQueries({
|
||||
query,
|
||||
numQueries = 3,
|
||||
learnings,
|
||||
}: {
|
||||
query: string;
|
||||
numQueries?: number;
|
||||
|
||||
// optional, if provided, the research will continue from the last learning
|
||||
learnings?: string[];
|
||||
}) {
|
||||
const res = await generateObject({
|
||||
model: o3MiniModel,
|
||||
system: systemPrompt(),
|
||||
prompt: `Given the following prompt from the user, generate a list of SERP queries to research the topic. Return a maximum of ${numQueries} queries, but feel free to return less if the original prompt is clear. Make sure each query is unique and not similar to each other: <prompt>${query}</prompt>\n\n${
|
||||
learnings
|
||||
? `Here are some learnings from previous research, use them to generate more specific queries: ${learnings.join(
|
||||
'\n',
|
||||
)}`
|
||||
: ''
|
||||
}`,
|
||||
schema: z.object({
|
||||
queries: z
|
||||
.array(
|
||||
z.object({
|
||||
query: z.string().describe('The SERP query'),
|
||||
researchGoal: z
|
||||
.string()
|
||||
.describe(
|
||||
'First talk about the goal of the research that this query is meant to accomplish, then go deeper into how to advance the research once the results are found, mention additional research directions. Be as specific as possible, especially for additional research directions.',
|
||||
),
|
||||
}),
|
||||
)
|
||||
.describe(`List of SERP queries, max of ${numQueries}`),
|
||||
}),
|
||||
});
|
||||
console.log(
|
||||
`Created ${res.object.queries.length} queries`,
|
||||
res.object.queries,
|
||||
);
|
||||
|
||||
return res.object.queries.slice(0, numQueries);
|
||||
}
|
||||
|
||||
async function processSerpResult({
|
||||
query,
|
||||
result,
|
||||
numLearnings = 3,
|
||||
numFollowUpQuestions = 3,
|
||||
}: {
|
||||
query: string;
|
||||
result: SearchResponse;
|
||||
numLearnings?: number;
|
||||
numFollowUpQuestions?: number;
|
||||
}) {
|
||||
const contents = compact(result.data.map(item => item.markdown));
|
||||
console.log(`Ran ${query}, found ${contents.length} contents`, contents);
|
||||
|
||||
const res = await generateObject({
|
||||
model: o3MiniModel,
|
||||
system: systemPrompt(),
|
||||
prompt: `Given the following contents from a SERP search for the query <query>${query}</query>, generate a list of learnings from the contents. Return a maximum of ${numLearnings} learnings, but feel free to return less if the contents are clear. Make sure each learning is unique and not similar to each other. The learnings should be concise and to the point, as infromation dense as possible. Make sure to include any entities like people, places, companies, products, things, etc in the learnings, as well as any exact metrics, numbers, or dates. The learnings will be used to research the topic further.\n\n<contents>${contents
|
||||
.map(content => `<content>\n${content}\n</content>`)
|
||||
.join('\n')}</contents>`,
|
||||
schema: z.object({
|
||||
learnings: z
|
||||
.array(z.string())
|
||||
.describe(`List of learnings, max of ${numLearnings}`),
|
||||
followUpQuestions: z
|
||||
.array(z.string())
|
||||
.describe(
|
||||
`List of follow-up questions to research the topic further, max of ${numFollowUpQuestions}`,
|
||||
),
|
||||
}),
|
||||
});
|
||||
console.log(
|
||||
`Created ${res.object.learnings.length} learnings`,
|
||||
res.object.learnings,
|
||||
);
|
||||
|
||||
return res.object;
|
||||
}
|
||||
|
||||
export async function writeFinalReport({
|
||||
prompt,
|
||||
learnings,
|
||||
visitedUrls,
|
||||
}: {
|
||||
prompt: string;
|
||||
learnings: string[];
|
||||
visitedUrls: string[];
|
||||
}) {
|
||||
const res = await generateObject({
|
||||
model: o3MiniModel,
|
||||
system: systemPrompt(),
|
||||
prompt: `Given the following prompt from the user, write a final report on the topic using the learnings from research:\n\n<prompt>${prompt}</prompt>\n\nHere are all the learnings from previous research:\n\n<learnings>${learnings
|
||||
.map(learning => `<learning>\n${learning}\n</learning>`)
|
||||
.join('\n')}</learnings>`,
|
||||
schema: z.object({
|
||||
reportMarkdown: z
|
||||
.string()
|
||||
.describe('Final report on the topic in Markdown'),
|
||||
}),
|
||||
});
|
||||
|
||||
// Append the visited URLs section to the report
|
||||
const urlsSection = `\n\n## Sources\n\n${visitedUrls.map(url => `- ${url}`).join('\n')}`;
|
||||
return res.object.reportMarkdown + urlsSection;
|
||||
}
|
||||
|
||||
export async function deepResearch({
|
||||
query,
|
||||
breadth,
|
||||
depth,
|
||||
learnings = [],
|
||||
visitedUrls = [],
|
||||
}: {
|
||||
query: string;
|
||||
breadth: number;
|
||||
depth: number;
|
||||
learnings?: string[];
|
||||
visitedUrls?: string[];
|
||||
}): Promise<ResearchResult> {
|
||||
const serpQueries = await generateSerpQueries({
|
||||
query,
|
||||
learnings,
|
||||
numQueries: breadth,
|
||||
});
|
||||
const limit = pLimit(ConcurrencyLimit);
|
||||
|
||||
const results = await Promise.all(
|
||||
serpQueries.map(serpQuery =>
|
||||
limit(async () => {
|
||||
const result = await firecrawl.search(serpQuery.query, {
|
||||
scrapeOptions: { formats: ['markdown'] },
|
||||
});
|
||||
|
||||
// Collect URLs from this search
|
||||
const newUrls = compact(result.data.map(item => item.url));
|
||||
const newBreadth = Math.ceil(breadth / 2);
|
||||
const newDepth = depth - 1;
|
||||
|
||||
const newLearnings = await processSerpResult({
|
||||
query: serpQuery.query,
|
||||
result,
|
||||
numFollowUpQuestions: newBreadth,
|
||||
});
|
||||
const allLearnings = [...learnings, ...newLearnings.learnings];
|
||||
const allUrls = [...visitedUrls, ...newUrls];
|
||||
|
||||
if (newDepth > 0) {
|
||||
console.log(
|
||||
`Researching deeper, breadth: ${newBreadth}, depth: ${newDepth}`,
|
||||
);
|
||||
|
||||
const nextQuery = `
|
||||
Previous research goal: ${serpQuery.researchGoal}
|
||||
Follow-up research directions: ${newLearnings.followUpQuestions.map(q => `\n${q}`).join('')}
|
||||
`.trim();
|
||||
|
||||
return deepResearch({
|
||||
query: nextQuery,
|
||||
breadth: newBreadth,
|
||||
depth: newDepth,
|
||||
learnings: allLearnings,
|
||||
visitedUrls: allUrls,
|
||||
});
|
||||
} else {
|
||||
return {
|
||||
learnings: allLearnings,
|
||||
visitedUrls: allUrls,
|
||||
};
|
||||
}
|
||||
}),
|
||||
),
|
||||
);
|
||||
|
||||
// Simpler way to combine and deduplicate results
|
||||
return {
|
||||
learnings: [...new Set(results.flatMap(r => r.learnings))],
|
||||
visitedUrls: [...new Set(results.flatMap(r => r.visitedUrls))],
|
||||
};
|
||||
}
|
||||
28
src/feedback.ts
Normal file
28
src/feedback.ts
Normal file
@ -0,0 +1,28 @@
|
||||
import { generateObject } from 'ai';
|
||||
import { z } from 'zod';
|
||||
|
||||
import { o3MiniModel } from './ai/providers';
|
||||
import { systemPrompt } from './prompt';
|
||||
|
||||
export async function generateFeedback({
|
||||
query,
|
||||
numQuestions = 3,
|
||||
}: {
|
||||
query: string;
|
||||
numQuestions?: number;
|
||||
}) {
|
||||
const userFeedback = await generateObject({
|
||||
model: o3MiniModel,
|
||||
system: systemPrompt(),
|
||||
prompt: `Given the following query from the user, ask some follow up questions to clarify the research direction. Return a maximum of ${numQuestions} questions, but feel free to return less if the original query is clear: <query>${query}</query>`,
|
||||
schema: z.object({
|
||||
questions: z
|
||||
.array(z.string())
|
||||
.describe(
|
||||
`Follow up questions to clarify the research direction, max of ${numQuestions}`,
|
||||
),
|
||||
}),
|
||||
});
|
||||
|
||||
return userFeedback.object.questions.slice(0, numQuestions);
|
||||
}
|
||||
15
src/prompt.ts
Normal file
15
src/prompt.ts
Normal file
@ -0,0 +1,15 @@
|
||||
export const systemPrompt = () => {
|
||||
const now = new Date().toISOString();
|
||||
return `You are an expert researcher. Today is ${now}. Follow these instructions when responding:
|
||||
- You may be asked to research subjects that is after your knowledge cutoff, assume the user is right when presented with news.
|
||||
- The user is a highly experienced analyst, no need to simplify it, be as detailed as possible and make sure your response is correct.
|
||||
- Be highly organized.
|
||||
- Suggest solutions that I didn't think about.
|
||||
- Be proactive and anticipate my needs.
|
||||
- Treat me as an expert in all subject matter.
|
||||
- Mistakes erode my trust, so be accurate and thorough.
|
||||
- Provide detailed explanations, I'm comfortable with lots of detail.
|
||||
- Value good arguments over authorities, the source is irrelevant.
|
||||
- Consider new technologies and contrarian ideas, not just the conventional wisdom.
|
||||
- You may use high levels of speculation or prediction, just flag it for me.`;
|
||||
};
|
||||
93
src/run.ts
Normal file
93
src/run.ts
Normal file
@ -0,0 +1,93 @@
|
||||
import * as fs from 'fs/promises';
|
||||
import * as readline from 'readline';
|
||||
|
||||
import { deepResearch, writeFinalReport } from './deep-research';
|
||||
import { generateFeedback } from './feedback';
|
||||
|
||||
const rl = readline.createInterface({
|
||||
input: process.stdin,
|
||||
output: process.stdout,
|
||||
});
|
||||
|
||||
// Helper function to get user input
|
||||
function askQuestion(query: string): Promise<string> {
|
||||
return new Promise(resolve => {
|
||||
rl.question(query, answer => {
|
||||
resolve(answer);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
// run the agent
|
||||
async function run() {
|
||||
// Get initial query
|
||||
const initialQuery = await askQuestion('What would you like to research? ');
|
||||
|
||||
// Get breath and depth parameters
|
||||
const breadth =
|
||||
parseInt(
|
||||
await askQuestion(
|
||||
'Enter research breadth (recommended 3-10, default 6): ',
|
||||
),
|
||||
10,
|
||||
) || 6;
|
||||
const depth =
|
||||
parseInt(
|
||||
await askQuestion('Enter research depth (recommended 1-5, default 3): '),
|
||||
10,
|
||||
) || 3;
|
||||
|
||||
console.log(`Creating research plan...`);
|
||||
|
||||
// Generate follow-up questions
|
||||
const followUpQuestions = await generateFeedback({
|
||||
query: initialQuery,
|
||||
});
|
||||
|
||||
console.log(
|
||||
'\nTo better understand your research needs, please answer these follow-up questions:',
|
||||
);
|
||||
|
||||
// Collect answers to follow-up questions
|
||||
const answers: string[] = [];
|
||||
for (const question of followUpQuestions) {
|
||||
const answer = await askQuestion(`\n${question}\nYour answer: `);
|
||||
answers.push(answer);
|
||||
}
|
||||
|
||||
// Combine all information for deep research
|
||||
const combinedQuery = `
|
||||
Initial Query: ${initialQuery}
|
||||
Follow-up Questions and Answers:
|
||||
${followUpQuestions.map((q, i) => `Q: ${q}\nA: ${answers[i]}`).join('\n')}
|
||||
`;
|
||||
|
||||
console.log('\nResearching your topic...');
|
||||
|
||||
const { learnings, visitedUrls } = await deepResearch({
|
||||
query: combinedQuery,
|
||||
breadth,
|
||||
depth,
|
||||
});
|
||||
|
||||
console.log(`\n\nLearnings:\n\n${learnings.join('\n')}`);
|
||||
console.log(
|
||||
`\n\nVisited URLs (${visitedUrls.length}):\n\n${visitedUrls.join('\n')}`,
|
||||
);
|
||||
console.log('Writing final report...');
|
||||
|
||||
const report = await writeFinalReport({
|
||||
prompt: combinedQuery,
|
||||
learnings,
|
||||
visitedUrls,
|
||||
});
|
||||
|
||||
// Save report to file
|
||||
await fs.writeFile('report.md', report, 'utf-8');
|
||||
|
||||
console.log(`\n\nFinal Report:\n\n${report}`);
|
||||
console.log('\nReport has been saved to report.md');
|
||||
rl.close();
|
||||
}
|
||||
|
||||
run().catch(console.error);
|
||||
19
tsconfig.json
Normal file
19
tsconfig.json
Normal file
@ -0,0 +1,19 @@
|
||||
{
|
||||
"$schema": "https://json.schemastore.org/tsconfig",
|
||||
"compilerOptions": {
|
||||
"declaration": true,
|
||||
"declarationMap": true,
|
||||
"esModuleInterop": true,
|
||||
"incremental": false,
|
||||
"isolatedModules": true,
|
||||
"lib": ["es2022", "DOM", "DOM.Iterable"],
|
||||
"module": "ESNext",
|
||||
"moduleDetection": "force",
|
||||
"moduleResolution": "Bundler",
|
||||
"noUncheckedIndexedAccess": true,
|
||||
"resolveJsonModule": true,
|
||||
"skipLibCheck": true,
|
||||
"strict": true,
|
||||
"target": "ES2022"
|
||||
}
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user