node-DeepResearch/src/tools/query-rewriter.ts
Devin AI 4c0093deb0 refactor: add multi-provider support with Vercel AI SDK
- Add support for Gemini, OpenAI, and Ollama providers
- Set default models (gemini-flash-1.5 for Gemini, gpt4o-mini for OpenAI)
- Implement provider factory pattern
- Update schema handling for each provider
- Add environment variable configuration
- Maintain token tracking across providers

Co-Authored-By: Han Xiao <han.xiao@jina.ai>
2025-02-05 12:28:21 +00:00

161 lines
6.2 KiB
TypeScript

import { ProviderFactory, AIProvider, isGeminiProvider, isOpenAIProvider } from '../utils/provider-factory';
import { aiConfig, modelConfigs } from "../config";
import { TokenTracker } from "../utils/token-tracker";
import { SearchAction, KeywordsResponse, ProviderType, OpenAIFunctionParameter } from "../types";
import { z } from 'zod';
import { getProviderSchema } from '../utils/schema';
const responseSchema = z.object({
think: z.string().describe("Strategic reasoning about query complexity and search approach"),
queries: z.array(
z.string().describe("Search query, must be less than 30 characters")
).describe("Array of search queries, orthogonal to each other")
.min(1)
.max(3)
});
function getPrompt(action: SearchAction): string {
return `You are an expert Information Retrieval Assistant. Transform user queries into precise keyword combinations with strategic reasoning and appropriate search operators.
<rules>
1. Generate search queries that directly include appropriate operators
2. Keep base keywords minimal: 2-3 words preferred
3. Use exact match quotes for specific phrases that must stay together
4. Split queries only when necessary for distinctly different aspects
5. Preserve crucial qualifiers while removing fluff words
6. Make the query resistant to SEO manipulation
7. When necessary, append <query-operators> at the end only when must needed
<query-operators>
A query can't only have operators; and operators can't be at the start a query;
- "phrase" : exact match for phrases
- +term : must include term; for critical terms that must appear
- -term : exclude term; exclude irrelevant or ambiguous terms
- filetype:pdf/doc : specific file type
- site:example.com : limit to specific site
- lang:xx : language filter (ISO 639-1 code)
- loc:xx : location filter (ISO 3166-1 code)
- intitle:term : term must be in title
- inbody:term : term must be in body text
</query-operators>
</rules>
<examples>
Input Query: What's the difference between ReactJS and Vue.js for building web applications?
<think>
This is a comparison query. User is likely looking for technical evaluation and objective feature comparisons, possibly for framework selection decisions. We'll split this into separate queries to capture both high-level differences and specific technical aspects.
</think>
Queries: [
"react performance",
"vue performance",
"react vue comparison",
]
Input Query: How to fix a leaking kitchen faucet?
<think>
This is a how-to query seeking practical solutions. User likely wants step-by-step guidance and visual demonstrations for DIY repair. We'll target both video tutorials and written guides.
</think>
Queries: [
"kitchen faucet leak repair",
"faucet drip fix site:youtube.com",
"how to repair faucet "
]
Input Query: What are healthy breakfast options for type 2 diabetes?
<think>
This is a health-specific informational query. User needs authoritative medical advice combined with practical meal suggestions. Splitting into medical guidelines and recipes will provide comprehensive coverage.
</think>
Queries: [
"what to eat for type 2 diabetes",
"type 2 diabetes breakfast guidelines",
"diabetic breakfast recipes"
]
Input Query: Latest AWS Lambda features for serverless applications
<think>
This is a product research query focused on recent updates. User wants current information about specific technology features, likely for implementation purposes. We'll target official docs and community insights.
</think>
Queries: [
"aws lambda features site:aws.amazon.com intitle:2025",
"new features lambda serverless"
]
</examples>
Now, process this query:
Input Query: ${action.searchQuery}
Intention: ${action.think}
`;
}
async function generateResponse(provider: AIProvider, prompt: string, providerType: ProviderType) {
if (!isGeminiProvider(provider) && !isOpenAIProvider(provider)) {
throw new Error('Invalid provider type');
}
switch (providerType) {
case 'gemini': {
if (!isGeminiProvider(provider)) throw new Error('Invalid provider type');
const result = await provider.generateContent({
contents: [{ role: 'user', parts: [{ text: prompt }]}],
generationConfig: {
temperature: modelConfigs.queryRewriter.temperature,
maxOutputTokens: 1000
}
});
const response = await result.response;
return {
text: response.text(),
tokens: response.usageMetadata?.totalTokenCount || 0
};
}
case 'openai': {
if (!isOpenAIProvider(provider)) throw new Error('Invalid provider type');
const result = await provider.chat.completions.create({
messages: [{ role: 'user', content: prompt }],
model: modelConfigs.queryRewriter.model,
temperature: modelConfigs.queryRewriter.temperature,
max_tokens: 1000,
functions: [{
name: 'generate',
parameters: getProviderSchema('openai', responseSchema) as OpenAIFunctionParameter
}],
function_call: { name: 'generate' }
});
const functionCall = result.choices[0].message.function_call;
return {
text: functionCall?.arguments || '',
tokens: result.usage?.total_tokens || 0
};
}
case 'ollama':
throw new Error('Ollama support coming soon');
default:
throw new Error(`Unsupported provider type: ${providerType}`);
}
}
export async function rewriteQuery(action: SearchAction, tracker?: TokenTracker): Promise<{ queries: string[], tokens: number }> {
try {
const provider = ProviderFactory.createProvider();
const providerType = aiConfig.defaultProvider;
const prompt = getPrompt(action);
const { text, tokens } = await generateResponse(provider, prompt, providerType);
const responseData = JSON.parse(text) as KeywordsResponse;
if (!responseData) throw new Error('No valid response generated');
console.log('Query rewriter:', responseData.queries);
(tracker || new TokenTracker()).trackUsage('query-rewriter', tokens, providerType);
return { queries: responseData.queries, tokens };
} catch (error) {
console.error('Error in query rewriting:', error);
if (error instanceof Error && error.message.includes('Ollama support')) {
throw new Error('Ollama provider is not yet supported for query rewriting');
}
throw error;
}
}