Merge branch 'main' of github.com:jina-ai/node-DeepResearch

This commit is contained in:
Sha Zhou
2025-04-22 19:36:27 +08:00
7 changed files with 12 additions and 40 deletions

View File

@@ -10,7 +10,7 @@
"DEFAULT_MODEL_NAME": "" "DEFAULT_MODEL_NAME": ""
}, },
"defaults": { "defaults": {
"search_provider": "serper", "search_provider": "jina",
"llm_provider": "vertex", "llm_provider": "vertex",
"step_sleep": 500 "step_sleep": 500
}, },

View File

@@ -110,7 +110,7 @@ export const jinaAiMiddleware = (req: Request, res: Response, next: NextFunction
} else { } else {
rateLimitPolicy = [ rateLimitPolicy = [
RateLimitDesc.from({ RateLimitDesc.from({
occurrence: 2, occurrence: 1,
periodSeconds: 60 periodSeconds: 60
}) })
] ]

View File

@@ -302,7 +302,7 @@ async function executeSearchQueries(
console.log('Search query:', query); console.log('Search query:', query);
switch (SEARCH_PROVIDER) { switch (SEARCH_PROVIDER) {
case 'jina': case 'jina':
results = (await search(query.q, context.tokenTracker)).response?.data || []; results = (await search(query, context.tokenTracker)).response?.data || [];
break; break;
case 'duck': case 'duck':
results = (await duckSearch(query.q, {safeSearch: SafeSearchType.STRICT})).results; results = (await duckSearch(query.q, {safeSearch: SafeSearchType.STRICT})).results;
@@ -1003,11 +1003,7 @@ But unfortunately, you failed to solve the issue. You need to think out of the b
await updateReferences(answerStep, allURLs) await updateReferences(answerStep, allURLs)
answerStep.mdAnswer = repairMarkdownFootnotesOuter(buildMdFromAnswer(answerStep)); answerStep.mdAnswer = repairMarkdownFootnotesOuter(buildMdFromAnswer(answerStep));
} else { } else {
answerStep.mdAnswer = answerStep.mdAnswer = buildMdFromAnswer(answerStep);
convertHtmlTablesToMd(
fixCodeBlockIndentation(
buildMdFromAnswer(answerStep))
);
} }
console.log(thisStep) console.log(thisStep)

View File

@@ -1,26 +1,23 @@
import axios from 'axios'; import axios from 'axios';
import { TokenTracker } from "../utils/token-tracker"; import { TokenTracker } from "../utils/token-tracker";
import { SearchResponse } from '../types'; import { SearchResponse, SERPQuery } from '../types';
import { JINA_API_KEY } from "../config"; import { JINA_API_KEY } from "../config";
export async function search( export async function search(
query: string, query: SERPQuery,
tracker?: TokenTracker tracker?: TokenTracker
): Promise<{ response: SearchResponse }> { ): Promise<{ response: SearchResponse }> {
if (!query.trim()) {
throw new Error('Query cannot be empty');
}
try { try {
const { data } = await axios.get<SearchResponse>( const { data } = await axios.post<SearchResponse>(
`https://s.jina.ai/?q=${encodeURIComponent(query)}`, `https://s.jina.ai/`,
query,
{ {
headers: { headers: {
'Accept': 'application/json', 'Accept': 'application/json',
'Authorization': `Bearer ${JINA_API_KEY}`, 'Authorization': `Bearer ${JINA_API_KEY}`,
'X-Respond-With': 'no-content', 'X-Respond-With': 'no-content',
}, },
timeout: 30000, timeout: 10000,
responseType: 'json' responseType: 'json'
} }
); );
@@ -39,7 +36,7 @@ export async function search(
const tokenTracker = tracker || new TokenTracker(); const tokenTracker = tracker || new TokenTracker();
tokenTracker.trackUsage('search', { tokenTracker.trackUsage('search', {
totalTokens, totalTokens,
promptTokens: query.length, promptTokens: query.q.length,
completionTokens: totalTokens completionTokens: totalTokens
}); });

View File

@@ -56,11 +56,9 @@ Leverage the soundbites from the context user provides to generate queries that
2. Schema usage rules: 2. Schema usage rules:
- Always include the 'q' field in every query object (should be the last field listed) - Always include the 'q' field in every query object (should be the last field listed)
- Use 'tbs' for time-sensitive queries (remove time constraints from 'q' field) - Use 'tbs' for time-sensitive queries (remove time constraints from 'q' field)
- Use 'gl' and 'hl' for region/language-specific queries (remove region/language from 'q' field)
- Use appropriate language code in 'hl' when using non-English queries
- Include 'location' only when geographically relevant - Include 'location' only when geographically relevant
- Never duplicate information in 'q' that is already specified in other fields - Never duplicate information in 'q' that is already specified in other fields
- List fields in this order: tbs, gl, hl, location, q - List fields in this order: tbs, location, q
<query-operators> <query-operators>
For the 'q' field content: For the 'q' field content:
@@ -98,8 +96,6 @@ queries: [
"q": "宝马行情" "q": "宝马行情"
}, },
{ {
"gl": "de",
"hl": "de",
"q": "BMW Gebrauchtwagen Probleme" "q": "BMW Gebrauchtwagen Probleme"
}, },
{ {
@@ -127,13 +123,10 @@ queries: [
}, },
{ {
"tbs": "qdr:y", "tbs": "qdr:y",
"gl": "au",
"location": "Perth", "location": "Perth",
"q": "aboriginal firestick farming soil restoration" "q": "aboriginal firestick farming soil restoration"
}, },
{ {
"gl": "uk",
"hl": "en",
"location": "Totnes", "location": "Totnes",
"q": "comparison no-till vs biochar vs compost tea" "q": "comparison no-till vs biochar vs compost tea"
}, },
@@ -143,14 +136,11 @@ queries: [
"q": "soil microbial inoculants research trials" "q": "soil microbial inoculants research trials"
}, },
{ {
"gl": "at",
"hl": "de",
"location": "Graz", "location": "Graz",
"q": "Humusaufbau Alpenregion Techniken" "q": "Humusaufbau Alpenregion Techniken"
}, },
{ {
"tbs": "qdr:m", "tbs": "qdr:m",
"gl": "ca",
"location": "Guelph", "location": "Guelph",
"q": "regenerative agriculture exaggerated claims evidence" "q": "regenerative agriculture exaggerated claims evidence"
} }
@@ -166,33 +156,26 @@ AIリテラシー向上させる方法か...なるほど。最近AIがどんど
</think> </think>
queries: [ queries: [
{ {
"hl": "ja",
"q": "AI技術 限界 誇大宣伝" "q": "AI技術 限界 誇大宣伝"
}, },
{ {
"gl": "jp",
"hl": "ja",
"q": "AIリテラシー 学習ステップ 体系化" "q": "AIリテラシー 学習ステップ 体系化"
}, },
{ {
"tbs": "qdr:y", "tbs": "qdr:y",
"hl": "ja",
"q": "AI歴史 失敗事例 教訓" "q": "AI歴史 失敗事例 教訓"
}, },
{ {
"hl": "ja",
"q": "AIリテラシー vs プログラミング vs 批判思考" "q": "AIリテラシー vs プログラミング vs 批判思考"
}, },
{ {
"tbs": "qdr:m", "tbs": "qdr:m",
"hl": "ja",
"q": "AI最新トレンド 必須スキル" "q": "AI最新トレンド 必須スキル"
}, },
{ {
"q": "artificial intelligence literacy fundamentals" "q": "artificial intelligence literacy fundamentals"
}, },
{ {
"hl": "ja",
"q": "AIリテラシー向上 無意味 理由" "q": "AIリテラシー向上 無意味 理由"
} }
] ]

View File

@@ -8,8 +8,6 @@ type BaseAction = {
export type SERPQuery = { export type SERPQuery = {
q: string, q: string,
hl?: string,
gl?: string,
location?: string, location?: string,
tbs?: string, tbs?: string,
} }

View File

@@ -122,8 +122,6 @@ export class Schemas {
queries: z.array( queries: z.array(
z.object({ z.object({
tbs: z.enum(['qdr:h', 'qdr:d', 'qdr:w', 'qdr:m', 'qdr:y']).describe('time-based search filter, must use this field if the search request asks for latest info. qdr:h for past hour, qdr:d for past 24 hours, qdr:w for past week, qdr:m for past month, qdr:y for past year. Choose exactly one.'), tbs: z.enum(['qdr:h', 'qdr:d', 'qdr:w', 'qdr:m', 'qdr:y']).describe('time-based search filter, must use this field if the search request asks for latest info. qdr:h for past hour, qdr:d for past 24 hours, qdr:w for past week, qdr:m for past month, qdr:y for past year. Choose exactly one.'),
gl: z.string().describe('defines the country to use for the search. a two-letter country code. e.g., us for the United States, uk for United Kingdom, or fr for France.'),
hl: z.string().describe('the language to use for the search. a two-letter language code. e.g., en for English, es for Spanish, or fr for French.'),
location: z.string().describe('defines from where you want the search to originate. It is recommended to specify location at the city level in order to simulate a real users search.').optional(), location: z.string().describe('defines from where you want the search to originate. It is recommended to specify location at the city level in order to simulate a real users search.').optional(),
q: z.string().describe('keyword-based search query, 2-3 words preferred, total length < 30 characters').max(50), q: z.string().describe('keyword-based search query, 2-3 words preferred, total length < 30 characters').max(50),
})) }))