mirror of
https://github.com/jina-ai/node-DeepResearch.git
synced 2026-03-22 07:29:35 +08:00
fix: url datetime guessing
This commit is contained in:
12
src/agent.ts
12
src/agent.ts
@@ -1,7 +1,7 @@
|
||||
import {ZodObject} from 'zod';
|
||||
import {CoreMessage} from 'ai';
|
||||
import {SEARCH_PROVIDER, STEP_SLEEP} from "./config";
|
||||
import {readUrl, removeAllLineBreaks} from "./tools/read";
|
||||
import {readUrl} from "./tools/read";
|
||||
import fs from 'fs/promises';
|
||||
import {SafeSearchType, search as duckSearch} from "duck-duck-scrape";
|
||||
import {braveSearch} from "./tools/brave-search";
|
||||
@@ -35,7 +35,13 @@ import {
|
||||
normalizeUrl, sampleMultinomial,
|
||||
weightedURLToString, getLastModified
|
||||
} from "./utils/url-tools";
|
||||
import {buildMdFromAnswer, chooseK, removeExtraLineBreaks, removeHTMLtags} from "./utils/text-tools";
|
||||
import {
|
||||
buildMdFromAnswer,
|
||||
chooseK,
|
||||
removeAllLineBreaks,
|
||||
removeExtraLineBreaks,
|
||||
removeHTMLtags
|
||||
} from "./utils/text-tools";
|
||||
import {MAX_QUERIES_PER_STEP, MAX_REFLECT_PER_STEP, MAX_URLS_PER_STEP, Schemas} from "./utils/schemas";
|
||||
|
||||
async function sleep(ms: number) {
|
||||
@@ -687,7 +693,7 @@ You decided to think out of the box or cut from a completely different angle.
|
||||
const urlResults = await Promise.all(
|
||||
uniqueURLs.map(async url => {
|
||||
try {
|
||||
const {response} = await readUrl(url, context.tokenTracker);
|
||||
const {response} = await readUrl(url, true, context.tokenTracker);
|
||||
const {data} = response;
|
||||
const guessedTime = await getLastModified(url);
|
||||
console.log('Guessed time for', url, guessedTime)
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
import {GenerateObjectResult} from 'ai';
|
||||
import {AnswerAction, EvaluationResponse, EvaluationType, PromptPair, TrackerContext} from '../types';
|
||||
import {readUrl, removeAllLineBreaks} from "./read";
|
||||
import {readUrl} from "./read";
|
||||
import {ObjectGeneratorSafe} from "../utils/safe-generator";
|
||||
import {Schemas} from "../utils/schemas";
|
||||
import {removeAllLineBreaks} from "../utils/text-tools";
|
||||
|
||||
const TOOL_NAME = 'evaluator';
|
||||
|
||||
@@ -696,7 +697,7 @@ async function fetchSourceContent(urls: string[], trackers: TrackerContext, sche
|
||||
const results = await Promise.all(
|
||||
urls.map(async (url) => {
|
||||
try {
|
||||
const {response} = await readUrl(url, trackers.tokenTracker);
|
||||
const {response} = await readUrl(url, false, trackers.tokenTracker);
|
||||
const content = response?.data?.content || '';
|
||||
return removeAllLineBreaks(content);
|
||||
} catch (error) {
|
||||
|
||||
@@ -3,7 +3,7 @@ import { TokenTracker } from "../utils/token-tracker";
|
||||
import { ReadResponse } from '../types';
|
||||
import { JINA_API_KEY } from "../config";
|
||||
|
||||
export function readUrl(url: string, tracker?: TokenTracker): Promise<{ response: ReadResponse }> {
|
||||
export function readUrl(url: string, withAllLinks?: boolean, tracker?: TokenTracker): Promise<{ response: ReadResponse }> {
|
||||
return new Promise((resolve, reject) => {
|
||||
if (!url.trim()) {
|
||||
reject(new Error('URL cannot be empty'));
|
||||
@@ -11,21 +11,22 @@ export function readUrl(url: string, tracker?: TokenTracker): Promise<{ response
|
||||
}
|
||||
|
||||
const data = JSON.stringify({ url });
|
||||
const headers: Record<string, any> = {
|
||||
'Accept': 'application/json',
|
||||
'Authorization': `Bearer ${JINA_API_KEY}`,
|
||||
'Content-Type': 'application/json',
|
||||
'X-Retain-Images': 'none',
|
||||
};
|
||||
if (withAllLinks) {
|
||||
headers['X-With-Links-Summary'] = 'all'
|
||||
}
|
||||
|
||||
const options = {
|
||||
hostname: 'r.jina.ai',
|
||||
port: 443,
|
||||
path: '/',
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Accept': 'application/json',
|
||||
'Authorization': `Bearer ${JINA_API_KEY}`,
|
||||
'Content-Type': 'application/json',
|
||||
'Content-Length': data.length,
|
||||
'X-Retain-Images': 'none',
|
||||
'X-With-Links-Summary': 'all',
|
||||
'X-Timeout': '30'
|
||||
}
|
||||
headers
|
||||
};
|
||||
|
||||
const req = https.request(options, (res) => {
|
||||
@@ -97,7 +98,3 @@ export function readUrl(url: string, tracker?: TokenTracker): Promise<{ response
|
||||
req.end();
|
||||
});
|
||||
}
|
||||
|
||||
export function removeAllLineBreaks(text: string) {
|
||||
return text.replace(/(\r\n|\n|\r)/gm, " ");
|
||||
}
|
||||
@@ -149,6 +149,9 @@ export function removeHTMLtags(text: string) {
|
||||
return text.replace(/<[^>]*>?/gm, '');
|
||||
}
|
||||
|
||||
export function removeAllLineBreaks(text: string) {
|
||||
return text.replace(/(\r\n|\n|\r)/gm, " ");
|
||||
}
|
||||
|
||||
export function getI18nText(key: string, lang = 'en', params: Record<string, string> = {}) {
|
||||
// 获取i18n数据
|
||||
|
||||
Reference in New Issue
Block a user