mirror of
https://github.com/jina-ai/node-DeepResearch.git
synced 2026-03-22 07:29:35 +08:00
fix: url datetime guessing
This commit is contained in:
12
src/agent.ts
12
src/agent.ts
@@ -1,7 +1,7 @@
|
|||||||
import {ZodObject} from 'zod';
|
import {ZodObject} from 'zod';
|
||||||
import {CoreMessage} from 'ai';
|
import {CoreMessage} from 'ai';
|
||||||
import {SEARCH_PROVIDER, STEP_SLEEP} from "./config";
|
import {SEARCH_PROVIDER, STEP_SLEEP} from "./config";
|
||||||
import {readUrl, removeAllLineBreaks} from "./tools/read";
|
import {readUrl} from "./tools/read";
|
||||||
import fs from 'fs/promises';
|
import fs from 'fs/promises';
|
||||||
import {SafeSearchType, search as duckSearch} from "duck-duck-scrape";
|
import {SafeSearchType, search as duckSearch} from "duck-duck-scrape";
|
||||||
import {braveSearch} from "./tools/brave-search";
|
import {braveSearch} from "./tools/brave-search";
|
||||||
@@ -35,7 +35,13 @@ import {
|
|||||||
normalizeUrl, sampleMultinomial,
|
normalizeUrl, sampleMultinomial,
|
||||||
weightedURLToString, getLastModified
|
weightedURLToString, getLastModified
|
||||||
} from "./utils/url-tools";
|
} from "./utils/url-tools";
|
||||||
import {buildMdFromAnswer, chooseK, removeExtraLineBreaks, removeHTMLtags} from "./utils/text-tools";
|
import {
|
||||||
|
buildMdFromAnswer,
|
||||||
|
chooseK,
|
||||||
|
removeAllLineBreaks,
|
||||||
|
removeExtraLineBreaks,
|
||||||
|
removeHTMLtags
|
||||||
|
} from "./utils/text-tools";
|
||||||
import {MAX_QUERIES_PER_STEP, MAX_REFLECT_PER_STEP, MAX_URLS_PER_STEP, Schemas} from "./utils/schemas";
|
import {MAX_QUERIES_PER_STEP, MAX_REFLECT_PER_STEP, MAX_URLS_PER_STEP, Schemas} from "./utils/schemas";
|
||||||
|
|
||||||
async function sleep(ms: number) {
|
async function sleep(ms: number) {
|
||||||
@@ -687,7 +693,7 @@ You decided to think out of the box or cut from a completely different angle.
|
|||||||
const urlResults = await Promise.all(
|
const urlResults = await Promise.all(
|
||||||
uniqueURLs.map(async url => {
|
uniqueURLs.map(async url => {
|
||||||
try {
|
try {
|
||||||
const {response} = await readUrl(url, context.tokenTracker);
|
const {response} = await readUrl(url, true, context.tokenTracker);
|
||||||
const {data} = response;
|
const {data} = response;
|
||||||
const guessedTime = await getLastModified(url);
|
const guessedTime = await getLastModified(url);
|
||||||
console.log('Guessed time for', url, guessedTime)
|
console.log('Guessed time for', url, guessedTime)
|
||||||
|
|||||||
@@ -1,8 +1,9 @@
|
|||||||
import {GenerateObjectResult} from 'ai';
|
import {GenerateObjectResult} from 'ai';
|
||||||
import {AnswerAction, EvaluationResponse, EvaluationType, PromptPair, TrackerContext} from '../types';
|
import {AnswerAction, EvaluationResponse, EvaluationType, PromptPair, TrackerContext} from '../types';
|
||||||
import {readUrl, removeAllLineBreaks} from "./read";
|
import {readUrl} from "./read";
|
||||||
import {ObjectGeneratorSafe} from "../utils/safe-generator";
|
import {ObjectGeneratorSafe} from "../utils/safe-generator";
|
||||||
import {Schemas} from "../utils/schemas";
|
import {Schemas} from "../utils/schemas";
|
||||||
|
import {removeAllLineBreaks} from "../utils/text-tools";
|
||||||
|
|
||||||
const TOOL_NAME = 'evaluator';
|
const TOOL_NAME = 'evaluator';
|
||||||
|
|
||||||
@@ -696,7 +697,7 @@ async function fetchSourceContent(urls: string[], trackers: TrackerContext, sche
|
|||||||
const results = await Promise.all(
|
const results = await Promise.all(
|
||||||
urls.map(async (url) => {
|
urls.map(async (url) => {
|
||||||
try {
|
try {
|
||||||
const {response} = await readUrl(url, trackers.tokenTracker);
|
const {response} = await readUrl(url, false, trackers.tokenTracker);
|
||||||
const content = response?.data?.content || '';
|
const content = response?.data?.content || '';
|
||||||
return removeAllLineBreaks(content);
|
return removeAllLineBreaks(content);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ import { TokenTracker } from "../utils/token-tracker";
|
|||||||
import { ReadResponse } from '../types';
|
import { ReadResponse } from '../types';
|
||||||
import { JINA_API_KEY } from "../config";
|
import { JINA_API_KEY } from "../config";
|
||||||
|
|
||||||
export function readUrl(url: string, tracker?: TokenTracker): Promise<{ response: ReadResponse }> {
|
export function readUrl(url: string, withAllLinks?: boolean, tracker?: TokenTracker): Promise<{ response: ReadResponse }> {
|
||||||
return new Promise((resolve, reject) => {
|
return new Promise((resolve, reject) => {
|
||||||
if (!url.trim()) {
|
if (!url.trim()) {
|
||||||
reject(new Error('URL cannot be empty'));
|
reject(new Error('URL cannot be empty'));
|
||||||
@@ -11,21 +11,22 @@ export function readUrl(url: string, tracker?: TokenTracker): Promise<{ response
|
|||||||
}
|
}
|
||||||
|
|
||||||
const data = JSON.stringify({ url });
|
const data = JSON.stringify({ url });
|
||||||
|
const headers: Record<string, any> = {
|
||||||
|
'Accept': 'application/json',
|
||||||
|
'Authorization': `Bearer ${JINA_API_KEY}`,
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'X-Retain-Images': 'none',
|
||||||
|
};
|
||||||
|
if (withAllLinks) {
|
||||||
|
headers['X-With-Links-Summary'] = 'all'
|
||||||
|
}
|
||||||
|
|
||||||
const options = {
|
const options = {
|
||||||
hostname: 'r.jina.ai',
|
hostname: 'r.jina.ai',
|
||||||
port: 443,
|
port: 443,
|
||||||
path: '/',
|
path: '/',
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
headers: {
|
headers
|
||||||
'Accept': 'application/json',
|
|
||||||
'Authorization': `Bearer ${JINA_API_KEY}`,
|
|
||||||
'Content-Type': 'application/json',
|
|
||||||
'Content-Length': data.length,
|
|
||||||
'X-Retain-Images': 'none',
|
|
||||||
'X-With-Links-Summary': 'all',
|
|
||||||
'X-Timeout': '30'
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
const req = https.request(options, (res) => {
|
const req = https.request(options, (res) => {
|
||||||
@@ -97,7 +98,3 @@ export function readUrl(url: string, tracker?: TokenTracker): Promise<{ response
|
|||||||
req.end();
|
req.end();
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
export function removeAllLineBreaks(text: string) {
|
|
||||||
return text.replace(/(\r\n|\n|\r)/gm, " ");
|
|
||||||
}
|
|
||||||
@@ -149,6 +149,9 @@ export function removeHTMLtags(text: string) {
|
|||||||
return text.replace(/<[^>]*>?/gm, '');
|
return text.replace(/<[^>]*>?/gm, '');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function removeAllLineBreaks(text: string) {
|
||||||
|
return text.replace(/(\r\n|\n|\r)/gm, " ");
|
||||||
|
}
|
||||||
|
|
||||||
export function getI18nText(key: string, lang = 'en', params: Record<string, string> = {}) {
|
export function getI18nText(key: string, lang = 'en', params: Record<string, string> = {}) {
|
||||||
// 获取i18n数据
|
// 获取i18n数据
|
||||||
|
|||||||
Reference in New Issue
Block a user