From f264fc01a3cf5f6815db2c5ae50a6d6ff4fdde50 Mon Sep 17 00:00:00 2001 From: Han Xiao Date: Mon, 10 Mar 2025 17:35:34 +0800 Subject: [PATCH] fix: url datetime guessing --- src/agent.ts | 12 +++++++++--- src/tools/evaluator.ts | 5 +++-- src/tools/read.ts | 25 +++++++++++-------------- src/utils/text-tools.ts | 3 +++ 4 files changed, 26 insertions(+), 19 deletions(-) diff --git a/src/agent.ts b/src/agent.ts index 88f50b0..ad31d42 100644 --- a/src/agent.ts +++ b/src/agent.ts @@ -1,7 +1,7 @@ import {ZodObject} from 'zod'; import {CoreMessage} from 'ai'; import {SEARCH_PROVIDER, STEP_SLEEP} from "./config"; -import {readUrl, removeAllLineBreaks} from "./tools/read"; +import {readUrl} from "./tools/read"; import fs from 'fs/promises'; import {SafeSearchType, search as duckSearch} from "duck-duck-scrape"; import {braveSearch} from "./tools/brave-search"; @@ -35,7 +35,13 @@ import { normalizeUrl, sampleMultinomial, weightedURLToString, getLastModified } from "./utils/url-tools"; -import {buildMdFromAnswer, chooseK, removeExtraLineBreaks, removeHTMLtags} from "./utils/text-tools"; +import { + buildMdFromAnswer, + chooseK, + removeAllLineBreaks, + removeExtraLineBreaks, + removeHTMLtags +} from "./utils/text-tools"; import {MAX_QUERIES_PER_STEP, MAX_REFLECT_PER_STEP, MAX_URLS_PER_STEP, Schemas} from "./utils/schemas"; async function sleep(ms: number) { @@ -687,7 +693,7 @@ You decided to think out of the box or cut from a completely different angle. const urlResults = await Promise.all( uniqueURLs.map(async url => { try { - const {response} = await readUrl(url, context.tokenTracker); + const {response} = await readUrl(url, true, context.tokenTracker); const {data} = response; const guessedTime = await getLastModified(url); console.log('Guessed time for', url, guessedTime) diff --git a/src/tools/evaluator.ts b/src/tools/evaluator.ts index 91f8779..0fe41f3 100644 --- a/src/tools/evaluator.ts +++ b/src/tools/evaluator.ts @@ -1,8 +1,9 @@ import {GenerateObjectResult} from 'ai'; import {AnswerAction, EvaluationResponse, EvaluationType, PromptPair, TrackerContext} from '../types'; -import {readUrl, removeAllLineBreaks} from "./read"; +import {readUrl} from "./read"; import {ObjectGeneratorSafe} from "../utils/safe-generator"; import {Schemas} from "../utils/schemas"; +import {removeAllLineBreaks} from "../utils/text-tools"; const TOOL_NAME = 'evaluator'; @@ -696,7 +697,7 @@ async function fetchSourceContent(urls: string[], trackers: TrackerContext, sche const results = await Promise.all( urls.map(async (url) => { try { - const {response} = await readUrl(url, trackers.tokenTracker); + const {response} = await readUrl(url, false, trackers.tokenTracker); const content = response?.data?.content || ''; return removeAllLineBreaks(content); } catch (error) { diff --git a/src/tools/read.ts b/src/tools/read.ts index d64f60f..c03f218 100644 --- a/src/tools/read.ts +++ b/src/tools/read.ts @@ -3,7 +3,7 @@ import { TokenTracker } from "../utils/token-tracker"; import { ReadResponse } from '../types'; import { JINA_API_KEY } from "../config"; -export function readUrl(url: string, tracker?: TokenTracker): Promise<{ response: ReadResponse }> { +export function readUrl(url: string, withAllLinks?: boolean, tracker?: TokenTracker): Promise<{ response: ReadResponse }> { return new Promise((resolve, reject) => { if (!url.trim()) { reject(new Error('URL cannot be empty')); @@ -11,21 +11,22 @@ export function readUrl(url: string, tracker?: TokenTracker): Promise<{ response } const data = JSON.stringify({ url }); + const headers: Record = { + 'Accept': 'application/json', + 'Authorization': `Bearer ${JINA_API_KEY}`, + 'Content-Type': 'application/json', + 'X-Retain-Images': 'none', + }; + if (withAllLinks) { + headers['X-With-Links-Summary'] = 'all' + } const options = { hostname: 'r.jina.ai', port: 443, path: '/', method: 'POST', - headers: { - 'Accept': 'application/json', - 'Authorization': `Bearer ${JINA_API_KEY}`, - 'Content-Type': 'application/json', - 'Content-Length': data.length, - 'X-Retain-Images': 'none', - 'X-With-Links-Summary': 'all', - 'X-Timeout': '30' - } + headers }; const req = https.request(options, (res) => { @@ -96,8 +97,4 @@ export function readUrl(url: string, tracker?: TokenTracker): Promise<{ response req.write(data); req.end(); }); -} - -export function removeAllLineBreaks(text: string) { - return text.replace(/(\r\n|\n|\r)/gm, " "); } \ No newline at end of file diff --git a/src/utils/text-tools.ts b/src/utils/text-tools.ts index 74347f8..fcd6d7c 100644 --- a/src/utils/text-tools.ts +++ b/src/utils/text-tools.ts @@ -149,6 +149,9 @@ export function removeHTMLtags(text: string) { return text.replace(/<[^>]*>?/gm, ''); } +export function removeAllLineBreaks(text: string) { + return text.replace(/(\r\n|\n|\r)/gm, " "); +} export function getI18nText(key: string, lang = 'en', params: Record = {}) { // 获取i18n数据