mirror of
https://github.com/jina-ai/node-DeepResearch.git
synced 2025-12-26 06:28:56 +08:00
fix: normalize url
This commit is contained in:
parent
1067a0c256
commit
5c36410b54
11
src/agent.ts
11
src/agent.ts
@ -403,6 +403,7 @@ export async function getResponse(question?: string,
|
||||
|
||||
const allURLs: Record<string, SearchSnippet> = {};
|
||||
const visitedURLs: string[] = [];
|
||||
const badURLs: string[] = [];
|
||||
const evaluationMetrics: Record<string, EvaluationType[]> = {};
|
||||
// reserve the 10% final budget for the beast mode
|
||||
const regularBudget = tokenBudget * 0.9;
|
||||
@ -515,14 +516,13 @@ export async function getResponse(question?: string,
|
||||
allKnowledge,
|
||||
allURLs,
|
||||
visitedURLs,
|
||||
badURLs,
|
||||
SchemaGen,
|
||||
currentQuestion
|
||||
);
|
||||
|
||||
// is this really required???
|
||||
// if (!evaluationMetrics[currentQuestion].includes('attribution')) {
|
||||
// evaluationMetrics[currentQuestion].push('attribution')
|
||||
// }
|
||||
// remove references whose urls are in badURLs
|
||||
thisStep.references = thisStep.references.filter(ref => !badURLs.includes(ref.url));
|
||||
}
|
||||
|
||||
updateContext({
|
||||
@ -764,6 +764,7 @@ You decided to think out of the box or cut from a completely different angle.
|
||||
allKnowledge,
|
||||
allURLs,
|
||||
visitedURLs,
|
||||
badURLs,
|
||||
SchemaGen,
|
||||
currentQuestion
|
||||
);
|
||||
@ -908,7 +909,7 @@ But unfortunately, you failed to solve the issue. You need to think out of the b
|
||||
result: thisStep,
|
||||
context,
|
||||
visitedURLs: returnedURLs,
|
||||
readURLs: visitedURLs,
|
||||
readURLs: visitedURLs.filter(url => !badURLs.includes(url)),
|
||||
allURLs: weightedURLs.map(r => r.url)
|
||||
};
|
||||
}
|
||||
|
||||
@ -390,14 +390,17 @@ export async function processURLs(
|
||||
allKnowledge: KnowledgeItem[],
|
||||
allURLs: Record<string, SearchSnippet>,
|
||||
visitedURLs: string[],
|
||||
badURLs: string[],
|
||||
schemaGen: Schemas,
|
||||
question: string
|
||||
): Promise<{ urlResults: any[], success: boolean }> {
|
||||
): Promise<{ urlResults: any[], success: boolean, badURLs: string[] }> {
|
||||
// Skip if no URLs to process
|
||||
if (urls.length === 0) {
|
||||
return {urlResults: [], success: false};
|
||||
return {urlResults: [], success: false, badURLs: []};
|
||||
}
|
||||
|
||||
const badHostnames: string[] = [];
|
||||
|
||||
// Track the reading action
|
||||
const thisStep: VisitAction = {
|
||||
action: 'visit',
|
||||
@ -455,12 +458,29 @@ export async function processURLs(
|
||||
});
|
||||
|
||||
return {url, result: response};
|
||||
} catch (error) {
|
||||
} catch (error: any) {
|
||||
console.error('Error reading URL:', url, error);
|
||||
badURLs.push(url);
|
||||
// Extract hostname from the URL
|
||||
if (
|
||||
(error?.name === 'ParamValidationError' && error.message?.includes('Domain')) ||
|
||||
(error?.name === 'AssertionFailureError' && error.message?.includes('resolve host name')) ||
|
||||
error?.message?.includes("Couldn't resolve host name") ||
|
||||
error?.message?.includes("could not be resolved")
|
||||
) {
|
||||
let hostname = '';
|
||||
try {
|
||||
hostname = extractUrlParts(url).hostname;
|
||||
} catch (e) {
|
||||
console.error('Error parsing URL for hostname:', url, e);
|
||||
}
|
||||
badHostnames.push(hostname);
|
||||
console.log(`Added ${hostname} to bad hostnames list`);
|
||||
}
|
||||
return null;
|
||||
} finally {
|
||||
// Only add valid URLs to visitedURLs list
|
||||
if (url && typeof url === 'string') {
|
||||
if (url) {
|
||||
visitedURLs.push(url);
|
||||
}
|
||||
}
|
||||
@ -470,8 +490,20 @@ export async function processURLs(
|
||||
// Filter out null results without changing the original array
|
||||
const validResults = urlResults.filter(Boolean);
|
||||
|
||||
// remove any URL with bad hostnames from allURLs
|
||||
if (badHostnames.length > 0) {
|
||||
Object.keys(allURLs).forEach(url => {
|
||||
if (badHostnames.includes(extractUrlParts(url).hostname)) {
|
||||
delete allURLs[url];
|
||||
console.log(`Removed ${url} from allURLs`);
|
||||
}
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
return {
|
||||
urlResults: validResults,
|
||||
success: validResults.length > 0
|
||||
success: validResults.length > 0,
|
||||
badURLs
|
||||
};
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user