mirror of
https://github.com/jina-ai/node-DeepResearch.git
synced 2025-12-26 06:28:56 +08:00
revert: no spam filter
This commit is contained in:
parent
d824957d29
commit
d5cb62f7ea
@ -488,12 +488,12 @@ export async function processURLs(
|
||||
|
||||
// check if content is likely a blocked msg from paywall, bot detection, etc.
|
||||
// only check for <5000 char length content as most blocking msg is short
|
||||
// const spamDetectLength = 1000;
|
||||
// const isGoodContent = data.content.length > spamDetectLength || await classifyText(data.content);
|
||||
// if (!isGoodContent) {
|
||||
// console.error(`Blocked content ${data.content.length}:`, url, data.content.slice(0, spamDetectLength));
|
||||
// throw new Error(`Blocked content ${url}`);
|
||||
// }
|
||||
const spamDetectLength = 300;
|
||||
const isGoodContent = data.content.length > spamDetectLength || await classifyText(data.content);
|
||||
if (!isGoodContent) {
|
||||
console.error(`Blocked content ${data.content.length}:`, url, data.content.slice(0, spamDetectLength));
|
||||
throw new Error(`Blocked content ${url}`);
|
||||
}
|
||||
|
||||
// Add to knowledge base
|
||||
allKnowledge.push({
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user