mirror of
https://github.com/jina-ai/node-DeepResearch.git
synced 2025-12-26 06:28:56 +08:00
normalize host name
This commit is contained in:
parent
ad5af8bdd0
commit
dd6ee81baa
@ -13,6 +13,7 @@ import {TokenTracker} from "./utils/token-tracker";
|
||||
import {ActionTracker} from "./utils/action-tracker";
|
||||
import {ObjectGeneratorSafe} from "./utils/safe-generator";
|
||||
import {jsonSchema} from "ai"; // or another converter library
|
||||
import {normalizeHostName} from "./utils/url-tools";
|
||||
|
||||
const app = express();
|
||||
|
||||
@ -555,9 +556,9 @@ app.post('/v1/chat/completions', (async (req: Request, res: Response) => {
|
||||
body.messages,
|
||||
body.max_returned_urls,
|
||||
body.no_direct_answer,
|
||||
body.boost_hostnames,
|
||||
body.bad_hostnames,
|
||||
body.only_hostnames,
|
||||
body.boost_hostnames?.map(i => normalizeHostName(i)),
|
||||
body.bad_hostnames?.map(i => normalizeHostName(i)),
|
||||
body.only_hostnames?.map(i => normalizeHostName(i)),
|
||||
body.max_annotations,
|
||||
body.min_annotation_relevance
|
||||
)
|
||||
|
||||
@ -173,7 +173,7 @@ const extractUrlParts = (urlStr: string) => {
|
||||
try {
|
||||
const url = new URL(urlStr);
|
||||
return {
|
||||
hostname: url.hostname,
|
||||
hostname: url.hostname.startsWith('www.') ? url.hostname.slice(4) : url.hostname,
|
||||
path: url.pathname
|
||||
};
|
||||
} catch (e) {
|
||||
@ -182,6 +182,15 @@ const extractUrlParts = (urlStr: string) => {
|
||||
}
|
||||
};
|
||||
|
||||
export const normalizeHostName = (hostStr: string) => {
|
||||
const extract = extractUrlParts(hostStr);
|
||||
const host = extract.hostname;
|
||||
if (!host) {
|
||||
return hostStr.startsWith('www.') ? hostStr.slice(4).toLowerCase() : hostStr.toLowerCase();
|
||||
}
|
||||
return host;
|
||||
}
|
||||
|
||||
// Function to count occurrences of hostnames and paths
|
||||
export const countUrlParts = (urlItems: SearchSnippet[]) => {
|
||||
const hostnameCount: Record<string, number> = {};
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user