normalize host name

This commit is contained in:
Sha Zhou 2025-04-28 12:20:22 +08:00
parent ad5af8bdd0
commit dd6ee81baa
2 changed files with 14 additions and 4 deletions

View File

@ -13,6 +13,7 @@ import {TokenTracker} from "./utils/token-tracker";
import {ActionTracker} from "./utils/action-tracker";
import {ObjectGeneratorSafe} from "./utils/safe-generator";
import {jsonSchema} from "ai"; // or another converter library
import {normalizeHostName} from "./utils/url-tools";
const app = express();
@ -555,9 +556,9 @@ app.post('/v1/chat/completions', (async (req: Request, res: Response) => {
body.messages,
body.max_returned_urls,
body.no_direct_answer,
body.boost_hostnames,
body.bad_hostnames,
body.only_hostnames,
body.boost_hostnames?.map(i => normalizeHostName(i)),
body.bad_hostnames?.map(i => normalizeHostName(i)),
body.only_hostnames?.map(i => normalizeHostName(i)),
body.max_annotations,
body.min_annotation_relevance
)

View File

@ -173,7 +173,7 @@ const extractUrlParts = (urlStr: string) => {
try {
const url = new URL(urlStr);
return {
hostname: url.hostname,
hostname: url.hostname.startsWith('www.') ? url.hostname.slice(4) : url.hostname,
path: url.pathname
};
} catch (e) {
@ -182,6 +182,15 @@ const extractUrlParts = (urlStr: string) => {
}
};
export const normalizeHostName = (hostStr: string) => {
const extract = extractUrlParts(hostStr);
const host = extract.hostname;
if (!host) {
return hostStr.startsWith('www.') ? hostStr.slice(4).toLowerCase() : hostStr.toLowerCase();
}
return host;
}
// Function to count occurrences of hostnames and paths
export const countUrlParts = (urlItems: SearchSnippet[]) => {
const hostnameCount: Record<string, number> = {};