mirror of
https://github.com/jina-ai/node-DeepResearch.git
synced 2026-03-22 07:29:35 +08:00
normalize host name
This commit is contained in:
@@ -13,6 +13,7 @@ import {TokenTracker} from "./utils/token-tracker";
|
|||||||
import {ActionTracker} from "./utils/action-tracker";
|
import {ActionTracker} from "./utils/action-tracker";
|
||||||
import {ObjectGeneratorSafe} from "./utils/safe-generator";
|
import {ObjectGeneratorSafe} from "./utils/safe-generator";
|
||||||
import {jsonSchema} from "ai"; // or another converter library
|
import {jsonSchema} from "ai"; // or another converter library
|
||||||
|
import {normalizeHostName} from "./utils/url-tools";
|
||||||
|
|
||||||
const app = express();
|
const app = express();
|
||||||
|
|
||||||
@@ -555,9 +556,9 @@ app.post('/v1/chat/completions', (async (req: Request, res: Response) => {
|
|||||||
body.messages,
|
body.messages,
|
||||||
body.max_returned_urls,
|
body.max_returned_urls,
|
||||||
body.no_direct_answer,
|
body.no_direct_answer,
|
||||||
body.boost_hostnames,
|
body.boost_hostnames?.map(i => normalizeHostName(i)),
|
||||||
body.bad_hostnames,
|
body.bad_hostnames?.map(i => normalizeHostName(i)),
|
||||||
body.only_hostnames,
|
body.only_hostnames?.map(i => normalizeHostName(i)),
|
||||||
body.max_annotations,
|
body.max_annotations,
|
||||||
body.min_annotation_relevance
|
body.min_annotation_relevance
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -173,7 +173,7 @@ const extractUrlParts = (urlStr: string) => {
|
|||||||
try {
|
try {
|
||||||
const url = new URL(urlStr);
|
const url = new URL(urlStr);
|
||||||
return {
|
return {
|
||||||
hostname: url.hostname,
|
hostname: url.hostname.startsWith('www.') ? url.hostname.slice(4) : url.hostname,
|
||||||
path: url.pathname
|
path: url.pathname
|
||||||
};
|
};
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
@@ -182,6 +182,15 @@ const extractUrlParts = (urlStr: string) => {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
export const normalizeHostName = (hostStr: string) => {
|
||||||
|
const extract = extractUrlParts(hostStr);
|
||||||
|
const host = extract.hostname;
|
||||||
|
if (!host) {
|
||||||
|
return hostStr.startsWith('www.') ? hostStr.slice(4).toLowerCase() : hostStr.toLowerCase();
|
||||||
|
}
|
||||||
|
return host;
|
||||||
|
}
|
||||||
|
|
||||||
// Function to count occurrences of hostnames and paths
|
// Function to count occurrences of hostnames and paths
|
||||||
export const countUrlParts = (urlItems: SearchSnippet[]) => {
|
export const countUrlParts = (urlItems: SearchSnippet[]) => {
|
||||||
const hostnameCount: Record<string, number> = {};
|
const hostnameCount: Record<string, number> = {};
|
||||||
|
|||||||
Reference in New Issue
Block a user