From 25606299ba4f87e2867c3796d94269d4b850a266 Mon Sep 17 00:00:00 2001 From: Han Xiao Date: Thu, 20 Mar 2025 14:15:14 +0800 Subject: [PATCH] fix: md table render --- package-lock.json | 478 +++++++++++++++++++++++++++++++++++++++- package.json | 2 + src/agent.ts | 33 +-- src/utils/text-tools.ts | 217 ++++++++++++++++++ 4 files changed, 709 insertions(+), 21 deletions(-) diff --git a/package-lock.json b/package-lock.json index d184bd1..80f440b 100644 --- a/package-lock.json +++ b/package-lock.json @@ -11,6 +11,7 @@ "dependencies": { "@ai-sdk/google": "^1.0.0", "@ai-sdk/openai": "^1.1.9", + "@types/jsdom": "^21.1.7", "ai": "^4.1.26", "axios": "^1.7.9", "commander": "^13.1.0", @@ -19,6 +20,7 @@ "duck-duck-scrape": "^2.2.7", "express": "^4.21.2", "hjson": "^3.2.2", + "jsdom": "^26.0.0", "node-fetch": "^3.3.2", "undici": "^7.3.0", "zod": "^3.22.4", @@ -224,6 +226,25 @@ "@jridgewell/sourcemap-codec": "^1.4.14" } }, + "node_modules/@asamuzakjp/css-color": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/@asamuzakjp/css-color/-/css-color-3.1.1.tgz", + "integrity": "sha512-hpRD68SV2OMcZCsrbdkccTw5FXjNDLo5OuqSHyHZfwweGsDWZwDJ2+gONyNAbazZclobMirACLw0lk8WVxIqxA==", + "license": "MIT", + "dependencies": { + "@csstools/css-calc": "^2.1.2", + "@csstools/css-color-parser": "^3.0.8", + "@csstools/css-parser-algorithms": "^3.0.4", + "@csstools/css-tokenizer": "^3.0.3", + "lru-cache": "^10.4.3" + } + }, + "node_modules/@asamuzakjp/css-color/node_modules/lru-cache": { + "version": "10.4.3", + "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-10.4.3.tgz", + "integrity": "sha512-JNAzZcXrCt42VGLuYz0zfAzDfAvJWW6AfYlDBQyDV5DClI2m5sAmK+OIO7s59XfsRsWHp02jAJrRadPRGTt6SQ==", + "license": "ISC" + }, "node_modules/@babel/code-frame": { "version": "7.26.2", "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.26.2.tgz", @@ -764,6 +785,116 @@ "node": ">=12" } }, + "node_modules/@csstools/color-helpers": { + "version": "5.0.2", + "resolved": "https://registry.npmjs.org/@csstools/color-helpers/-/color-helpers-5.0.2.tgz", + "integrity": "sha512-JqWH1vsgdGcw2RR6VliXXdA0/59LttzlU8UlRT/iUUsEeWfYq8I+K0yhihEUTTHLRm1EXvpsCx3083EU15ecsA==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/csstools" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/csstools" + } + ], + "license": "MIT-0", + "engines": { + "node": ">=18" + } + }, + "node_modules/@csstools/css-calc": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/@csstools/css-calc/-/css-calc-2.1.2.tgz", + "integrity": "sha512-TklMyb3uBB28b5uQdxjReG4L80NxAqgrECqLZFQbyLekwwlcDDS8r3f07DKqeo8C4926Br0gf/ZDe17Zv4wIuw==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/csstools" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/csstools" + } + ], + "license": "MIT", + "engines": { + "node": ">=18" + }, + "peerDependencies": { + "@csstools/css-parser-algorithms": "^3.0.4", + "@csstools/css-tokenizer": "^3.0.3" + } + }, + "node_modules/@csstools/css-color-parser": { + "version": "3.0.8", + "resolved": "https://registry.npmjs.org/@csstools/css-color-parser/-/css-color-parser-3.0.8.tgz", + "integrity": "sha512-pdwotQjCCnRPuNi06jFuP68cykU1f3ZWExLe/8MQ1LOs8Xq+fTkYgd+2V8mWUWMrOn9iS2HftPVaMZDaXzGbhQ==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/csstools" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/csstools" + } + ], + "license": "MIT", + "dependencies": { + "@csstools/color-helpers": "^5.0.2", + "@csstools/css-calc": "^2.1.2" + }, + "engines": { + "node": ">=18" + }, + "peerDependencies": { + "@csstools/css-parser-algorithms": "^3.0.4", + "@csstools/css-tokenizer": "^3.0.3" + } + }, + "node_modules/@csstools/css-parser-algorithms": { + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/@csstools/css-parser-algorithms/-/css-parser-algorithms-3.0.4.tgz", + "integrity": "sha512-Up7rBoV77rv29d3uKHUIVubz1BTcgyUK72IvCQAbfbMv584xHcGKCKbWh7i8hPrRJ7qU4Y8IO3IY9m+iTB7P3A==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/csstools" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/csstools" + } + ], + "license": "MIT", + "engines": { + "node": ">=18" + }, + "peerDependencies": { + "@csstools/css-tokenizer": "^3.0.3" + } + }, + "node_modules/@csstools/css-tokenizer": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/@csstools/css-tokenizer/-/css-tokenizer-3.0.3.tgz", + "integrity": "sha512-UJnjoFsmxfKUdNYdWgOB0mWUypuLvAfQPH1+pyvRJs6euowbFkFC6P13w1l8mJyi3vxYMxc9kld5jZEGRQs6bw==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/csstools" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/csstools" + } + ], + "license": "MIT", + "engines": { + "node": ">=18" + } + }, "node_modules/@eslint-community/eslint-utils": { "version": "4.4.1", "resolved": "https://registry.npmjs.org/@eslint-community/eslint-utils/-/eslint-utils-4.4.1.tgz", @@ -1708,6 +1839,17 @@ "pretty-format": "^29.0.0" } }, + "node_modules/@types/jsdom": { + "version": "21.1.7", + "resolved": "https://registry.npmjs.org/@types/jsdom/-/jsdom-21.1.7.tgz", + "integrity": "sha512-yOriVnggzrnQ3a9OKOCxaVuSug3w3/SbOj5i7VwXWZEyUNl3bLF9V3MfxGbZKuwqJOQyRfqXyROBB1CoZLFWzA==", + "license": "MIT", + "dependencies": { + "@types/node": "*", + "@types/tough-cookie": "*", + "parse5": "^7.0.0" + } + }, "node_modules/@types/methods": { "version": "1.1.4", "resolved": "https://registry.npmjs.org/@types/methods/-/methods-1.1.4.tgz", @@ -1726,7 +1868,6 @@ "version": "22.10.10", "resolved": "https://registry.npmjs.org/@types/node/-/node-22.10.10.tgz", "integrity": "sha512-X47y/mPNzxviAGY5TcYPtYL8JsY3kAq2n8fMmKoRCxq/c4v4pyGNCzM2R6+M5/umG4ZfHuT+sgqDYqWc9rJ6ww==", - "dev": true, "license": "MIT", "dependencies": { "undici-types": "~6.20.0" @@ -1811,6 +1952,12 @@ "@types/superagent": "^8.1.0" } }, + "node_modules/@types/tough-cookie": { + "version": "4.0.5", + "resolved": "https://registry.npmjs.org/@types/tough-cookie/-/tough-cookie-4.0.5.tgz", + "integrity": "sha512-/Ad8+nIOV7Rl++6f1BdKxFSMgmoqEoYbHRpPcx3JEfv8VRsQe9Z4mCXeJBzxs7mbHY/XOZZuXlRNfhpVPbs6ZA==", + "license": "MIT" + }, "node_modules/@types/yargs": { "version": "17.0.33", "resolved": "https://registry.npmjs.org/@types/yargs/-/yargs-17.0.33.tgz", @@ -2082,7 +2229,6 @@ "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-7.1.3.tgz", "integrity": "sha512-jRR5wdylq8CkOe6hei19GGZnxM6rBGwFl3Bg0YItGDimvjGtAvdZk4Pu6Cl4u4Igsws4a1fd1Vq3ezrhn4KmFw==", "license": "MIT", - "optional": true, "engines": { "node": ">= 14" } @@ -2898,6 +3044,19 @@ "node": ">= 8" } }, + "node_modules/cssstyle": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/cssstyle/-/cssstyle-4.3.0.tgz", + "integrity": "sha512-6r0NiY0xizYqfBvWp1G7WXJ06/bZyrk7Dc6PHql82C/pKGUTKu4yAX4Y8JPamb1ob9nBKuxWzCGTRuGwU3yxJQ==", + "license": "MIT", + "dependencies": { + "@asamuzakjp/css-color": "^3.1.1", + "rrweb-cssom": "^0.8.0" + }, + "engines": { + "node": ">=18" + } + }, "node_modules/data-uri-to-buffer": { "version": "4.0.1", "resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-4.0.1.tgz", @@ -2907,11 +3066,57 @@ "node": ">= 12" } }, + "node_modules/data-urls": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/data-urls/-/data-urls-5.0.0.tgz", + "integrity": "sha512-ZYP5VBHshaDAiVZxjbRVcFJpc+4xGgT0bK3vzy1HLN8jTO975HEbuYzZJcHoQEY5K1a0z8YayJkyVETa08eNTg==", + "license": "MIT", + "dependencies": { + "whatwg-mimetype": "^4.0.0", + "whatwg-url": "^14.0.0" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/data-urls/node_modules/tr46": { + "version": "5.1.0", + "resolved": "https://registry.npmjs.org/tr46/-/tr46-5.1.0.tgz", + "integrity": "sha512-IUWnUK7ADYR5Sl1fZlO1INDUhVhatWl7BtJWsIhwJ0UAK7ilzzIa8uIqOO/aYVWHZPJkKbEL+362wrzoeRF7bw==", + "license": "MIT", + "dependencies": { + "punycode": "^2.3.1" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/data-urls/node_modules/webidl-conversions": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-7.0.0.tgz", + "integrity": "sha512-VwddBukDzu71offAQR975unBIGqfKZpM+8ZX6ySk8nYhVoo5CYaZyzt3YBvYtRtO+aoGlqxPg/B87NGVZ/fu6g==", + "license": "BSD-2-Clause", + "engines": { + "node": ">=12" + } + }, + "node_modules/data-urls/node_modules/whatwg-url": { + "version": "14.2.0", + "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-14.2.0.tgz", + "integrity": "sha512-De72GdQZzNTUBBChsXueQUnPKDkg/5A5zp7pFDuQAj5UFoENpiACU0wlCvzpAGnTkj++ihpKwKyYewn/XNUbKw==", + "license": "MIT", + "dependencies": { + "tr46": "^5.1.0", + "webidl-conversions": "^7.0.0" + }, + "engines": { + "node": ">=18" + } + }, "node_modules/debug": { "version": "4.4.0", "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.0.tgz", "integrity": "sha512-6WTZ/IxCY/T6BALoZHaE4ctp9xm+Z5kY/pzYaCHRFeyVhojxlrm+46y68HA6hr0TcwEssoxNiDEUJQjfPZ/RYA==", - "devOptional": true, "license": "MIT", "dependencies": { "ms": "^2.1.3" @@ -2925,6 +3130,12 @@ } } }, + "node_modules/decimal.js": { + "version": "10.5.0", + "resolved": "https://registry.npmjs.org/decimal.js/-/decimal.js-10.5.0.tgz", + "integrity": "sha512-8vDa8Qxvr/+d94hSh5P3IJwI5t8/c0KsMp+g8bNw9cY2icONa5aPfvKeieW1WlG0WQYwwhJ7mjui2xtiePQSXw==", + "license": "MIT" + }, "node_modules/dedent": { "version": "1.5.3", "resolved": "https://registry.npmjs.org/dedent/-/dedent-1.5.3.tgz", @@ -3174,6 +3385,18 @@ "node": ">= 0.8" } }, + "node_modules/entities": { + "version": "4.5.0", + "resolved": "https://registry.npmjs.org/entities/-/entities-4.5.0.tgz", + "integrity": "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==", + "license": "BSD-2-Clause", + "engines": { + "node": ">=0.12" + }, + "funding": { + "url": "https://github.com/fb55/entities?sponsor=1" + } + }, "node_modules/error-ex": { "version": "1.3.2", "resolved": "https://registry.npmjs.org/error-ex/-/error-ex-1.3.2.tgz", @@ -4250,6 +4473,18 @@ "hjson": "bin/hjson" } }, + "node_modules/html-encoding-sniffer": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/html-encoding-sniffer/-/html-encoding-sniffer-4.0.0.tgz", + "integrity": "sha512-Y22oTqIU4uuPgEemfz7NDJz6OeKf12Lsu+QC+s3BVpda64lTiMYCyGwg5ki4vFxkMwQdeZDl2adZoqUgdFuTgQ==", + "license": "MIT", + "dependencies": { + "whatwg-encoding": "^3.1.1" + }, + "engines": { + "node": ">=18" + } + }, "node_modules/html-entities": { "version": "2.5.2", "resolved": "https://registry.npmjs.org/html-entities/-/html-entities-2.5.2.tgz", @@ -4289,12 +4524,24 @@ "node": ">= 0.8" } }, + "node_modules/http-proxy-agent": { + "version": "7.0.2", + "resolved": "https://registry.npmjs.org/http-proxy-agent/-/http-proxy-agent-7.0.2.tgz", + "integrity": "sha512-T1gkAiYYDWYx3V5Bmyu7HcfcvL7mUrTWiM6yOfa3PIphViJ/gFPbvidQ+veqSOHci/PxBcDabeUNCzpOODJZig==", + "license": "MIT", + "dependencies": { + "agent-base": "^7.1.0", + "debug": "^4.3.4" + }, + "engines": { + "node": ">= 14" + } + }, "node_modules/https-proxy-agent": { "version": "7.0.6", "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.6.tgz", "integrity": "sha512-vK9P5/iUfdl95AI+JVyUuIcVtd4ofvtrOr3HNtM2yxC9bnMbEdp3x01OhQNnjb8IJYi38VlTE3mBXwcfvywuSw==", "license": "MIT", - "optional": true, "dependencies": { "agent-base": "^7.1.2", "debug": "4" @@ -4495,6 +4742,12 @@ "node": ">=8" } }, + "node_modules/is-potential-custom-element-name": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/is-potential-custom-element-name/-/is-potential-custom-element-name-1.0.1.tgz", + "integrity": "sha512-bCYeRA2rVibKZd+s2625gGnGF/t7DSqDs4dP7CrLA1m7jKWz6pps0LpYLJN8Q64HtmPKJ1hrN3nzPNKFEKOUiQ==", + "license": "MIT" + }, "node_modules/is-stream": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/is-stream/-/is-stream-2.0.1.tgz", @@ -5232,6 +5485,80 @@ "js-yaml": "bin/js-yaml.js" } }, + "node_modules/jsdom": { + "version": "26.0.0", + "resolved": "https://registry.npmjs.org/jsdom/-/jsdom-26.0.0.tgz", + "integrity": "sha512-BZYDGVAIriBWTpIxYzrXjv3E/4u8+/pSG5bQdIYCbNCGOvsPkDQfTVLAIXAf9ETdCpduCVTkDe2NNZ8NIwUVzw==", + "license": "MIT", + "dependencies": { + "cssstyle": "^4.2.1", + "data-urls": "^5.0.0", + "decimal.js": "^10.4.3", + "form-data": "^4.0.1", + "html-encoding-sniffer": "^4.0.0", + "http-proxy-agent": "^7.0.2", + "https-proxy-agent": "^7.0.6", + "is-potential-custom-element-name": "^1.0.1", + "nwsapi": "^2.2.16", + "parse5": "^7.2.1", + "rrweb-cssom": "^0.8.0", + "saxes": "^6.0.0", + "symbol-tree": "^3.2.4", + "tough-cookie": "^5.0.0", + "w3c-xmlserializer": "^5.0.0", + "webidl-conversions": "^7.0.0", + "whatwg-encoding": "^3.1.1", + "whatwg-mimetype": "^4.0.0", + "whatwg-url": "^14.1.0", + "ws": "^8.18.0", + "xml-name-validator": "^5.0.0" + }, + "engines": { + "node": ">=18" + }, + "peerDependencies": { + "canvas": "^3.0.0" + }, + "peerDependenciesMeta": { + "canvas": { + "optional": true + } + } + }, + "node_modules/jsdom/node_modules/tr46": { + "version": "5.1.0", + "resolved": "https://registry.npmjs.org/tr46/-/tr46-5.1.0.tgz", + "integrity": "sha512-IUWnUK7ADYR5Sl1fZlO1INDUhVhatWl7BtJWsIhwJ0UAK7ilzzIa8uIqOO/aYVWHZPJkKbEL+362wrzoeRF7bw==", + "license": "MIT", + "dependencies": { + "punycode": "^2.3.1" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/jsdom/node_modules/webidl-conversions": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-7.0.0.tgz", + "integrity": "sha512-VwddBukDzu71offAQR975unBIGqfKZpM+8ZX6ySk8nYhVoo5CYaZyzt3YBvYtRtO+aoGlqxPg/B87NGVZ/fu6g==", + "license": "BSD-2-Clause", + "engines": { + "node": ">=12" + } + }, + "node_modules/jsdom/node_modules/whatwg-url": { + "version": "14.2.0", + "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-14.2.0.tgz", + "integrity": "sha512-De72GdQZzNTUBBChsXueQUnPKDkg/5A5zp7pFDuQAj5UFoENpiACU0wlCvzpAGnTkj++ihpKwKyYewn/XNUbKw==", + "license": "MIT", + "dependencies": { + "tr46": "^5.1.0", + "webidl-conversions": "^7.0.0" + }, + "engines": { + "node": ">=18" + } + }, "node_modules/jsesc": { "version": "3.1.0", "resolved": "https://registry.npmjs.org/jsesc/-/jsesc-3.1.0.tgz", @@ -5734,6 +6061,12 @@ "node": ">=8" } }, + "node_modules/nwsapi": { + "version": "2.2.19", + "resolved": "https://registry.npmjs.org/nwsapi/-/nwsapi-2.2.19.tgz", + "integrity": "sha512-94bcyI3RsqiZufXjkr3ltkI86iEl+I7uiHVDtcq9wJUTwYQJ5odHDeSzkkrRzi80jJ8MaeZgqKjH1bAWAFw9bA==", + "license": "MIT" + }, "node_modules/object-assign": { "version": "4.1.1", "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz", @@ -5885,6 +6218,18 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/parse5": { + "version": "7.2.1", + "resolved": "https://registry.npmjs.org/parse5/-/parse5-7.2.1.tgz", + "integrity": "sha512-BuBYQYlv1ckiPdQi/ohiivi9Sagc9JG+Ozs0r7b/0iK3sKmrb0b9FdWdBbOdx6hBCM/F9Ir82ofnBhtZOjCRPQ==", + "license": "MIT", + "dependencies": { + "entities": "^4.5.0" + }, + "funding": { + "url": "https://github.com/inikulin/parse5?sponsor=1" + } + }, "node_modules/parseurl": { "version": "1.3.3", "resolved": "https://registry.npmjs.org/parseurl/-/parseurl-1.3.3.tgz", @@ -6121,7 +6466,6 @@ "version": "2.3.1", "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.1.tgz", "integrity": "sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg==", - "dev": true, "license": "MIT", "engines": { "node": ">=6" @@ -6335,6 +6679,12 @@ "url": "https://github.com/sponsors/isaacs" } }, + "node_modules/rrweb-cssom": { + "version": "0.8.0", + "resolved": "https://registry.npmjs.org/rrweb-cssom/-/rrweb-cssom-0.8.0.tgz", + "integrity": "sha512-guoltQEx+9aMf2gDZ0s62EcV8lsXR+0w8915TC3ITdn2YueuNjdAYh/levpU9nFaoChh9RUS5ZdQMrKfVEN9tw==", + "license": "MIT" + }, "node_modules/run-parallel": { "version": "1.2.0", "resolved": "https://registry.npmjs.org/run-parallel/-/run-parallel-1.2.0.tgz", @@ -6391,6 +6741,18 @@ "integrity": "sha512-+aWOz7yVScEGoKNd4PA10LZ8sk0A/z5+nXQG5giUO5rprX9jgYsTdov9qCchZiPIZezbZH+jRut8nPodFAX4Jg==", "license": "ISC" }, + "node_modules/saxes": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/saxes/-/saxes-6.0.0.tgz", + "integrity": "sha512-xAg7SOnEhrm5zI3puOOKyy1OMcMlIJZYNJY7xLBwSze0UjhPLnWfj2GF2EpT0jmzaJKIWKHLsaSSajf35bcYnA==", + "license": "ISC", + "dependencies": { + "xmlchars": "^2.2.0" + }, + "engines": { + "node": ">=v12.22.7" + } + }, "node_modules/secure-json-parse": { "version": "2.7.0", "resolved": "https://registry.npmjs.org/secure-json-parse/-/secure-json-parse-2.7.0.tgz", @@ -6820,6 +7182,12 @@ "react": "^16.11.0 || ^17.0.0 || ^18.0.0 || ^19.0.0" } }, + "node_modules/symbol-tree": { + "version": "3.2.4", + "resolved": "https://registry.npmjs.org/symbol-tree/-/symbol-tree-3.2.4.tgz", + "integrity": "sha512-9QNk5KwDF+Bvz+PyObkmSYjI5ksVUYtjW7AU22r2NKcfLJcXp96hkDWU3+XndOsUb+AQ9QhfzfCT2O+CNWT5Tw==", + "license": "MIT" + }, "node_modules/test-exclude": { "version": "6.0.0", "resolved": "https://registry.npmjs.org/test-exclude/-/test-exclude-6.0.0.tgz", @@ -6878,6 +7246,24 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/tldts": { + "version": "6.1.84", + "resolved": "https://registry.npmjs.org/tldts/-/tldts-6.1.84.tgz", + "integrity": "sha512-aRGIbCIF3teodtUFAYSdQONVmDRy21REM3o6JnqWn5ZkQBJJ4gHxhw6OfwQ+WkSAi3ASamrS4N4nyazWx6uTYg==", + "license": "MIT", + "dependencies": { + "tldts-core": "^6.1.84" + }, + "bin": { + "tldts": "bin/cli.js" + } + }, + "node_modules/tldts-core": { + "version": "6.1.84", + "resolved": "https://registry.npmjs.org/tldts-core/-/tldts-core-6.1.84.tgz", + "integrity": "sha512-NaQa1W76W2aCGjXybvnMYzGSM4x8fvG2AN/pla7qxcg0ZHbooOPhA8kctmOZUDfZyhDL27OGNbwAeig8P4p1vg==", + "license": "MIT" + }, "node_modules/tmpl": { "version": "1.0.5", "resolved": "https://registry.npmjs.org/tmpl/-/tmpl-1.0.5.tgz", @@ -6907,6 +7293,18 @@ "node": ">=0.6" } }, + "node_modules/tough-cookie": { + "version": "5.1.2", + "resolved": "https://registry.npmjs.org/tough-cookie/-/tough-cookie-5.1.2.tgz", + "integrity": "sha512-FVDYdxtnj0G6Qm/DhNPSb8Ju59ULcup3tuJxkFb5K8Bv2pUXILbf0xZWU8PX8Ov19OXljbUyveOFwRMwkXzO+A==", + "license": "BSD-3-Clause", + "dependencies": { + "tldts": "^6.1.32" + }, + "engines": { + "node": ">=16" + } + }, "node_modules/tr46": { "version": "0.0.3", "resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz", @@ -7096,7 +7494,6 @@ "version": "6.20.0", "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.20.0.tgz", "integrity": "sha512-Ny6QZ2Nju20vw1SRHe3d9jVu6gJ+4e3+MMpqu7pqE5HT6WsTSlce++GQmK5UXS8mzV8DSYHrQH+Xrf2jVcuKNg==", - "dev": true, "license": "MIT" }, "node_modules/unpipe": { @@ -7223,6 +7620,18 @@ "node": ">= 0.8" } }, + "node_modules/w3c-xmlserializer": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/w3c-xmlserializer/-/w3c-xmlserializer-5.0.0.tgz", + "integrity": "sha512-o8qghlI8NZHU1lLPrpi2+Uq7abh4GGPpYANlalzWxyWteJOCsr/P+oPBA49TOLu5FTZO4d3F9MnWJfiMo4BkmA==", + "license": "MIT", + "dependencies": { + "xml-name-validator": "^5.0.0" + }, + "engines": { + "node": ">=18" + } + }, "node_modules/walker": { "version": "1.0.8", "resolved": "https://registry.npmjs.org/walker/-/walker-1.0.8.tgz", @@ -7249,6 +7658,27 @@ "license": "BSD-2-Clause", "optional": true }, + "node_modules/whatwg-encoding": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/whatwg-encoding/-/whatwg-encoding-3.1.1.tgz", + "integrity": "sha512-6qN4hJdMwfYBtE3YBTTHhoeuUrDBPZmbQaxWAqSALV/MeEnR5z1xd8UKud2RAkFoPkmB+hli1TZSnyi84xz1vQ==", + "license": "MIT", + "dependencies": { + "iconv-lite": "0.6.3" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/whatwg-mimetype": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/whatwg-mimetype/-/whatwg-mimetype-4.0.0.tgz", + "integrity": "sha512-QaKxh0eNIi2mE9p2vEdzfagOKHCcj1pJ56EEHGQOVxp8r9/iszLUUV7v89x9O1p/T+NlTM5W7jW6+cz4Fq1YVg==", + "license": "MIT", + "engines": { + "node": ">=18" + } + }, "node_modules/whatwg-url": { "version": "5.0.0", "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz", @@ -7325,6 +7755,42 @@ "node": "^12.13.0 || ^14.15.0 || >=16.0.0" } }, + "node_modules/ws": { + "version": "8.18.1", + "resolved": "https://registry.npmjs.org/ws/-/ws-8.18.1.tgz", + "integrity": "sha512-RKW2aJZMXeMxVpnZ6bck+RswznaxmzdULiBr6KY7XkTnW8uvt0iT9H5DkHUChXrc+uurzwa0rVI16n/Xzjdz1w==", + "license": "MIT", + "engines": { + "node": ">=10.0.0" + }, + "peerDependencies": { + "bufferutil": "^4.0.1", + "utf-8-validate": ">=5.0.2" + }, + "peerDependenciesMeta": { + "bufferutil": { + "optional": true + }, + "utf-8-validate": { + "optional": true + } + } + }, + "node_modules/xml-name-validator": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/xml-name-validator/-/xml-name-validator-5.0.0.tgz", + "integrity": "sha512-EvGK8EJ3DhaHfbRlETOWAS5pO9MZITeauHKJyb8wyajUfQUenkIg2MvLDTZ4T/TgIcm3HU0TFBgWWboAZ30UHg==", + "license": "Apache-2.0", + "engines": { + "node": ">=18" + } + }, + "node_modules/xmlchars": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/xmlchars/-/xmlchars-2.2.0.tgz", + "integrity": "sha512-JZnDKK8B0RCDw84FNdDAIpZK+JuJw+s7Lz8nksI7SIuU3UXJJslUthsi+uWBUYOwPFwW7W7PRLRfUKpxjtjFCw==", + "license": "MIT" + }, "node_modules/y18n": { "version": "5.0.8", "resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz", diff --git a/package.json b/package.json index b74ca70..649b528 100644 --- a/package.json +++ b/package.json @@ -28,6 +28,7 @@ "dependencies": { "@ai-sdk/google": "^1.0.0", "@ai-sdk/openai": "^1.1.9", + "@types/jsdom": "^21.1.7", "ai": "^4.1.26", "axios": "^1.7.9", "commander": "^13.1.0", @@ -36,6 +37,7 @@ "duck-duck-scrape": "^2.2.7", "express": "^4.21.2", "hjson": "^3.2.2", + "jsdom": "^26.0.0", "node-fetch": "^3.3.2", "undici": "^7.3.0", "zod": "^3.22.4", diff --git a/src/agent.ts b/src/agent.ts index 243f6fa..0231f37 100644 --- a/src/agent.ts +++ b/src/agent.ts @@ -35,7 +35,7 @@ import { } from "./utils/url-tools"; import { buildMdFromAnswer, - chooseK, fixCodeBlockIndentation, + chooseK, convertHtmlTablesToMd, fixCodeBlockIndentation, removeExtraLineBreaks, removeHTMLtags, repairMarkdownFootnotesOuter } from "./utils/text-tools"; @@ -927,21 +927,24 @@ But unfortunately, you failed to solve the issue. You need to think out of the b if (!trivialQuestion) { (thisStep as AnswerAction).mdAnswer = - fixBadURLMdLinks( - fixCodeBlockIndentation( - repairMarkdownFootnotesOuter( - await fixMarkdown( - buildMdFromAnswer((thisStep as AnswerAction)), - allKnowledge, - context, - SchemaGen - )) - ), - allURLs); + convertHtmlTablesToMd( + fixBadURLMdLinks( + fixCodeBlockIndentation( + repairMarkdownFootnotesOuter( + await fixMarkdown( + buildMdFromAnswer((thisStep as AnswerAction)), + allKnowledge, + context, + SchemaGen + )) + ), + allURLs)); } else { - (thisStep as AnswerAction).mdAnswer = fixCodeBlockIndentation( - buildMdFromAnswer((thisStep as AnswerAction)) - ); + (thisStep as AnswerAction).mdAnswer = + convertHtmlTablesToMd( + fixCodeBlockIndentation( + buildMdFromAnswer((thisStep as AnswerAction))) + ); } console.log(thisStep) diff --git a/src/utils/text-tools.ts b/src/utils/text-tools.ts index 1297ce9..8b31f4b 100644 --- a/src/utils/text-tools.ts +++ b/src/utils/text-tools.ts @@ -1,5 +1,7 @@ import {AnswerAction, KnowledgeItem, Reference} from "../types"; import i18nJSON from './i18n.json'; +import { JSDOM } from 'jsdom'; + export function buildMdFromAnswer(answer: AnswerAction) { return repairMarkdownFootnotes(answer.answer, answer.references); @@ -429,3 +431,218 @@ ${k.answer} }) } + + +/** + * Converts HTML tables in a markdown string to markdown tables + * @param mdString The markdown string containing potential HTML tables + * @returns The markdown string with HTML tables converted to markdown tables, or the original string if no conversions were made + */ +export function convertHtmlTablesToMd(mdString: string): string { + // Check if there are any HTML tables in the string + if (!mdString.includes('')) { + return mdString; + } + + try { + // Regular expression to find HTML tables + const tableRegex = /
([\s\S]*?)<\/table>/g; + let result = mdString; + let match; + + // Process each table found + while ((match = tableRegex.exec(mdString)) !== null) { + const htmlTable = match[0]; + const convertedTable = convertSingleHtmlTableToMd(htmlTable); + + if (convertedTable) { + result = result.replace(htmlTable, convertedTable); + } + } + + return result; + } catch (error) { + console.error('Error converting HTML tables to Markdown:', error); + return mdString; // Return original string if conversion fails + } +} + +/** + * Converts a single HTML table to a markdown table + * @param htmlTable The HTML table string + * @returns The markdown table string or null if conversion fails + */ +function convertSingleHtmlTableToMd(htmlTable: string): string | null { + try { + // Create a DOM parser to parse the HTML + const parser = new DOMParser(); + const doc = parser.parseFromString(htmlTable, 'text/html'); + const table = doc.querySelector('table'); + + if (!table) { + return null; + } + + // Extract headers + const headers = Array.from(table.querySelectorAll('thead th')) + .map(th => sanitizeCell(th.textContent || '')); + + // Check if headers were found + if (headers.length === 0) { + // Try to find headers in the first row of tbody + const firstRow = table.querySelector('tbody tr'); + if (firstRow) { + headers.push(...Array.from(firstRow.querySelectorAll('td, th')) + .map(cell => sanitizeCell(cell.textContent || ''))); + } + } + + if (headers.length === 0) { + return null; // No headers found, can't create a valid markdown table + } + + // Start building the markdown table + let mdTable = ''; + + // Add the header row + mdTable += '| ' + headers.join(' | ') + ' |\n'; + + // Add the separator row + mdTable += '| ' + headers.map(() => '---').join(' | ') + ' |\n'; + + // Add the data rows + const rows = Array.from(table.querySelectorAll('tbody tr')); + + for (const row of rows) { + // Skip the first row if it was used for headers + if (table.querySelector('thead') === null && row === rows[0]) { + continue; + } + + const cells = Array.from(row.querySelectorAll('td')) + .map(td => { + // Check for markdown content in the cell + const cellContent = td.innerHTML; + let processedContent = cellContent; + + // Detect if the cell contains markdown formatting + const containsMarkdown = + cellContent.includes('**') || + cellContent.includes('* ') || + cellContent.includes('* ') || + cellContent.includes('* '); + + if (containsMarkdown) { + // Handle mixed HTML and Markdown + + // Handle lists inside cells (both ordered and unordered) + if (cellContent.includes('* ') || cellContent.includes('* ') || cellContent.includes('* ')) { + // Extract list items, handling both HTML list structures or markdown-style lists + let listItems = []; + + if (td.querySelectorAll('li').length > 0) { + // Handle HTML lists + listItems = Array.from(td.querySelectorAll('li')) + .map(li => li.innerHTML.trim()); + } else { + // Handle markdown-style lists with asterisks + const lines = cellContent.split('\n'); + for (const line of lines) { + const trimmedLine = line.trim(); + if (trimmedLine.match(/^\s*\*\s+/)) { + listItems.push(trimmedLine.replace(/^\s*\*\s+/, '')); + } + } + } + + // Format as bullet points with line breaks + processedContent = listItems.map(item => `• ${item}`).join('
'); + + // Preserve markdown formatting like bold and italic within list items + processedContent = processedContent + .replace(/\*\*(.*?)\*\*/g, '**$1**') // Preserve bold + .replace(/_(.*?)_/g, '_$1_'); // Preserve italic + } else { + // For cells without lists but with markdown, preserve the markdown formatting + processedContent = cellContent + .replace(/<\/?strong>/g, '**') // Convert HTML bold to markdown + .replace(/<\/?em>/g, '_') // Convert HTML italic to markdown + .replace(/<\/?b>/g, '**') // Convert HTML bold to markdown + .replace(/<\/?i>/g, '_') // Convert HTML italic to markdown + .replace(//g, '
') // Preserve line breaks as
tags + .replace(//g, '') // Remove opening paragraph tags + .replace(/<\/p>/g, '
'); // Convert closing paragraph tags to line breaks + } + } else { + // For regular HTML cells without markdown + processedContent = processedContent + .replace(/<\/?strong>/g, '**') // Bold + .replace(/<\/?em>/g, '_') // Italic + .replace(/<\/?b>/g, '**') // Bold + .replace(/<\/?i>/g, '_') // Italic + .replace(//g, '
') // Preserve line breaks as
tags + .replace(//g, '') // Opening paragraph tags + .replace(/<\/p>/g, '
'); // Convert closing paragraph tags to line breaks + } + + // Strip any remaining HTML tags, but preserve markdown syntax and
tags + processedContent = processedContent + .replace(/<(?!\/?br\b)[^>]*>/g, '') // Remove all HTML tags except
+ .trim(); + + return sanitizeCell(processedContent); + }); + + // Ensure each row has the same number of cells as headers + while (cells.length < headers.length) { + cells.push(''); + } + + mdTable += '| ' + cells.join(' | ') + ' |\n'; + } + + return mdTable; + } catch (error) { + console.error('Error converting single HTML table:', error); + return null; + } +} + +/** + * Sanitizes a cell's content for use in a markdown table + * @param content The cell content + * @returns Sanitized content + */ +function sanitizeCell(content: string): string { + // Trim whitespace + let sanitized = content.trim(); + + // Normalize pipe characters in content (escape them) + sanitized = sanitized.replace(/\|/g, '\\|'); + + // Preserve line breaks + sanitized = sanitized.replace(/\n/g, '
'); + + // Keep existing
tags intact (don't escape them) + sanitized = sanitized.replace(/<br>/g, '
'); + + // Preserve markdown formatting + sanitized = sanitized + .replace(/\\\*\\\*/g, '**') // Fix escaped bold markers + .replace(/\\\*/g, '*') // Fix escaped list markers + .replace(/\\_/g, '_'); // Fix escaped italic markers + + return sanitized; +} + + + + +if (typeof window === 'undefined') { + global.DOMParser = class DOMParser { + parseFromString(htmlString: string, mimeType: string) { + const dom = new JSDOM(htmlString, { contentType: mimeType }); + return dom.window.document; + } + }; +} \ No newline at end of file