${node.children?.map((child) => child.htmlValue || '').join('')}
`; + case 'heading': + return `${node.children?.map((child) => child.htmlValue || '').join('')}`; + case 'code': + return `
${node.value}
`;
+ case 'inlineCode':
+ return `${node.value}
`;
+ case 'thematicBreak':
+ return ''; + default: + return ''; + } + } + + return processNode(tree); +} + function markdownToHtmlTable(markdownTable) { // Split the markdown table into lines const lines = markdownTable.trim().split('\n'); @@ -74,29 +143,23 @@ app.get('/convert', async (req, res) => { // Then read and process the Markdown const markdown = readFileSync(markdownPath, 'utf-8'); const mdast = fromMarkdown(markdown); - const md = readFileSync(markdownPath, 'utf-8'); const ast = parse(md); + // This is to correct some types mdast.children.forEach(async (element, index) => { const hast = toHast(element, { allowDangerousHtml: true }); const html = toHtml(hast, { allowDangerousHtml: true }); - element.htmlValue = html; element.type = ast.children[index].type; element.raw = ast.children[index].raw; - if (element.type == 'Table') { - element.htmlValue = markdownToHtmlTable(html); - } - - if (ast.children[index].children[0].type == 'Image') { - element.type = 'Image'; - element.src = `${outputDir}/${ast.children[index].children[0].url}`; - } + element.htmlValue = html; }); + const enhanced = await enhanceMdastNodesRecursive(mdast, outputDir); + // Return the processed content along with conversion info res.json({ - content: mdast.children, + content: enhanced.children, outputDirectory: outputDir, warnings: warnings, }); @@ -136,22 +199,15 @@ app.get('/html-convert', async (req, res) => { mdast.children.forEach(async (element, index) => { const hast = toHast(element, { allowDangerousHtml: true }); const html = toHtml(hast, { allowDangerousHtml: true }); - element.htmlValue = html; element.type = ast.children[index].type; element.raw = ast.children[index].raw; - if (element.type == 'Table') { - element.htmlValue = markdownToHtmlTable(html); - } - - if (ast.children[index].children[0].type == 'Image') { - element.type = 'Image'; - element.src = `${outputDir}/${ast.children[index].children[0].url}`; - } + element.htmlValue = html; }); + const enhanced = await enhanceMdastNodesRecursive(mdast, outputDir); // Return the processed content along with conversion info res.json({ - content: mdast.children, + content: enhanced.children, outputDirectory: outputDir, warnings: warnings, }); @@ -167,6 +223,54 @@ app.get('/html-convert', async (req, res) => { } }); +app.get('/pdf-convert', async (req, res) => { + const filePath = req.query.path; + + if (!filePath) { + return res.status(400).json({ + error: "Please provide a URLas 'path' query parameter", + }); + } + + try { + // First convert Word to Markdown + const { markdownPath, outputDir } = await pdfToMarkdown(filePath); + + // Then read and process the Markdown + const markdown = readFileSync(markdownPath, 'utf-8'); + const mdast = fromMarkdown(markdown); + + const md = readFileSync(markdownPath, 'utf-8'); + const ast = parse(md); + + mdast.children.forEach(async (element, index) => { + const hast = toHast(element, { allowDangerousHtml: true }); + const html = toHtml(hast, { allowDangerousHtml: true }); + element.type = ast.children[index].type; + element.raw = ast.children[index].raw; + element.htmlValue = html; + }); + + const enhanced = await enhanceMdastNodesRecursive(mdast, outputDir); + + // Return the processed content along with conversion info + res.json({ + content: enhanced.children, + outputDirectory: outputDir, + // warnings: warnings, + }); + } catch (error) { + if (error.code === 'ENOENT') { + res.status(404).json({ error: `File not found: ${filePath}` }); + } else { + res.status(500).json({ + error: 'Error processing document', + details: error.message, + }); + } + } +}); + app.listen(PORT, () => { console.log(`Server running on http://localhost:${PORT}`); }); diff --git a/apps/converter/package-lock.json b/apps/converter/package-lock.json index a51bdd697..fbc3bb1b4 100644 --- a/apps/converter/package-lock.json +++ b/apps/converter/package-lock.json @@ -9,16 +9,100 @@ "version": "1.0.0", "license": "ISC", "dependencies": { + "@extractus/article-extractor": "^8.0.16", "@textlint/markdown-to-ast": "^14.3.0", + "dotenv": "^16.4.7", "express": "^4.21.1", "fs-extra": "^11.2.0", "hast-util-to-html": "^9.0.3", "image-type": "^5.2.0", + "jsdom": "^25.0.1", + "langchain": "^0.3.6", "mammoth": "^1.8.0", "mdast-util-from-markdown": "^2.0.2", "mdast-util-to-hast": "^13.2.0", + "node-fetch": "^3.3.2", + "openai": "^4.76.1", + "pdf-parse": "github:iamh2o/pdf-parse#1.1.3", + "pdf2json": "^3.1.4", "sanitize-filename": "^1.6.3", - "turndown": "^7.2.0" + "turndown": "^7.2.0", + "unist-util-visit": "^5.0.0" + } + }, + "node_modules/@cfworker/json-schema": { + "version": "4.0.3", + "resolved": "https://registry.npmjs.org/@cfworker/json-schema/-/json-schema-4.0.3.tgz", + "integrity": "sha512-ZykIcDTVv5UNmKWSTLAs3VukO6NDJkkSKxrgUTDPBkAlORVT3H9n5DbRjRl8xIotklscHdbLIa0b9+y3mQq73g==", + "peer": true + }, + "node_modules/@extractus/article-extractor": { + "version": "8.0.16", + "resolved": "https://registry.npmjs.org/@extractus/article-extractor/-/article-extractor-8.0.16.tgz", + "integrity": "sha512-amxCKO2uerY0UPxDVSoTDdcTny0otpKsAIGC2q2CUDEhUX6EfxmpURttlKLx9uWFT9DRlNX9LSyMSP/2p7kFLg==", + "dependencies": { + "@mozilla/readability": "^0.5.0", + "bellajs": "^11.2.0", + "cross-fetch": "^4.0.0", + "linkedom": "^0.18.5", + "sanitize-html": "2.13.1" + }, + "engines": { + "node": ">= 18" + } + }, + "node_modules/@langchain/core": { + "version": "0.3.23", + "resolved": "https://registry.npmjs.org/@langchain/core/-/core-0.3.23.tgz", + "integrity": "sha512-Aut43dEJYH/ibccSErFOLQzymkBG4emlN16P0OHWwx02bDosOR9ilZly4JJiCSYcprn2X2H8nee6P/4VMg1oQA==", + "peer": true, + "dependencies": { + "@cfworker/json-schema": "^4.0.2", + "ansi-styles": "^5.0.0", + "camelcase": "6", + "decamelize": "1.2.0", + "js-tiktoken": "^1.0.12", + "langsmith": "^0.2.8", + "mustache": "^4.2.0", + "p-queue": "^6.6.2", + "p-retry": "4", + "uuid": "^10.0.0", + "zod": "^3.22.4", + "zod-to-json-schema": "^3.22.3" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/@langchain/openai": { + "version": "0.3.14", + "resolved": "https://registry.npmjs.org/@langchain/openai/-/openai-0.3.14.tgz", + "integrity": "sha512-lNWjUo1tbvsss45IF7UQtMu1NJ6oUKvhgPYWXnX9f/d6OmuLu7D99HQ3Y88vLcUo9XjjOy417olYHignMduMjA==", + "dependencies": { + "js-tiktoken": "^1.0.12", + "openai": "^4.71.0", + "zod": "^3.22.4", + "zod-to-json-schema": "^3.22.3" + }, + "engines": { + "node": ">=18" + }, + "peerDependencies": { + "@langchain/core": ">=0.2.26 <0.4.0" + } + }, + "node_modules/@langchain/textsplitters": { + "version": "0.1.0", + "resolved": "https://registry.npmjs.org/@langchain/textsplitters/-/textsplitters-0.1.0.tgz", + "integrity": "sha512-djI4uw9rlkAb5iMhtLED+xJebDdAG935AdP4eRTB02R7OB/act55Bj9wsskhZsvuyQRpO4O1wQOp85s6T6GWmw==", + "dependencies": { + "js-tiktoken": "^1.0.12" + }, + "engines": { + "node": ">=18" + }, + "peerDependencies": { + "@langchain/core": ">=0.2.21 <0.4.0" } }, "node_modules/@mixmark-io/domino": { @@ -26,6 +110,14 @@ "resolved": "https://registry.npmjs.org/@mixmark-io/domino/-/domino-2.2.0.tgz", "integrity": "sha512-Y28PR25bHXUg88kCV7nivXrP2Nj2RueZ3/l/jdx6J9f8J4nsEGcgX0Qe6lt7Pa+J79+kPiJU3LguR6O/6zrLOw==" }, + "node_modules/@mozilla/readability": { + "version": "0.5.0", + "resolved": "https://registry.npmjs.org/@mozilla/readability/-/readability-0.5.0.tgz", + "integrity": "sha512-Z+CZ3QaosfFaTqvhQsIktyGrjFjSC0Fa4EMph4mqKnWhmyoGICsV/8QK+8HpXut6zV7zwfWwqDmEjtk1Qf6EgQ==", + "engines": { + "node": ">=14.0.0" + } + }, "node_modules/@textlint/ast-node-types": { "version": "14.3.0", "resolved": "https://registry.npmjs.org/@textlint/ast-node-types/-/ast-node-types-14.3.0.tgz", @@ -81,11 +173,38 @@ "resolved": "https://registry.npmjs.org/@types/ms/-/ms-0.7.34.tgz", "integrity": "sha512-nG96G3Wp6acyAgJqGasjODb+acrI7KltPiRxzHPXnP3NgI28bpQDRv53olbqGXbfcgF5aiiHmO3xpwEpS5Ld9g==" }, + "node_modules/@types/node": { + "version": "18.19.67", + "resolved": "https://registry.npmjs.org/@types/node/-/node-18.19.67.tgz", + "integrity": "sha512-wI8uHusga+0ZugNp0Ol/3BqQfEcCCNfojtO6Oou9iVNGPTL6QNSdnUdqq85fRgIorLhLMuPIKpsN98QE9Nh+KQ==", + "dependencies": { + "undici-types": "~5.26.4" + } + }, + "node_modules/@types/node-fetch": { + "version": "2.6.12", + "resolved": "https://registry.npmjs.org/@types/node-fetch/-/node-fetch-2.6.12.tgz", + "integrity": "sha512-8nneRWKCg3rMtF69nLQJnOYUcbafYeFSjqkw3jCRLsqkWFlHaoQrr5mXmofFGOx3DKn7UfmBMyov8ySvLRVldA==", + "dependencies": { + "@types/node": "*", + "form-data": "^4.0.0" + } + }, + "node_modules/@types/retry": { + "version": "0.12.0", + "resolved": "https://registry.npmjs.org/@types/retry/-/retry-0.12.0.tgz", + "integrity": "sha512-wWKOClTTiizcZhXnPY4wikVAwmdYHp8q6DmC+EJUzAMsycb7HB32Kh9RN4+0gExjmPmZSAQjgURXIGATPegAvA==" + }, "node_modules/@types/unist": { "version": "3.0.3", "resolved": "https://registry.npmjs.org/@types/unist/-/unist-3.0.3.tgz", "integrity": "sha512-ko/gIFJRv177XgZsZcBwnqJN5x/Gien8qNOn0D5bQU/zAzVf9Zt3BlcUiLqhV9y4ARk0GbT3tnUiPNgnTXzc/Q==" }, + "node_modules/@types/uuid": { + "version": "10.0.0", + "resolved": "https://registry.npmjs.org/@types/uuid/-/uuid-10.0.0.tgz", + "integrity": "sha512-7gqG38EyHgyP1S+7+xomFtL+ZNHcKv6DwNaCZmJmo1vgMugyF3TCnXVg4t1uk89mLNwnLtnY3TpOpCOyp1/xHQ==" + }, "node_modules/@ungap/structured-clone": { "version": "1.2.0", "resolved": "https://registry.npmjs.org/@ungap/structured-clone/-/structured-clone-1.2.0.tgz", @@ -99,6 +218,17 @@ "node": ">=10.0.0" } }, + "node_modules/abort-controller": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/abort-controller/-/abort-controller-3.0.0.tgz", + "integrity": "sha512-h8lQ8tacZYnR3vNQTgibj+tODHI5/+l06Au2Pcriv/Gmet0eaj4TwWH41sO9wnHDiQsEj19q0drzdWdeAHtweg==", + "dependencies": { + "event-target-shim": "^5.0.0" + }, + "engines": { + "node": ">=6.5" + } + }, "node_modules/accepts": { "version": "1.3.8", "resolved": "https://registry.npmjs.org/accepts/-/accepts-1.3.8.tgz", @@ -111,6 +241,37 @@ "node": ">= 0.6" } }, + "node_modules/agent-base": { + "version": "7.1.3", + "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-7.1.3.tgz", + "integrity": "sha512-jRR5wdylq8CkOe6hei19GGZnxM6rBGwFl3Bg0YItGDimvjGtAvdZk4Pu6Cl4u4Igsws4a1fd1Vq3ezrhn4KmFw==", + "engines": { + "node": ">= 14" + } + }, + "node_modules/agentkeepalive": { + "version": "4.5.0", + "resolved": "https://registry.npmjs.org/agentkeepalive/-/agentkeepalive-4.5.0.tgz", + "integrity": "sha512-5GG/5IbQQpC9FpkRGsSvZI5QYeSCzlJHdpBQntCsuTOxhKD8lqKhrleg2Yi7yvMIf82Ycmmqln9U8V9qwEiJew==", + "dependencies": { + "humanize-ms": "^1.2.1" + }, + "engines": { + "node": ">= 8.0.0" + } + }, + "node_modules/ansi-styles": { + "version": "5.2.0", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-5.2.0.tgz", + "integrity": "sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA==", + "peer": true, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/chalk/ansi-styles?sponsor=1" + } + }, "node_modules/argparse": { "version": "1.0.10", "resolved": "https://registry.npmjs.org/argparse/-/argparse-1.0.10.tgz", @@ -124,6 +285,11 @@ "resolved": "https://registry.npmjs.org/array-flatten/-/array-flatten-1.1.1.tgz", "integrity": "sha512-PCVAQswWemu6UdxsDFFX/+gVeYqKAod3D3UVm91jHwynguOwAvYPhx8nNlM++NqRcK6CxxpUafjmhIdKiHibqg==" }, + "node_modules/asynckit": { + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", + "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==" + }, "node_modules/bail": { "version": "1.0.5", "resolved": "https://registry.npmjs.org/bail/-/bail-1.0.5.tgz", @@ -152,6 +318,14 @@ } ] }, + "node_modules/bellajs": { + "version": "11.2.0", + "resolved": "https://registry.npmjs.org/bellajs/-/bellajs-11.2.0.tgz", + "integrity": "sha512-Wjss+Bc674ZABPr+SCKWTqA4V1pyYFhzDTjNBJy4jdmgOv0oGIGXeKBRJyINwP5tIy+iIZD9SfgZpztduzQ5QA==", + "engines": { + "node": ">= 18.4" + } + }, "node_modules/bluebird": { "version": "3.4.7", "resolved": "https://registry.npmjs.org/bluebird/-/bluebird-3.4.7.tgz", @@ -193,6 +367,11 @@ "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", "integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==" }, + "node_modules/boolbase": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz", + "integrity": "sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww==" + }, "node_modules/bytes": { "version": "3.1.2", "resolved": "https://registry.npmjs.org/bytes/-/bytes-3.1.2.tgz", @@ -219,6 +398,18 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/camelcase": { + "version": "6.3.0", + "resolved": "https://registry.npmjs.org/camelcase/-/camelcase-6.3.0.tgz", + "integrity": "sha512-Gmy6FhYlCY7uOElZUSbxo2UCDH8owEk996gkbrpsgGtrJLM3J7jGxl9Ic7Qwwj4ivOE5AWZWRMecDdF7hqGjFA==", + "peer": true, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/ccount": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/ccount/-/ccount-2.0.1.tgz", @@ -264,6 +455,17 @@ "url": "https://github.com/sponsors/wooorm" } }, + "node_modules/combined-stream": { + "version": "1.0.8", + "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz", + "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==", + "dependencies": { + "delayed-stream": "~1.0.0" + }, + "engines": { + "node": ">= 0.8" + } + }, "node_modules/comma-separated-tokens": { "version": "2.0.3", "resolved": "https://registry.npmjs.org/comma-separated-tokens/-/comma-separated-tokens-2.0.3.tgz", @@ -273,6 +475,14 @@ "url": "https://github.com/sponsors/wooorm" } }, + "node_modules/commander": { + "version": "10.0.1", + "resolved": "https://registry.npmjs.org/commander/-/commander-10.0.1.tgz", + "integrity": "sha512-y4Mg2tXshplEbSGzx7amzPwKKOCGuoSRP/CjEdwwk0FOGlUbq6lKuoyDZTNZkmxHdJtp54hdfY/JUrdL7Xfdug==", + "engines": { + "node": ">=14" + } + }, "node_modules/content-disposition": { "version": "0.5.4", "resolved": "https://registry.npmjs.org/content-disposition/-/content-disposition-0.5.4.tgz", @@ -310,6 +520,114 @@ "resolved": "https://registry.npmjs.org/core-util-is/-/core-util-is-1.0.3.tgz", "integrity": "sha512-ZQBvi1DcpJ4GDqanjucZ2Hj3wEO5pZDS89BWbkcrvdxksJorwUDDZamX9ldFkp9aw2lmBDLgkObEA4DWNJ9FYQ==" }, + "node_modules/cross-fetch": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/cross-fetch/-/cross-fetch-4.0.0.tgz", + "integrity": "sha512-e4a5N8lVvuLgAWgnCrLr2PP0YyDOTHa9H/Rj54dirp61qXnNq46m82bRhNqIA5VccJtWBvPTFRV3TtvHUKPB1g==", + "dependencies": { + "node-fetch": "^2.6.12" + } + }, + "node_modules/cross-fetch/node_modules/node-fetch": { + "version": "2.7.0", + "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz", + "integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==", + "dependencies": { + "whatwg-url": "^5.0.0" + }, + "engines": { + "node": "4.x || >=6.0.0" + }, + "peerDependencies": { + "encoding": "^0.1.0" + }, + "peerDependenciesMeta": { + "encoding": { + "optional": true + } + } + }, + "node_modules/cross-fetch/node_modules/tr46": { + "version": "0.0.3", + "resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz", + "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==" + }, + "node_modules/cross-fetch/node_modules/webidl-conversions": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz", + "integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==" + }, + "node_modules/cross-fetch/node_modules/whatwg-url": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz", + "integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==", + "dependencies": { + "tr46": "~0.0.3", + "webidl-conversions": "^3.0.0" + } + }, + "node_modules/css-select": { + "version": "5.1.0", + "resolved": "https://registry.npmjs.org/css-select/-/css-select-5.1.0.tgz", + "integrity": "sha512-nwoRF1rvRRnnCqqY7updORDsuqKzqYJ28+oSMaJMMgOauh3fvwHqMS7EZpIPqK8GL+g9mKxF1vP/ZjSeNjEVHg==", + "dependencies": { + "boolbase": "^1.0.0", + "css-what": "^6.1.0", + "domhandler": "^5.0.2", + "domutils": "^3.0.1", + "nth-check": "^2.0.1" + }, + "funding": { + "url": "https://github.com/sponsors/fb55" + } + }, + "node_modules/css-what": { + "version": "6.1.0", + "resolved": "https://registry.npmjs.org/css-what/-/css-what-6.1.0.tgz", + "integrity": "sha512-HTUrgRJ7r4dsZKU6GjmpfRK1O76h97Z8MfS1G0FozR+oF2kG6Vfe8JE6zwrkbxigziPHinCJ+gCPjA9EaBDtRw==", + "engines": { + "node": ">= 6" + }, + "funding": { + "url": "https://github.com/sponsors/fb55" + } + }, + "node_modules/cssom": { + "version": "0.5.0", + "resolved": "https://registry.npmjs.org/cssom/-/cssom-0.5.0.tgz", + "integrity": "sha512-iKuQcq+NdHqlAcwUY0o/HL69XQrUaQdMjmStJ8JFmUaiiQErlhrmuigkg/CU4E2J0IyUKUrMAgl36TvN67MqTw==" + }, + "node_modules/cssstyle": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/cssstyle/-/cssstyle-4.1.0.tgz", + "integrity": "sha512-h66W1URKpBS5YMI/V8PyXvTMFT8SupJ1IzoIV8IeBC/ji8WVmrO8dGlTi+2dh6whmdk6BiKJLD/ZBkhWbcg6nA==", + "dependencies": { + "rrweb-cssom": "^0.7.1" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/data-uri-to-buffer": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-4.0.1.tgz", + "integrity": "sha512-0R9ikRb668HB7QDxT1vkpuUBtqc53YyAwMwGeUFKRojY/NWKvdZ+9UYtRfGmhqNbRkTSVpMbmyhXipFFv2cb/A==", + "engines": { + "node": ">= 12" + } + }, + "node_modules/data-urls": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/data-urls/-/data-urls-5.0.0.tgz", + "integrity": "sha512-ZYP5VBHshaDAiVZxjbRVcFJpc+4xGgT0bK3vzy1HLN8jTO975HEbuYzZJcHoQEY5K1a0z8YayJkyVETa08eNTg==", + "dependencies": { + "whatwg-mimetype": "^4.0.0", + "whatwg-url": "^14.0.0" + }, + "engines": { + "node": ">=18" + } + }, "node_modules/debug": { "version": "4.3.7", "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.7.tgz", @@ -326,6 +644,20 @@ } } }, + "node_modules/decamelize": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/decamelize/-/decamelize-1.2.0.tgz", + "integrity": "sha512-z2S+W9X73hAUUki+N+9Za2lBlun89zigOyGrsax+KUQ6wKW4ZoWpEYBkGhQjwAjjDCkWxhY0VKEhk8wzY7F5cA==", + "peer": true, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/decimal.js": { + "version": "10.4.3", + "resolved": "https://registry.npmjs.org/decimal.js/-/decimal.js-10.4.3.tgz", + "integrity": "sha512-VBBaLc1MgL5XpzgIP7ny5Z6Nx3UrRkIViUkPUdtl9aya5amy3De1gsUUSB1g3+3sExYNjCAsAznmukyxCb1GRA==" + }, "node_modules/decode-named-character-reference": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/decode-named-character-reference/-/decode-named-character-reference-1.0.2.tgz", @@ -338,6 +670,14 @@ "url": "https://github.com/sponsors/wooorm" } }, + "node_modules/deepmerge": { + "version": "4.3.1", + "resolved": "https://registry.npmjs.org/deepmerge/-/deepmerge-4.3.1.tgz", + "integrity": "sha512-3sUqbMEc77XqpdNO7FRyRog+eW3ph+GYCbj+rK+uYyRMuwsVy0rMiVtPn+QJlKFvWP/1PYpapqYn0Me2knFn+A==", + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/define-data-property": { "version": "1.1.4", "resolved": "https://registry.npmjs.org/define-data-property/-/define-data-property-1.1.4.tgz", @@ -354,6 +694,14 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/delayed-stream": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", + "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==", + "engines": { + "node": ">=0.4.0" + } + }, "node_modules/depd": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/depd/-/depd-2.0.0.tgz", @@ -396,6 +744,68 @@ "resolved": "https://registry.npmjs.org/dingbat-to-unicode/-/dingbat-to-unicode-1.0.1.tgz", "integrity": "sha512-98l0sW87ZT58pU4i61wa2OHwxbiYSbuxsCBozaVnYX2iCnr3bLM3fIes1/ej7h1YdOKuKt/MLs706TVnALA65w==" }, + "node_modules/dom-serializer": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-2.0.0.tgz", + "integrity": "sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg==", + "dependencies": { + "domelementtype": "^2.3.0", + "domhandler": "^5.0.2", + "entities": "^4.2.0" + }, + "funding": { + "url": "https://github.com/cheeriojs/dom-serializer?sponsor=1" + } + }, + "node_modules/domelementtype": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-2.3.0.tgz", + "integrity": "sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fb55" + } + ] + }, + "node_modules/domhandler": { + "version": "5.0.3", + "resolved": "https://registry.npmjs.org/domhandler/-/domhandler-5.0.3.tgz", + "integrity": "sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==", + "dependencies": { + "domelementtype": "^2.3.0" + }, + "engines": { + "node": ">= 4" + }, + "funding": { + "url": "https://github.com/fb55/domhandler?sponsor=1" + } + }, + "node_modules/domutils": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/domutils/-/domutils-3.1.0.tgz", + "integrity": "sha512-H78uMmQtI2AhgDJjWeQmHwJJ2bLPD3GMmO7Zja/ZZh84wkm+4ut+IUnUdRa8uCGX88DiVx1j6FRe1XfxEgjEZA==", + "dependencies": { + "dom-serializer": "^2.0.0", + "domelementtype": "^2.3.0", + "domhandler": "^5.0.3" + }, + "funding": { + "url": "https://github.com/fb55/domutils?sponsor=1" + } + }, + "node_modules/dotenv": { + "version": "16.4.7", + "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.4.7.tgz", + "integrity": "sha512-47qPchRCykZC03FhkYAhrvwU4xDBFIj1QPqaarj6mdM/hgUzfPHcpkHJOn3mJAufFeeAxAzeGsr5X0M4k6fLZQ==", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://dotenvx.com" + } + }, "node_modules/duck": { "version": "0.1.12", "resolved": "https://registry.npmjs.org/duck/-/duck-0.1.12.tgz", @@ -417,6 +827,17 @@ "node": ">= 0.8" } }, + "node_modules/entities": { + "version": "4.5.0", + "resolved": "https://registry.npmjs.org/entities/-/entities-4.5.0.tgz", + "integrity": "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==", + "engines": { + "node": ">=0.12" + }, + "funding": { + "url": "https://github.com/fb55/entities?sponsor=1" + } + }, "node_modules/es-define-property": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.0.tgz", @@ -460,6 +881,19 @@ "node": ">= 0.6" } }, + "node_modules/event-target-shim": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/event-target-shim/-/event-target-shim-5.0.1.tgz", + "integrity": "sha512-i/2XbnSz/uxRCU6+NdVJgKWDTM427+MqYbkQzD321DuCQJUqOuJKIA0IM2+W2xtYHdKOmZ4dR6fExsd4SXL+WQ==", + "engines": { + "node": ">=6" + } + }, + "node_modules/eventemitter3": { + "version": "4.0.7", + "resolved": "https://registry.npmjs.org/eventemitter3/-/eventemitter3-4.0.7.tgz", + "integrity": "sha512-8guHBZCwKnFhYdHr2ysuRWErTwhoN2X8XELRlrRwpmfeY2jjuUN4taQMsULKUVo1K4DvZl+0pgfyoysHxvmvEw==" + }, "node_modules/express": { "version": "4.21.1", "resolved": "https://registry.npmjs.org/express/-/express-4.21.1.tgz", @@ -531,6 +965,28 @@ "url": "https://github.com/sponsors/wooorm" } }, + "node_modules/fetch-blob": { + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/fetch-blob/-/fetch-blob-3.2.0.tgz", + "integrity": "sha512-7yAQpD2UMJzLi1Dqv7qFYnPbaPx7ZfFK6PiIxQ4PfkGPyNyl2Ugx+a/umUonmKqjhM4DnfbMvdX6otXq83soQQ==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/jimmywarting" + }, + { + "type": "paypal", + "url": "https://paypal.me/jimmywarting" + } + ], + "dependencies": { + "node-domexception": "^1.0.0", + "web-streams-polyfill": "^3.0.3" + }, + "engines": { + "node": "^12.20 || >= 14.13" + } + }, "node_modules/file-type": { "version": "18.7.0", "resolved": "https://registry.npmjs.org/file-type/-/file-type-18.7.0.tgz", @@ -577,6 +1033,24 @@ "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", "integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==" }, + "node_modules/form-data": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.1.tgz", + "integrity": "sha512-tzN8e4TX8+kkxGPK8D5u0FNmjPUjw3lwC9lSLxxoB/+GtsJG91CO8bSWy73APlgAZzZbXEYZJuxjkHH2w+Ezhw==", + "dependencies": { + "asynckit": "^0.4.0", + "combined-stream": "^1.0.8", + "mime-types": "^2.1.12" + }, + "engines": { + "node": ">= 6" + } + }, + "node_modules/form-data-encoder": { + "version": "1.7.2", + "resolved": "https://registry.npmjs.org/form-data-encoder/-/form-data-encoder-1.7.2.tgz", + "integrity": "sha512-qfqtYan3rxrnCk1VYaA4H+Ms9xdpPqvLZa6xmMgFvhO32x7/3J/ExcTd6qpxM0vH2GdMI+poehyBZvqfMTto8A==" + }, "node_modules/format": { "version": "0.2.2", "resolved": "https://registry.npmjs.org/format/-/format-0.2.2.tgz", @@ -585,6 +1059,37 @@ "node": ">=0.4.x" } }, + "node_modules/formdata-node": { + "version": "4.4.1", + "resolved": "https://registry.npmjs.org/formdata-node/-/formdata-node-4.4.1.tgz", + "integrity": "sha512-0iirZp3uVDjVGt9p49aTaqjk84TrglENEDuqfdlZQ1roC9CWlPk6Avf8EEnZNcAqPonwkG35x4n3ww/1THYAeQ==", + "dependencies": { + "node-domexception": "1.0.0", + "web-streams-polyfill": "4.0.0-beta.3" + }, + "engines": { + "node": ">= 12.20" + } + }, + "node_modules/formdata-node/node_modules/web-streams-polyfill": { + "version": "4.0.0-beta.3", + "resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-4.0.0-beta.3.tgz", + "integrity": "sha512-QW95TCTaHmsYfHDybGMwO5IJIM93I/6vTRk+daHTWFPhwh+C8Cg7j7XyKrwrj8Ib6vYXe0ocYNrmzY4xAAN6ug==", + "engines": { + "node": ">= 14" + } + }, + "node_modules/formdata-polyfill": { + "version": "4.0.10", + "resolved": "https://registry.npmjs.org/formdata-polyfill/-/formdata-polyfill-4.0.10.tgz", + "integrity": "sha512-buewHzMvYL29jdeQTVILecSaZKnt/RJWjoZCF5OW60Z67/GmSLBkOFM7qh1PI3zFNtJbaZL5eQu1vLfazOwj4g==", + "dependencies": { + "fetch-blob": "^3.1.2" + }, + "engines": { + "node": ">=12.20.0" + } + }, "node_modules/forwarded": { "version": "0.2.0", "resolved": "https://registry.npmjs.org/forwarded/-/forwarded-0.2.0.tgz", @@ -734,6 +1239,22 @@ "url": "https://opencollective.com/unified" } }, + "node_modules/html-encoding-sniffer": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/html-encoding-sniffer/-/html-encoding-sniffer-4.0.0.tgz", + "integrity": "sha512-Y22oTqIU4uuPgEemfz7NDJz6OeKf12Lsu+QC+s3BVpda64lTiMYCyGwg5ki4vFxkMwQdeZDl2adZoqUgdFuTgQ==", + "dependencies": { + "whatwg-encoding": "^3.1.1" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/html-escaper": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/html-escaper/-/html-escaper-3.0.3.tgz", + "integrity": "sha512-RuMffC89BOWQoY0WKGpIhn5gX3iI54O6nRA0yC124NYVtzjmFWBIiFd8M0x+ZdX0P9R4lADg1mgP8C7PxGOWuQ==" + }, "node_modules/html-void-elements": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/html-void-elements/-/html-void-elements-3.0.0.tgz", @@ -743,6 +1264,24 @@ "url": "https://github.com/sponsors/wooorm" } }, + "node_modules/htmlparser2": { + "version": "9.1.0", + "resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-9.1.0.tgz", + "integrity": "sha512-5zfg6mHUoaer/97TxnGpxmbR7zJtPwIYFMZ/H5ucTlPZhKvtum05yiPK3Mgai3a0DyVxv7qYqoweaEd2nrYQzQ==", + "funding": [ + "https://github.com/fb55/htmlparser2?sponsor=1", + { + "type": "github", + "url": "https://github.com/sponsors/fb55" + } + ], + "dependencies": { + "domelementtype": "^2.3.0", + "domhandler": "^5.0.3", + "domutils": "^3.1.0", + "entities": "^4.5.0" + } + }, "node_modules/http-errors": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/http-errors/-/http-errors-2.0.0.tgz", @@ -758,6 +1297,38 @@ "node": ">= 0.8" } }, + "node_modules/http-proxy-agent": { + "version": "7.0.2", + "resolved": "https://registry.npmjs.org/http-proxy-agent/-/http-proxy-agent-7.0.2.tgz", + "integrity": "sha512-T1gkAiYYDWYx3V5Bmyu7HcfcvL7mUrTWiM6yOfa3PIphViJ/gFPbvidQ+veqSOHci/PxBcDabeUNCzpOODJZig==", + "dependencies": { + "agent-base": "^7.1.0", + "debug": "^4.3.4" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/https-proxy-agent": { + "version": "7.0.6", + "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.6.tgz", + "integrity": "sha512-vK9P5/iUfdl95AI+JVyUuIcVtd4ofvtrOr3HNtM2yxC9bnMbEdp3x01OhQNnjb8IJYi38VlTE3mBXwcfvywuSw==", + "dependencies": { + "agent-base": "^7.1.2", + "debug": "4" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/humanize-ms": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/humanize-ms/-/humanize-ms-1.2.1.tgz", + "integrity": "sha512-Fl70vYtsAFb/C06PTS9dZBo7ihau+Tu/DNCk/OyHhea07S+aeMWpFFkUaXRa8fI+ScZbEI8dfSxwY7gxZ9SAVQ==", + "dependencies": { + "ms": "^2.0.0" + } + }, "node_modules/iconv-lite": { "version": "0.4.24", "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.4.24.tgz", @@ -890,15 +1461,91 @@ "node": ">=8" } }, + "node_modules/is-plain-object": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/is-plain-object/-/is-plain-object-5.0.0.tgz", + "integrity": "sha512-VRSzKkbMm5jMDoKLbltAkFQ5Qr7VDiTFGXxYFXXowVj387GeGNOCsOH6Msy00SGZ3Fp84b1Naa1psqgcCIEP5Q==", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/is-potential-custom-element-name": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/is-potential-custom-element-name/-/is-potential-custom-element-name-1.0.1.tgz", + "integrity": "sha512-bCYeRA2rVibKZd+s2625gGnGF/t7DSqDs4dP7CrLA1m7jKWz6pps0LpYLJN8Q64HtmPKJ1hrN3nzPNKFEKOUiQ==" + }, "node_modules/isarray": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/isarray/-/isarray-1.0.0.tgz", "integrity": "sha512-VLghIWNM6ELQzo7zwmcg0NmTVyWKYjvIeM83yjp0wRDTmUnrM678fQbcKBo6n2CJEF0szoG//ytg+TKla89ALQ==" }, - "node_modules/jsonfile": { - "version": "6.1.0", - "resolved": "https://registry.npmjs.org/jsonfile/-/jsonfile-6.1.0.tgz", - "integrity": "sha512-5dgndWOriYSm5cnYaJNhalLNDKOqFwyDB/rr1E9ZsGciGvKPs8R2xYGCacuf3z6K1YKDz182fd+fY3cn3pMqXQ==", + "node_modules/js-tiktoken": { + "version": "1.0.15", + "resolved": "https://registry.npmjs.org/js-tiktoken/-/js-tiktoken-1.0.15.tgz", + "integrity": "sha512-65ruOWWXDEZHHbAo7EjOcNxOGasQKbL4Fq3jEr2xsCqSsoOo6VVSqzWQb6PRIqypFSDcma4jO90YP0w5X8qVXQ==", + "dependencies": { + "base64-js": "^1.5.1" + } + }, + "node_modules/js-yaml": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.0.tgz", + "integrity": "sha512-wpxZs9NoxZaJESJGIZTyDEaYpl0FKSA+FB9aJiyemKhMwkxQg63h4T1KJgUGHpTqPDNRcmmYLugrRjJlBtWvRA==", + "dependencies": { + "argparse": "^2.0.1" + }, + "bin": { + "js-yaml": "bin/js-yaml.js" + } + }, + "node_modules/js-yaml/node_modules/argparse": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz", + "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==" + }, + "node_modules/jsdom": { + "version": "25.0.1", + "resolved": "https://registry.npmjs.org/jsdom/-/jsdom-25.0.1.tgz", + "integrity": "sha512-8i7LzZj7BF8uplX+ZyOlIz86V6TAsSs+np6m1kpW9u0JWi4z/1t+FzcK1aek+ybTnAC4KhBL4uXCNT0wcUIeCw==", + "dependencies": { + "cssstyle": "^4.1.0", + "data-urls": "^5.0.0", + "decimal.js": "^10.4.3", + "form-data": "^4.0.0", + "html-encoding-sniffer": "^4.0.0", + "http-proxy-agent": "^7.0.2", + "https-proxy-agent": "^7.0.5", + "is-potential-custom-element-name": "^1.0.1", + "nwsapi": "^2.2.12", + "parse5": "^7.1.2", + "rrweb-cssom": "^0.7.1", + "saxes": "^6.0.0", + "symbol-tree": "^3.2.4", + "tough-cookie": "^5.0.0", + "w3c-xmlserializer": "^5.0.0", + "webidl-conversions": "^7.0.0", + "whatwg-encoding": "^3.1.1", + "whatwg-mimetype": "^4.0.0", + "whatwg-url": "^14.0.0", + "ws": "^8.18.0", + "xml-name-validator": "^5.0.0" + }, + "engines": { + "node": ">=18" + }, + "peerDependencies": { + "canvas": "^2.11.2" + }, + "peerDependenciesMeta": { + "canvas": { + "optional": true + } + } + }, + "node_modules/jsonfile": { + "version": "6.1.0", + "resolved": "https://registry.npmjs.org/jsonfile/-/jsonfile-6.1.0.tgz", + "integrity": "sha512-5dgndWOriYSm5cnYaJNhalLNDKOqFwyDB/rr1E9ZsGciGvKPs8R2xYGCacuf3z6K1YKDz182fd+fY3cn3pMqXQ==", "dependencies": { "universalify": "^2.0.0" }, @@ -906,6 +1553,14 @@ "graceful-fs": "^4.1.6" } }, + "node_modules/jsonpointer": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/jsonpointer/-/jsonpointer-5.0.1.tgz", + "integrity": "sha512-p/nXbhSEcu3pZRdkW1OfJhpsVtW1gd4Wa1fnQc9YLiTfAjn0312eMKimbdIQzuZl9aa9xUGaRlP9T/CJE/ditQ==", + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/jszip": { "version": "3.10.1", "resolved": "https://registry.npmjs.org/jszip/-/jszip-3.10.1.tgz", @@ -917,6 +1572,106 @@ "setimmediate": "^1.0.5" } }, + "node_modules/langchain": { + "version": "0.3.6", + "resolved": "https://registry.npmjs.org/langchain/-/langchain-0.3.6.tgz", + "integrity": "sha512-erZOIKXzwCOrQHqY9AyjkQmaX62zUap1Sigw1KrwMUOnVoLKkVNRmAyxFlNZDZ9jLs/58MaQcaT9ReJtbj3x6w==", + "dependencies": { + "@langchain/openai": ">=0.1.0 <0.4.0", + "@langchain/textsplitters": ">=0.0.0 <0.2.0", + "js-tiktoken": "^1.0.12", + "js-yaml": "^4.1.0", + "jsonpointer": "^5.0.1", + "langsmith": "^0.2.0", + "openapi-types": "^12.1.3", + "p-retry": "4", + "uuid": "^10.0.0", + "yaml": "^2.2.1", + "zod": "^3.22.4", + "zod-to-json-schema": "^3.22.3" + }, + "engines": { + "node": ">=18" + }, + "peerDependencies": { + "@langchain/anthropic": "*", + "@langchain/aws": "*", + "@langchain/cohere": "*", + "@langchain/core": ">=0.2.21 <0.4.0", + "@langchain/google-genai": "*", + "@langchain/google-vertexai": "*", + "@langchain/groq": "*", + "@langchain/mistralai": "*", + "@langchain/ollama": "*", + "axios": "*", + "cheerio": "*", + "handlebars": "^4.7.8", + "peggy": "^3.0.2", + "typeorm": "*" + }, + "peerDependenciesMeta": { + "@langchain/anthropic": { + "optional": true + }, + "@langchain/aws": { + "optional": true + }, + "@langchain/cohere": { + "optional": true + }, + "@langchain/google-genai": { + "optional": true + }, + "@langchain/google-vertexai": { + "optional": true + }, + "@langchain/groq": { + "optional": true + }, + "@langchain/mistralai": { + "optional": true + }, + "@langchain/ollama": { + "optional": true + }, + "axios": { + "optional": true + }, + "cheerio": { + "optional": true + }, + "handlebars": { + "optional": true + }, + "peggy": { + "optional": true + }, + "typeorm": { + "optional": true + } + } + }, + "node_modules/langsmith": { + "version": "0.2.11", + "resolved": "https://registry.npmjs.org/langsmith/-/langsmith-0.2.11.tgz", + "integrity": "sha512-rVPUN/jQEHjTuYaoVKGjfb3NsYNLGTQT9LXcgJvka5M0EDcXciC598A+DsAQrl6McdfSJCFJDelgRPqVoF2xNA==", + "dependencies": { + "@types/uuid": "^10.0.0", + "commander": "^10.0.1", + "p-queue": "^6.6.2", + "p-retry": "4", + "semver": "^7.6.3", + "uuid": "^10.0.0" + }, + "peerDependencies": { + "openai": "*" + }, + "peerDependenciesMeta": { + "openai": { + "optional": true + } + } + }, "node_modules/lie": { "version": "3.3.0", "resolved": "https://registry.npmjs.org/lie/-/lie-3.3.0.tgz", @@ -925,6 +1680,18 @@ "immediate": "~3.0.5" } }, + "node_modules/linkedom": { + "version": "0.18.5", + "resolved": "https://registry.npmjs.org/linkedom/-/linkedom-0.18.5.tgz", + "integrity": "sha512-JGLaGGtqtu+eOhYrC1wkWYTBcpVWL4AsnwAtMtgO1Q0gI0PuPJKI0zBBE+a/1BrhOE3Uw8JI/ycByAv5cLrAuQ==", + "dependencies": { + "css-select": "^5.1.0", + "cssom": "^0.5.0", + "html-escaper": "^3.0.3", + "htmlparser2": "^9.1.0", + "uhyphen": "^0.2.0" + } + }, "node_modules/longest-streak": { "version": "2.0.4", "resolved": "https://registry.npmjs.org/longest-streak/-/longest-streak-2.0.4.tgz", @@ -1919,6 +2686,32 @@ "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==" }, + "node_modules/mustache": { + "version": "4.2.0", + "resolved": "https://registry.npmjs.org/mustache/-/mustache-4.2.0.tgz", + "integrity": "sha512-71ippSywq5Yb7/tVYyGbkBggbU8H3u5Rz56fH60jGFgr8uHwxs+aSKeqmluIVzM0m0kB7xQjKS6qPfd0b2ZoqQ==", + "peer": true, + "bin": { + "mustache": "bin/mustache" + } + }, + "node_modules/nanoid": { + "version": "3.3.8", + "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.8.tgz", + "integrity": "sha512-WNLf5Sd8oZxOm+TzppcYk8gVOgP+l58xNy58D0nbUnOxOWRWvlcCV4kUF7ltmI6PsrLl/BgKEyS4mqsGChFN0w==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], + "bin": { + "nanoid": "bin/nanoid.cjs" + }, + "engines": { + "node": "^10 || ^12 || ^13.7 || ^14 || >=15.0.1" + } + }, "node_modules/negotiator": { "version": "0.6.3", "resolved": "https://registry.npmjs.org/negotiator/-/negotiator-0.6.3.tgz", @@ -1935,6 +2728,62 @@ "node": ">= 10" } }, + "node_modules/node-domexception": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/node-domexception/-/node-domexception-1.0.0.tgz", + "integrity": "sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/jimmywarting" + }, + { + "type": "github", + "url": "https://paypal.me/jimmywarting" + } + ], + "engines": { + "node": ">=10.5.0" + } + }, + "node_modules/node-ensure": { + "version": "0.0.0", + "resolved": "https://registry.npmjs.org/node-ensure/-/node-ensure-0.0.0.tgz", + "integrity": "sha512-DRI60hzo2oKN1ma0ckc6nQWlHU69RH6xN0sjQTjMpChPfTYvKZdcQFfdYK2RWbJcKyUizSIy/l8OTGxMAM1QDw==" + }, + "node_modules/node-fetch": { + "version": "3.3.2", + "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-3.3.2.tgz", + "integrity": "sha512-dRB78srN/l6gqWulah9SrxeYnxeddIG30+GOqK/9OlLVyLg3HPnr6SqOWTWOXKRwC2eGYCkZ59NNuSgvSrpgOA==", + "dependencies": { + "data-uri-to-buffer": "^4.0.0", + "fetch-blob": "^3.1.4", + "formdata-polyfill": "^4.0.10" + }, + "engines": { + "node": "^12.20.0 || ^14.13.1 || >=16.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/node-fetch" + } + }, + "node_modules/nth-check": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/nth-check/-/nth-check-2.1.1.tgz", + "integrity": "sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w==", + "dependencies": { + "boolbase": "^1.0.0" + }, + "funding": { + "url": "https://github.com/fb55/nth-check?sponsor=1" + } + }, + "node_modules/nwsapi": { + "version": "2.2.16", + "resolved": "https://registry.npmjs.org/nwsapi/-/nwsapi-2.2.16.tgz", + "integrity": "sha512-F1I/bimDpj3ncaNDhfyMWuFqmQDBwDB0Fogc2qpL3BWvkQteFD/8BzWuIRl83rq0DXfm8SGt/HFhLXZyljTXcQ==" + }, "node_modules/object-inspect": { "version": "1.13.3", "resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.13.3.tgz", @@ -1957,11 +2806,125 @@ "node": ">= 0.8" } }, + "node_modules/openai": { + "version": "4.76.1", + "resolved": "https://registry.npmjs.org/openai/-/openai-4.76.1.tgz", + "integrity": "sha512-ci63/WFEMd6QjjEVeH0pV7hnFS6CCqhgJydSti4Aak/8uo2SpgzKjteUDaY+OkwziVj11mi6j+0mRUIiGKUzWw==", + "dependencies": { + "@types/node": "^18.11.18", + "@types/node-fetch": "^2.6.4", + "abort-controller": "^3.0.0", + "agentkeepalive": "^4.2.1", + "form-data-encoder": "1.7.2", + "formdata-node": "^4.3.2", + "node-fetch": "^2.6.7" + }, + "bin": { + "openai": "bin/cli" + }, + "peerDependencies": { + "zod": "^3.23.8" + }, + "peerDependenciesMeta": { + "zod": { + "optional": true + } + } + }, + "node_modules/openai/node_modules/node-fetch": { + "version": "2.7.0", + "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz", + "integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==", + "dependencies": { + "whatwg-url": "^5.0.0" + }, + "engines": { + "node": "4.x || >=6.0.0" + }, + "peerDependencies": { + "encoding": "^0.1.0" + }, + "peerDependenciesMeta": { + "encoding": { + "optional": true + } + } + }, + "node_modules/openai/node_modules/tr46": { + "version": "0.0.3", + "resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz", + "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==" + }, + "node_modules/openai/node_modules/webidl-conversions": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz", + "integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==" + }, + "node_modules/openai/node_modules/whatwg-url": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz", + "integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==", + "dependencies": { + "tr46": "~0.0.3", + "webidl-conversions": "^3.0.0" + } + }, + "node_modules/openapi-types": { + "version": "12.1.3", + "resolved": "https://registry.npmjs.org/openapi-types/-/openapi-types-12.1.3.tgz", + "integrity": "sha512-N4YtSYJqghVu4iek2ZUvcN/0aqH1kRDuNqzcycDxhOUpg7GdvLa2F3DgS6yBNhInhv2r/6I0Flkn7CqL8+nIcw==" + }, "node_modules/option": { "version": "0.2.4", "resolved": "https://registry.npmjs.org/option/-/option-0.2.4.tgz", "integrity": "sha512-pkEqbDyl8ou5cpq+VsnQbe/WlEy5qS7xPzMS1U55OCG9KPvwFD46zDbxQIj3egJSFc3D+XhYOPUzz49zQAVy7A==" }, + "node_modules/p-finally": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/p-finally/-/p-finally-1.0.0.tgz", + "integrity": "sha512-LICb2p9CB7FS+0eR1oqWnHhp0FljGLZCWBE9aix0Uye9W8LTQPwMTYVGWQWIw9RdQiDg4+epXQODwIYJtSJaow==", + "engines": { + "node": ">=4" + } + }, + "node_modules/p-queue": { + "version": "6.6.2", + "resolved": "https://registry.npmjs.org/p-queue/-/p-queue-6.6.2.tgz", + "integrity": "sha512-RwFpb72c/BhQLEXIZ5K2e+AhgNVmIejGlTgiB9MzZ0e93GRvqZ7uSi0dvRF7/XIXDeNkra2fNHBxTyPDGySpjQ==", + "dependencies": { + "eventemitter3": "^4.0.4", + "p-timeout": "^3.2.0" + }, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/p-retry": { + "version": "4.6.2", + "resolved": "https://registry.npmjs.org/p-retry/-/p-retry-4.6.2.tgz", + "integrity": "sha512-312Id396EbJdvRONlngUx0NydfrIQ5lsYu0znKVUzVvArzEIt08V1qhtyESbGVd1FGX7UKtiFp5uwKZdM8wIuQ==", + "dependencies": { + "@types/retry": "0.12.0", + "retry": "^0.13.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/p-timeout": { + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/p-timeout/-/p-timeout-3.2.0.tgz", + "integrity": "sha512-rhIwUycgwwKcP9yTOOFK/AKsAopjjCakVqLHePO3CC6Mir1Z99xT+R63jZxAT5lFZLa2inS5h+ZS2GvR99/FBg==", + "dependencies": { + "p-finally": "^1.0.0" + }, + "engines": { + "node": ">=8" + } + }, "node_modules/pako": { "version": "1.0.11", "resolved": "https://registry.npmjs.org/pako/-/pako-1.0.11.tgz", @@ -1993,6 +2956,22 @@ "url": "https://github.com/sponsors/wooorm" } }, + "node_modules/parse-srcset": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/parse-srcset/-/parse-srcset-1.0.2.tgz", + "integrity": "sha512-/2qh0lav6CmI15FzA3i/2Bzk2zCgQhGMkvhOhKNcBVQ1ldgpbfiNTVslmooUmWJcADi1f1kIeynbDRVzNlfR6Q==" + }, + "node_modules/parse5": { + "version": "7.2.1", + "resolved": "https://registry.npmjs.org/parse5/-/parse5-7.2.1.tgz", + "integrity": "sha512-BuBYQYlv1ckiPdQi/ohiivi9Sagc9JG+Ozs0r7b/0iK3sKmrb0b9FdWdBbOdx6hBCM/F9Ir82ofnBhtZOjCRPQ==", + "dependencies": { + "entities": "^4.5.0" + }, + "funding": { + "url": "https://github.com/inikulin/parse5?sponsor=1" + } + }, "node_modules/parseurl": { "version": "1.3.3", "resolved": "https://registry.npmjs.org/parseurl/-/parseurl-1.3.3.tgz", @@ -2014,6 +2993,52 @@ "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-0.1.10.tgz", "integrity": "sha512-7lf7qcQidTku0Gu3YDPc8DJ1q7OOucfa/BSsIwjuh56VU7katFvuM8hULfkwB3Fns/rsVF7PwPKVw1sl5KQS9w==" }, + "node_modules/pdf-parse": { + "version": "1.1.3", + "resolved": "git+ssh://git@github.com/iamh2o/pdf-parse.git#d7a41d5aaed1503bee2d7ea50bf89588d3b2d2cf", + "license": "MIT", + "dependencies": { + "debug": "^3.1.0", + "node-ensure": "^0.0.0" + }, + "engines": { + "node": ">=6.8.1" + } + }, + "node_modules/pdf-parse/node_modules/debug": { + "version": "3.2.7", + "resolved": "https://registry.npmjs.org/debug/-/debug-3.2.7.tgz", + "integrity": "sha512-CFjzYYAi4ThfiQvizrFQevTTXHtnCqWfe7x1AhgEscTz6ZbLbfoLRLPugTQyBth6f8ZERVUSyWHFD/7Wu4t1XQ==", + "dependencies": { + "ms": "^2.1.1" + } + }, + "node_modules/pdf2json": { + "version": "3.1.4", + "resolved": "https://registry.npmjs.org/pdf2json/-/pdf2json-3.1.4.tgz", + "integrity": "sha512-rS+VapXpXZr+5lUpHmRh3ugXdFXp24p1RyG24yP1DMpqP4t0mrYNGpLtpSbWD42PnQ59GIXofxF+yWb7M+3THg==", + "bundleDependencies": [ + "@xmldom/xmldom" + ], + "dependencies": { + "@xmldom/xmldom": "^0.8.10" + }, + "bin": { + "pdf2json": "bin/pdf2json.js" + }, + "engines": { + "node": ">=18.12.1", + "npm": ">=8.19.2" + } + }, + "node_modules/pdf2json/node_modules/@xmldom/xmldom": { + "version": "0.8.10", + "inBundle": true, + "license": "MIT", + "engines": { + "node": ">=10.0.0" + } + }, "node_modules/peek-readable": { "version": "5.3.1", "resolved": "https://registry.npmjs.org/peek-readable/-/peek-readable-5.3.1.tgz", @@ -2026,6 +3051,38 @@ "url": "https://github.com/sponsors/Borewit" } }, + "node_modules/picocolors": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.1.1.tgz", + "integrity": "sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA==" + }, + "node_modules/postcss": { + "version": "8.4.49", + "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.4.49.tgz", + "integrity": "sha512-OCVPnIObs4N29kxTjzLfUryOkvZEq+pf8jTF0lg8E7uETuWHA+v7j3c/xJmiqpX450191LlmZfUKkXxkTry7nA==", + "funding": [ + { + "type": "opencollective", + "url": "https://opencollective.com/postcss/" + }, + { + "type": "tidelift", + "url": "https://tidelift.com/funding/github/npm/postcss" + }, + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], + "dependencies": { + "nanoid": "^3.3.7", + "picocolors": "^1.1.1", + "source-map-js": "^1.2.1" + }, + "engines": { + "node": "^10 || ^12 || >=14" + } + }, "node_modules/process-nextick-args": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/process-nextick-args/-/process-nextick-args-2.0.1.tgz", @@ -2052,6 +3109,14 @@ "node": ">= 0.10" } }, + "node_modules/punycode": { + "version": "2.3.1", + "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.1.tgz", + "integrity": "sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg==", + "engines": { + "node": ">=6" + } + }, "node_modules/qs": { "version": "6.13.0", "resolved": "https://registry.npmjs.org/qs/-/qs-6.13.0.tgz", @@ -2263,6 +3328,19 @@ "node": ">=0.10" } }, + "node_modules/retry": { + "version": "0.13.1", + "resolved": "https://registry.npmjs.org/retry/-/retry-0.13.1.tgz", + "integrity": "sha512-XQBQ3I8W1Cge0Seh+6gjj03LbmRFWuoszgK9ooCpwYIrhhoO80pfq4cUkU5DkknwfOfFteRwlZ56PYOGYyFWdg==", + "engines": { + "node": ">= 4" + } + }, + "node_modules/rrweb-cssom": { + "version": "0.7.1", + "resolved": "https://registry.npmjs.org/rrweb-cssom/-/rrweb-cssom-0.7.1.tgz", + "integrity": "sha512-TrEMa7JGdVm0UThDJSx7ddw5nVm3UJS9o9CCIZ72B1vSyEZoziDqBYP3XIoi/12lKrJR8rE3jeFHMok2F/Mnsg==" + }, "node_modules/safe-buffer": { "version": "5.2.1", "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz", @@ -2295,6 +3373,59 @@ "truncate-utf8-bytes": "^1.0.0" } }, + "node_modules/sanitize-html": { + "version": "2.13.1", + "resolved": "https://registry.npmjs.org/sanitize-html/-/sanitize-html-2.13.1.tgz", + "integrity": "sha512-ZXtKq89oue4RP7abL9wp/9URJcqQNABB5GGJ2acW1sdO8JTVl92f4ygD7Yc9Ze09VAZhnt2zegeU0tbNsdcLYg==", + "dependencies": { + "deepmerge": "^4.2.2", + "escape-string-regexp": "^4.0.0", + "htmlparser2": "^8.0.0", + "is-plain-object": "^5.0.0", + "parse-srcset": "^1.0.2", + "postcss": "^8.3.11" + } + }, + "node_modules/sanitize-html/node_modules/htmlparser2": { + "version": "8.0.2", + "resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-8.0.2.tgz", + "integrity": "sha512-GYdjWKDkbRLkZ5geuHs5NY1puJ+PXwP7+fHPRz06Eirsb9ugf6d8kkXav6ADhcODhFFPMIXyxkxSuMf3D6NCFA==", + "funding": [ + "https://github.com/fb55/htmlparser2?sponsor=1", + { + "type": "github", + "url": "https://github.com/sponsors/fb55" + } + ], + "dependencies": { + "domelementtype": "^2.3.0", + "domhandler": "^5.0.3", + "domutils": "^3.0.1", + "entities": "^4.4.0" + } + }, + "node_modules/saxes": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/saxes/-/saxes-6.0.0.tgz", + "integrity": "sha512-xAg7SOnEhrm5zI3puOOKyy1OMcMlIJZYNJY7xLBwSze0UjhPLnWfj2GF2EpT0jmzaJKIWKHLsaSSajf35bcYnA==", + "dependencies": { + "xmlchars": "^2.2.0" + }, + "engines": { + "node": ">=v12.22.7" + } + }, + "node_modules/semver": { + "version": "7.6.3", + "resolved": "https://registry.npmjs.org/semver/-/semver-7.6.3.tgz", + "integrity": "sha512-oVekP1cKtI+CTDvHWYFUcMtsK/00wmAEfyqKfNdARm8u1wNVhSgaX7A8d4UuIlUI5e84iEwOhs7ZPYRmzU9U6A==", + "bin": { + "semver": "bin/semver.js" + }, + "engines": { + "node": ">=10" + } + }, "node_modules/send": { "version": "0.19.0", "resolved": "https://registry.npmjs.org/send/-/send-0.19.0.tgz", @@ -2396,6 +3527,14 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/source-map-js": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.1.tgz", + "integrity": "sha512-UXWMKhLOwVKb728IUtQPXxfYU+usdybtUrK/8uGE8CQMvrhOpwvzDBwj0QhSL7MQc7vIsISBG8VQ8+IDQxpfQA==", + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/space-separated-tokens": { "version": "2.0.2", "resolved": "https://registry.npmjs.org/space-separated-tokens/-/space-separated-tokens-2.0.2.tgz", @@ -2469,6 +3608,27 @@ "url": "https://github.com/sponsors/Borewit" } }, + "node_modules/symbol-tree": { + "version": "3.2.4", + "resolved": "https://registry.npmjs.org/symbol-tree/-/symbol-tree-3.2.4.tgz", + "integrity": "sha512-9QNk5KwDF+Bvz+PyObkmSYjI5ksVUYtjW7AU22r2NKcfLJcXp96hkDWU3+XndOsUb+AQ9QhfzfCT2O+CNWT5Tw==" + }, + "node_modules/tldts": { + "version": "6.1.66", + "resolved": "https://registry.npmjs.org/tldts/-/tldts-6.1.66.tgz", + "integrity": "sha512-l3ciXsYFel/jSRfESbyKYud1nOw7WfhrBEF9I3UiarYk/qEaOOwu3qXNECHw4fHGHGTEOuhf/VdKgoDX5M/dhQ==", + "dependencies": { + "tldts-core": "^6.1.66" + }, + "bin": { + "tldts": "bin/cli.js" + } + }, + "node_modules/tldts-core": { + "version": "6.1.66", + "resolved": "https://registry.npmjs.org/tldts-core/-/tldts-core-6.1.66.tgz", + "integrity": "sha512-s07jJruSwndD2X8bVjwioPfqpIc1pDTzszPe9pL1Skbh4bjytL85KNQ3tolqLbCvpQHawIsGfFi9dgerWjqW4g==" + }, "node_modules/toidentifier": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/toidentifier/-/toidentifier-1.0.1.tgz", @@ -2493,6 +3653,28 @@ "url": "https://github.com/sponsors/Borewit" } }, + "node_modules/tough-cookie": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/tough-cookie/-/tough-cookie-5.0.0.tgz", + "integrity": "sha512-FRKsF7cz96xIIeMZ82ehjC3xW2E+O2+v11udrDYewUbszngYhsGa8z6YUMMzO9QJZzzyd0nGGXnML/TReX6W8Q==", + "dependencies": { + "tldts": "^6.1.32" + }, + "engines": { + "node": ">=16" + } + }, + "node_modules/tr46": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/tr46/-/tr46-5.0.0.tgz", + "integrity": "sha512-tk2G5R2KRwBd+ZN0zaEXpmzdKyOYksXwywulIX95MBODjSzMIuQnQ3m8JxgbhnL1LeVo7lqQKsYa1O3Htl7K5g==", + "dependencies": { + "punycode": "^2.3.1" + }, + "engines": { + "node": ">=18" + } + }, "node_modules/trim-lines": { "version": "3.0.1", "resolved": "https://registry.npmjs.org/trim-lines/-/trim-lines-3.0.1.tgz", @@ -2539,11 +3721,21 @@ "node": ">= 0.6" } }, + "node_modules/uhyphen": { + "version": "0.2.0", + "resolved": "https://registry.npmjs.org/uhyphen/-/uhyphen-0.2.0.tgz", + "integrity": "sha512-qz3o9CHXmJJPGBdqzab7qAYuW8kQGKNEuoHFYrBwV6hWIMcpAmxDLXojcHfFr9US1Pe6zUswEIJIbLI610fuqA==" + }, "node_modules/underscore": { "version": "1.13.7", "resolved": "https://registry.npmjs.org/underscore/-/underscore-1.13.7.tgz", "integrity": "sha512-GMXzWtsc57XAtguZgaQViUOzs0KTkk8ojr3/xAxXLITqf/3EMwxC0inyETfDFjH/Krbhuep0HNbbjI9i/q3F3g==" }, + "node_modules/undici-types": { + "version": "5.26.5", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz", + "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==" + }, "node_modules/unified": { "version": "9.2.2", "resolved": "https://registry.npmjs.org/unified/-/unified-9.2.2.tgz", @@ -2730,6 +3922,18 @@ "node": ">= 0.4.0" } }, + "node_modules/uuid": { + "version": "10.0.0", + "resolved": "https://registry.npmjs.org/uuid/-/uuid-10.0.0.tgz", + "integrity": "sha512-8XkAphELsDnEGrDxUOHB3RGvXz6TeuYSGEZBOjtTtPm2lwhGBjLgOzLHB63IUWfBpNucQjND6d3AOudO+H3RWQ==", + "funding": [ + "https://github.com/sponsors/broofa", + "https://github.com/sponsors/ctavan" + ], + "bin": { + "uuid": "dist/bin/uuid" + } + }, "node_modules/vary": { "version": "1.1.2", "resolved": "https://registry.npmjs.org/vary/-/vary-1.1.2.tgz", @@ -2764,6 +3968,103 @@ "url": "https://opencollective.com/unified" } }, + "node_modules/w3c-xmlserializer": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/w3c-xmlserializer/-/w3c-xmlserializer-5.0.0.tgz", + "integrity": "sha512-o8qghlI8NZHU1lLPrpi2+Uq7abh4GGPpYANlalzWxyWteJOCsr/P+oPBA49TOLu5FTZO4d3F9MnWJfiMo4BkmA==", + "dependencies": { + "xml-name-validator": "^5.0.0" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/web-streams-polyfill": { + "version": "3.3.3", + "resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-3.3.3.tgz", + "integrity": "sha512-d2JWLCivmZYTSIoge9MsgFCZrt571BikcWGYkjC1khllbTeDlGqZ2D8vD8E/lJa8WGWbb7Plm8/XJYV7IJHZZw==", + "engines": { + "node": ">= 8" + } + }, + "node_modules/webidl-conversions": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-7.0.0.tgz", + "integrity": "sha512-VwddBukDzu71offAQR975unBIGqfKZpM+8ZX6ySk8nYhVoo5CYaZyzt3YBvYtRtO+aoGlqxPg/B87NGVZ/fu6g==", + "engines": { + "node": ">=12" + } + }, + "node_modules/whatwg-encoding": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/whatwg-encoding/-/whatwg-encoding-3.1.1.tgz", + "integrity": "sha512-6qN4hJdMwfYBtE3YBTTHhoeuUrDBPZmbQaxWAqSALV/MeEnR5z1xd8UKud2RAkFoPkmB+hli1TZSnyi84xz1vQ==", + "dependencies": { + "iconv-lite": "0.6.3" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/whatwg-encoding/node_modules/iconv-lite": { + "version": "0.6.3", + "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.6.3.tgz", + "integrity": "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==", + "dependencies": { + "safer-buffer": ">= 2.1.2 < 3.0.0" + }, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/whatwg-mimetype": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/whatwg-mimetype/-/whatwg-mimetype-4.0.0.tgz", + "integrity": "sha512-QaKxh0eNIi2mE9p2vEdzfagOKHCcj1pJ56EEHGQOVxp8r9/iszLUUV7v89x9O1p/T+NlTM5W7jW6+cz4Fq1YVg==", + "engines": { + "node": ">=18" + } + }, + "node_modules/whatwg-url": { + "version": "14.1.0", + "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-14.1.0.tgz", + "integrity": "sha512-jlf/foYIKywAt3x/XWKZ/3rz8OSJPiWktjmk891alJUEjiVxKX9LEO92qH3hv4aJ0mN3MWPvGMCy8jQi95xK4w==", + "dependencies": { + "tr46": "^5.0.0", + "webidl-conversions": "^7.0.0" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/ws": { + "version": "8.18.0", + "resolved": "https://registry.npmjs.org/ws/-/ws-8.18.0.tgz", + "integrity": "sha512-8VbfWfHLbbwu3+N6OKsOMpBdT4kXPDDB9cJk2bJ6mh9ucxdlnNvH1e+roYkKmN9Nxw2yjz7VzeO9oOz2zJ04Pw==", + "engines": { + "node": ">=10.0.0" + }, + "peerDependencies": { + "bufferutil": "^4.0.1", + "utf-8-validate": ">=5.0.2" + }, + "peerDependenciesMeta": { + "bufferutil": { + "optional": true + }, + "utf-8-validate": { + "optional": true + } + } + }, + "node_modules/xml-name-validator": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/xml-name-validator/-/xml-name-validator-5.0.0.tgz", + "integrity": "sha512-EvGK8EJ3DhaHfbRlETOWAS5pO9MZITeauHKJyb8wyajUfQUenkIg2MvLDTZ4T/TgIcm3HU0TFBgWWboAZ30UHg==", + "engines": { + "node": ">=18" + } + }, "node_modules/xmlbuilder": { "version": "10.1.1", "resolved": "https://registry.npmjs.org/xmlbuilder/-/xmlbuilder-10.1.1.tgz", @@ -2772,6 +4073,38 @@ "node": ">=4.0" } }, + "node_modules/xmlchars": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/xmlchars/-/xmlchars-2.2.0.tgz", + "integrity": "sha512-JZnDKK8B0RCDw84FNdDAIpZK+JuJw+s7Lz8nksI7SIuU3UXJJslUthsi+uWBUYOwPFwW7W7PRLRfUKpxjtjFCw==" + }, + "node_modules/yaml": { + "version": "2.6.1", + "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.6.1.tgz", + "integrity": "sha512-7r0XPzioN/Q9kXBro/XPnA6kznR73DHq+GXh5ON7ZozRO6aMjbmiBuKste2wslTFkC5d1dw0GooOCepZXJ2SAg==", + "bin": { + "yaml": "bin.mjs" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/zod": { + "version": "3.24.1", + "resolved": "https://registry.npmjs.org/zod/-/zod-3.24.1.tgz", + "integrity": "sha512-muH7gBL9sI1nciMZV67X5fTKKBLtwpZ5VBp1vsOQzj1MhrBZ4wlVCm3gedKZWLp0Oyel8sIGfeiz54Su+OVT+A==", + "funding": { + "url": "https://github.com/sponsors/colinhacks" + } + }, + "node_modules/zod-to-json-schema": { + "version": "3.23.5", + "resolved": "https://registry.npmjs.org/zod-to-json-schema/-/zod-to-json-schema-3.23.5.tgz", + "integrity": "sha512-5wlSS0bXfF/BrL4jPAbz9da5hDlDptdEppYfe+x4eIJ7jioqKG9uUxOwPzqof09u/XeVdrgFu29lZi+8XNDJtA==", + "peerDependencies": { + "zod": "^3.23.3" + } + }, "node_modules/zwitch": { "version": "2.0.4", "resolved": "https://registry.npmjs.org/zwitch/-/zwitch-2.0.4.tgz", diff --git a/apps/converter/package.json b/apps/converter/package.json index 186f32b56..f109e1fcc 100644 --- a/apps/converter/package.json +++ b/apps/converter/package.json @@ -8,20 +8,28 @@ "author": "", "license": "ISC", "scripts": { - "start": "node index.js" + "start": "node index.js", + "dev": "node --watch index.js" }, "dependencies": { + "@extractus/article-extractor": "^8.0.16", "@textlint/markdown-to-ast": "^14.3.0", + "dotenv": "^16.4.7", "express": "^4.21.1", "fs-extra": "^11.2.0", "hast-util-to-html": "^9.0.3", "image-type": "^5.2.0", "jsdom": "^25.0.1", "mammoth": "^1.8.0", + "markdownlint": "^0.37.1", "mdast-util-from-markdown": "^2.0.2", "mdast-util-to-hast": "^13.2.0", "node-fetch": "^3.3.2", + "openai": "^4.76.1", + "pdf-parse": "github:iamh2o/pdf-parse#1.1.3", + "pdf2json": "^3.1.4", "sanitize-filename": "^1.6.3", - "turndown": "^7.2.0" + "turndown": "^7.2.0", + "unist-util-visit": "^5.0.0" } } diff --git a/apps/converter/pdfToMarkdown.js b/apps/converter/pdfToMarkdown.js new file mode 100644 index 000000000..fcab436bb --- /dev/null +++ b/apps/converter/pdfToMarkdown.js @@ -0,0 +1,108 @@ +import crypto from 'crypto'; +import fs from 'fs'; +import path from 'path'; +import pdf from 'pdf-parse'; +import PDFParser from 'pdf2json'; + +export function generateFolderName(filePath) { + const fileContent = fs.readFileSync(filePath); + const hash = crypto.createHash('md5').update(fileContent).digest('hex'); + return hash.substring(0, 12); +} + +export async function pdfToMarkdown(pdfPath) { + try { + // Validate input file exists and is a PDF + if (!fs.existsSync(pdfPath) || !pdfPath.toLowerCase().endsWith('.pdf')) { + throw new Error('Invalid PDF file path'); + } + + // Generate output folder name + const folderName = generateFolderName(pdfPath); + const outputDir = path.join(path.dirname(pdfPath), folderName); + const imagesDir = path.join(outputDir, 'images'); + + // Create output directories + if (!fs.existsSync(outputDir)) { + fs.mkdirSync(outputDir); + } + if (!fs.existsSync(imagesDir)) { + fs.mkdirSync(imagesDir); + } + + // Extract text content from all pages + const dataBuffer = fs.readFileSync(pdfPath); + const data = await pdf(dataBuffer); + // Combine text from all pages + const markdownContent = data.text; + + // Save markdown content + fs.writeFileSync(path.join(outputDir, 'content.md'), markdownContent); + + // Extract images + const pdfParser = new PDFParser(null, 1); // Added parameter to preserve images + + return new Promise((resolve, reject) => { + pdfParser.on('pdfParser_dataReady', (pdfData) => { + try { + // Extract and save images + let imageCount = 0; + + if (pdfData.Pages) { + pdfData.Pages.forEach((page, pageIndex) => { + console.log("🚀 ~ pdfData.Pages.forEach ~ page:1"); + // Handle both Images and Bg (background) images + const images = [...(page.Images || []), ...(page.Bg || [])]; + + images.forEach((image) => { + try { + // Check if image data exists and is valid + if (image.data) { + let imageBuffer; + + // Handle different image data formats + if (Buffer.isBuffer(image.data)) { + imageBuffer = image.data; + } else if (typeof image.data === 'string') { + imageBuffer = Buffer.from(image.data, 'base64'); + } else { + console.warn(`Skipping invalid image data at page ${pageIndex + 1}`); + return; + } + + const imagePath = path.join( + imagesDir, + `image_${pageIndex + 1}_${++imageCount}.png` + ); + + fs.writeFileSync(imagePath, imageBuffer); + } + } catch (imageError) { + console.warn(`Error processing image at page ${pageIndex + 1}:`, imageError); + } + }); + }); + } + + resolve({ + outputDir, + markdownPath: path.join(outputDir, 'content.md'), + imagesDir, + totalPages: pdfData.Pages.length, + totalImages: imageCount + }); + } catch (extractError) { + reject(extractError); + } + }); + + pdfParser.on('pdfParser_dataError', (error) => { + reject(error); + }); + + pdfParser.loadPDF(pdfPath); + }); + } catch (error) { + throw new Error(`PDF conversion failed: ${error.message}`); + } +} diff --git a/packages/drupal/silverback_ai/modules/silverback_ai_import/silverback_ai_import.install b/packages/drupal/silverback_ai/modules/silverback_ai_import/silverback_ai_import.install index f4a635f72..3e3acebf5 100644 --- a/packages/drupal/silverback_ai/modules/silverback_ai_import/silverback_ai_import.install +++ b/packages/drupal/silverback_ai/modules/silverback_ai_import/silverback_ai_import.install @@ -2,5 +2,84 @@ /** * @file - * Install, update and uninstall functions for the silverback_ai_import module. + * Install, update and uninstall functions for the Silverback AI module. */ + +use Drupal\Core\Entity\EntityTypeInterface; + +/** + * Implements hook_schema(). + */ +function silverback_ai_import_schema() { + + $db_schema = \Drupal::database()->schema(); + if ($db_schema->tableExists('silverback_ai_import')) { + $db_schema->dropTable('silverback_ai_import'); + } + + $schema['silverback_ai_import'] = [ + 'description' => 'Import log for the Silverback AI import module.', + 'fields' => [ + 'id' => [ + 'type' => 'serial', + 'not null' => TRUE, + 'description' => 'Primary Key.', + ], + 'uid' => [ + 'description' => 'Foreign key to {users}.uid; uniquely identifies a Drupal user executed the ai fetch action.', + 'type' => 'int', + 'unsigned' => TRUE, + 'not null' => TRUE, + ], + 'timestamp' => [ + 'description' => 'Date/time if the import, as Unix timestamp.', + 'type' => 'int', + 'unsigned' => TRUE, + 'not null' => TRUE, + ], + 'target_entity_type_id' => [ + 'type' => 'varchar_ascii', + 'length' => EntityTypeInterface::ID_MAX_LENGTH, + 'not null' => FALSE, + 'default' => '', + 'description' => 'The ID of the associated entity type.', + ], + 'target_entity_id' => [ + 'type' => 'int', + 'unsigned' => TRUE, + 'not null' => FALSE, + 'description' => 'The ID of the associated entity.', + ], + 'target_entity_revision_id' => [ + 'type' => 'int', + 'unsigned' => TRUE, + 'not null' => FALSE, + 'description' => 'The revision ID of the associated entity.', + ], + 'source' => [ + 'type' => 'text', + 'not null' => TRUE, + 'size' => 'normal', + 'description' => 'The source of the import.', + ], + 'output_folder' => [ + 'type' => 'text', + 'not null' => TRUE, + 'size' => 'small', + 'description' => 'The name of the folder exported.', + ], + 'data' => [ + 'type' => 'blob', + 'not null' => TRUE, + 'size' => 'big', + 'description' => 'The import response *usually a text series of gutenberg formatted blocks.', + ], + 'primary key' => ['id'], + 'indexes' => [ + 'uid' => ['uid'], + 'timestamp' => ['timestamp'], + ], + ]; + + return $schema; +} diff --git a/packages/drupal/silverback_ai/modules/silverback_ai_import/silverback_ai_import.module b/packages/drupal/silverback_ai/modules/silverback_ai_import/silverback_ai_import.module index 0a059ba92..4c92de170 100644 --- a/packages/drupal/silverback_ai/modules/silverback_ai_import/silverback_ai_import.module +++ b/packages/drupal/silverback_ai/modules/silverback_ai_import/silverback_ai_import.module @@ -98,10 +98,14 @@ function _silverback_ai_import_form_submit(array $form, FormStateInterface $form if ($fid && $type == 'docx') { $file = \Drupal::entityTypeManager()->getStorage('file')->load($fid); $ast = $service->getAstFromFilePath($file); - $content->create($ast->content, $entity); + // $content->create($ast->content, $entity); + $flatten = $service->flattenAst($ast->content); + $content->create($flatten, $entity); } elseif (!empty($url_value) && $type == 'url') { $ast = $service->getAstFromUrl($url_value); - $content->create($ast->content, $entity); + // $content->create($ast->content, $entity); + $flatten = $service->flattenAst($ast->content); + $content->create($flatten, $entity); } } diff --git a/packages/drupal/silverback_ai/modules/silverback_ai_import/src/AiImportPluginManagerInterface.php b/packages/drupal/silverback_ai/modules/silverback_ai_import/src/AiImportPluginManagerInterface.php index 8d98b1841..1776daa06 100644 --- a/packages/drupal/silverback_ai/modules/silverback_ai_import/src/AiImportPluginManagerInterface.php +++ b/packages/drupal/silverback_ai/modules/silverback_ai_import/src/AiImportPluginManagerInterface.php @@ -42,12 +42,13 @@ public function matches(array $chunk); public function convert(array $chunk); /** - * Generates a block string by replacing template placeholders with parsed header data. + * Generates a block string by replacing template placeholders. * - * This function takes a chunk of markdown content and a template string. It uses - * the parsed data from the markdown header to replace placeholders in the template - * with actual values corresponding to the markdown header's level, text, and HTML tag. - * Before processing, it ensures that all required keys are available in the parsed data. + * This function takes a chunk of markdown content and a template string. + * It uses the parsed data from the markdown header to replace placeholders + * in the template with actual values corresponding to the markdown header's + * level, text, and HTML tag. Before processing, it ensures that all required + * keys are available in the parsed data. * * @param array $data * The template string containing placeholders for replacement. diff --git a/packages/drupal/silverback_ai/modules/silverback_ai_import/src/ContentImportAiService.php b/packages/drupal/silverback_ai/modules/silverback_ai_import/src/ContentImportAiService.php index ecc7d700c..84bcd1704 100644 --- a/packages/drupal/silverback_ai/modules/silverback_ai_import/src/ContentImportAiService.php +++ b/packages/drupal/silverback_ai/modules/silverback_ai_import/src/ContentImportAiService.php @@ -264,7 +264,7 @@ public function getPlugin(array $chunk) { $definitions = $this->pluginManager->getDefinitions(); // @todo Order by weight. foreach ($definitions as $definition) { - $plugin = $this->pluginManager->createInstance($definition['id']); + $plugin = $this->pluginManager->createInstance($definition['id'], ['chunk' => $chunk]); if ($plugin->matches($chunk)) { $default_plugin = $plugin; break; @@ -273,4 +273,178 @@ public function getPlugin(array $chunk) { return $default_plugin; } + /** + * Flattens a hierarchical AST (Abstract Syntax Tree) into a linear array of nodes. + * + * This function converts a nested AST structure into a flat array where each node + * is assigned a unique ID and maintains a reference to its parent. It processes + * specific node types differently and handles recursive traversal of child nodes. + * + * @param array|null $ast + * The AST structure to flatten. + * @param int|null $parent + * The ID of the parent node (used in recursion) + * + * @return array An array of flattened nodes, where each node contains: + * - type: The capitalized node type + * - id: A unique identifier + * - parent: Reference to the parent node's ID + * - Additional properties specific to each node type + */ + public function flattenAst($ast, $parent = NULL) { + + $ast = json_decode(json_encode($ast), TRUE); + static $flatNodes = []; + static $id; + + if ($ast === NULL) { + return $flatNodes; + } + + foreach ($ast as $chunk) { + if (isset($chunk['type']) + && in_array($chunk['type'], [ + 'Strong', + 'Text', + 'ListItem', + 'Emphasis', + ])) { + continue; + } + + if (isset($chunk['type']) + && $chunk['type'] == 'Link' + && isset($chunk['children']) + && count($chunk['children']) == 1 + && $chunk['children'][0]['type'] !== 'Image' + ) { + continue; + } + + $children = $chunk['children'] ?? []; + // unset($chunk['children']); + // Chunk preprocessing. + $chunk['type'] = ucfirst($chunk['type']); + $chunk['id'] = ++$id; + $chunk['parent'] = $parent; + + $flatNodes[] = $chunk; + // Recursively process children. + foreach ($children as $child) { + $this->flattenAst([$child], $id); + } + } + + return $flatNodes; + } + + /** + * + */ + public function iterateArray(array &$data, int $depth = 0): void { + foreach ($data as &$item) { + // Process item here. + if (isset($item['type'])) { + if ($item['type'] == 'Image') { + $item['gutenberg'] = $this->processChunk($item); + } + } + if (isset($item['children']) && is_array($item['children'])) { + $this->iterateArray($item['children'], $depth + 1); + } + } + + } + + /** + * Extracts various metadata from a given URL by fetching and parsing its HTML content. + * + * This function attempts to retrieve the HTML content of a URL and extract key information + * including title, path, meta tags, and language settings. It includes error handling for + * various failure scenarios. + * + * @param string $url + * The URL to extract data from. + * + * @return array An associative array containing: + * - title: string|null The page title if found + * - path: string The URL path component, defaults to "/" if not found + * - metatags: array Meta tag name-content pairs + * - language: string|null The page language if specified + * - error: string|null Error message if any error occurred, null otherwise + * + * @throws \Exception Caught internally and returned as error in result array + */ + public function extractPageDataFromUrl($url) { + $data = [ + 'title' => NULL, + 'path' => NULL, + 'metatags' => [], + 'language' => NULL, + // Add an error key. + 'error' => NULL, + ]; + + // Validate URL. + if (!filter_var($url, FILTER_VALIDATE_URL)) { + $data['error'] = "Invalid URL"; + return $data; + } + + try { + // Use file_get_contents with a user agent to avoid being blocked by some servers. + $options = [ + 'http' => [ + 'method' => 'GET', + // Example user agent. + 'user_agent' => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36', + // Timeout in seconds. + 'timeout' => 10, + ], + ]; + $context = stream_context_create($options); + // Use @ to suppress warnings for invalid URLs or network issues. + $html = @file_get_contents($url, FALSE, $context); + + if ($html === FALSE) { + $error = error_get_last(); + $data['error'] = "Failed to fetch URL: " . ($error ? $error['message'] : "Unknown error"); + return $data; + } + + // Extract Title. + if (preg_match('/
- ', '
- ', $html);
- $html = str_replace('