From b58e7028f6cc618acaafe8ffd740c7f372ffdaa6 Mon Sep 17 00:00:00 2001 From: Futa Arai Date: Sun, 8 Sep 2024 20:02:41 +0900 Subject: [PATCH 1/2] add pdf convert code --- .devcontainer/docker-compose.yml | 12 + apps/app/package.json | 2 + .../components/PageBulkExportSelectModal.tsx | 3 +- .../server/routes/apiv3/page-bulk-export.ts | 2 +- .../server/service/page-bulk-export/index.ts | 38 ++- yarn.lock | 230 +++++++++++++++++- 6 files changed, 272 insertions(+), 15 deletions(-) diff --git a/.devcontainer/docker-compose.yml b/.devcontainer/docker-compose.yml index 123d2837ab2..9bae6dc858b 100644 --- a/.devcontainer/docker-compose.yml +++ b/.devcontainer/docker-compose.yml @@ -24,6 +24,7 @@ services: - node_modules_slackbot-proxy:/workspace/growi/apps/slackbot-proxy/node_modules - buildcache_app:/workspace/growi/apps/app/.next - ../../growi-docker-compose:/workspace/growi-docker-compose:delegated + - page_bulk_export_tmp:/tmp/page-bulk-export tty: true @@ -58,8 +59,19 @@ services: - /usr/share/elasticsearch/data - ../../growi-docker-compose/elasticsearch/v8/config/elasticsearch.yml:/usr/share/elasticsearch/config/elasticsearch.yml + growi-pdf-converter: + build: + context: ../../growi-pdf-converter + dockerfile: ./Dockerfile + restart: unless-stopped + ports: + - 3004 + volumes: + - page_bulk_export_tmp:/tmp/page-bulk-export + volumes: node_modules: node_modules_app: node_modules_slackbot-proxy: buildcache_app: + page_bulk_export_tmp: diff --git a/apps/app/package.json b/apps/app/package.json index 56fe48a18b2..97f9d8a3fde 100644 --- a/apps/app/package.json +++ b/apps/app/package.json @@ -186,10 +186,12 @@ "rehype-sanitize": "^5.0.1", "rehype-slug": "^5.0.1", "rehype-toc": "^3.0.2", + "remark": "^13.0.0", "remark-breaks": "^3.0.2", "remark-emoji": "^3.0.2", "remark-frontmatter": "^4.0.1", "remark-gfm": "^3.0.1", + "remark-html": "^11.0.0", "remark-math": "^5.1.1", "remark-toc": "^8.0.1", "remark-wiki-link": "^1.0.4", diff --git a/apps/app/src/features/page-bulk-export/client/components/PageBulkExportSelectModal.tsx b/apps/app/src/features/page-bulk-export/client/components/PageBulkExportSelectModal.tsx index 7676736e1dc..2bf735a9480 100644 --- a/apps/app/src/features/page-bulk-export/client/components/PageBulkExportSelectModal.tsx +++ b/apps/app/src/features/page-bulk-export/client/components/PageBulkExportSelectModal.tsx @@ -69,8 +69,7 @@ const PageBulkExportSelectModal = (): JSX.Element => { - {/* TODO: enable in https://redmine.weseek.co.jp/issues/135772 */} - {/* */} + diff --git a/apps/app/src/features/page-bulk-export/server/routes/apiv3/page-bulk-export.ts b/apps/app/src/features/page-bulk-export/server/routes/apiv3/page-bulk-export.ts index aac5d13eaa5..6657d92dd82 100644 --- a/apps/app/src/features/page-bulk-export/server/routes/apiv3/page-bulk-export.ts +++ b/apps/app/src/features/page-bulk-export/server/routes/apiv3/page-bulk-export.ts @@ -42,7 +42,7 @@ module.exports = (crowi: Crowi): Router => { }; try { - await pageBulkExportService?.createAndExecuteOrRestartBulkExportJob(path, req.user, activityParameters, restartJob); + await pageBulkExportService?.createAndExecuteOrRestartBulkExportJob(path, format, req.user, activityParameters, restartJob); return res.apiv3({}, 204); } catch (err) { diff --git a/apps/app/src/features/page-bulk-export/server/service/page-bulk-export/index.ts b/apps/app/src/features/page-bulk-export/server/service/page-bulk-export/index.ts index 110fb44af6a..bcf6ddb1967 100644 --- a/apps/app/src/features/page-bulk-export/server/service/page-bulk-export/index.ts +++ b/apps/app/src/features/page-bulk-export/server/service/page-bulk-export/index.ts @@ -11,9 +11,13 @@ import { import { getParentPath, normalizePath } from '@growi/core/dist/utils/path-utils'; import type { Archiver } from 'archiver'; import archiver from 'archiver'; +// eslint-disable-next-line no-restricted-imports +import axios from 'axios'; import gc from 'expose-gc/function'; import type { HydratedDocument } from 'mongoose'; import mongoose from 'mongoose'; +import remark from 'remark'; +import html from 'remark-html'; import type { SupportedActionType } from '~/interfaces/activity'; import { SupportedAction, SupportedTargetModel } from '~/interfaces/activity'; @@ -81,14 +85,15 @@ class PageBulkExportService implements IPageBulkExportService { /** * Create a new page bulk export job and execute it */ - async createAndExecuteOrRestartBulkExportJob(basePagePath: string, currentUser, activityParameters: ActivityParameters, restartJob = false): Promise { + async createAndExecuteOrRestartBulkExportJob( + basePagePath: string, format: PageBulkExportFormat, currentUser, activityParameters: ActivityParameters, restartJob = false, + ): Promise { const basePage = await this.pageModel.findByPathAndViewer(basePagePath, currentUser, null, true); if (basePage == null) { throw new Error('Base page not found or not accessible'); } - const format = PageBulkExportFormat.md; const duplicatePageBulkExportJobInProgress: HydratedDocument | null = await PageBulkExportJob.findOne({ user: currentUser, page: basePage, @@ -292,12 +297,22 @@ class PageBulkExportService implements IPageBulkExportService { if (revision != null && isPopulated(revision)) { const markdownBody = revision.body; - const pathNormalized = `${normalizePath(page.path)}.${PageBulkExportFormat.md}`; - const fileOutputPath = path.join(outputDir, pathNormalized); - const fileOutputParentPath = getParentPath(fileOutputPath); - await fs.promises.mkdir(fileOutputParentPath, { recursive: true }); - await fs.promises.writeFile(fileOutputPath, markdownBody); + if (pageBulkExportJob.format === PageBulkExportFormat.md) { + const pathNormalized = `${normalizePath(page.path)}.${PageBulkExportFormat.md}`; + const fileOutputPath = path.join(outputDir, pathNormalized); + const fileOutputParentPath = getParentPath(fileOutputPath); + + await fs.promises.mkdir(fileOutputParentPath, { recursive: true }); + await fs.promises.writeFile(fileOutputPath, markdownBody); + } + else { + const htmlString = await this.convertMdToHtml(markdownBody); + const pathNormalized = `${normalizePath(page.path)}.${PageBulkExportFormat.pdf}`; + + const url = 'http://growi-pdf-converter:3004/pdf/html-to-pdf'; + await axios.post(url, { htmlString, fileName: pathNormalized, jobId: pageBulkExportJob._id.toString() }); + } pageBulkExportJob.lastExportedPagePath = page.path; await pageBulkExportJob.save(); } @@ -311,6 +326,15 @@ class PageBulkExportService implements IPageBulkExportService { }); } + private async convertMdToHtml(md: string): Promise { + const htmlString = (await remark() + .use(html) + .process(md)) + .toString(); + + return htmlString; + } + /** * Execute a pipeline that reads the page files from the temporal fs directory, compresses them, and uploads to the cloud storage */ diff --git a/yarn.lock b/yarn.lock index b68a86ddeb2..e558b803ce8 100644 --- a/yarn.lock +++ b/yarn.lock @@ -4806,6 +4806,11 @@ resolved "https://registry.yarnpkg.com/@types/unist/-/unist-2.0.3.tgz#9c088679876f374eb5983f150d4787aa6fb32d7e" integrity sha512-FvUupuM3rlRsRtCN+fDudtmytGO6iHJuuRKS1Ss0pG5z8oX0diNEw94UEL7hgDbpN94rgaK5R7sWm6RrSkZuAQ== +"@types/unist@^2.0.2": + version "2.0.11" + resolved "https://registry.yarnpkg.com/@types/unist/-/unist-2.0.11.tgz#11af57b127e32487774841f7a4e54eab166d03c4" + integrity sha512-CmBKiL6NNo/OqgmMn95Fk9Whlp2mtvIv+KNpQKN2F4SjvrEesubTRWGYSg+BnWZOnlCaSTU1sMpsBOzgbYhnsA== + "@types/unzip-stream@^0.3.4": version "0.3.4" resolved "https://registry.yarnpkg.com/@types/unzip-stream/-/unzip-stream-0.3.4.tgz#6e762ef8b8fcf902ba7d7999a149a3af84064144" @@ -5827,6 +5832,11 @@ backoff@^2.5.0: dependencies: precond "0.2" +bail@^1.0.0: + version "1.0.5" + resolved "https://registry.yarnpkg.com/bail/-/bail-1.0.5.tgz#b6fa133404a392cbc1f8c4bf63f5953351e7a776" + integrity sha512-xFbRxM1tahm08yHBP16MMjVUAvDaBMD38zsM9EMAUN61omwLmKlOpB/Zku5QkjZ8TZ4vn53pj+t518cH0S03RQ== + bail@^2.0.0: version "2.0.2" resolved "https://registry.yarnpkg.com/bail/-/bail-2.0.2.tgz#d26f5cd8fe5d6f832a31517b9f7c356040ba6d5d" @@ -6336,6 +6346,11 @@ caseless@~0.12.0: resolved "https://registry.yarnpkg.com/caseless/-/caseless-0.12.0.tgz#1b681c21ff84033c826543090689420d187151dc" integrity sha1-G2gcIf+EAzyCZUMJBolCDRhxUdw= +ccount@^1.0.0: + version "1.1.0" + resolved "https://registry.yarnpkg.com/ccount/-/ccount-1.1.0.tgz#246687debb6014735131be8abab2d93898f8d043" + integrity sha512-vlNK021QdI7PNeiUh/lKkC/mNHHfV0m/Ad5JoI0TYtlBnJAslM/JIkm/tGC88bkLIwO6OQ5uV6ztS6kVAtCDlg== + ccount@^2.0.0: version "2.0.1" resolved "https://registry.yarnpkg.com/ccount/-/ccount-2.0.1.tgz#17a3bf82302e0870d6da43a01311a8bc02a3ecf5" @@ -6426,6 +6441,11 @@ char-regex@^1.0.2: resolved "https://registry.yarnpkg.com/char-regex/-/char-regex-1.0.2.tgz#d744358226217f981ed58f479b1d6bcc29545dcf" integrity sha512-kWWXztvZ5SBQV+eRgKFeh8q5sLuZY2+8WUIzlxWVTg+oGwY14qylx1KbKzHd8P6ZYkAg0xyIDU9JMHhyJMZ1jw== +character-entities-html4@^1.0.0: + version "1.1.4" + resolved "https://registry.yarnpkg.com/character-entities-html4/-/character-entities-html4-1.1.4.tgz#0e64b0a3753ddbf1fdc044c5fd01d0199a02e125" + integrity sha512-HRcDxZuZqMx3/a+qrzxdBKBPUpxWEq9xw2OPZ3a/174ihfrQKVsFhqtthBInFy1zZ9GgZyFXOatNujm8M+El3g== + character-entities-html4@^2.0.0: version "2.1.0" resolved "https://registry.yarnpkg.com/character-entities-html4/-/character-entities-html4-2.1.0.tgz#1f1adb940c971a4b22ba39ddca6b618dc6e56b2b" @@ -6687,6 +6707,11 @@ codemirror@^6.0.0, codemirror@^6.0.1: "@codemirror/state" "^6.0.0" "@codemirror/view" "^6.0.0" +collapse-white-space@^1.0.0: + version "1.0.6" + resolved "https://registry.yarnpkg.com/collapse-white-space/-/collapse-white-space-1.0.6.tgz#e63629c0016665792060dbbeb79c42239d2c5287" + integrity sha512-jEovNnrhMuqyCcjfEJA56v0Xq8SkIoPKDyaHahwo3POf4qcSXqMYuwNcOTzp74vTsR9Tn08z4MxWqAhcekogkQ== + collect-v8-coverage@^1.0.0: version "1.0.0" resolved "https://registry.yarnpkg.com/collect-v8-coverage/-/collect-v8-coverage-1.0.0.tgz#150ee634ac3650b71d9c985eb7f608942334feb1" @@ -7877,6 +7902,13 @@ destroy@1.2.0: resolved "https://registry.yarnpkg.com/destroy/-/destroy-1.2.0.tgz#4803735509ad8be552934c67df614f94e66fa015" integrity sha512-2sJGJTaXIIaR1w4iJSNoN0hnMY7Gpc/n8D4qSCJw8QqFWXf7cuAgnEHxBpweaVcPevC2l3KpjYCx3NypQQgaJg== +detab@^2.0.0: + version "2.0.4" + resolved "https://registry.yarnpkg.com/detab/-/detab-2.0.4.tgz#b927892069aff405fbb9a186fe97a44a92a94b43" + integrity sha512-8zdsQA5bIkoRECvCrNKPla84lyoR7DSAyf7p0YgXzBO9PDJx8KntPUay7NS6yp+KdxdVtiE5SpHKtbp2ZQyA9g== + dependencies: + repeat-string "^1.5.4" + detect-indent@^6.0.0: version "6.1.0" resolved "https://registry.yarnpkg.com/detect-indent/-/detect-indent-6.1.0.tgz#592485ebbbf6b3b1ab2be175c8393d04ca0d57e6" @@ -10012,6 +10044,11 @@ hast-util-heading-rank@^2.0.0: dependencies: "@types/hast" "^2.0.0" +hast-util-is-element@^1.0.0: + version "1.1.0" + resolved "https://registry.yarnpkg.com/hast-util-is-element/-/hast-util-is-element-1.1.0.tgz#3b3ed5159a2707c6137b48637fbfe068e175a425" + integrity sha512-oUmNua0bFbdrD/ELDSSEadRVtWZOf3iF6Lbv81naqsIV99RnSCieTbWuWCY8BAeEfKJTKl0gRdokv+dELutHGQ== + hast-util-is-element@^2.0.0: version "2.1.2" resolved "https://registry.yarnpkg.com/hast-util-is-element/-/hast-util-is-element-2.1.2.tgz#fc0b0dc7cef3895e839b8d66979d57b0338c68f3" @@ -10049,6 +10086,13 @@ hast-util-raw@^7.2.0: web-namespaces "^2.0.0" zwitch "^2.0.0" +hast-util-sanitize@^2.0.0: + version "2.0.3" + resolved "https://registry.yarnpkg.com/hast-util-sanitize/-/hast-util-sanitize-2.0.3.tgz#3cf4a1f5adb7d3c0b1fbb5dc1b1930fab6574856" + integrity sha512-RILqWHmzU0Anmfw1KEP41LbCsJuJUVM0lQWAbTDk9+0bWqzRFXDaMdqIoRocLlOfR5NfcWyhFfZw/mGsuftwYA== + dependencies: + xtend "^4.0.0" + hast-util-sanitize@^4.0.0, hast-util-sanitize@^4.1.0: version "4.1.0" resolved "https://registry.yarnpkg.com/hast-util-sanitize/-/hast-util-sanitize-4.1.0.tgz#d90f8521f5083547095c5c63a7e03150303e0286" @@ -10077,6 +10121,22 @@ hast-util-select@^5.0.5, hast-util-select@~5.0.1: unist-util-visit "^4.0.0" zwitch "^2.0.0" +hast-util-to-html@^7.0.0: + version "7.1.3" + resolved "https://registry.yarnpkg.com/hast-util-to-html/-/hast-util-to-html-7.1.3.tgz#9f339ca9bea71246e565fc79ff7dbfe98bb50f5e" + integrity sha512-yk2+1p3EJTEE9ZEUkgHsUSVhIpCsL/bvT8E5GzmWc+N1Po5gBw+0F8bo7dpxXR0nu0bQVxVZGX2lBGF21CmeDw== + dependencies: + ccount "^1.0.0" + comma-separated-tokens "^1.0.0" + hast-util-is-element "^1.0.0" + hast-util-whitespace "^1.0.0" + html-void-elements "^1.0.0" + property-information "^5.0.0" + space-separated-tokens "^1.0.0" + stringify-entities "^3.0.1" + unist-util-is "^4.0.0" + xtend "^4.0.0" + hast-util-to-parse5@^7.0.0: version "7.0.0" resolved "https://registry.yarnpkg.com/hast-util-to-parse5/-/hast-util-to-parse5-7.0.0.tgz#a39808e69005d10afeed1866029a1fb137df3f7c" @@ -10105,6 +10165,11 @@ hast-util-to-text@^3.1.0: hast-util-is-element "^2.0.0" unist-util-find-after "^4.0.0" +hast-util-whitespace@^1.0.0: + version "1.0.4" + resolved "https://registry.yarnpkg.com/hast-util-whitespace/-/hast-util-whitespace-1.0.4.tgz#e4fe77c4a9ae1cb2e6c25e02df0043d0164f6e41" + integrity sha512-I5GTdSfhYfAPNztx2xJRQpG8cuDSNt599/7YUn7Gx/WxNMsG+a835k97TDkFgk123cwjfwINaZknkKkphx/f2A== + hast-util-whitespace@^2.0.0: version "2.0.0" resolved "https://registry.yarnpkg.com/hast-util-whitespace/-/hast-util-whitespace-2.0.0.tgz#4fc1086467cc1ef5ba20673cb6b03cec3a970f1c" @@ -10217,6 +10282,11 @@ html-tags@^3.3.1: resolved "https://registry.yarnpkg.com/html-tags/-/html-tags-3.3.1.tgz#a04026a18c882e4bba8a01a3d39cfe465d40b5ce" integrity sha512-ztqyC3kLto0e9WbNp0aeP+M3kTt+nbaIveGmUxAtZa+8iFgKLUOD4YKM5j+f3QD89bra7UeumolZHKuOXnTmeQ== +html-void-elements@^1.0.0: + version "1.0.5" + resolved "https://registry.yarnpkg.com/html-void-elements/-/html-void-elements-1.0.5.tgz#ce9159494e86d95e45795b166c2021c2cfca4483" + integrity sha512-uE/TxKuyNIcx44cIWnjr/rfIATDH7ZaOMmstu0CwhFG1Dunhlp4OC6/NMbhiwoq5BpW0ubi303qnEk/PZj614w== + html-void-elements@^2.0.0: version "2.0.1" resolved "https://registry.yarnpkg.com/html-void-elements/-/html-void-elements-2.0.1.tgz#29459b8b05c200b6c5ee98743c41b979d577549f" @@ -10848,6 +10918,11 @@ is-plain-obj@^1.1.0: resolved "https://registry.yarnpkg.com/is-plain-obj/-/is-plain-obj-1.1.0.tgz#71a50c8429dfca773c92a390a4a03b39fcd51d3e" integrity sha512-yvkRyxmFKEOQ4pNXCmJG5AEQNlXJS5LaONXo5/cLdTZdWvsZ1ioJEonLGAosKlMWE8lwUy/bJzMjcw8az73+Fg== +is-plain-obj@^2.0.0: + version "2.1.0" + resolved "https://registry.yarnpkg.com/is-plain-obj/-/is-plain-obj-2.1.0.tgz#45e42e37fccf1f40da8e5f76ee21515840c09287" + integrity sha512-YWnfyRwxL/+SsrWYfOpUtz5b3YD+nyfkHvjbcanzk8zgyO4ASD67uVMRt8k5bM4lLMDnXfriRhOpemw+NfT1eA== + is-plain-obj@^4.0.0: version "4.1.0" resolved "https://registry.yarnpkg.com/is-plain-obj/-/is-plain-obj-4.1.0.tgz#d65025edec3657ce032fd7db63c97883eaed71f0" @@ -12462,6 +12537,13 @@ md5@^2.2.1: crypt "~0.0.1" is-buffer "~1.1.1" +mdast-util-definitions@^2.0.0: + version "2.0.1" + resolved "https://registry.yarnpkg.com/mdast-util-definitions/-/mdast-util-definitions-2.0.1.tgz#2c931d8665a96670639f17f98e32c3afcfee25f3" + integrity sha512-Co+DQ6oZlUzvUR7JCpP249PcexxygiaKk9axJh+eRzHDZJk2julbIdKB4PXHVxdBuLzvJ1Izb+YDpj2deGMOuA== + dependencies: + unist-util-visit "^2.0.0" + mdast-util-definitions@^5.0.0: version "5.1.1" resolved "https://registry.yarnpkg.com/mdast-util-definitions/-/mdast-util-definitions-5.1.1.tgz#2c1d684b28e53f84938bb06317944bee8efa79db" @@ -12480,6 +12562,17 @@ mdast-util-find-and-replace@^2.0.0: unist-util-is "^5.0.0" unist-util-visit-parents "^5.0.0" +mdast-util-from-markdown@^0.8.0: + version "0.8.5" + resolved "https://registry.yarnpkg.com/mdast-util-from-markdown/-/mdast-util-from-markdown-0.8.5.tgz#d1ef2ca42bc377ecb0463a987910dae89bd9a28c" + integrity sha512-2hkTXtYYnr+NubD/g6KGBS/0mFmBcifAsI0yIWRiRo0PjVs6SSOSOdtzbp6kSGnShDN6G5aWZpKQ2lWRy27mWQ== + dependencies: + "@types/mdast" "^3.0.0" + mdast-util-to-string "^2.0.0" + micromark "~2.11.0" + parse-entities "^2.0.0" + unist-util-stringify-position "^2.0.0" + mdast-util-from-markdown@^1.0.0: version "1.2.0" resolved "https://registry.yarnpkg.com/mdast-util-from-markdown/-/mdast-util-from-markdown-1.2.0.tgz#84df2924ccc6c995dec1e2368b2b208ad0a76268" @@ -12587,7 +12680,22 @@ mdast-util-to-hast@^12.1.0: unist-util-position "^4.0.0" unist-util-visit "^4.0.0" -mdast-util-to-markdown@^0.6.5: +mdast-util-to-hast@^8.2.0: + version "8.2.0" + resolved "https://registry.yarnpkg.com/mdast-util-to-hast/-/mdast-util-to-hast-8.2.0.tgz#adf9f824defcd382e53dd7bace4282a45602ac67" + integrity sha512-WjH/KXtqU66XyTJQ7tg7sjvTw1OQcVV0hKdFh3BgHPwZ96fSBCQ/NitEHsN70Mmnggt+5eUUC7pCnK+2qGQnCA== + dependencies: + collapse-white-space "^1.0.0" + detab "^2.0.0" + mdast-util-definitions "^2.0.0" + mdurl "^1.0.0" + trim-lines "^1.0.0" + unist-builder "^2.0.0" + unist-util-generated "^1.0.0" + unist-util-position "^3.0.0" + unist-util-visit "^2.0.0" + +mdast-util-to-markdown@^0.6.0, mdast-util-to-markdown@^0.6.5: version "0.6.5" resolved "https://registry.yarnpkg.com/mdast-util-to-markdown/-/mdast-util-to-markdown-0.6.5.tgz#b33f67ca820d69e6cc527a93d4039249b504bebe" integrity sha512-XeV9sDE7ZlOQvs45C9UKMtfTcctcaj/pGwH8YLbMHoMOXNNCn2LsqVQOqrF1+/NU8lKDAqozme9SCXWyo9oAcQ== @@ -12654,7 +12762,7 @@ mdn-data@2.0.30: resolved "https://registry.yarnpkg.com/mdn-data/-/mdn-data-2.0.30.tgz#ce4df6f80af6cfbe218ecd5c552ba13c4dfa08cc" integrity sha512-GaqWWShW4kv/G9IEucWScBx9G1/vsFZZJUO+tD26M8J8z3Kw5RDQjaoZe03YAClgeS/SWPOcb4nkFBTEi5DUEA== -mdurl@^1.0.1: +mdurl@^1.0.0, mdurl@^1.0.1: version "1.0.1" resolved "https://registry.yarnpkg.com/mdurl/-/mdurl-1.0.1.tgz#fe85b2ec75a59037f2adfec100fd6c601761152e" integrity sha512-/sKlQJCBYVY9Ers9hqzKou4H6V5UWc/M59TH2dvkt+84itfnq7uFOMLpOiOS4ujvHP4etln18fmIxA5R5fll0g== @@ -13074,6 +13182,14 @@ micromark@^3.0.0: micromark-util-types "^1.0.1" uvu "^0.5.0" +micromark@~2.11.0: + version "2.11.4" + resolved "https://registry.yarnpkg.com/micromark/-/micromark-2.11.4.tgz#d13436138eea826383e822449c9a5c50ee44665a" + integrity sha512-+WoovN/ppKolQOFIAajxi7Lu9kInbPxFuTBVEavFcL8eAfVstoc5MocPmqBeAdBOJV00uaVjegzH4+MA0DN/uA== + dependencies: + debug "^4.0.0" + parse-entities "^2.0.0" + micromatch@4.0.2: version "4.0.2" resolved "https://registry.yarnpkg.com/micromatch/-/micromatch-4.0.2.tgz#4fcb0999bf9fbc2fcbdd212f6d629b9a56c39259" @@ -15798,6 +15914,16 @@ remark-gfm@^3.0.1: micromark-extension-gfm "^2.0.0" unified "^10.0.0" +remark-html@^11.0.0: + version "11.0.2" + resolved "https://registry.yarnpkg.com/remark-html/-/remark-html-11.0.2.tgz#76f6f7c8981c736f01cb65f8853dbe5c2e546dfa" + integrity sha512-U7qPKZq6Aai+UTpH5YrblLvqvdSUCRA4YmZYRTtbtknm/WUGmNUI0dvThbSuTNSf6TtC8btmbbScWi1wtUIxnw== + dependencies: + hast-util-sanitize "^2.0.0" + hast-util-to-html "^7.0.0" + mdast-util-to-hast "^8.2.0" + xtend "^4.0.1" + remark-math@^5.1.1: version "5.1.1" resolved "https://registry.yarnpkg.com/remark-math/-/remark-math-5.1.1.tgz#459e798d978d4ca032e745af0bac81ddcdf94964" @@ -15817,6 +15943,13 @@ remark-parse@^10.0.0: mdast-util-from-markdown "^1.0.0" unified "^10.0.0" +remark-parse@^9.0.0: + version "9.0.0" + resolved "https://registry.yarnpkg.com/remark-parse/-/remark-parse-9.0.0.tgz#4d20a299665880e4f4af5d90b7c7b8a935853640" + integrity sha512-geKatMwSzEXKHuzBNU1z676sGcDcFoChMK38TgdHJNAYfFtsfHDQG7MoJAjs6sgYMqyLduCYWDIWZIxiPeafEw== + dependencies: + mdast-util-from-markdown "^0.8.0" + remark-rehype@^10.0.0: version "10.1.0" resolved "https://registry.yarnpkg.com/remark-rehype/-/remark-rehype-10.1.0.tgz#32dc99d2034c27ecaf2e0150d22a6dcccd9a6279" @@ -15836,6 +15969,13 @@ remark-stringify@^10.0.0: mdast-util-to-markdown "^1.0.0" unified "^10.0.0" +remark-stringify@^9.0.0: + version "9.0.1" + resolved "https://registry.yarnpkg.com/remark-stringify/-/remark-stringify-9.0.1.tgz#576d06e910548b0a7191a71f27b33f1218862894" + integrity sha512-mWmNg3ZtESvZS8fv5PTvaPckdL4iNlCHTt8/e/8oN08nArHRHjNZMKzA/YW3+p7/lYqIw4nx1XsjCBo/AxNChg== + dependencies: + mdast-util-to-markdown "^0.6.0" + remark-toc@^8.0.1: version "8.0.1" resolved "https://registry.yarnpkg.com/remark-toc/-/remark-toc-8.0.1.tgz#f3e07ea13734f1c531e3d3460e58babe31d17cd7" @@ -15854,6 +15994,15 @@ remark-wiki-link@^1.0.4: mdast-util-wiki-link "^0.0.2" micromark-extension-wiki-link "^0.0.4" +remark@^13.0.0: + version "13.0.0" + resolved "https://registry.yarnpkg.com/remark/-/remark-13.0.0.tgz#d15d9bf71a402f40287ebe36067b66d54868e425" + integrity sha512-HDz1+IKGtOyWN+QgBiAT0kn+2s6ovOxHyPAFGKVE81VSzJ+mq7RwHFledEvB5F1p4iJvOah/LOKdFuzvRnNLCA== + dependencies: + remark-parse "^9.0.0" + remark-stringify "^9.0.0" + unified "^9.1.0" + remark@^14.0.0: version "14.0.2" resolved "https://registry.yarnpkg.com/remark/-/remark-14.0.2.tgz#4a1833f7441a5c29e44b37bb1843fb820797b40f" @@ -15873,7 +16022,7 @@ repeat-element@^1.1.2: resolved "https://registry.yarnpkg.com/repeat-element/-/repeat-element-1.1.4.tgz#be681520847ab58c7568ac75fbfad28ed42d39e9" integrity sha512-LFiNfRcSu7KK3evMyYOuCzv3L10TW7yC1G2/+StMjK8Y6Vqd2MG7r/Qjw4ghtuCOjFvlnms/iMmLqpvW/ES/WQ== -repeat-string@^1.0.0, repeat-string@^1.6.1: +repeat-string@^1.0.0, repeat-string@^1.5.4, repeat-string@^1.6.1: version "1.6.1" resolved "https://registry.yarnpkg.com/repeat-string/-/repeat-string-1.6.1.tgz#8dcae470e1c88abc2d600fff4a776286da75e637" @@ -17087,6 +17236,15 @@ string_decoder@~1.1.1: dependencies: safe-buffer "~5.1.0" +stringify-entities@^3.0.1: + version "3.1.0" + resolved "https://registry.yarnpkg.com/stringify-entities/-/stringify-entities-3.1.0.tgz#b8d3feac256d9ffcc9fa1fefdcf3ca70576ee903" + integrity sha512-3FP+jGMmMV/ffZs86MoghGqAoqXAdxLrJP4GUdrDN1aIScYih5tuIO3eF4To5AJZ79KDZ8Fpdy7QJnK8SsL1Vg== + dependencies: + character-entities-html4 "^1.0.0" + character-entities-legacy "^1.0.0" + xtend "^4.0.0" + stringify-entities@^4.0.0: version "4.0.3" resolved "https://registry.yarnpkg.com/stringify-entities/-/stringify-entities-4.0.3.tgz#cfabd7039d22ad30f3cc435b0ca2c1574fc88ef8" @@ -17802,6 +17960,11 @@ tr46@~0.0.3: resolved "https://registry.yarnpkg.com/traverse/-/traverse-0.3.9.tgz#717b8f220cc0bb7b44e40514c22b2e8bbc70d8b9" integrity sha1-cXuPIgzAu3tE5AUUwisui7xw2Lk= +trim-lines@^1.0.0: + version "1.1.3" + resolved "https://registry.yarnpkg.com/trim-lines/-/trim-lines-1.1.3.tgz#839514be82428fd9e7ec89e35081afe8f6f93115" + integrity sha512-E0ZosSWYK2mkSu+KEtQ9/KqarVjA9HztOSX+9FDdNacRAq29RRV6ZQNgob3iuW8Htar9vAfEa6yyt5qBAHZDBA== + trim-lines@^3.0.0: version "3.0.1" resolved "https://registry.yarnpkg.com/trim-lines/-/trim-lines-3.0.1.tgz#d802e332a07df861c48802c04321017b1bd87338" @@ -17817,6 +17980,11 @@ trim-newlines@^3.0.0: resolved "https://registry.yarnpkg.com/trim-newlines/-/trim-newlines-3.0.1.tgz#260a5d962d8b752425b32f3a7db0dcacd176c144" integrity sha512-c1PTsA3tYrIsLGkJkzHF+w9F2EyxfXGo4UyJc4pFL++FMjnq0HJS69T3M7d//gKrFKwy429bouPescbjecU+Zw== +trough@^1.0.0: + version "1.0.5" + resolved "https://registry.yarnpkg.com/trough/-/trough-1.0.5.tgz#b8b639cefad7d0bb2abd37d433ff8293efa5f406" + integrity sha512-rvuRbTarPXmMb79SmzEp8aqXNKcK+y0XaB298IXueQ8I2PsrATcPBCSPyK/dDNa2iWOhKlfNnOjdAOTBU/nkFA== + trough@^2.0.0: version "2.1.0" resolved "https://registry.yarnpkg.com/trough/-/trough-2.1.0.tgz#0f7b511a4fde65a46f18477ab38849b22c554876" @@ -18235,6 +18403,18 @@ unified@^10.0.0, unified@^10.1.2, unified@~10.1.1: trough "^2.0.0" vfile "^5.0.0" +unified@^9.1.0: + version "9.2.2" + resolved "https://registry.yarnpkg.com/unified/-/unified-9.2.2.tgz#67649a1abfc3ab85d2969502902775eb03146975" + integrity sha512-Sg7j110mtefBD+qunSLO1lqOEKdrwBFBrR6Qd8f4uwkhWNlbkaqwHse6e7QvD3AP/MNoJdEDLaf8OxYyoWgorQ== + dependencies: + bail "^1.0.0" + extend "^3.0.0" + is-buffer "^2.0.0" + is-plain-obj "^2.0.0" + trough "^1.0.0" + vfile "^4.0.0" + union-value@^1.0.0: version "1.0.1" resolved "https://registry.yarnpkg.com/union-value/-/union-value-1.0.1.tgz#0b6fe7b835aecda61c6ea4d4f02c14221e109847" @@ -18266,6 +18446,11 @@ unique-string@^2.0.0: dependencies: crypto-random-string "^2.0.0" +unist-builder@^2.0.0: + version "2.0.3" + resolved "https://registry.yarnpkg.com/unist-builder/-/unist-builder-2.0.3.tgz#77648711b5d86af0942f334397a33c5e91516436" + integrity sha512-f98yt5pnlMWlzP539tPc4grGMsFaQQlP/vM396b00jngsiINumNmsY8rkXjfoi1c6QaM8nQ3vaGDuoKWbe/1Uw== + unist-util-find-after@^4.0.0: version "4.0.0" resolved "https://registry.yarnpkg.com/unist-util-find-after/-/unist-util-find-after-4.0.0.tgz#1101cebf5fed88ae3c6f3fa676e86fd5772a4f32" @@ -18274,6 +18459,11 @@ unist-util-find-after@^4.0.0: "@types/unist" "^2.0.0" unist-util-is "^5.0.0" +unist-util-generated@^1.0.0: + version "1.1.6" + resolved "https://registry.yarnpkg.com/unist-util-generated/-/unist-util-generated-1.1.6.tgz#5ab51f689e2992a472beb1b35f2ce7ff2f324d4b" + integrity sha512-cln2Mm1/CZzN5ttGK7vkoGw+RZ8VcUH6BtGbq98DDtRGquAAOXig1mrBQYelOwMXYS8rK+vZDyyojSjp7JX+Lg== + unist-util-generated@^2.0.0: version "2.0.0" resolved "https://registry.yarnpkg.com/unist-util-generated/-/unist-util-generated-2.0.0.tgz#86fafb77eb6ce9bfa6b663c3f5ad4f8e56a60113" @@ -18289,6 +18479,11 @@ unist-util-is@^5.0.0: resolved "https://registry.yarnpkg.com/unist-util-is/-/unist-util-is-5.1.1.tgz#e8aece0b102fa9bc097b0fef8f870c496d4a6236" integrity sha512-F5CZ68eYzuSvJjGhCLPL3cYx45IxkqXSetCcRgUXtbcm50X2L9oOWQlfUfDdAf+6Pd27YDblBfdtmsThXmwpbQ== +unist-util-position@^3.0.0: + version "3.1.0" + resolved "https://registry.yarnpkg.com/unist-util-position/-/unist-util-position-3.1.0.tgz#1c42ee6301f8d52f47d14f62bbdb796571fa2d47" + integrity sha512-w+PkwCbYSFw8vpgWD0v7zRCl1FpY3fjDSQ3/N/wNd9Ffa4gPi8+4keqt99N3XW6F99t/mUzp2xAhNmfKWp95QA== + unist-util-position@^4.0.0: version "4.0.3" resolved "https://registry.yarnpkg.com/unist-util-position/-/unist-util-position-4.0.3.tgz#5290547b014f6222dff95c48d5c3c13a88fadd07" @@ -18304,6 +18499,13 @@ unist-util-remove-position@^4.0.0: "@types/unist" "^2.0.0" unist-util-visit "^4.0.0" +unist-util-stringify-position@^2.0.0: + version "2.0.3" + resolved "https://registry.yarnpkg.com/unist-util-stringify-position/-/unist-util-stringify-position-2.0.3.tgz#cce3bfa1cdf85ba7375d1d5b17bdc4cada9bd9da" + integrity sha512-3faScn5I+hy9VleOq/qNbAd6pAx7iH5jYBMS9I1HgQVijz/4mv5Bvw5iw1sC/90CODiKo81G/ps8AJrISn687g== + dependencies: + "@types/unist" "^2.0.2" + unist-util-stringify-position@^3.0.0: version "3.0.2" resolved "https://registry.yarnpkg.com/unist-util-stringify-position/-/unist-util-stringify-position-3.0.2.tgz#5c6aa07c90b1deffd9153be170dce628a869a447" @@ -18335,7 +18537,7 @@ unist-util-visit-parents@^5.0.0, unist-util-visit-parents@^5.1.1: "@types/unist" "^2.0.0" unist-util-is "^5.0.0" -unist-util-visit@^2.0.2: +unist-util-visit@^2.0.0, unist-util-visit@^2.0.2: version "2.0.3" resolved "https://registry.yarnpkg.com/unist-util-visit/-/unist-util-visit-2.0.3.tgz#c3703893146df47203bb8a9795af47d7b971208c" integrity sha512-iJ4/RczbJMkD0712mGktuGpm/U4By4FfDonL7N/9tATGIF4imikjOuagyMY53tnZq3NP6BcmlrHhEKAfGWjh7Q== @@ -18595,6 +18797,14 @@ vfile-location@^4.0.0: "@types/unist" "^2.0.0" vfile "^5.0.0" +vfile-message@^2.0.0: + version "2.0.4" + resolved "https://registry.yarnpkg.com/vfile-message/-/vfile-message-2.0.4.tgz#5b43b88171d409eae58477d13f23dd41d52c371a" + integrity sha512-DjssxRGkMvifUOJre00juHoP9DPWuzjxKuMDrhNbk2TdaYYBNMStsNhEOt3idrtI12VQYM/1+iM0KOzXi4pxwQ== + dependencies: + "@types/unist" "^2.0.0" + unist-util-stringify-position "^2.0.0" + vfile-message@^3.0.0: version "3.1.2" resolved "https://registry.yarnpkg.com/vfile-message/-/vfile-message-3.1.2.tgz#a2908f64d9e557315ec9d7ea3a910f658ac05f7d" @@ -18603,6 +18813,16 @@ vfile-message@^3.0.0: "@types/unist" "^2.0.0" unist-util-stringify-position "^3.0.0" +vfile@^4.0.0: + version "4.2.1" + resolved "https://registry.yarnpkg.com/vfile/-/vfile-4.2.1.tgz#03f1dce28fc625c625bc6514350fbdb00fa9e624" + integrity sha512-O6AE4OskCG5S1emQ/4gl8zK586RqA3srz3nfK/Viy0UPToBc5Trp9BVFb1u0CjsKrAWwnpr4ifM/KBXPWwJbCA== + dependencies: + "@types/unist" "^2.0.0" + is-buffer "^2.0.0" + unist-util-stringify-position "^2.0.0" + vfile-message "^2.0.0" + vfile@^5.0.0, vfile@^5.1.0: version "5.3.4" resolved "https://registry.yarnpkg.com/vfile/-/vfile-5.3.4.tgz#bbb8c96b956693bbf70b2c67fdb5781dff769b93" @@ -19053,7 +19273,7 @@ xss@^1.0.14, xss@^1.0.15: commander "^2.20.3" cssfilter "0.0.10" -xtend@^4.0.0, xtend@^4.0.2, xtend@~4.0.0, xtend@~4.0.1: +xtend@^4.0.0, xtend@^4.0.1, xtend@^4.0.2, xtend@~4.0.0, xtend@~4.0.1: version "4.0.2" resolved "https://registry.yarnpkg.com/xtend/-/xtend-4.0.2.tgz#bb72779f5fa465186b1f438f674fa347fdb5db54" integrity sha512-LKYU1iAXJXUgAXn9URjiu+MWhyUXHsvfp7mcuYm9dSUKK0/CjtrUwFAxD82/mCWbtLsGjFIad0wIsod4zrTAEQ== From 4822a81e26e405f64d91bf7e7310c6446adc95f1 Mon Sep 17 00:00:00 2001 From: Futa Arai Date: Sun, 29 Sep 2024 13:29:03 +0900 Subject: [PATCH 2/2] pdf convert --- .../server/service/page-bulk-export/index.ts | 80 ++++++++++++++++--- apps/app/src/server/service/config-loader.ts | 6 ++ 2 files changed, 73 insertions(+), 13 deletions(-) diff --git a/apps/app/src/features/page-bulk-export/server/service/page-bulk-export/index.ts b/apps/app/src/features/page-bulk-export/server/service/page-bulk-export/index.ts index bcf6ddb1967..d0de79f8ed8 100644 --- a/apps/app/src/features/page-bulk-export/server/service/page-bulk-export/index.ts +++ b/apps/app/src/features/page-bulk-export/server/service/page-bulk-export/index.ts @@ -4,6 +4,7 @@ import path from 'path'; import { Writable } from 'stream'; import { pipeline as pipelinePromise } from 'stream/promises'; + import type { IUser } from '@growi/core'; import { getIdForRef, getIdStringForRef, type IPage, isPopulated, SubscriptionStatusType, @@ -27,6 +28,7 @@ import type { IAttachmentDocument } from '~/server/models/attachment'; import { Attachment } from '~/server/models/attachment'; import type { PageModel, PageDocument } from '~/server/models/page'; import Subscription from '~/server/models/subscription'; +import { configManager } from '~/server/service/config-manager'; import type { FileUploader } from '~/server/service/file-uploader'; import type { IMultipartUploader } from '~/server/service/file-uploader/multipart-uploader'; import { preNotifyService } from '~/server/service/pre-notify'; @@ -279,16 +281,31 @@ class PageBulkExportService implements IPageBulkExportService { const pagesWritable = this.getPageWritable(pageBulkExportJob); + if (pageBulkExportJob.format === PageBulkExportFormat.pdf) { + // start pdf convert + const url = `${configManager.getConfig('crowi', 'app:pageBulkExportPdfConverterUrl')}/pdf/start-pdf-convert`; + await axios.post(url, { jobId: pageBulkExportJob._id.toString() }); + } + this.pageBulkExportJobManager.updateJobStream(pageBulkExportJob._id, pageSnapshotsReadable); - return pipelinePromise(pageSnapshotsReadable, pagesWritable); + await pipelinePromise(pageSnapshotsReadable, pagesWritable); + + if (pageBulkExportJob.format === PageBulkExportFormat.pdf) { + // notify pdf converter of the completion of html export + const url = `${configManager.getConfig('crowi', 'app:pageBulkExportPdfConverterUrl')}/pdf/html-export-done`; + await axios.patch(url, { jobId: pageBulkExportJob._id.toString() }); + + await this.waitPdfExportFinish(pageBulkExportJob); + } } /** * Get a Writable that writes the page body temporarily to fs */ private getPageWritable(pageBulkExportJob: PageBulkExportJobDocument): Writable { - const outputDir = this.getTmpOutputDir(pageBulkExportJob); + const isHtmlPath = pageBulkExportJob.format === PageBulkExportFormat.pdf; + const outputDir = this.getTmpOutputDir(pageBulkExportJob, isHtmlPath); return new Writable({ objectMode: true, write: async(page: PageBulkExportPageSnapshotDocument, encoding, callback) => { @@ -297,21 +314,18 @@ class PageBulkExportService implements IPageBulkExportService { if (revision != null && isPopulated(revision)) { const markdownBody = revision.body; + const format = pageBulkExportJob.format === PageBulkExportFormat.pdf ? 'html' : pageBulkExportJob.format; + const pathNormalized = `${normalizePath(page.path)}.${format}`; + const fileOutputPath = path.join(outputDir, pathNormalized); + const fileOutputParentPath = getParentPath(fileOutputPath); + await fs.promises.mkdir(fileOutputParentPath, { recursive: true }); if (pageBulkExportJob.format === PageBulkExportFormat.md) { - const pathNormalized = `${normalizePath(page.path)}.${PageBulkExportFormat.md}`; - const fileOutputPath = path.join(outputDir, pathNormalized); - const fileOutputParentPath = getParentPath(fileOutputPath); - - await fs.promises.mkdir(fileOutputParentPath, { recursive: true }); await fs.promises.writeFile(fileOutputPath, markdownBody); } else { const htmlString = await this.convertMdToHtml(markdownBody); - const pathNormalized = `${normalizePath(page.path)}.${PageBulkExportFormat.pdf}`; - - const url = 'http://growi-pdf-converter:3004/pdf/html-to-pdf'; - await axios.post(url, { htmlString, fileName: pathNormalized, jobId: pageBulkExportJob._id.toString() }); + await fs.promises.writeFile(fileOutputPath, htmlString); } pageBulkExportJob.lastExportedPagePath = page.path; await pageBulkExportJob.save(); @@ -335,6 +349,37 @@ class PageBulkExportService implements IPageBulkExportService { return htmlString; } + private async waitPdfExportFinish(pageBulkExportJob: PageBulkExportJobDocument): Promise { + const jobCreatedAt = pageBulkExportJob.createdAt; + if (jobCreatedAt == null) throw new Error('createdAt is not set'); + + const exportJobExpirationSeconds = configManager.getConfig('crowi', 'app:bulkExportJobExpirationSeconds'); + return new Promise((resolve, reject) => { + const interval = setInterval(async() => { + if (Date.now() - jobCreatedAt.getTime() > exportJobExpirationSeconds * 1000) { + reject(new BulkExportJobExpiredError()); + } + try { + const url = `${configManager.getConfig('crowi', 'app:pageBulkExportPdfConverterUrl')}/pdf/job-status`; + const res = await axios.get(url, { params: { jobId: pageBulkExportJob._id.toString() } }); + + if (res.data.jobStatus === 'PDF_EXPORT_DONE') { + clearInterval(interval); + resolve(); + } + else if (res.data.jobStatus === 'FAILED') { + clearInterval(interval); + reject(new Error('PDF export failed')); + } + } + catch (err) { + clearInterval(interval); + reject(err); + } + }, 60 * 1000 * 1); + }); + } + /** * Execute a pipeline that reads the page files from the temporal fs directory, compresses them, and uploads to the cloud storage */ @@ -430,8 +475,11 @@ class PageBulkExportService implements IPageBulkExportService { /** * Get the output directory on the fs to temporarily store page files before compressing and uploading */ - private getTmpOutputDir(pageBulkExportJob: PageBulkExportJobDocument): string { - return `${this.tmpOutputRootDir}/${pageBulkExportJob._id}`; + private getTmpOutputDir(pageBulkExportJob: PageBulkExportJobDocument, isHtmlPath = false): string { + if (isHtmlPath) { + return path.join(this.tmpOutputRootDir, 'html', pageBulkExportJob._id.toString()); + } + return path.join(this.tmpOutputRootDir, pageBulkExportJob._id.toString()); } async notifyExportResult( @@ -466,6 +514,12 @@ class PageBulkExportService implements IPageBulkExportService { fs.promises.rm(this.getTmpOutputDir(pageBulkExportJob), { recursive: true, force: true }), ]; + if (pageBulkExportJob.format === PageBulkExportFormat.pdf) { + promises.push( + fs.promises.rm(this.getTmpOutputDir(pageBulkExportJob, true), { recursive: true, force: true }), + ); + } + const fileUploadService: FileUploader = this.crowi.fileUploadService; if (pageBulkExportJob.uploadKey != null && pageBulkExportJob.uploadId != null) { promises.push(fileUploadService.abortPreviousMultipartUpload(pageBulkExportJob.uploadKey, pageBulkExportJob.uploadId)); diff --git a/apps/app/src/server/service/config-loader.ts b/apps/app/src/server/service/config-loader.ts index 54c8ec635c3..db68046962d 100644 --- a/apps/app/src/server/service/config-loader.ts +++ b/apps/app/src/server/service/config-loader.ts @@ -760,6 +760,12 @@ const ENV_VAR_NAME_TO_CONFIG_INFO = { type: ValueType.NUMBER, default: 5, }, + BULK_EXPORT_PDF_CONVERTER_URL: { + ns: 'crowi', + key: 'app:pageBulkExportPdfConverterUrl', + type: ValueType.STRING, + default: 'http://growi-pdf-converter:3004', + }, };