Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pdf convert #9199

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions .devcontainer/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ services:
- node_modules_slackbot-proxy:/workspace/growi/apps/slackbot-proxy/node_modules
- buildcache_app:/workspace/growi/apps/app/.next
- ../../growi-docker-compose:/workspace/growi-docker-compose:delegated
- page_bulk_export_tmp:/tmp/page-bulk-export

tty: true

Expand Down Expand Up @@ -58,8 +59,19 @@ services:
- /usr/share/elasticsearch/data
- ../../growi-docker-compose/elasticsearch/v8/config/elasticsearch.yml:/usr/share/elasticsearch/config/elasticsearch.yml

growi-pdf-converter:
build:
context: ../../growi-pdf-converter
dockerfile: ./Dockerfile
restart: unless-stopped
ports:
- 3004
volumes:
- page_bulk_export_tmp:/tmp/page-bulk-export

volumes:
node_modules:
node_modules_app:
node_modules_slackbot-proxy:
buildcache_app:
page_bulk_export_tmp:
2 changes: 2 additions & 0 deletions apps/app/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -186,10 +186,12 @@
"rehype-sanitize": "^5.0.1",
"rehype-slug": "^5.0.1",
"rehype-toc": "^3.0.2",
"remark": "^13.0.0",
"remark-breaks": "^3.0.2",
"remark-emoji": "^3.0.2",
"remark-frontmatter": "^4.0.1",
"remark-gfm": "^3.0.1",
"remark-html": "^11.0.0",
"remark-math": "^5.1.1",
"remark-toc": "^8.0.1",
"remark-wiki-link": "^1.0.4",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,7 @@ const PageBulkExportSelectModal = (): JSX.Element => {
<button className="btn btn-primary" type="button" onClick={() => startBulkExport(PageBulkExportFormat.md)}>
{t('page_export.markdown')}
</button>
{/* TODO: enable in https://redmine.weseek.co.jp/issues/135772 */}
{/* <button className="btn btn-primary ms-2" type="button" onClick={() => startBulkExport(PageBulkExportFormat.pdf)}>PDF</button> */}
<button className="btn btn-primary ms-2" type="button" onClick={() => startBulkExport(PageBulkExportFormat.pdf)}>PDF</button>
</div>
</ModalBody>
</Modal>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ module.exports = (crowi: Crowi): Router => {
};

try {
await pageBulkExportService?.createAndExecuteOrRestartBulkExportJob(path, req.user, activityParameters, restartJob);
await pageBulkExportService?.createAndExecuteOrRestartBulkExportJob(path, format, req.user, activityParameters, restartJob);
return res.apiv3({}, 204);
}
catch (err) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,21 @@ import path from 'path';
import { Writable } from 'stream';
import { pipeline as pipelinePromise } from 'stream/promises';


import type { IUser } from '@growi/core';
import {
getIdForRef, getIdStringForRef, type IPage, isPopulated, SubscriptionStatusType,
} from '@growi/core';
import { getParentPath, normalizePath } from '@growi/core/dist/utils/path-utils';
import type { Archiver } from 'archiver';
import archiver from 'archiver';
// eslint-disable-next-line no-restricted-imports
import axios from 'axios';
import gc from 'expose-gc/function';
import type { HydratedDocument } from 'mongoose';
import mongoose from 'mongoose';
import remark from 'remark';
import html from 'remark-html';

import type { SupportedActionType } from '~/interfaces/activity';
import { SupportedAction, SupportedTargetModel } from '~/interfaces/activity';
Expand All @@ -23,6 +28,7 @@ import type { IAttachmentDocument } from '~/server/models/attachment';
import { Attachment } from '~/server/models/attachment';
import type { PageModel, PageDocument } from '~/server/models/page';
import Subscription from '~/server/models/subscription';
import { configManager } from '~/server/service/config-manager';
import type { FileUploader } from '~/server/service/file-uploader';
import type { IMultipartUploader } from '~/server/service/file-uploader/multipart-uploader';
import { preNotifyService } from '~/server/service/pre-notify';
Expand Down Expand Up @@ -81,14 +87,15 @@ class PageBulkExportService implements IPageBulkExportService {
/**
* Create a new page bulk export job and execute it
*/
async createAndExecuteOrRestartBulkExportJob(basePagePath: string, currentUser, activityParameters: ActivityParameters, restartJob = false): Promise<void> {
async createAndExecuteOrRestartBulkExportJob(
basePagePath: string, format: PageBulkExportFormat, currentUser, activityParameters: ActivityParameters, restartJob = false,
): Promise<void> {
const basePage = await this.pageModel.findByPathAndViewer(basePagePath, currentUser, null, true);

if (basePage == null) {
throw new Error('Base page not found or not accessible');
}

const format = PageBulkExportFormat.md;
const duplicatePageBulkExportJobInProgress: HydratedDocument<PageBulkExportJobDocument> | null = await PageBulkExportJob.findOne({
user: currentUser,
page: basePage,
Expand Down Expand Up @@ -274,16 +281,31 @@ class PageBulkExportService implements IPageBulkExportService {

const pagesWritable = this.getPageWritable(pageBulkExportJob);

if (pageBulkExportJob.format === PageBulkExportFormat.pdf) {
// start pdf convert
const url = `${configManager.getConfig('crowi', 'app:pageBulkExportPdfConverterUrl')}/pdf/start-pdf-convert`;
await axios.post(url, { jobId: pageBulkExportJob._id.toString() });
}

this.pageBulkExportJobManager.updateJobStream(pageBulkExportJob._id, pageSnapshotsReadable);

return pipelinePromise(pageSnapshotsReadable, pagesWritable);
await pipelinePromise(pageSnapshotsReadable, pagesWritable);

if (pageBulkExportJob.format === PageBulkExportFormat.pdf) {
// notify pdf converter of the completion of html export
const url = `${configManager.getConfig('crowi', 'app:pageBulkExportPdfConverterUrl')}/pdf/html-export-done`;
await axios.patch(url, { jobId: pageBulkExportJob._id.toString() });

await this.waitPdfExportFinish(pageBulkExportJob);
}
}

/**
* Get a Writable that writes the page body temporarily to fs
*/
private getPageWritable(pageBulkExportJob: PageBulkExportJobDocument): Writable {
const outputDir = this.getTmpOutputDir(pageBulkExportJob);
const isHtmlPath = pageBulkExportJob.format === PageBulkExportFormat.pdf;
const outputDir = this.getTmpOutputDir(pageBulkExportJob, isHtmlPath);
return new Writable({
objectMode: true,
write: async(page: PageBulkExportPageSnapshotDocument, encoding, callback) => {
Expand All @@ -292,12 +314,19 @@ class PageBulkExportService implements IPageBulkExportService {

if (revision != null && isPopulated(revision)) {
const markdownBody = revision.body;
const pathNormalized = `${normalizePath(page.path)}.${PageBulkExportFormat.md}`;
const format = pageBulkExportJob.format === PageBulkExportFormat.pdf ? 'html' : pageBulkExportJob.format;
const pathNormalized = `${normalizePath(page.path)}.${format}`;
const fileOutputPath = path.join(outputDir, pathNormalized);
const fileOutputParentPath = getParentPath(fileOutputPath);

await fs.promises.mkdir(fileOutputParentPath, { recursive: true });
await fs.promises.writeFile(fileOutputPath, markdownBody);

if (pageBulkExportJob.format === PageBulkExportFormat.md) {
await fs.promises.writeFile(fileOutputPath, markdownBody);
}
else {
const htmlString = await this.convertMdToHtml(markdownBody);
await fs.promises.writeFile(fileOutputPath, htmlString);
}
pageBulkExportJob.lastExportedPagePath = page.path;
await pageBulkExportJob.save();
}
Expand All @@ -311,6 +340,46 @@ class PageBulkExportService implements IPageBulkExportService {
});
}

private async convertMdToHtml(md: string): Promise<string> {
const htmlString = (await remark()
.use(html)
.process(md))
.toString();

return htmlString;
}

private async waitPdfExportFinish(pageBulkExportJob: PageBulkExportJobDocument): Promise<void> {
const jobCreatedAt = pageBulkExportJob.createdAt;
if (jobCreatedAt == null) throw new Error('createdAt is not set');

const exportJobExpirationSeconds = configManager.getConfig('crowi', 'app:bulkExportJobExpirationSeconds');
return new Promise<void>((resolve, reject) => {
const interval = setInterval(async() => {
if (Date.now() - jobCreatedAt.getTime() > exportJobExpirationSeconds * 1000) {
reject(new BulkExportJobExpiredError());
}
try {
const url = `${configManager.getConfig('crowi', 'app:pageBulkExportPdfConverterUrl')}/pdf/job-status`;
const res = await axios.get(url, { params: { jobId: pageBulkExportJob._id.toString() } });

if (res.data.jobStatus === 'PDF_EXPORT_DONE') {
clearInterval(interval);
resolve();
}
else if (res.data.jobStatus === 'FAILED') {
clearInterval(interval);
reject(new Error('PDF export failed'));
}
}
catch (err) {
clearInterval(interval);
reject(err);
}
}, 60 * 1000 * 1);
});
}

/**
* Execute a pipeline that reads the page files from the temporal fs directory, compresses them, and uploads to the cloud storage
*/
Expand Down Expand Up @@ -406,8 +475,11 @@ class PageBulkExportService implements IPageBulkExportService {
/**
* Get the output directory on the fs to temporarily store page files before compressing and uploading
*/
private getTmpOutputDir(pageBulkExportJob: PageBulkExportJobDocument): string {
return `${this.tmpOutputRootDir}/${pageBulkExportJob._id}`;
private getTmpOutputDir(pageBulkExportJob: PageBulkExportJobDocument, isHtmlPath = false): string {
if (isHtmlPath) {
return path.join(this.tmpOutputRootDir, 'html', pageBulkExportJob._id.toString());
}
return path.join(this.tmpOutputRootDir, pageBulkExportJob._id.toString());
}

async notifyExportResult(
Expand Down Expand Up @@ -442,6 +514,12 @@ class PageBulkExportService implements IPageBulkExportService {
fs.promises.rm(this.getTmpOutputDir(pageBulkExportJob), { recursive: true, force: true }),
];

if (pageBulkExportJob.format === PageBulkExportFormat.pdf) {
promises.push(
fs.promises.rm(this.getTmpOutputDir(pageBulkExportJob, true), { recursive: true, force: true }),
);
}

const fileUploadService: FileUploader = this.crowi.fileUploadService;
if (pageBulkExportJob.uploadKey != null && pageBulkExportJob.uploadId != null) {
promises.push(fileUploadService.abortPreviousMultipartUpload(pageBulkExportJob.uploadKey, pageBulkExportJob.uploadId));
Expand Down
6 changes: 6 additions & 0 deletions apps/app/src/server/service/config-loader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -760,6 +760,12 @@ const ENV_VAR_NAME_TO_CONFIG_INFO = {
type: ValueType.NUMBER,
default: 5,
},
BULK_EXPORT_PDF_CONVERTER_URL: {
ns: 'crowi',
key: 'app:pageBulkExportPdfConverterUrl',
type: ValueType.STRING,
default: 'http://growi-pdf-converter:3004',
},
};


Expand Down
Loading
Loading