From 181bea6ec2c74f5ecdd9fee5d6ae8293c2b4f40d Mon Sep 17 00:00:00 2001 From: Romain Deltour Date: Sun, 24 Dec 2017 02:57:56 +0100 Subject: [PATCH] fix(core): don't crash if a content document has an '.xml' extension MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Puppeteer won't laod documents with extension `.xml` as XHTML. When a Content Document’s extension is not `.xhtml` or `.html`, we first copy the document to a temp `.xhtml` file and before opening it in Puppeteer. Fixes #122 --- packages/ace-core/package.json | 1 + .../ace-core/src/checker/checker-chromium.js | 22 ++++++++++++++-- tests/__tests__/regression.test.js | 5 ++++ tests/data/issue-122/EPUB/content_001.xml | 9 +++++++ tests/data/issue-122/EPUB/content_002.ace | 9 +++++++ tests/data/issue-122/EPUB/nav.xhtml | 13 ++++++++++ tests/data/issue-122/EPUB/package.opf | 25 +++++++++++++++++++ tests/data/issue-122/META-INF/container.xml | 6 +++++ tests/data/issue-122/mimetype | 1 + yarn.lock | 4 +++ 10 files changed, 93 insertions(+), 2 deletions(-) create mode 100644 tests/data/issue-122/EPUB/content_001.xml create mode 100644 tests/data/issue-122/EPUB/content_002.ace create mode 100644 tests/data/issue-122/EPUB/nav.xhtml create mode 100644 tests/data/issue-122/EPUB/package.opf create mode 100644 tests/data/issue-122/META-INF/container.xml create mode 100644 tests/data/issue-122/mimetype diff --git a/packages/ace-core/package.json b/packages/ace-core/package.json index 7a869fe8..7dcbc2c8 100644 --- a/packages/ace-core/package.json +++ b/packages/ace-core/package.json @@ -22,6 +22,7 @@ "@daisy/epub-utils": "^0.8.0", "@daisy/puppeteer-utils": "^0.7.0", "axe-core": "~2.6.1", + "file-url": "^2.0.2", "h5o": "^0.11.3", "p-map": "^1.2.0", "puppeteer": "^1.0.0", diff --git a/packages/ace-core/src/checker/checker-chromium.js b/packages/ace-core/src/checker/checker-chromium.js index 417f0e7b..baaa3383 100644 --- a/packages/ace-core/src/checker/checker-chromium.js +++ b/packages/ace-core/src/checker/checker-chromium.js @@ -1,15 +1,19 @@ 'use strict'; -const fs = require('fs'); +const fileUrl = require('file-url'); +const fs = require('fs-extra'); const path = require('path'); const pMap = require('p-map'); const puppeteer = require('puppeteer'); const os = require('os'); +const tmp = require('tmp'); const winston = require('winston'); const axe2ace = require('@daisy/ace-report-axe'); const utils = require('@daisy/puppeteer-utils'); +tmp.setGracefulCleanup(); + const scripts = [ path.resolve(require.resolve('axe-core'), '../axe.min.js'), require.resolve('../scripts/vendor/outliner.min.js'), @@ -21,8 +25,22 @@ const scripts = [ async function checkSingle(spineItem, epub, browser) { winston.verbose(`- Processing ${spineItem.relpath}`); try { + let url = spineItem.url; + let ext = path.extname(spineItem.filepath); + + // File extensions other than 'xhtml' or 'html' are not propertly loaded + // by puppeteer, so we copy the file to a new `.xhtml` temp file. + if (ext !== 'xhtml' && ext !== 'html') { + winston.warn(`Copying document with extension '${ext}' to a temporary '.xhtml' file…`); + const tmpdir = tmp.dirSync({ unsafeCleanup: true }).name; + const tmpFile = path.join(tmpdir, `${path.basename(spineItem.filepath, ext)}.xhtml`) + fs.copySync(spineItem.filepath, tmpFile); + url = fileUrl(tmpFile); + winston.debug(`checking copied file at ${url}`) + } + const page = await browser.newPage(); - await page.goto(spineItem.url); + await page.goto(url); await utils.addScripts(scripts, page); const results = await page.evaluate(() => new Promise((resolve, reject) => { diff --git a/tests/__tests__/regression.test.js b/tests/__tests__/regression.test.js index 5e2dbc61..16acf060 100644 --- a/tests/__tests__/regression.test.js +++ b/tests/__tests__/regression.test.js @@ -65,3 +65,8 @@ test('issue #114: Description list item does not have a
parent element', as const report = await ace('../data/issue-114'); expect(report['earl:result']['earl:outcome']).toEqual('pass'); }); + +test('issue #122: Failed to check HTML content with `.xml` extension', async () => { + const report = await ace('../data/issue-122'); + expect(report['earl:result']['earl:outcome']).toEqual('pass'); +}); diff --git a/tests/data/issue-122/EPUB/content_001.xml b/tests/data/issue-122/EPUB/content_001.xml new file mode 100644 index 00000000..c55a9e8b --- /dev/null +++ b/tests/data/issue-122/EPUB/content_001.xml @@ -0,0 +1,9 @@ + + +Minimal EPUB + + +

Loomings

+

Call me Ishmael.

+ + diff --git a/tests/data/issue-122/EPUB/content_002.ace b/tests/data/issue-122/EPUB/content_002.ace new file mode 100644 index 00000000..c55a9e8b --- /dev/null +++ b/tests/data/issue-122/EPUB/content_002.ace @@ -0,0 +1,9 @@ + + +Minimal EPUB + + +

Loomings

+

Call me Ishmael.

+ + diff --git a/tests/data/issue-122/EPUB/nav.xhtml b/tests/data/issue-122/EPUB/nav.xhtml new file mode 100644 index 00000000..d233bae0 --- /dev/null +++ b/tests/data/issue-122/EPUB/nav.xhtml @@ -0,0 +1,13 @@ + + +Minimal Nav + + + + + diff --git a/tests/data/issue-122/EPUB/package.opf b/tests/data/issue-122/EPUB/package.opf new file mode 100644 index 00000000..24947b9c --- /dev/null +++ b/tests/data/issue-122/EPUB/package.opf @@ -0,0 +1,25 @@ + + + + Minimal EPUB 3.0 + en + NOID + 2017-01-01T00:00:01Z + structuralNavigation + everything OK! + noFlashingHazard + noSoundHazard + noMotionSimulationHazard + textual + textual + + + + + + + + + + + diff --git a/tests/data/issue-122/META-INF/container.xml b/tests/data/issue-122/META-INF/container.xml new file mode 100644 index 00000000..2cf00654 --- /dev/null +++ b/tests/data/issue-122/META-INF/container.xml @@ -0,0 +1,6 @@ + + + + + + diff --git a/tests/data/issue-122/mimetype b/tests/data/issue-122/mimetype new file mode 100644 index 00000000..57ef03f2 --- /dev/null +++ b/tests/data/issue-122/mimetype @@ -0,0 +1 @@ +application/epub+zip \ No newline at end of file diff --git a/yarn.lock b/yarn.lock index fcb82cc2..94b9de91 100644 --- a/yarn.lock +++ b/yarn.lock @@ -2174,6 +2174,10 @@ file-exists-promise@^1.0.2: dependencies: es6-promise "^3.1.2" +file-url@^2.0.2: + version "2.0.2" + resolved "https://registry.yarnpkg.com/file-url/-/file-url-2.0.2.tgz#e951784d79095127d3713029ab063f40818ca2ae" + filename-regex@^2.0.0: version "2.0.1" resolved "https://registry.yarnpkg.com/filename-regex/-/filename-regex-2.0.1.tgz#c1c4b9bee3e09725ddb106b75c1e301fe2f18b26"