diff --git a/packages/ace-axe-runner-electron/src/init.js b/packages/ace-axe-runner-electron/src/init.js index 8e618c7f..d8e12124 100644 --- a/packages/ace-axe-runner-electron/src/init.js +++ b/packages/ace-axe-runner-electron/src/init.js @@ -119,7 +119,7 @@ const streamProtocolHandler = async ( if (LOG_DEBUG) console.log(`${ACE_LOG_PREFIX} streamProtocolHandler req.url: ${req.url}`); const u = new URL(req.url); - + if (LOG_DEBUG) { Object.keys(req.headers).forEach((header) => { const val = req.headers[header]; @@ -211,7 +211,7 @@ function loadUrl(browserWindow) { clearTimeout(browserWindow.ace__timeout); } browserWindow.ace__timeout = undefined; - + const options = {}; // { extraHeaders: 'pragma: no-cache\n' }; const uareel = `${rootUrl}${browserWindow.ace__currentUrl}?${HTTP_QUERY_PARAM}=${iHttpReq++}`; if (LOG_DEBUG_URLS) { @@ -267,7 +267,7 @@ function loadUrl(browserWindow) { if (!browserWindow.ace__TIME_executeJavaScript) { if (LOG_DEBUG) console.log(`${ACE_LOG_PREFIX} axeRunner ${MILLISECONDS_TIMEOUT_INITIAL}ms timeout [[RELOAD]] (${timeElapsed1[0]} seconds + ${timeElapsed1[1]} nanoseconds) (${timeElapsed2[0]} seconds + ${timeElapsed2[1]} nanoseconds) ${browserWindow.ace__currentUrlOriginal} => ${rootUrl}${browserWindow.ace__currentUrl}`); - + browserWindow.ace__TIME_loadURL = process.hrtime(); browserWindow.ace__TIME_executeJavaScript = 0; try { @@ -286,9 +286,9 @@ function loadUrl(browserWindow) { browserWindow.ace__timeout = setTimeout(timeoutFunc, MILLISECONDS_TIMEOUT_INITIAL); return; } - + if (LOG_DEBUG) console.log(`${ACE_LOG_PREFIX} axeRunner ${MILLISECONDS_TIMEOUT_INITIAL}ms timeout [[EXTEND]] (${timeElapsed1[0]} seconds + ${timeElapsed1[1]} nanoseconds) (${timeElapsed2[0]} seconds + ${timeElapsed2[1]} nanoseconds) ${browserWindow.ace__currentUrlOriginal} => ${rootUrl}${browserWindow.ace__currentUrl}`); - + browserWindow.ace__timeoutExtended = true; browserWindow.ace__timeout = setTimeout(timeoutFunc, MILLISECONDS_TIMEOUT_EXTENSION); } @@ -588,7 +588,7 @@ function axeRunnerInit(eventEmmitter, CONCURRENT_INSTANCES) { console.log("######## URL 3"); console.log(uarelObj.pathname); } - const full = (windowsDrive + decodeURI(uarelObj.pathname)); + const full = (windowsDrive + decodeURIComponent(uarelObj.pathname)); if (LOG_DEBUG_URLS) { console.log("######## URL 4"); console.log(full); @@ -603,7 +603,7 @@ function axeRunnerInit(eventEmmitter, CONCURRENT_INSTANCES) { console.log("######## URL 6"); console.log(httpUrl); } - + if (LOG_DEBUG) console.log(`${ACE_LOG_PREFIX} axeRunner running ... ${basedir} --- ${uarel} => ${httpUrl}`); function poolPush() { @@ -718,7 +718,7 @@ new Promise((resolve, reject) => { browserWindow.webContents.executeJavaScript(js, true) .then((ok) => { const timeElapsed = process.hrtime(browserWindow.ace__TIME_executeJavaScript); - + if (LOG_DEBUG) console.log(`${ACE_LOG_PREFIX} axeRunner done. (${timeElapsed[0]} seconds + ${timeElapsed[1]} nanoseconds) ${browserWindow.ace__poolIndex} ${browserWindow.ace__currentUrlOriginal} --- ${browserWindow.ace__currentUrl}`); // if (LOG_DEBUG && ok.axe.violations.length) console.log(ok.axe.url, JSON.stringify(ok, null, 4)); if (browserWindow.ace__replySent) { @@ -870,7 +870,7 @@ function startAxeServer(basedir, scripts, scriptContents) { console.log(">>>>>>>>>> URL 2"); console.log(ptn); } - const pn = decodeURI(ptn); + const pn = decodeURIComponent(ptn); if (LOG_DEBUG_URLS) { console.log(">>>>>>>>>> URL 3"); console.log(pn); @@ -946,7 +946,7 @@ function startAxeServer(basedir, scripts, scriptContents) { console.log(">>>>>>>>>>- URL 2"); console.log(ptn); } - const pn = decodeURI(ptn); + const pn = decodeURIComponent(ptn); if (LOG_DEBUG_URLS) { console.log(">>>>>>>>>>- URL 3"); console.log(pn); @@ -1000,7 +1000,7 @@ function startAxeServer(basedir, scripts, scriptContents) { // // const url = new URL(`https://fake.org${req.url}`); // // const pathname = url.pathname; - // const pathname = decodeURI(u.pathname); + // const pathname = decodeURIComponent(u.pathname); // const filePath = path.join(basedir, pathname); // if (filePathsExpressStaticNotExist[filePath]) { @@ -1170,15 +1170,15 @@ function prepareLaunch(eventEmmitter, CONCURRENT_INSTANCES) { eventEmmitter.on('AXE_RUNNER_LAUNCH', (event, arg) => { // const payload = eventEmmitter.ace_notElectronIpcMainRenderer ? event : arg; const sender = eventEmmitter.ace_notElectronIpcMainRenderer ? eventEmmitter : event.sender; - + if (LOG_DEBUG) console.log(`${ACE_LOG_PREFIX} axeRunner AXE_RUNNER_LAUNCH ...`); - + axeRunnerInit(eventEmmitter, CONCURRENT_INSTANCES); - + if (LOG_DEBUG) console.log(`${ACE_LOG_PREFIX} axeRunner sending launched event ...`); sender.send("AXE_RUNNER_LAUNCH_", { ok: true }); }); } -module.exports = { prepareLaunch }; \ No newline at end of file +module.exports = { prepareLaunch }; diff --git a/packages/ace-core/src/checker/checker-chromium.js b/packages/ace-core/src/checker/checker-chromium.js index 224709dc..fbef9e61 100644 --- a/packages/ace-core/src/checker/checker-chromium.js +++ b/packages/ace-core/src/checker/checker-chromium.js @@ -11,6 +11,15 @@ const axe2ace = require('@daisy/ace-report-axe'); const { getRawResourcesForCurrentLanguage } = require('../l10n/localize').localizer; + +// const encodeURIComponent_RFC3986 = require('@daisy/epub-utils').encodeURIComponent_RFC3986; +function encodeURIComponent_RFC3986(str) { + return encodeURIComponent(str) + .replace(/[!'()*]/g, (c) => { + return "%" + c.charCodeAt(0).toString(16); + }); +} + tmp.setGracefulCleanup(); const scripts = [ @@ -38,7 +47,7 @@ async function checkSingle(spineItem, epub, lang, axeRunner) { } let url = spineItem.url; let ext = path.extname(spineItem.filepath); - + // File extensions other than 'xhtml' or 'html' are not propertly loaded // by puppeteer, so we copy the file to a new `.xhtml` temp file. if (!process.versions['electron'] && // The Electron-based Axe runner handles .xml files just fine @@ -132,7 +141,7 @@ async function checkSingle(spineItem, epub, lang, axeRunner) { console.log("----- ITEMs SRC 1"); console.log(srcItem.src); } - srcItem.path = path.resolve(path.dirname(spineItem.filepath), decodeURI(srcItem.src.toString())); + srcItem.path = path.resolve(path.dirname(spineItem.filepath), decodeURIComponent(srcItem.src.toString())); if (LOG_DEBUG_URLS) { console.log("----- ITEMs SRC 2"); console.log(srcItem.path); @@ -150,7 +159,7 @@ async function checkSingle(spineItem, epub, lang, axeRunner) { console.log("----- ITEM SRC 1"); console.log(item.src); } - item.path = path.resolve(path.dirname(spineItem.filepath), decodeURI(item.src.toString())); + item.path = path.resolve(path.dirname(spineItem.filepath), decodeURIComponent(item.src.toString())); if (LOG_DEBUG_URLS) { console.log("----- ITEM SRC 2"); console.log(item.path); @@ -167,6 +176,7 @@ async function checkSingle(spineItem, epub, lang, axeRunner) { console.log(spineItem.relpath); console.log(item.cfi); } + // encodeURIComponent_RFC3986 item.location = `${encodeURI(spineItem.relpath)}#epubcfi(${encodeURI(item.cfi)})`; if (LOG_DEBUG_URLS) { console.log("----- CFI 2"); diff --git a/packages/epub-utils/src/epub-parse.js b/packages/epub-utils/src/epub-parse.js index 287e965c..6823c6a0 100644 --- a/packages/epub-utils/src/epub-parse.js +++ b/packages/epub-utils/src/epub-parse.js @@ -60,15 +60,15 @@ function parseNavDoc(fullpath, epubDir) { let pageListHrefs = undefined; if (hasPageList) { const arr1 = select('descendant::html:a/@href', sPageList[0]); - pageListHrefs = arr1.map((o) => decodeURI(o.nodeValue)); + pageListHrefs = arr1.map((o) => decodeURIComponent(o.nodeValue)); // console.log(arr1.length, JSON.stringify(pageListHrefs, null, 4)); } - + let tocHrefs = undefined; const sTOC = select('//html:nav[@epub:type="toc"]/html:ol', doc); if (sTOC[0]) { const arr2 = select('descendant::html:a/@href', sTOC[0]); - tocHrefs = arr2.map((o) => decodeURI(o.nodeValue)); + tocHrefs = arr2.map((o) => decodeURIComponent(o.nodeValue)); // console.log(arr2.length, JSON.stringify(tocHrefs, null, 4)); } @@ -80,7 +80,7 @@ function parseNavDoc(fullpath, epubDir) { while (a.firstChild) a.parentNode.insertBefore(a.firstChild, a); a.parentNode.removeChild(a); } - + const tocHTML = new XMLSerializer().serializeToString(sTOC[0]); // console.log(tocHTML); @@ -216,7 +216,7 @@ function parseLinks(doc, select) { } } - addLink(rel, decodeURI(link.getAttribute('href')), result); + addLink(rel, decodeURIComponent(link.getAttribute('href')), result); }); return result; } @@ -274,7 +274,8 @@ EpubParser.prototype.parseData = function(packageDocPath, epubDir) { if (this.contentDocMediaType === contentType) { var spineItem = new SpineItem(); - spineItem.relpath = decodeURI(manifestItem[0].getAttribute('href')); + spineItem.relpath = decodeURIComponent(manifestItem[0].getAttribute('href')); + // if (manifestItem[0].getAttribute('href').includes("%")) console.log(`${manifestItem[0].getAttribute('href')} ===> ${spineItem.relpath}`); spineItem.filepath = path.join(path.dirname(packageDocPath), spineItem.relpath); const o = this.parseContentDocTitleAndIds(spineItem.filepath); @@ -298,7 +299,7 @@ EpubParser.prototype.parseData = function(packageDocPath, epubDir) { const smilManifestItem = select(`/opf:package/opf:manifest/opf:item[@id='${moAttr}']`, doc); if (smilManifestItem.length > 0) { spineItem.mediaOverlay = {}; - spineItem.mediaOverlay.smilRelPath = decodeURI(smilManifestItem[0].getAttribute('href')); + spineItem.mediaOverlay.smilRelPath = decodeURIComponent(smilManifestItem[0].getAttribute('href')); spineItem.mediaOverlay.smilFilePath = path.join(path.dirname(packageDocPath), spineItem.mediaOverlay.smilRelPath ); // spineItem.mediaOverlay.smilUrl = fileUrl(spineItem.mediaOverlay.smilFilePath); spineItem.mediaOverlay.smilRefs = this.parseSmilRefs(spineItem.mediaOverlay.smilFilePath); @@ -315,7 +316,7 @@ EpubParser.prototype.parseData = function(packageDocPath, epubDir) { + '[contains(concat(" ", normalize-space(@properties), " ")," nav ")]' + '/@href', doc); if (navDocRef.length > 0) { - const navDocPath = decodeURI(navDocRef[0].nodeValue); + const navDocPath = decodeURIComponent(navDocRef[0].nodeValue); const navDocFullPath = path.join(path.dirname(packageDocPath), navDocPath); this.navDoc = parseNavDoc(navDocFullPath, epubDir); @@ -354,7 +355,7 @@ EpubParser.prototype.parseSmilRefs = function(filepath) { const content = fs.readFileSync(filepath).toString(); const doc = new DOMParser({errorHandler}).parseFromString(content, 'application/xml'); const select = xpath.useNamespaces({smil: "http://www.w3.org/ns/SMIL", epub: "http://www.idpf.org/2007/ops"}); - + const arr = select('//smil:text[@src]', doc); let smilRefs = arr.map((o) => { let epubType = o.parentNode ? o.parentNode.getAttributeNS('http://www.idpf.org/2007/ops', 'type') : undefined; @@ -425,11 +426,19 @@ EpubParser.prototype.calculatePackageDocPath = function(epubDir) { const rootfiles = select('/ocf:container/ocf:rootfiles/ocf:rootfile[@media-type="application/oebps-package+xml"]/@full-path', doc); // just grab the first one as we're not handling the case of multiple renditions if (rootfiles.length > 0) { - return (path.join(epubDir, decodeURI(rootfiles[0].nodeValue))); + return (path.join(epubDir, decodeURIComponent(rootfiles[0].nodeValue))); } return ''; } +function encodeURIComponent_RFC3986(str) { + return encodeURIComponent(str) + .replace(/[!'()*]/g, (c) => { + return "%" + c.charCodeAt(0).toString(16); + }); +} module.exports.SpineItem = SpineItem; module.exports.EpubParser = EpubParser; + +module.exports.encodeURIComponent_RFC3986 = encodeURIComponent_RFC3986; diff --git a/packages/epub-utils/src/index.js b/packages/epub-utils/src/index.js index c8df7df2..6d043dde 100644 --- a/packages/epub-utils/src/index.js +++ b/packages/epub-utils/src/index.js @@ -1,7 +1,9 @@ 'use strict'; -const EPUB = require('./epub'); +const EPUB = require('./epub.js'); +const epubParse = require('./epub-parse.js'); module.exports = { EPUB, + encodeURIComponent_RFC3986: epubParse.encodeURIComponent_RFC3986, }; diff --git a/tests/__tests__/regression.test.js b/tests/__tests__/regression.test.js index 9ef93aec..4f93a8ba 100644 --- a/tests/__tests__/regression.test.js +++ b/tests/__tests__/regression.test.js @@ -118,6 +118,7 @@ test('issue #239: `listitem` is not reported when roles inherit from list roles' test('issue #290 (unzipped): URL percent encoding', async () => { const report = await ace('../data/issue-290'); + // console.log(JSON.stringify(report, null, 4)); expect(report['earl:result']['earl:outcome']).toEqual('pass'); }); diff --git a/tests/data/issue-290.epub b/tests/data/issue-290.epub index cc662570..0244358f 100644 Binary files a/tests/data/issue-290.epub and b/tests/data/issue-290.epub differ diff --git "a/tests/data/issue-290/E%PU B/c%on t&e%26n%2Ft_\303\250001_.xhtml" "b/tests/data/issue-290/E%PU B/deep/deeper/c%on t&e%26n%2Ft_\303\250001_.xhtml" similarity index 79% rename from "tests/data/issue-290/E%PU B/c%on t&e%26n%2Ft_\303\250001_.xhtml" rename to "tests/data/issue-290/E%PU B/deep/deeper/c%on t&e%26n%2Ft_\303\250001_.xhtml" index da98b1f8..de4eed50 100644 --- "a/tests/data/issue-290/E%PU B/c%on t&e%26n%2Ft_\303\250001_.xhtml" +++ "b/tests/data/issue-290/E%PU B/deep/deeper/c%on t&e%26n%2Ft_\303\250001_.xhtml" @@ -4,7 +4,7 @@
Call me Ishmael.
diff --git "a/tests/data/issue-290/E%PU B/i%ma g&e%26_%2F00\303\2501_.jpg" "b/tests/data/issue-290/E%PU B/deep/deeper/deepest/i%ma g&e%26_%2F00\303\2501_.jpg" similarity index 100% rename from "tests/data/issue-290/E%PU B/i%ma g&e%26_%2F00\303\2501_.jpg" rename to "tests/data/issue-290/E%PU B/deep/deeper/deepest/i%ma g&e%26_%2F00\303\2501_.jpg" diff --git "a/tests/data/issue-290/E%PU B/n%av i&g%26a%2Ftio\303\250n_.xhtml" "b/tests/data/issue-290/E%PU B/deep/n%av i&g%26a%2Ftio\303\250n_.xhtml" similarity index 62% rename from "tests/data/issue-290/E%PU B/n%av i&g%26a%2Ftio\303\250n_.xhtml" rename to "tests/data/issue-290/E%PU B/deep/n%av i&g%26a%2Ftio\303\250n_.xhtml" index 31040e3c..0d78893a 100644 --- "a/tests/data/issue-290/E%PU B/n%av i&g%26a%2Ftio\303\250n_.xhtml" +++ "b/tests/data/issue-290/E%PU B/deep/n%av i&g%26a%2Ftio\303\250n_.xhtml" @@ -5,12 +5,12 @@ diff --git "a/tests/data/issue-290/E%PU B/pa&c%26kag%2Fe\303\250_.opf" "b/tests/data/issue-290/E%PU B/pa&c%26kag%2Fe\303\250_.opf" index 58c0b152..ff0c077e 100644 --- "a/tests/data/issue-290/E%PU B/pa&c%26kag%2Fe\303\250_.opf" +++ "b/tests/data/issue-290/E%PU B/pa&c%26kag%2Fe\303\250_.opf" @@ -4,7 +4,7 @@