Skip to content

Commit

Permalink
fix: URL percent encoding / escaping, Fixes #290
Browse files Browse the repository at this point in the history
  • Loading branch information
danielweck committed Dec 1, 2024
1 parent bd1d9d1 commit d122dfa
Show file tree
Hide file tree
Showing 11 changed files with 76 additions and 37 deletions.
30 changes: 15 additions & 15 deletions packages/ace-axe-runner-electron/src/init.js
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ const streamProtocolHandler = async (

if (LOG_DEBUG) console.log(`${ACE_LOG_PREFIX} streamProtocolHandler req.url: ${req.url}`);
const u = new URL(req.url);

if (LOG_DEBUG) {
Object.keys(req.headers).forEach((header) => {
const val = req.headers[header];
Expand Down Expand Up @@ -211,7 +211,7 @@ function loadUrl(browserWindow) {
clearTimeout(browserWindow.ace__timeout);
}
browserWindow.ace__timeout = undefined;

const options = {}; // { extraHeaders: 'pragma: no-cache\n' };
const uareel = `${rootUrl}${browserWindow.ace__currentUrl}?${HTTP_QUERY_PARAM}=${iHttpReq++}`;
if (LOG_DEBUG_URLS) {
Expand Down Expand Up @@ -267,7 +267,7 @@ function loadUrl(browserWindow) {

if (!browserWindow.ace__TIME_executeJavaScript) {
if (LOG_DEBUG) console.log(`${ACE_LOG_PREFIX} axeRunner ${MILLISECONDS_TIMEOUT_INITIAL}ms timeout [[RELOAD]] (${timeElapsed1[0]} seconds + ${timeElapsed1[1]} nanoseconds) (${timeElapsed2[0]} seconds + ${timeElapsed2[1]} nanoseconds) ${browserWindow.ace__currentUrlOriginal} => ${rootUrl}${browserWindow.ace__currentUrl}`);

browserWindow.ace__TIME_loadURL = process.hrtime();
browserWindow.ace__TIME_executeJavaScript = 0;
try {
Expand All @@ -286,9 +286,9 @@ function loadUrl(browserWindow) {
browserWindow.ace__timeout = setTimeout(timeoutFunc, MILLISECONDS_TIMEOUT_INITIAL);
return;
}

if (LOG_DEBUG) console.log(`${ACE_LOG_PREFIX} axeRunner ${MILLISECONDS_TIMEOUT_INITIAL}ms timeout [[EXTEND]] (${timeElapsed1[0]} seconds + ${timeElapsed1[1]} nanoseconds) (${timeElapsed2[0]} seconds + ${timeElapsed2[1]} nanoseconds) ${browserWindow.ace__currentUrlOriginal} => ${rootUrl}${browserWindow.ace__currentUrl}`);

browserWindow.ace__timeoutExtended = true;
browserWindow.ace__timeout = setTimeout(timeoutFunc, MILLISECONDS_TIMEOUT_EXTENSION);
}
Expand Down Expand Up @@ -588,7 +588,7 @@ function axeRunnerInit(eventEmmitter, CONCURRENT_INSTANCES) {
console.log("######## URL 3");
console.log(uarelObj.pathname);
}
const full = (windowsDrive + decodeURI(uarelObj.pathname));
const full = (windowsDrive + decodeURIComponent(uarelObj.pathname));
if (LOG_DEBUG_URLS) {
console.log("######## URL 4");
console.log(full);
Expand All @@ -603,7 +603,7 @@ function axeRunnerInit(eventEmmitter, CONCURRENT_INSTANCES) {
console.log("######## URL 6");
console.log(httpUrl);
}

if (LOG_DEBUG) console.log(`${ACE_LOG_PREFIX} axeRunner running ... ${basedir} --- ${uarel} => ${httpUrl}`);

function poolPush() {
Expand Down Expand Up @@ -718,7 +718,7 @@ new Promise((resolve, reject) => {
browserWindow.webContents.executeJavaScript(js, true)
.then((ok) => {
const timeElapsed = process.hrtime(browserWindow.ace__TIME_executeJavaScript);

if (LOG_DEBUG) console.log(`${ACE_LOG_PREFIX} axeRunner done. (${timeElapsed[0]} seconds + ${timeElapsed[1]} nanoseconds) ${browserWindow.ace__poolIndex} ${browserWindow.ace__currentUrlOriginal} --- ${browserWindow.ace__currentUrl}`);
// if (LOG_DEBUG && ok.axe.violations.length) console.log(ok.axe.url, JSON.stringify(ok, null, 4));
if (browserWindow.ace__replySent) {
Expand Down Expand Up @@ -870,7 +870,7 @@ function startAxeServer(basedir, scripts, scriptContents) {
console.log(">>>>>>>>>> URL 2");
console.log(ptn);
}
const pn = decodeURI(ptn);
const pn = decodeURIComponent(ptn);
if (LOG_DEBUG_URLS) {
console.log(">>>>>>>>>> URL 3");
console.log(pn);
Expand Down Expand Up @@ -946,7 +946,7 @@ function startAxeServer(basedir, scripts, scriptContents) {
console.log(">>>>>>>>>>- URL 2");
console.log(ptn);
}
const pn = decodeURI(ptn);
const pn = decodeURIComponent(ptn);
if (LOG_DEBUG_URLS) {
console.log(">>>>>>>>>>- URL 3");
console.log(pn);
Expand Down Expand Up @@ -1000,7 +1000,7 @@ function startAxeServer(basedir, scripts, scriptContents) {

// // const url = new URL(`https://fake.org${req.url}`);
// // const pathname = url.pathname;
// const pathname = decodeURI(u.pathname);
// const pathname = decodeURIComponent(u.pathname);

// const filePath = path.join(basedir, pathname);
// if (filePathsExpressStaticNotExist[filePath]) {
Expand Down Expand Up @@ -1170,15 +1170,15 @@ function prepareLaunch(eventEmmitter, CONCURRENT_INSTANCES) {
eventEmmitter.on('AXE_RUNNER_LAUNCH', (event, arg) => {
// const payload = eventEmmitter.ace_notElectronIpcMainRenderer ? event : arg;
const sender = eventEmmitter.ace_notElectronIpcMainRenderer ? eventEmmitter : event.sender;

if (LOG_DEBUG) console.log(`${ACE_LOG_PREFIX} axeRunner AXE_RUNNER_LAUNCH ...`);

axeRunnerInit(eventEmmitter, CONCURRENT_INSTANCES);

if (LOG_DEBUG) console.log(`${ACE_LOG_PREFIX} axeRunner sending launched event ...`);
sender.send("AXE_RUNNER_LAUNCH_", {
ok: true
});
});
}
module.exports = { prepareLaunch };
module.exports = { prepareLaunch };
16 changes: 13 additions & 3 deletions packages/ace-core/src/checker/checker-chromium.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,15 @@ const axe2ace = require('@daisy/ace-report-axe');

const { getRawResourcesForCurrentLanguage } = require('../l10n/localize').localizer;


// const encodeURIComponent_RFC3986 = require('@daisy/epub-utils').encodeURIComponent_RFC3986;
function encodeURIComponent_RFC3986(str) {
return encodeURIComponent(str)
.replace(/[!'()*]/g, (c) => {
return "%" + c.charCodeAt(0).toString(16);
});
}

tmp.setGracefulCleanup();

const scripts = [
Expand Down Expand Up @@ -38,7 +47,7 @@ async function checkSingle(spineItem, epub, lang, axeRunner) {
}
let url = spineItem.url;
let ext = path.extname(spineItem.filepath);

// File extensions other than 'xhtml' or 'html' are not propertly loaded
// by puppeteer, so we copy the file to a new `.xhtml` temp file.
if (!process.versions['electron'] && // The Electron-based Axe runner handles .xml files just fine
Expand Down Expand Up @@ -132,7 +141,7 @@ async function checkSingle(spineItem, epub, lang, axeRunner) {
console.log("----- ITEMs SRC 1");
console.log(srcItem.src);
}
srcItem.path = path.resolve(path.dirname(spineItem.filepath), decodeURI(srcItem.src.toString()));
srcItem.path = path.resolve(path.dirname(spineItem.filepath), decodeURIComponent(srcItem.src.toString()));
if (LOG_DEBUG_URLS) {
console.log("----- ITEMs SRC 2");
console.log(srcItem.path);
Expand All @@ -150,7 +159,7 @@ async function checkSingle(spineItem, epub, lang, axeRunner) {
console.log("----- ITEM SRC 1");
console.log(item.src);
}
item.path = path.resolve(path.dirname(spineItem.filepath), decodeURI(item.src.toString()));
item.path = path.resolve(path.dirname(spineItem.filepath), decodeURIComponent(item.src.toString()));
if (LOG_DEBUG_URLS) {
console.log("----- ITEM SRC 2");
console.log(item.path);
Expand All @@ -167,6 +176,7 @@ async function checkSingle(spineItem, epub, lang, axeRunner) {
console.log(spineItem.relpath);
console.log(item.cfi);
}
// encodeURIComponent_RFC3986
item.location = `${encodeURI(spineItem.relpath)}#epubcfi(${encodeURI(item.cfi)})`;
if (LOG_DEBUG_URLS) {
console.log("----- CFI 2");
Expand Down
29 changes: 19 additions & 10 deletions packages/epub-utils/src/epub-parse.js
Original file line number Diff line number Diff line change
Expand Up @@ -60,15 +60,15 @@ function parseNavDoc(fullpath, epubDir) {
let pageListHrefs = undefined;
if (hasPageList) {
const arr1 = select('descendant::html:a/@href', sPageList[0]);
pageListHrefs = arr1.map((o) => decodeURI(o.nodeValue));
pageListHrefs = arr1.map((o) => decodeURIComponent(o.nodeValue));
// console.log(arr1.length, JSON.stringify(pageListHrefs, null, 4));
}

let tocHrefs = undefined;
const sTOC = select('//html:nav[@epub:type="toc"]/html:ol', doc);
if (sTOC[0]) {
const arr2 = select('descendant::html:a/@href', sTOC[0]);
tocHrefs = arr2.map((o) => decodeURI(o.nodeValue));
tocHrefs = arr2.map((o) => decodeURIComponent(o.nodeValue));
// console.log(arr2.length, JSON.stringify(tocHrefs, null, 4));
}

Expand All @@ -80,7 +80,7 @@ function parseNavDoc(fullpath, epubDir) {
while (a.firstChild) a.parentNode.insertBefore(a.firstChild, a);
a.parentNode.removeChild(a);
}

const tocHTML = new XMLSerializer().serializeToString(sTOC[0]);
// console.log(tocHTML);

Expand Down Expand Up @@ -216,7 +216,7 @@ function parseLinks(doc, select) {
}
}

addLink(rel, decodeURI(link.getAttribute('href')), result);
addLink(rel, decodeURIComponent(link.getAttribute('href')), result);
});
return result;
}
Expand Down Expand Up @@ -274,7 +274,8 @@ EpubParser.prototype.parseData = function(packageDocPath, epubDir) {
if (this.contentDocMediaType === contentType) {

var spineItem = new SpineItem();
spineItem.relpath = decodeURI(manifestItem[0].getAttribute('href'));
spineItem.relpath = decodeURIComponent(manifestItem[0].getAttribute('href'));
// if (manifestItem[0].getAttribute('href').includes("%")) console.log(`${manifestItem[0].getAttribute('href')} ===> ${spineItem.relpath}`);
spineItem.filepath = path.join(path.dirname(packageDocPath), spineItem.relpath);

const o = this.parseContentDocTitleAndIds(spineItem.filepath);
Expand All @@ -298,7 +299,7 @@ EpubParser.prototype.parseData = function(packageDocPath, epubDir) {
const smilManifestItem = select(`/opf:package/opf:manifest/opf:item[@id='${moAttr}']`, doc);
if (smilManifestItem.length > 0) {
spineItem.mediaOverlay = {};
spineItem.mediaOverlay.smilRelPath = decodeURI(smilManifestItem[0].getAttribute('href'));
spineItem.mediaOverlay.smilRelPath = decodeURIComponent(smilManifestItem[0].getAttribute('href'));
spineItem.mediaOverlay.smilFilePath = path.join(path.dirname(packageDocPath), spineItem.mediaOverlay.smilRelPath );
// spineItem.mediaOverlay.smilUrl = fileUrl(spineItem.mediaOverlay.smilFilePath);
spineItem.mediaOverlay.smilRefs = this.parseSmilRefs(spineItem.mediaOverlay.smilFilePath);
Expand All @@ -315,7 +316,7 @@ EpubParser.prototype.parseData = function(packageDocPath, epubDir) {
+ '[contains(concat(" ", normalize-space(@properties), " ")," nav ")]'
+ '/@href', doc);
if (navDocRef.length > 0) {
const navDocPath = decodeURI(navDocRef[0].nodeValue);
const navDocPath = decodeURIComponent(navDocRef[0].nodeValue);
const navDocFullPath = path.join(path.dirname(packageDocPath), navDocPath);
this.navDoc = parseNavDoc(navDocFullPath, epubDir);

Expand Down Expand Up @@ -354,7 +355,7 @@ EpubParser.prototype.parseSmilRefs = function(filepath) {
const content = fs.readFileSync(filepath).toString();
const doc = new DOMParser({errorHandler}).parseFromString(content, 'application/xml');
const select = xpath.useNamespaces({smil: "http://www.w3.org/ns/SMIL", epub: "http://www.idpf.org/2007/ops"});

const arr = select('//smil:text[@src]', doc);
let smilRefs = arr.map((o) => {
let epubType = o.parentNode ? o.parentNode.getAttributeNS('http://www.idpf.org/2007/ops', 'type') : undefined;
Expand Down Expand Up @@ -425,11 +426,19 @@ EpubParser.prototype.calculatePackageDocPath = function(epubDir) {
const rootfiles = select('/ocf:container/ocf:rootfiles/ocf:rootfile[@media-type="application/oebps-package+xml"]/@full-path', doc);
// just grab the first one as we're not handling the case of multiple renditions
if (rootfiles.length > 0) {
return (path.join(epubDir, decodeURI(rootfiles[0].nodeValue)));
return (path.join(epubDir, decodeURIComponent(rootfiles[0].nodeValue)));
}
return '';
}

function encodeURIComponent_RFC3986(str) {
return encodeURIComponent(str)
.replace(/[!'()*]/g, (c) => {
return "%" + c.charCodeAt(0).toString(16);
});
}

module.exports.SpineItem = SpineItem;
module.exports.EpubParser = EpubParser;

module.exports.encodeURIComponent_RFC3986 = encodeURIComponent_RFC3986;
4 changes: 3 additions & 1 deletion packages/epub-utils/src/index.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
'use strict';

const EPUB = require('./epub');
const EPUB = require('./epub.js');
const epubParse = require('./epub-parse.js');

module.exports = {
EPUB,
encodeURIComponent_RFC3986: epubParse.encodeURIComponent_RFC3986,
};
1 change: 1 addition & 0 deletions tests/__tests__/regression.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ test('issue #239: `listitem` is not reported when roles inherit from list roles'

test('issue #290 (unzipped): URL percent encoding', async () => {
const report = await ace('../data/issue-290');
// console.log(JSON.stringify(report, null, 4));
expect(report['earl:result']['earl:outcome']).toEqual('pass');
});

Expand Down
Binary file modified tests/data/issue-290.epub
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
</head>
<body>
<h1>Loomings</h1>
<img src="i%25ma%20g&amp;e%26_%2F00%C3%A81_.jpg" alt="dummy"/>
<img src="./deepest/i%25ma%20g%26e%2526_%252F00%C3%A81_.jpg" alt="dummy"/>
<p>Call me Ishmael.</p>
<span id="p1" epub:type="pagebreak" aria-label="page 1" role="doc-pagebreak"/>
</body>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@
<body>
<nav epub:type="toc" role="doc-toc">
<ol>
<li><a href="c%25on%20t&amp;e%26n%2Ft_%C3%A8001_.xhtml">content 001</a></li>
<li><a href="deeper/c%25on%20t%26e%2526n%252Ft_%C3%A8001_.xhtml">content 001</a></li>
</ol>
</nav>
<nav epub:type="page-list" role="doc-pagelist">
<ol>
<li><a href="c%25on%20t&amp;e%26n%2Ft_%C3%A8001_.xhtml#p1">page 1</a></li>
<li><a href="./deeper/c%25on%20t%26e%2526n%252Ft_%C3%A8001_.xhtml#p1">page 1</a></li>
</ol>
</nav>
</body>
Expand Down
20 changes: 16 additions & 4 deletions tests/data/issue-290/E%PU B/pa&c%26kag%2Feè_.opf
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
<dc:title id="title">Minimal EPUB 3.0</dc:title>
<dc:language>en</dc:language>
<dc:identifier id="uid">NOID</dc:identifier>

<!-- see package@version="2.0" which is used here in the unzipped version, note that the zipped version of this test is 3.0 and activates the conformsTo below to pass the missing @refines for dc:source -->
<dc:source id="pg-src">urn:isbn:9781234567891</dc:source>
<!-- meta property="source-of" refines="#pg-src">pagination</meta -->
Expand All @@ -23,16 +23,28 @@
</metadata>
<manifest>
<!-- decodeURI("n%25a v%20i&g%26a%2Ftio%C3%A8n_.xhtml"): "n%a v i&g%26a%2Ftioèn.xhtml" -->

<!-- encodeURIComponent("n%av i&g%26a%2Ftioèn_.xhtml"): "n%25av%20i%26g%2526a%252Ftio%C3%A8n_.xhtml" -->
<!-- decodeURIComponent("n%25av%20i%26g%2526a%252Ftio%C3%A8n_.xhtml"): "n%av i&g%26a%2Ftioèn_.xhtml" -->

<!-- NOTE that "%26" is ampersand "£", and "%2F" is forward slash "/" -->
<item id="nav" href="n%25av%20i&g%26a%2Ftio%C3%A8n_.xhtml" media-type="application/xhtml+xml" properties="nav"/>
<item id="nav" href="deep/n%25av%20i%26g%2526a%252Ftio%C3%A8n_.xhtml" media-type="application/xhtml+xml" properties="nav"/>

<!-- decodeURI("c%25o n%20t&e%26n%2Ft_%C3%A8001_.xhtml"): "c%o n t&e%26n%2Ft_è001.xhtml" -->

<!-- encodeURIComponent("c%on t&e%26n%2Ft_è001_.xhtml"): "c%25on%20t%26e%2526n%252Ft_%C3%A8001_.xhtml" -->
<!-- decodeURIComponent("c%25on%20t%26e%2526n%252Ft_%C3%A8001_.xhtml"): "c%on t&e%26n%2Ft_è001_.xhtml" -->

<!-- NOTE that "%26" is ampersand "£", and "%2F" is forward slash "/" -->
<item id="content_001" href="c%25on%20t&e%26n%2Ft_%C3%A8001_.xhtml" media-type="application/xhtml+xml"/>
<item id="content_001" href="deep/deeper/c%25on%20t%26e%2526n%252Ft_%C3%A8001_.xhtml" media-type="application/xhtml+xml"/>

<!-- decodeURI("i%25m a%20g&e%26_%2F00%C3%A81_.jpg"): "i%m a g&e%26_%2F00è1.jpg" -->

<!-- encodeURIComponent("i%ma g&e%26_%2F00è1_.jpg"): "i%25ma%20g%26e%2526_%252F00%C3%A81_.jpg" -->
<!-- decodeURIComponent("i%25ma%20g%26e%2526_%252F00%C3%A81_.jpg"): "i%ma g&e%26_%2F00è1_.jpg" -->

<!-- NOTE that "%26" is ampersand "£", and "%2F" is forward slash "/" -->
<item id="image_001" href="i%25ma%20g&e%26_%2F00%C3%A81_.jpg" media-type="image/jpeg"/>
<item id="image_001" href="deep/deeper/deepest/i%25ma%20g%26e%2526_%252F00%C3%A81_.jpg" media-type="image/jpeg"/>
</manifest>
<spine>
<itemref idref="content_001" />
Expand Down
7 changes: 6 additions & 1 deletion tests/data/issue-290/META-INF/container.xml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,12 @@
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
<rootfiles>
<!-- decodeURI("E%25PU%20B/pa&c%26kag%2Fe%C3%A8_.opf"): "E%PU B/pa&c%26kag%2Feè_.opf" -->

<!-- encodeURIComponent("E%PU B/pa&c%26kag%2Feè_.opf"): "E%25PU%20B%2Fpa%26c%2526kag%252Fe%C3%A8_.opf" -->
<!-- decodeURIComponent("E%25PU%20B%2Fpa%26c%2526kag%252Fe%C3%A8_.opf"): "E%PU B/pa&c%26kag%2Feè_.opf" -->

<!-- NOTE that "%26" is ampersand "£", and "%2F" is forward slash "/" -->
<rootfile full-path="E%25PU%20B/pa&c%26kag%2Fe%C3%A8_.opf" media-type="application/oebps-package+xml"/>

<rootfile full-path="E%25PU%20B%2Fpa%26c%2526kag%252Fe%C3%A8_.opf" media-type="application/oebps-package+xml"/>
</rootfiles>
</container>

0 comments on commit d122dfa

Please sign in to comment.