From 349ca67cc41f50947a6e573288949a1ebe1bc4dd Mon Sep 17 00:00:00 2001 From: Romain Deltour Date: Sun, 24 Dec 2017 02:18:42 +0100 Subject: [PATCH] fix(unzip): try to repair ZIP archives with extra comment length Fixes #121 --- packages/epub-utils/src/epub.js | 83 +++++++++++++++------ tests/__tests__/unzip.test.js | 52 +++++++++++++ tests/data/zip-invalid-comment-length.epub | Bin 0 -> 1447 bytes tests/data/zip-invalid.epub | 1 + 4 files changed, 114 insertions(+), 22 deletions(-) create mode 100644 tests/__tests__/unzip.test.js create mode 100644 tests/data/zip-invalid-comment-length.epub create mode 100644 tests/data/zip-invalid.epub diff --git a/packages/epub-utils/src/epub.js b/packages/epub-utils/src/epub.js index 0613ec1e..21592326 100644 --- a/packages/epub-utils/src/epub.js +++ b/packages/epub-utils/src/epub.js @@ -1,7 +1,7 @@ 'use strict'; const epubParse = require('./epub-parse.js'); -const unzip = require('extract-zip'); +const extractZip = require('extract-zip'); const tmp = require('tmp'); const fs = require('fs-extra'); const path = require('path'); @@ -9,6 +9,42 @@ const winston = require('winston'); tmp.setGracefulCleanup(); +async function unzip(path) { + const tmpdir = tmp.dirSync({ unsafeCleanup: true }).name; + return new Promise((resolve, reject) => { + extractZip(path, { dir: tmpdir }, (err) => { + if (err) { + reject(err); + } else { + resolve(tmpdir); + } + }); + }) +} + +async function retryUnzip(epub, error) { + if (error.message === undefined) throw error; + winston.info('Trying to repair the archive and unzip again...'); + try { + // Detect 'invalid comment length' errors + const invalidCommentLengthMatch = error.message.match(/invalid comment length\. expected: (\d+)\. found: (\d)/); + if (invalidCommentLengthMatch) { + const tmpEPUB = tmp.fileSync({ unsafeCleanup: true }).name; + const size = fs.statSync(epub.path).size; + const truncatedSize = size - invalidCommentLengthMatch[1]; + fs.copySync(epub.path, tmpEPUB); + fs.truncateSync(tmpEPUB, truncatedSize); + return await unzip(tmpEPUB); + } else { + winston.error('The ZIP archive couldn’t be repaired.'); + } + } catch (error) { + winston.error('Unzipping failed again'); + winston.debug(error); + } + throw error; +} + class EPUB { constructor(epub, cwd = process.cwd()) { this.path = path.resolve(cwd, epub); @@ -22,30 +58,33 @@ class EPUB { return fs.statSync(this.path).isDirectory(); } - extract() { - return new Promise((resolve, reject) => { - if (this.basedir !== undefined) { - resolve(this); - } else if (this.expanded) { - winston.verbose('EPUB is already unpacked'); - this.basedir = this.path; - resolve(this); - } else { - winston.verbose('Extracting EPUB'); - const tmpdir = tmp.dirSync({ unsafeCleanup: true }); // remove even when not empty - unzip(this.path, { dir: tmpdir.name }, (err) => { - if (err) { - winston.error('Failed to unzip EPUB (the ZIP archive may be corrupt).'); - reject(err); - } else { - this.basedir = tmpdir.name; - resolve(this); - } - }); + async extract() { + if (this.basedir !== undefined) { + return this; + } else if (this.expanded) { + winston.verbose('EPUB is already unpacked'); + this.basedir = this.path; + return this; + } else { + winston.verbose('Extracting EPUB'); + let unzippedDir; + try { + unzippedDir = await unzip(this.path); + } catch (error) { + winston.error('Failed to unzip EPUB (the ZIP archive may be corrupt).'); + winston.debug(error); + try { + unzippedDir = await retryUnzip(this, error); + } catch (error) { + throw error; + } } - }); + this.basedir = unzippedDir; + return this; + } } + parse() { return new Promise((resolve, reject) => { if (this.parsed) return resolve(this); diff --git a/tests/__tests__/unzip.test.js b/tests/__tests__/unzip.test.js new file mode 100644 index 00000000..311620ab --- /dev/null +++ b/tests/__tests__/unzip.test.js @@ -0,0 +1,52 @@ +'use strict'; + +const fs = require('fs'); +const path = require('path'); +const tmp = require('tmp'); + +const runAce = require('../runAceJS'); + +tmp.setGracefulCleanup(); + +let outdir; +let tmpdir; +let reportPath; + +beforeEach(() => { + outdir = tmp.dirSync({ prefix: 'ace_out_', unsafeCleanup: true }); + tmpdir = tmp.dirSync({ prefix: 'ace_tmp_', unsafeCleanup: true }); + reportPath = path.join(outdir.name, 'report.json'); +}); + +afterEach(() => { + outdir.removeCallback(); + tmpdir.removeCallback(); +}); + + +function ace(epub, options = {}) { + return runAce(epub, Object.assign({ + outdir: outdir.name, + tmp: tmpdir.name, + }, options)) + .then(() => { + expect(fs.existsSync(reportPath)).toBeTruthy(); + return JSON.parse(fs.readFileSync(reportPath, 'utf8')); + }) + .catch(err => console.log(err)); +} + +test('well-formed EPUB archive is processed', async () => { + const report = await ace(path.join(__dirname, '../data/base-epub-30.epub')); + expect(report['earl:result']).toBeDefined(); +}); + +test('an EPUB archive with an extra comment length is repaired', async () => { +const report = await ace(path.join(__dirname, '../data/zip-invalid-comment-length.epub')); +expect(report['earl:result']).toBeDefined(); +}); + +test('an EPUB archive beyond repair is rejected', async () => { + const report = await ace(path.join(__dirname, '../data/zip-invalid.epub')); + expect(report).toBeUndefined(); +}); \ No newline at end of file diff --git a/tests/data/zip-invalid-comment-length.epub b/tests/data/zip-invalid-comment-length.epub new file mode 100644 index 0000000000000000000000000000000000000000..ff6ffc80b0d083d7f007a98701cf9ef751cb8df8 GIT binary patch literal 1447 zcmWIWW@h1HU}9ik5ZHRfD?f3yjtGzq!W=-Ho0*$hQdy9iSWu9YnVeXXnV+YhT2Pv# zU6ol70Msc0)X4$XseJuy;vArk3qULe#I6CMPWs9Dc_pcNCGiFZhI$nlCAm4F=X|-E z4Fp&p>|M@b*|7NSA`L@sZ}C>KT`w7xmZ=&1T&Ynq^Qup3=a#w3`uY^>CM6#s(()sQ?u9osfr@i1b)vCE|f9Xr2 z#f|Dv+k@>-6{NNAo>*BFBI2WcQ{AyStaEx{+N5cFo}PNJg^Nx6(yu!IM^C&y1vFO& z%vtolsCWCz2;~_nRbkA_=a#RKsay3bY@UDDX{D1%3argXmj6=bT4{FLl*v-D3&~?j z6$ajKRHiIW1bS*JFwO;F9xF&p&Q45E)ypqP>kYE@KWrdy@2BXr@GCboodc&V5n@td zlXKp^K|y=p7WJIt7Jl;oGjAfHE%tT|? z%cXBGiHx5bW->pCS!IE5{Kd83w+A7Y=VD=RMWAz@eEMT=9jA z<5br&>9zAOE)eUo*|YY&M@Qf$<5xbN6Wan-vQ2Kg{M}-|iq9_1C#PRuu4RgEP{G{4{XR=#YW84hzk*BCyow9r1+UGeGrB-bd7bLY-J7HT;tRy9B6Vzb{f zw>KJ!(d@aq(%aWw?-Jq_I(kN4HzXut^#(z^H*=SM{Ho}5EP&&$XX{_FJgdjflljkm zKKbDI?f<L;%k`^>CB?i(!2k|PIpi`V*o}8 zDBt+HhB)ea`niGgPGV+WYLQ+=Zcc2UBiA7V0oU)gt~1tdxj46FS<`}S@u~*HxmgBt zIeoRNs=K`er~m()c5l6TzugLJ-jm$X5ux4zNBpABi*ql3yf3EtmWk`z7UQaA5BcYv zvy7BrH)}ene)vYP(M`$DBWrhTR6qBucjiw{lh&YBaxO*BnU|LT$qg}Y3vfFV&b|9^ z=4$0#(s3&$-DrHZ@@vGibQNxi&puS~T(g@! literal 0 HcmV?d00001 diff --git a/tests/data/zip-invalid.epub b/tests/data/zip-invalid.epub new file mode 100644 index 00000000..ce013625 --- /dev/null +++ b/tests/data/zip-invalid.epub @@ -0,0 +1 @@ +hello