Skip to content

Commit

Permalink
fix(unzip): try to repair ZIP archives with extra comment length
Browse files Browse the repository at this point in the history
Fixes #121
  • Loading branch information
rdeltour committed Dec 24, 2017
1 parent 5e053fa commit 349ca67
Show file tree
Hide file tree
Showing 4 changed files with 114 additions and 22 deletions.
83 changes: 61 additions & 22 deletions packages/epub-utils/src/epub.js
Original file line number Diff line number Diff line change
@@ -1,14 +1,50 @@
'use strict';

const epubParse = require('./epub-parse.js');
const unzip = require('extract-zip');
const extractZip = require('extract-zip');
const tmp = require('tmp');
const fs = require('fs-extra');
const path = require('path');
const winston = require('winston');

tmp.setGracefulCleanup();

async function unzip(path) {
const tmpdir = tmp.dirSync({ unsafeCleanup: true }).name;
return new Promise((resolve, reject) => {
extractZip(path, { dir: tmpdir }, (err) => {
if (err) {
reject(err);
} else {
resolve(tmpdir);
}
});
})
}

async function retryUnzip(epub, error) {
if (error.message === undefined) throw error;
winston.info('Trying to repair the archive and unzip again...');
try {
// Detect 'invalid comment length' errors
const invalidCommentLengthMatch = error.message.match(/invalid comment length\. expected: (\d+)\. found: (\d)/);
if (invalidCommentLengthMatch) {
const tmpEPUB = tmp.fileSync({ unsafeCleanup: true }).name;
const size = fs.statSync(epub.path).size;
const truncatedSize = size - invalidCommentLengthMatch[1];
fs.copySync(epub.path, tmpEPUB);
fs.truncateSync(tmpEPUB, truncatedSize);
return await unzip(tmpEPUB);
} else {
winston.error('The ZIP archive couldn’t be repaired.');
}
} catch (error) {
winston.error('Unzipping failed again');
winston.debug(error);
}
throw error;
}

class EPUB {
constructor(epub, cwd = process.cwd()) {
this.path = path.resolve(cwd, epub);
Expand All @@ -22,30 +58,33 @@ class EPUB {
return fs.statSync(this.path).isDirectory();
}

extract() {
return new Promise((resolve, reject) => {
if (this.basedir !== undefined) {
resolve(this);
} else if (this.expanded) {
winston.verbose('EPUB is already unpacked');
this.basedir = this.path;
resolve(this);
} else {
winston.verbose('Extracting EPUB');
const tmpdir = tmp.dirSync({ unsafeCleanup: true }); // remove even when not empty
unzip(this.path, { dir: tmpdir.name }, (err) => {
if (err) {
winston.error('Failed to unzip EPUB (the ZIP archive may be corrupt).');
reject(err);
} else {
this.basedir = tmpdir.name;
resolve(this);
}
});
async extract() {
if (this.basedir !== undefined) {
return this;
} else if (this.expanded) {
winston.verbose('EPUB is already unpacked');
this.basedir = this.path;
return this;
} else {
winston.verbose('Extracting EPUB');
let unzippedDir;
try {
unzippedDir = await unzip(this.path);
} catch (error) {
winston.error('Failed to unzip EPUB (the ZIP archive may be corrupt).');
winston.debug(error);
try {
unzippedDir = await retryUnzip(this, error);
} catch (error) {
throw error;
}
}
});
this.basedir = unzippedDir;
return this;
}
}


parse() {
return new Promise((resolve, reject) => {
if (this.parsed) return resolve(this);
Expand Down
52 changes: 52 additions & 0 deletions tests/__tests__/unzip.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
'use strict';

const fs = require('fs');
const path = require('path');
const tmp = require('tmp');

const runAce = require('../runAceJS');

tmp.setGracefulCleanup();

let outdir;
let tmpdir;
let reportPath;

beforeEach(() => {
outdir = tmp.dirSync({ prefix: 'ace_out_', unsafeCleanup: true });
tmpdir = tmp.dirSync({ prefix: 'ace_tmp_', unsafeCleanup: true });
reportPath = path.join(outdir.name, 'report.json');
});

afterEach(() => {
outdir.removeCallback();
tmpdir.removeCallback();
});


function ace(epub, options = {}) {
return runAce(epub, Object.assign({
outdir: outdir.name,
tmp: tmpdir.name,
}, options))
.then(() => {
expect(fs.existsSync(reportPath)).toBeTruthy();
return JSON.parse(fs.readFileSync(reportPath, 'utf8'));
})
.catch(err => console.log(err));
}

test('well-formed EPUB archive is processed', async () => {
const report = await ace(path.join(__dirname, '../data/base-epub-30.epub'));
expect(report['earl:result']).toBeDefined();
});

test('an EPUB archive with an extra comment length is repaired', async () => {
const report = await ace(path.join(__dirname, '../data/zip-invalid-comment-length.epub'));
expect(report['earl:result']).toBeDefined();
});

test('an EPUB archive beyond repair is rejected', async () => {
const report = await ace(path.join(__dirname, '../data/zip-invalid.epub'));
expect(report).toBeUndefined();
});
Binary file added tests/data/zip-invalid-comment-length.epub
Binary file not shown.
1 change: 1 addition & 0 deletions tests/data/zip-invalid.epub
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
hello

0 comments on commit 349ca67

Please sign in to comment.