Skip to content

Commit

Permalink
zlib: add support for concatenated members
Browse files Browse the repository at this point in the history
According to the spec gzipped archives can contain more than one compressed member. Previously Node's gzip implementation would only unzip the first member and throw away the rest of the compressed data. Issue nodejs#4306 is an example of this occurring in daily use.
  • Loading branch information
kthelgason committed Mar 14, 2016
1 parent 2848f84 commit 4995640
Show file tree
Hide file tree
Showing 3 changed files with 86 additions and 0 deletions.
18 changes: 18 additions & 0 deletions src/node_zlib.cc
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ enum node_zlib_mode {
UNZIP
};

#define GZIP_HEADER_ID1 0x1f
#define GZIP_HEADER_ID2 0x8b
#define GZIP_MIN_HEADER_SIZE 10

void InitZlib(v8::Local<v8::Object> target);

Expand Down Expand Up @@ -254,6 +257,19 @@ class ZCtx : public AsyncWrap {
ctx->err_ = Z_NEED_DICT;
}
}
while (ctx->strm_.avail_in >= GZIP_MIN_HEADER_SIZE &&
ctx->mode_ == GUNZIP) {
// Bytes remain in input buffer. Perhaps this is another compressed
// member in the same archive, or just trailing garbage.
// Check the header to find out.
if (ctx->strm_.next_in[0] != GZIP_HEADER_ID1 ||
ctx->strm_.next_in[1] != GZIP_HEADER_ID2) {
// Not a valid gzip member
break;
}
Reset(ctx);
ctx->err_ = inflate(&ctx->strm_, ctx->flush_);
}
break;
default:
CHECK(0 && "wtf?");
Expand Down Expand Up @@ -524,10 +540,12 @@ class ZCtx : public AsyncWrap {
switch (ctx->mode_) {
case DEFLATE:
case DEFLATERAW:
case GZIP:
ctx->err_ = deflateReset(&ctx->strm_);
break;
case INFLATE:
case INFLATERAW:
case GUNZIP:
ctx->err_ = inflateReset(&ctx->strm_);
break;
default:
Expand Down
18 changes: 18 additions & 0 deletions test/parallel/test-zlib-from-concatenated-gzip.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
'use strict';
// Test unzipping a gzip file that contains multiple concatenated "members"

const common = require('../common');
const assert = require('assert');
const zlib = require('zlib');

const data = Buffer.concat([
zlib.gzipSync('abc'),
zlib.gzipSync('def')
]);

assert.equal(zlib.gunzipSync(data).toString(), 'abcdef');

zlib.gunzip(data, common.mustCall((err, result) => {
assert.ifError(err);
assert.equal(result, 'abcdef', 'result should match original string');
}));
50 changes: 50 additions & 0 deletions test/parallel/test-zlib-from-gzip-with-trailing-garbage.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
'use strict';
// test unzipping a gzip file that has trailing garbage

const common = require('../common');
const assert = require('assert');
const zlib = require('zlib');

// should ignore trailing null-bytes
let data = Buffer.concat([
zlib.gzipSync('abc'),
zlib.gzipSync('def'),
Buffer(10).fill(0)
]);

assert.equal(zlib.gunzipSync(data).toString(), 'abcdef');

zlib.gunzip(data, common.mustCall((err, result) => {
assert.ifError(err);
assert.equal(result, 'abcdef', 'result should match original string');
}));

// if the trailing garbage happens to look like a gzip header, it should
// throw an error.
data = Buffer.concat([
zlib.gzipSync('abc'),
zlib.gzipSync('def'),
Buffer([0x1f, 0x8b, 0xff, 0xff]),
Buffer(10).fill(0)
]);

assert.throws(() => zlib.gunzipSync(data));

zlib.gunzip(data, common.mustCall((err, result) => {
assert(err);
}));

// In this case the trailing junk is too short to be a gzip segment
// So we ignore it and decompression succeeds.
data = Buffer.concat([
zlib.gzipSync('abc'),
zlib.gzipSync('def'),
Buffer([0x1f, 0x8b, 0xff, 0xff])
]);

assert.equal(zlib.gunzipSync(data).toString(), 'abcdef');

zlib.gunzip(data, common.mustCall((err, result) => {
assert.ifError(err);
assert.equal(result, 'abcdef', 'result should match original string');
}));

0 comments on commit 4995640

Please sign in to comment.