zlib: detect gzip files when using unzip*

Detect whether a gzip file is being passed to `unzip*` by testing the first bytes for the gzip magic bytes, and setting the decompression mode to `GUNZIP` or `INFLATE` according to the result. This enables gzip-only features like multi-member support to be used together with the `unzip*` autodetection support and thereby makes `gunzip*` and `unzip*` return identical results for gzip input again. Add a simple test for checking that features specific to `zlib.gunzip`, notably support for multiple members, also work when using `zlib.unzip`. PR-URL: #5884 Reviewed-By: Ben Noordhuis <info@bnoordhuis.nl> Reviewed-By: James M Snell <jasnell@gmail.com>
nodejs · Apr 20, 2016 · 2c23e14 · 2c23e14
1 parent 1fb4052
commit 2c23e14
Show file tree

Hide file tree

Showing 3 changed files with 91 additions and 1 deletion.
diff --git a/src/node_zlib.cc b/src/node_zlib.cc
@@ -69,7 +69,8 @@ class ZCtx : public AsyncWrap {
         write_in_progress_(false),
         pending_close_(false),
         refs_(0),
-        first_member_ended_(false) {
+        first_member_ended_(false),
+        gzip_id_bytes_read_(0) {
     MakeWeak<ZCtx>(this);
   }
 
@@ -226,6 +227,8 @@ class ZCtx : public AsyncWrap {
   static void Process(uv_work_t* work_req) {
     ZCtx *ctx = ContainerOf(&ZCtx::work_req_, work_req);
 
+    const Bytef* next_expected_header_byte = nullptr;
+
     // If the avail_out is left at 0, then it means that it ran out
     // of room.  If there was avail_out left over, then it means
     // that all of the input was consumed.
@@ -236,6 +239,50 @@ class ZCtx : public AsyncWrap {
         ctx->err_ = deflate(&ctx->strm_, ctx->flush_);
         break;
       case UNZIP:
+        if (ctx->strm_.avail_in > 0) {
+          next_expected_header_byte = ctx->strm_.next_in;
+        }
+
+        switch (ctx->gzip_id_bytes_read_) {
+          case 0:
+            if (next_expected_header_byte == nullptr) {
+              break;
+            }
+
+            if (*next_expected_header_byte == GZIP_HEADER_ID1) {
+              ctx->gzip_id_bytes_read_ = 1;
+              next_expected_header_byte++;
+
+              if (ctx->strm_.avail_in == 1) {
+                // The only available byte was already read.
+                break;
+              }
+            } else {
+              ctx->mode_ = INFLATE;
+              break;
+            }
+
+            // fallthrough
+          case 1:
+            if (next_expected_header_byte == nullptr) {
+              break;
+            }
+
+            if (*next_expected_header_byte == GZIP_HEADER_ID2) {
+              ctx->gzip_id_bytes_read_ = 2;
+              ctx->mode_ = GUNZIP;
+            } else {
+              // There is no actual difference between INFLATE and INFLATERAW
+              // (after initialization).
+              ctx->mode_ = INFLATE;
+            }
+
+            break;
+          default:
+            CHECK(0 && "invalid number of gzip magic number bytes read");
+        }
+
+        // fallthrough
       case INFLATE:
       case GUNZIP:
       case INFLATERAW:
@@ -602,6 +649,7 @@ class ZCtx : public AsyncWrap {
   bool pending_close_;
   unsigned int refs_;
   bool first_member_ended_;
+  unsigned int gzip_id_bytes_read_;
 };
 
 

diff --git a/test/parallel/test-zlib-from-concatenated-gzip.js b/test/parallel/test-zlib-from-concatenated-gzip.js
@@ -22,6 +22,20 @@ zlib.gunzip(data, common.mustCall((err, result) => {
   assert.equal(result, 'abcdef', 'result should match original string');
 }));
 
+zlib.unzip(data, common.mustCall((err, result) => {
+  assert.ifError(err);
+  assert.equal(result, 'abcdef', 'result should match original string');
+}));
+
+// Multi-member support does not apply to zlib inflate/deflate.
+zlib.unzip(Buffer.concat([
+  zlib.deflateSync('abc'),
+  zlib.deflateSync('def')
+]), common.mustCall((err, result) => {
+  assert.ifError(err);
+  assert.equal(result, 'abc', 'result should match contents of first "member"');
+}));
+
 // files that have the "right" magic bytes for starting a new gzip member
 // in the middle of themselves, even if they are part of a single
 // regularly compressed member

diff --git a/test/parallel/test-zlib-unzip-one-byte-chunks.js b/test/parallel/test-zlib-unzip-one-byte-chunks.js
@@ -0,0 +1,28 @@
+'use strict';
+const common = require('../common');
+const assert = require('assert');
+const zlib = require('zlib');
+
+const data = Buffer.concat([
+  zlib.gzipSync('abc'),
+  zlib.gzipSync('def')
+]);
+
+const resultBuffers = [];
+
+const unzip = zlib.createUnzip()
+  .on('error', (err) => {
+    assert.ifError(err);
+  })
+  .on('data', (data) => resultBuffers.push(data))
+  .on('finish', common.mustCall(() => {
+    assert.deepStrictEqual(Buffer.concat(resultBuffers).toString(), 'abcdef',
+      'result should match original string');
+  }));
+
+for (let i = 0; i < data.length; i++) {
+  // Write each single byte individually.
+  unzip.write(Buffer.from([data[i]]));
+}
+
+unzip.end();