Merge pull request python#12 from darkk/master

Fix endless loop in lz4.frame.decompress being fed with truncated frame
tbbharaj · Jan 15, 2017 · e70527a · e70527a
2 parents 113fa52 + 186c19d
commit e70527a
Show file tree

Hide file tree

Showing 4 changed files with 50 additions and 2 deletions.
diff --git a/Makefile b/Makefile
@@ -0,0 +1,11 @@
+.PHONY: build test clean pre-commit
+build:
+	python$(PY) setup.py build
+test:
+	python$(PY) setup.py test
+clean:
+	rm -rf build
+	find . '(' -name '*.pyc' -o -name '*.so' ')' -delete
+pre-commit:
+	$(MAKE) clean && $(MAKE) PY=2 build && $(MAKE) PY=2 test
+	$(MAKE) clean && $(MAKE) PY=3 build && $(MAKE) PY=3 test
diff --git a/lz4/frame/_frame.c b/lz4/frame/_frame.c
@@ -729,15 +729,15 @@ decompress (PyObject * Py_UNUSED (self), PyObject * args, PyObject * keywds)
   destination_cursor = destination_buffer;
   destination_written = 0;
 
-  while (1)
+  while (source_cursor < source_end)
     {
       /* Decompress from the source string and write to the destination_buffer
          until there's no more source string to read.
 
          On calling LZ4F_decompress, source_read is set to the remaining length
          of source available to read. On return, source_read is set to the
          actual number of bytes read from source, which may be less than
-         available.
+         available. NB: LZ4F_decompress does not explicitly fail on empty input.
 
          On calling LZ4F_decompres, destination_write is the number of bytes in
          destination available for writing. On exit, destination_write is set to
@@ -809,6 +809,13 @@ decompress (PyObject * Py_UNUSED (self), PyObject * args, PyObject * keywds)
                     LZ4F_getErrorName (result));
       return NULL;
     }
+  else if (result != 0)
+    {
+      PyMem_Free (destination_buffer);
+      PyErr_Format (PyExc_RuntimeError,
+                    "LZ4F_freeDecompressionContext reported unclean decompressor state (truncated frame?): %zu", result);
+      return NULL;
+    }
   else if (source_cursor != source_end)
     {
       PyMem_Free (destination_buffer);

diff --git a/tests/test_block.py b/tests/test_block.py
@@ -189,6 +189,13 @@ def test_decompress_without_leak(self):
         with self.assertRaisesRegexp(ValueError, r'^Decompressor wrote 64 bytes, but 79 bytes expected from header$'):
             lz4.decompress(data[4:], uncompressed_size=79)
 
+    def test_decompress_truncated(self):
+        input_data = b"2099023098234882923049823094823094898239230982349081231290381209380981203981209381238901283098908123109238098123" * 24
+        compressed = lz4.compress(input_data)
+        for i in range(len(compressed)):
+            with self.assertRaisesRegexp(ValueError, '^(Input source data size too small|Corrupt input at byte \d+|Decompressor wrote \d+ bytes, but \d+ bytes expected from header)'):
+                lz4.decompress(compressed[:i])
+
     def test_decompress_with_trailer(self):
         data = b'A' * 64
         comp = lz4.compress(data)

diff --git a/tests/test_frame.py b/tests/test_frame.py
@@ -2,6 +2,7 @@
 import unittest
 import os
 import sys
+import struct
 from multiprocessing.pool import ThreadPool
 
 class TestLZ4Frame(unittest.TestCase):
@@ -261,6 +262,28 @@ def test_LZ4FrameCompressor_reset(self):
         self.assertEqual(input_data, decompressed)
 
 class TestLZ4FrameModern(unittest.TestCase):
+    def test_decompress_truncated(self):
+        input_data = b"2099023098234882923049823094823094898239230982349081231290381209380981203981209381238901283098908123109238098123"
+        for chksum in (lz4frame.CONTENTCHECKSUM_DISABLED, lz4frame.CONTENTCHECKSUM_ENABLED):
+            for conlen in (0, len(input_data)):
+                context = lz4frame.create_compression_context()
+                compressed = lz4frame.compress_begin(context, content_checksum=chksum, source_size=conlen)
+                compressed += lz4frame.compress_update(context, input_data)
+                compressed += lz4frame.compress_end(context)
+                for i in range(len(compressed)):
+                    with self.assertRaisesRegexp(RuntimeError, r'^(LZ4F_getFrameInfo failed with code: ERROR_frameHeader_incomplete|LZ4F_freeDecompressionContext reported unclean decompressor state \(truncated frame\?\): \d+)$'):
+                        lz4frame.decompress(compressed[:i])
+
+    def test_checksum_failure(self):
+        input_data = b"2099023098234882923049823094823094898239230982349081231290381209380981203981209381238901283098908123109238098123"
+        compressed = lz4frame.compress(input_data, content_checksum=lz4frame.CONTENTCHECKSUM_ENABLED)
+        with self.assertRaisesRegexp(RuntimeError, r'^LZ4F_decompress failed with code: ERROR_contentChecksum_invalid'):
+            last = struct.unpack('B', compressed[-1:])[0]
+            lz4frame.decompress(compressed[:-1] + struct.pack('B', last ^ 0x42))
+        # NB: blockChecksumFlag is not supported by lz4 at the moment, so some
+        # random 1-bit modifications of input may actually trigger valid output
+        # without errors. And content checksum remains the same!
+
     def test_decompress_trailer(self):
         input_data = b"2099023098234882923049823094823094898239230982349081231290381209380981203981209381238901283098908123109238098123"
         compressed = lz4frame.compress(input_data)