Skip to content

Commit

Permalink
Merge pull request python#30 from python-lz4/development
Browse files Browse the repository at this point in the history
Development branch fixes for 0.9.1 release
  • Loading branch information
jonathanunderwood authored May 13, 2017
2 parents b77b080 + 1b60148 commit 05df110
Show file tree
Hide file tree
Showing 11 changed files with 78 additions and 63 deletions.
1 change: 1 addition & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,6 @@ python:
- 3.3
- 3.4
- 3.5
- 3.6
install:
script: python setup.py test
1 change: 1 addition & 0 deletions appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ environment:
- PYTHON: "C:\\Python34-x64"
DISTUTILS_USE_SDK: "1"
- PYTHON: "C:\\Python35-x64"
- PYTHON: "C:\\Python36-x64"

matrix:
# Immediately finish build once one of the jobs fails.
Expand Down
7 changes: 1 addition & 6 deletions lz4/block/_block.c
Original file line number Diff line number Diff line change
Expand Up @@ -121,11 +121,6 @@ compress (PyObject * Py_UNUSED (self), PyObject * args, PyObject * kwargs)
return NULL;
}

if (source_size <= 0) {
PyErr_Format(PyExc_ValueError, "Input source data size invalid: %d bytes", source_size);
return NULL;
}

if (!strncmp (mode, "default", sizeof ("default")))
{
comp = DEFAULT;
Expand Down Expand Up @@ -258,7 +253,7 @@ decompress (PyObject * Py_UNUSED (self), PyObject * args, PyObject * kwargs)
source_size -= hdr_size;
}

if (dest_size <= 0 || dest_size > PY_SSIZE_T_MAX)
if (dest_size < 0 || dest_size > PY_SSIZE_T_MAX)
{
PyErr_Format (PyExc_ValueError, "Invalid size in header: 0x%zu",
dest_size);
Expand Down
11 changes: 3 additions & 8 deletions lz4/frame/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,6 @@ class LZ4FrameCompressor(object):
- lz4.frame.CONTENTCHECKSUM_DISABLED or 0: disables checksumming
- lz4.frame.CONTENTCHECKSUM_ENABLED or 1: enables checksumming
The default is CONTENTCHECKSUM_DISABLED.
content_size (bool): Specifies whether to include an optional 8-byte header
field that is the uncompressed size of data included within the frame.
Including the content-size header is optional, and is enabled by default.
frame_type (int): Specifies whether user data can be injected between
frames. Options:
- lz4.frame.FRAMETYPE_FRAME or 0: disables user data injection
Expand All @@ -53,14 +50,12 @@ def __init__(self,
block_mode=BLOCKMODE_LINKED,
compression_level=COMPRESSIONLEVEL_MIN,
content_checksum=CONTENTCHECKSUM_DISABLED,
content_size=True,
frame_type=FRAMETYPE_FRAME,
auto_flush=True):
self.block_size = block_size
self.block_mode = block_mode
self.compression_level = compression_level
self.content_checksum = content_checksum
self.content_size = content_size
self.frame_type = frame_type
self.auto_flush = auto_flush
self._context = create_compression_context()
Expand All @@ -82,9 +77,10 @@ def compress_begin(self, source_size=0):
Args:
data (bytes): data to compress
source_size (int): Optionally specified the total size of the
source_size (int): Optionally specify the total size of the
uncompressed data. If specified, will be stored in the
compressed frame header for later use in decompression.
compressed frame header as an 8-byte field for later use
during decompression.
Returns:
bytes: frame header data
Expand All @@ -97,7 +93,6 @@ def compress_begin(self, source_size=0):
frame_type=self.frame_type,
compression_level=self.compression_level,
content_checksum=self.content_checksum,
content_size=self.content_size,
auto_flush=self.auto_flush,
source_size=source_size)

Expand Down
44 changes: 17 additions & 27 deletions lz4/frame/_frame.c
Original file line number Diff line number Diff line change
Expand Up @@ -121,9 +121,9 @@ create_compression_context (PyObject * Py_UNUSED (self))
return PyCapsule_New (context, capsule_name, destruct_compression_context);
}

/******************
* compress_frame *
******************/
/************
* compress *
************/
#define __COMPRESS_KWARGS_DOCSTRING \
" block_size (int): Sepcifies the maximum blocksize to use.\n" \
" Options:\n\n" \
Expand Down Expand Up @@ -152,25 +152,25 @@ create_compression_context (PyObject * Py_UNUSED (self))
" - lz4.frame.CONTENTCHECKSUM_DISABLED or 0: disables checksumming\n" \
" - lz4.frame.CONTENTCHECKSUM_ENABLED or 1: enables checksumming\n\n" \
" The default is CONTENTCHECKSUM_DISABLED.\n" \
" content_size (bool): Specifies whether to include an optional 8-byte header\n" \
" field that is the uncompressed size of data included within the frame.\n" \
" Including the content-size header is optional, and is enabled by default.\n" \
" frame_type (int): Specifies whether user data can be injected between\n" \
" frames. Options:\n\n" \
" - lz4.frame.FRAMETYPE_FRAME or 0: disables user data injection\n" \
" - lz4.frame.FRAMETYPE_SKIPPABLEFRAME or 1: enables user data injection\n\n" \
" The default is lz4.frame.FRAMETYPE_FRAME.\n" \

PyDoc_STRVAR(compress__doc,
"compress(source, compression_level=0, block_size=0, content_checksum=0, content_size=1, block_mode=0, frame_type=0)\n\n" \
"compress(source, compression_level=0, block_size=0, content_checksum=0, block_mode=0, frame_type=0, content_size_header=1)\n\n" \
"Accepts a string, and compresses the string in one go, returning the\n" \
"compressed string as a string of bytes. The compressed string includes\n" \
"a header and endmark and so is suitable for writing to a file.\n\n" \
"Args:\n" \
" source (str): String to compress\n\n" \
"Keyword Args:\n" \
__COMPRESS_KWARGS_DOCSTRING \
"\n" \
" content_size_header (bool): Specifies whether to include an optional\n" \
" 8-byte header field that is the uncompressed size of data included\n" \
" within the frame. Including the content-size header is optional\n" \
" and is enabled by default.\n\n" \
"Returns:\n" \
" str: Compressed data as a string\n"
);
Expand All @@ -192,9 +192,9 @@ compress (PyObject * Py_UNUSED (self), PyObject * args,
"compression_level",
"block_size",
"content_checksum",
"content_size",
"block_mode",
"frame_type",
"content_size_header",
NULL
};

Expand All @@ -205,11 +205,10 @@ compress (PyObject * Py_UNUSED (self), PyObject * args,
&source, &source_size,
&preferences.compressionLevel,
&preferences.frameInfo.blockSizeID,
&preferences.
frameInfo.contentChecksumFlag,
&content_size_header,
&preferences.frameInfo.contentChecksumFlag,
&preferences.frameInfo.blockMode,
&preferences.frameInfo.frameType))
&preferences.frameInfo.frameType,
&content_size_header))
{
return NULL;
}
Expand Down Expand Up @@ -295,7 +294,7 @@ PyDoc_STRVAR(compress_begin__doc,
" When autoFlush is disabled, the LZ4 library may buffer data\n" \
" until a block is full\n\n" \
" source_size (int): This optionally specifies the uncompressed size\n" \
" of the source content. This arument is optional, but can if specified\n" \
" of the source content. This arument is optional, but if specified\n" \
" will be stored in the frame header for use during decompression.\n"
"Returns:\n" \
" str (str): Frame header.\n"
Expand All @@ -309,7 +308,6 @@ compress_begin (PyObject * Py_UNUSED (self), PyObject * args,
{
PyObject *py_context = NULL;
unsigned long source_size = 0;
int content_size_header = 1;
LZ4F_preferences_t preferences;
/* Only needs to be large enough for a header, which is 15 bytes.
* Unfortunately, the lz4 library doesn't provide a #define for this.
Expand All @@ -322,7 +320,6 @@ compress_begin (PyObject * Py_UNUSED (self), PyObject * args,
"compression_level",
"block_size",
"content_checksum",
"content_size",
"block_mode",
"frame_type",
"auto_flush",
Expand All @@ -335,13 +332,12 @@ compress_begin (PyObject * Py_UNUSED (self), PyObject * args,
argument */
preferences.autoFlush = 1;

if (!PyArg_ParseTupleAndKeywords (args, keywds, "O|kiiiiiii", kwlist,
if (!PyArg_ParseTupleAndKeywords (args, keywds, "O|kiiiiii", kwlist,
&py_context,
&source_size,
&preferences.compressionLevel,
&preferences.frameInfo.blockSizeID,
&preferences.frameInfo.contentChecksumFlag,
&content_size_header,
&preferences.frameInfo.blockMode,
&preferences.frameInfo.frameType,
&preferences.autoFlush
Expand All @@ -350,14 +346,7 @@ compress_begin (PyObject * Py_UNUSED (self), PyObject * args,
return NULL;
}

if (content_size_header)
{
preferences.frameInfo.contentSize = source_size;
}
else
{
preferences.frameInfo.contentSize = 0;
}
preferences.frameInfo.contentSize = source_size;

context =
(struct compression_context *) PyCapsule_GetPointer (py_context, capsule_name);
Expand Down Expand Up @@ -798,16 +787,17 @@ decompress (PyObject * Py_UNUSED (self), PyObject * args, PyObject * keywds)
to estimate the new size of the destination buffer. */
char * destination_buffer_new;
destination_size += 3 * result;
Py_BLOCK_THREADS
destination_buffer_new = PyMem_Realloc(destination_buffer, destination_size);
if (!destination_buffer_new)
{
LZ4F_freeDecompressionContext (context);
Py_BLOCK_THREADS
PyErr_SetString (PyExc_RuntimeError,
"Failed to increase destination buffer size");
PyMem_Free (destination_buffer);
return NULL;
}
Py_UNBLOCK_THREADS
destination_buffer = destination_buffer_new;
}
/* Data still remaining to be decompressed, so increment the source and
Expand Down
2 changes: 1 addition & 1 deletion py3c/py3c.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,6 @@ SOFTWARE.

#include <py3c/comparison.h>
#include <py3c/compat.h>

#include <py3c/py3shims.h>

#endif
2 changes: 1 addition & 1 deletion py3c/py3c/compat.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@
#define PyStr_InternFromString PyString_InternFromString
#define PyStr_Decode PyString_Decode

static inline PyObject *PyStr_Concat(PyObject *left, PyObject *right) {
static PyObject *PyStr_Concat(PyObject *left, PyObject *right) {
PyObject *str = left;
Py_INCREF(left); // reference to old left will be stolen
PyString_Concat(&str, right);
Expand Down
29 changes: 29 additions & 0 deletions py3c/py3c/py3shims.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
/* Copyright (c) 2016, Red Hat, Inc. and/or its affiliates
* Licensed under the MIT license; see py3c.h
*/

/*
* Shims for the PyMem_Raw* functions added inPython 3.3
*
* See https://docs.python.org/3/c-api/memory.html#raw-memory-interface
*/

#ifndef _PY3C_RAWMALLOC_H_
#define _PY3C_RAWMALLOC_H_
#include <Python.h>
#include <stdlib.h>


#if PY_MAJOR_VERSION < 3 || (PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION < 4)
#define PyMem_RawMalloc(n) malloc((n) || 1)
#define PyMem_RawRealloc(p, n) realloc(p, (n) || 1)
#define PyMem_RawFree(p) free(p)
#endif /* version < 3.4 */


#if PY_MAJOR_VERSION < 3 || (PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION < 5)
#define PyMem_RawCalloc(n, s) calloc((n) || 1, (s) || 1)
#endif /* version < 3.5 */


#endif /* _PY3C_RAWMALLOC_H_ */
23 changes: 12 additions & 11 deletions tests/bench.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,24 @@
import uuid
import timeit
import lz4
import snappy
import os
from timeit import Timer
import sys
import blosc

DATA = open("../src/lz4.c", "rb").read()
LZ4_DATA = lz4.compress(DATA)
SNAPPY_DATA = snappy.compress(DATA)
LOOPS = 200000
DATA = open(sys.argv[1], "rb").read()
LZ4_DATA = lz4.block.compress(DATA)
BLOSC_DATA = blosc.compress(DATA, cname='lz4', clevel=5, shuffle=True)
LOOPS = 100

print("Data Size:")
print(" Input: %d" % len(DATA))
print(" LZ4: %d (%.2f)" % (len(LZ4_DATA), len(LZ4_DATA) / float(len(DATA))))
print(" Snappy: %d (%.2f)" % (len(SNAPPY_DATA), len(SNAPPY_DATA) / float(len(DATA))))
print(" LZ4 / Snappy: %f" % (float(len(LZ4_DATA)) / float(len(SNAPPY_DATA))))
print(" Blosc: %d (%.2f)" % (len(BLOSC_DATA), len(BLOSC_DATA) / float(len(DATA))))
print(" LZ4 / Blosc: %f" % (float(len(LZ4_DATA)) / float(len(BLOSC_DATA))))

print("Benchmark: %d calls" % LOOPS)
print(" LZ4 Compression: %fs" % Timer("lz4.compress(DATA)", "from __main__ import DATA; import lz4").timeit(number=LOOPS))
print(" Snappy Compression: %fs" % Timer("snappy.compress(DATA)", "from __main__ import DATA; import snappy").timeit(number=LOOPS))
print(" LZ4 Decompression: %fs" % Timer("lz4.uncompress(LZ4_DATA)", "from __main__ import LZ4_DATA; import lz4").timeit(number=LOOPS))
print(" Snappy Decompression : %fs" % Timer("snappy.uncompress(SNAPPY_DATA)", "from __main__ import SNAPPY_DATA; import snappy").timeit(number=LOOPS))
print(" LZ4 Compression: %fs" % (Timer("lz4.block.compress(DATA)", "from __main__ import DATA; import lz4").timeit(number=LOOPS)/LOOPS))
print(" Blosc Compression: %fs" % (Timer("blosc.compress(DATA, cname='lz4', clevel=5, shuffle=True)", "from __main__ import DATA; import blosc").timeit(number=LOOPS)/LOOPS))
print(" LZ4 Decompression: %fs" % (Timer("lz4.block.decompress(LZ4_DATA)", "from __main__ import LZ4_DATA; import lz4").timeit(number=LOOPS)/LOOPS))
print(" Blosc Decompression : %fs" % (Timer("blosc.decompress(BLOSC_DATA)", "from __main__ import BLOSC_DATA; import blosc").timeit(number=LOOPS)/LOOPS))
4 changes: 4 additions & 0 deletions tests/test_block.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@

class TestLZ4Block(unittest.TestCase):

def test_empty_string(self):
DATA = b''
self.assertEqual(DATA, lz4.block.decompress(lz4.block.compress(DATA)))

def test_random(self):
DATA = os.urandom(128 * 1024) # Read 128kb
self.assertEqual(DATA, lz4.block.decompress(lz4.block.compress(DATA)))
Expand Down
17 changes: 8 additions & 9 deletions tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,6 @@ def test_compress_begin_update_end_not_defaults(self):
block_size=lz4frame.BLOCKSIZE_MAX256KB,
block_mode=lz4frame.BLOCKMODE_LINKED,
compression_level=lz4frame.COMPRESSIONLEVEL_MINHC,
content_size=False,
auto_flush=1
)
chunk_size = 128 * 1024 # 128 kb, half of block size
Expand Down Expand Up @@ -265,6 +264,14 @@ def test_LZ4FrameCompressor_reset(self):
decompressed = lz4frame.decompress(compressed)
self.assertEqual(input_data, decompressed)

def test_compress_without_content_size(self):
input_data = b"2099023098234882923049823094823094898239230982349081231290381209380981203981209381238901283098908123109238098123"
compressed = lz4frame.compress(input_data, content_size_header=False)
frame = lz4frame.get_frame_info(compressed)
self.assertEqual(frame['contentSize'], 0)
decompressed = lz4frame.decompress(compressed)
self.assertEqual(input_data, decompressed)

class TestLZ4FrameModern(unittest.TestCase):
def test_decompress_truncated(self):
input_data = b"2099023098234882923049823094823094898239230982349081231290381209380981203981209381238901283098908123109238098123"
Expand Down Expand Up @@ -306,14 +313,6 @@ def test_LZ4FrameCompressor_fails(self):
compressed += compressor.flush()
compressed = compressor.compress(input_data)

def test_compress_without_content_size(self):
input_data = b"2099023098234882923049823094823094898239230982349081231290381209380981203981209381238901283098908123109238098123"
compressed = lz4frame.compress(input_data, content_size=False)
frame = lz4frame.get_frame_info(compressed)
self.assertEqual(frame['contentSize'], 0)
decompressed = lz4frame.decompress(compressed)
self.assertEqual(input_data, decompressed)


if sys.version_info < (2, 7):
# Poor-man unittest.TestCase.skip for Python 2.6
Expand Down

0 comments on commit 05df110

Please sign in to comment.