From 89fa205d27819c4d4cd444abbfde76e0abe15cea Mon Sep 17 00:00:00 2001 From: Yaron de Leeuw Date: Mon, 31 Jul 2017 13:16:19 -0400 Subject: [PATCH 1/5] bpo-26253: Add compressionlevel to tarfile stream `tarfile` already accepts a compressionlevel argument for creating files. This patch adds the same for stream-based tarfile usage. The default is 9, the value that was previously hard-coded. --- Doc/library/tarfile.rst | 4 +-- Lib/tarfile.py | 33 +++++++++++-------- .../2017-07-31-13-35-28.bpo-26253.8v_sCs.rst | 1 + 3 files changed, 22 insertions(+), 16 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2017-07-31-13-35-28.bpo-26253.8v_sCs.rst diff --git a/Doc/library/tarfile.rst b/Doc/library/tarfile.rst index f5c49b0ac4f738..3ae0507a6295a8 100644 --- a/Doc/library/tarfile.rst +++ b/Doc/library/tarfile.rst @@ -98,8 +98,8 @@ Some facts and figures: If *fileobj* is specified, it is used as an alternative to a :term:`file object` opened in binary mode for *name*. It is supposed to be at position 0. - For modes ``'w:gz'``, ``'r:gz'``, ``'w:bz2'``, ``'r:bz2'``, ``'x:gz'``, - ``'x:bz2'``, :func:`tarfile.open` accepts the keyword argument + For modes ``'w:gz'``, ``'x:gz'``, ``'w|gz'``, ``'w:bz2'``, ``'x:bz2'``, + ``'w|bz2'``, :func:`tarfile.open` accepts the keyword argument *compresslevel* (default ``9``) to specify the compression level of the file. For modes ``'w:xz'`` and ``'x:xz'``, :func:`tarfile.open` accepts the diff --git a/Lib/tarfile.py b/Lib/tarfile.py index 8d43d0da7b9880..8ad0f3942c72fe 100755 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -336,7 +336,8 @@ class _Stream: _Stream is intended to be used only internally. """ - def __init__(self, name, mode, comptype, fileobj, bufsize): + def __init__(self, name, mode, comptype, fileobj, bufsize, + compresslevel=9): """Construct a _Stream object. """ self._extfileobj = True @@ -350,14 +351,15 @@ def __init__(self, name, mode, comptype, fileobj, bufsize): fileobj = _StreamProxy(fileobj) comptype = fileobj.getcomptype() - self.name = name or "" - self.mode = mode + self.name = name or "" + self.mode = mode self.comptype = comptype - self.fileobj = fileobj - self.bufsize = bufsize - self.buf = b"" - self.pos = 0 - self.closed = False + self.fileobj = fileobj + self.bufsize = bufsize + self.compresslevel = compresslevel + self.buf = b"" + self.pos = 0 + self.closed = False try: if comptype == "gz": @@ -383,7 +385,7 @@ def __init__(self, name, mode, comptype, fileobj, bufsize): self.cmp = bz2.BZ2Decompressor() self.exception = OSError else: - self.cmp = bz2.BZ2Compressor() + self.cmp = bz2.BZ2Compressor(self.compresslevel) elif comptype == "xz": try: @@ -413,10 +415,11 @@ def __del__(self): def _init_write_gz(self): """Initialize for writing with gzip compression. """ - self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED, - -self.zlib.MAX_WBITS, - self.zlib.DEF_MEM_LEVEL, - 0) + self.cmp = self.zlib.compressobj(self.compresslevel, + self.zlib.DEFLATED, + -self.zlib.MAX_WBITS, + self.zlib.DEF_MEM_LEVEL, + 0) timestamp = struct.pack(" Date: Mon, 31 Jul 2017 17:19:26 -0400 Subject: [PATCH 2/5] bpo-26253: don't add compresslevel as an attribute In tarfile, compresslevel will be simply passed around. Also added 'versionadded' to stream compression level setting --- Doc/library/tarfile.rst | 3 +++ Lib/tarfile.py | 25 ++++++++++++------------- 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/Doc/library/tarfile.rst b/Doc/library/tarfile.rst index 3ae0507a6295a8..08d3a9dbfecc49 100644 --- a/Doc/library/tarfile.rst +++ b/Doc/library/tarfile.rst @@ -152,6 +152,9 @@ Some facts and figures: .. versionchanged:: 3.6 The *name* parameter accepts a :term:`path-like object`. + .. versionchanged:: 3.7 + The *compresslevel* keyword argument also works for streams. + .. class:: TarFile :noindex: diff --git a/Lib/tarfile.py b/Lib/tarfile.py index 8ad0f3942c72fe..4ce6a84e97ed46 100755 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -337,7 +337,7 @@ class _Stream: """ def __init__(self, name, mode, comptype, fileobj, bufsize, - compresslevel=9): + compresslevel): """Construct a _Stream object. """ self._extfileobj = True @@ -351,15 +351,14 @@ def __init__(self, name, mode, comptype, fileobj, bufsize, fileobj = _StreamProxy(fileobj) comptype = fileobj.getcomptype() - self.name = name or "" - self.mode = mode + self.name = name or "" + self.mode = mode self.comptype = comptype - self.fileobj = fileobj - self.bufsize = bufsize - self.compresslevel = compresslevel - self.buf = b"" - self.pos = 0 - self.closed = False + self.fileobj = fileobj + self.bufsize = bufsize + self.buf = b"" + self.pos = 0 + self.closed = False try: if comptype == "gz": @@ -373,7 +372,7 @@ def __init__(self, name, mode, comptype, fileobj, bufsize, self._init_read_gz() self.exception = zlib.error else: - self._init_write_gz() + self._init_write_gz(compresslevel) elif comptype == "bz2": try: @@ -385,7 +384,7 @@ def __init__(self, name, mode, comptype, fileobj, bufsize, self.cmp = bz2.BZ2Decompressor() self.exception = OSError else: - self.cmp = bz2.BZ2Compressor(self.compresslevel) + self.cmp = bz2.BZ2Compressor(compresslevel) elif comptype == "xz": try: @@ -412,10 +411,10 @@ def __del__(self): if hasattr(self, "closed") and not self.closed: self.close() - def _init_write_gz(self): + def _init_write_gz(self, compresslevel): """Initialize for writing with gzip compression. """ - self.cmp = self.zlib.compressobj(self.compresslevel, + self.cmp = self.zlib.compressobj(compresslevel, self.zlib.DEFLATED, -self.zlib.MAX_WBITS, self.zlib.DEF_MEM_LEVEL, From c21736031a86d9c12da783a18383a4046e642fc2 Mon Sep 17 00:00:00 2001 From: Yaron de Leeuw Date: Tue, 1 Aug 2017 12:55:07 -0400 Subject: [PATCH 3/5] bpo-26253: tarfile: add compresslevel unit tests Check outcoming file headers when writing a compressed file. --- Lib/test/test_tarfile.py | 68 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index 12850cd635e995..0c78709f30aef8 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -1532,6 +1532,74 @@ class Bz2StreamWriteTest(Bz2Test, StreamWriteTest): class LzmaStreamWriteTest(LzmaTest, StreamWriteTest): decompressor = lzma.LZMADecompressor if lzma else None +class _CompressedWriteTest(TarTest): + # This is not actually a standalone test. + # It does not inherit WriteTest because it only makes sense with gz,bz2 + source = (b"And we move to Bristol where they have a special, " + + b"Very Silly candidate") + + def _compressed_tar(self, compresslevel): + fobj = io.BytesIO() + with tarfile.open(tmpname, self.mode, fobj, + compresslevel=compresslevel) as tarfl: + tarfl.addfile(tarfile.TarInfo("foo"), io.BytesIO(self.source)) + return fobj + + def _test_bz2_header(self, compresslevel): + fobj = self._compressed_tar(compresslevel) + self.assertEqual(fobj.getvalue()[0:10], + b"BZh%d1AY&SY" % compresslevel) + + def _test_gz_header(self, compresslevel): + fobj = self._compressed_tar(compresslevel) + self.assertEqual(fobj.getvalue()[:3], b"\x1f\x8b\x08") + +class Bz2CompressWriteTest(Bz2Test, _CompressedWriteTest, unittest.TestCase): + prefix = "w:" + def test_compression_levels(self): + self._test_bz2_header(1) + self._test_bz2_header(5) + self._test_bz2_header(9) + +class Bz2CompressStreamWriteTest(Bz2Test, _CompressedWriteTest, + unittest.TestCase): + prefix = "w|" + def test_compression_levels(self): + self._test_bz2_header(1) + self._test_bz2_header(5) + self._test_bz2_header(9) + +class GzCompressWriteTest(GzipTest, _CompressedWriteTest, unittest.TestCase): + prefix = "w:" + def test_compression_levels(self): + self._test_gz_header(1) + self._test_gz_header(5) + self._test_gz_header(9) + +class GzCompressStreamWriteTest(GzipTest, _CompressedWriteTest, + unittest.TestCase): + prefix = "w|" + def test_compression_levels(self): + self._test_gz_header(1) + self._test_gz_header(5) + self._test_gz_header(9) + +class CompressLevelRaises(unittest.TestCase): + def test_compresslevel_wrong_modes(self): + compresslevel = 5 + fobj = io.BytesIO() + with self.assertRaises(TypeError): + tarfile.open(tmpname, "w:", fobj, compresslevel=compresslevel) + + def test_wrong_compresslevels(self): + # BZ2 checks that the compresslevel is in [1,9]. gz does not + fobj = io.BytesIO() + with self.assertRaises(ValueError): + tarfile.open(tmpname, "w:bz2", fobj, compresslevel=0) + with self.assertRaises(ValueError): + tarfile.open(tmpname, "w:bz2", fobj, compresslevel=10) + with self.assertRaises(ValueError): + tarfile.open(tmpname, "w|bz2", fobj, compresslevel=10) class GNUWriteTest(unittest.TestCase): # This testcase checks for correct creation of GNU Longname From f48acbdc80b3dffbf9d82cfd8f26cdb354a8c1bf Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 17 Jun 2022 09:59:03 +0300 Subject: [PATCH 4/5] Apply suggestions from code review Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> --- Doc/library/tarfile.rst | 2 +- .../next/Library/2017-07-31-13-35-28.bpo-26253.8v_sCs.rst | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/Doc/library/tarfile.rst b/Doc/library/tarfile.rst index 08d3a9dbfecc49..f9d34def79a12b 100644 --- a/Doc/library/tarfile.rst +++ b/Doc/library/tarfile.rst @@ -152,7 +152,7 @@ Some facts and figures: .. versionchanged:: 3.6 The *name* parameter accepts a :term:`path-like object`. - .. versionchanged:: 3.7 + .. versionchanged:: 3.12 The *compresslevel* keyword argument also works for streams. diff --git a/Misc/NEWS.d/next/Library/2017-07-31-13-35-28.bpo-26253.8v_sCs.rst b/Misc/NEWS.d/next/Library/2017-07-31-13-35-28.bpo-26253.8v_sCs.rst index c8c051afe514c6..be604403a4a99a 100644 --- a/Misc/NEWS.d/next/Library/2017-07-31-13-35-28.bpo-26253.8v_sCs.rst +++ b/Misc/NEWS.d/next/Library/2017-07-31-13-35-28.bpo-26253.8v_sCs.rst @@ -1 +1,2 @@ -Adjustable compression level for tarfile streams. +Allow adjustable compression level for tarfile streams in +:function:`tarfile.open`. From 2b1b417e0e3fe7222088ebbeec3364096ce51b6d Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 17 Jun 2022 11:46:35 +0300 Subject: [PATCH 5/5] Update Misc/NEWS.d/next/Library/2017-07-31-13-35-28.bpo-26253.8v_sCs.rst Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> --- .../next/Library/2017-07-31-13-35-28.bpo-26253.8v_sCs.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Misc/NEWS.d/next/Library/2017-07-31-13-35-28.bpo-26253.8v_sCs.rst b/Misc/NEWS.d/next/Library/2017-07-31-13-35-28.bpo-26253.8v_sCs.rst index be604403a4a99a..fa0dc95b7d62b3 100644 --- a/Misc/NEWS.d/next/Library/2017-07-31-13-35-28.bpo-26253.8v_sCs.rst +++ b/Misc/NEWS.d/next/Library/2017-07-31-13-35-28.bpo-26253.8v_sCs.rst @@ -1,2 +1,2 @@ -Allow adjustable compression level for tarfile streams in -:function:`tarfile.open`. +Allow adjustable compression level for tarfile streams in +:func:`tarfile.open`.