Skip to content

Commit

Permalink
Merge pull request #161 from pycompression/release_1.4.1
Browse files Browse the repository at this point in the history
Release 1.4.1
  • Loading branch information
rhpvorderman authored Oct 9, 2023
2 parents 8e0b1ea + 15a4810 commit c689da0
Show file tree
Hide file tree
Showing 8 changed files with 110 additions and 62 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ Changelog
.. This document is user facing. Please word the changes in such a way
.. that users understand how the changes affect the new version.
version 1.4.1
-----------------
+ Fix several errors related to unclosed files and buffers.

version 1.4.0
-----------------
+ Drop support for python 3.7 and PyPy 3.8 as these are no longer supported.
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ def build_isa_l():

setup(
name="isal",
version="1.4.0",
version="1.4.1",
description="Faster zlib and gzip compatible compression and "
"decompression by providing python bindings for the ISA-L "
"library.",
Expand Down
2 changes: 1 addition & 1 deletion src/isal/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,4 @@
"__version__"
]

__version__ = "1.4.0"
__version__ = "1.4.1"
10 changes: 7 additions & 3 deletions src/isal/igzip.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
Library to speed up its methods."""

import argparse
import builtins
import gzip
import io
import os
Expand Down Expand Up @@ -337,13 +338,14 @@ def main():
if yes_or_no not in {"y", "Y", "yes"}:
sys.exit("not overwritten")

out_buffer = None
if args.compress:
if args.file is None:
in_file = sys.stdin.buffer
else:
in_file = io.open(args.file, mode="rb")
in_file = builtins.open(args.file, mode="rb")
if out_filepath is not None:
out_buffer = io.open(out_filepath, "wb")
out_buffer = builtins.open(out_filepath, "wb")
else:
out_buffer = sys.stdout.buffer

Expand All @@ -359,7 +361,7 @@ def main():
else:
in_file = IGzipFile(mode="rb", fileobj=sys.stdin.buffer)
if out_filepath is not None:
out_file = io.open(out_filepath, mode="wb")
out_file = builtins.open(out_filepath, mode="wb")
else:
out_file = sys.stdout.buffer

Expand All @@ -374,6 +376,8 @@ def main():
in_file.close()
if out_file is not sys.stdout.buffer:
out_file.close()
if out_buffer is not None and out_buffer is not sys.stdout.buffer:
out_buffer.close()


if __name__ == "__main__": # pragma: no cover
Expand Down
35 changes: 27 additions & 8 deletions src/isal/igzip_threaded.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
# This file is part of python-isal which is distributed under the
# PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2.

import builtins
import io
import multiprocessing
import os
Expand Down Expand Up @@ -54,7 +55,7 @@ def open(filename, mode="rb", compresslevel=igzip._COMPRESS_LEVEL_TRADEOFF,
threads = 1
open_mode = mode.replace("t", "b")
if isinstance(filename, (str, bytes)) or hasattr(filename, "__fspath__"):
binary_file = io.open(filename, open_mode)
binary_file = builtins.open(filename, open_mode)
elif hasattr(filename, "read") or hasattr(filename, "write"):
binary_file = filename
else:
Expand Down Expand Up @@ -83,9 +84,14 @@ def __init__(self, fp, queue_size=4, block_size=8 * 1024 * 1024):
self.buffer = io.BytesIO()
self.block_size = block_size
self.worker = threading.Thread(target=self._decompress)
self._closed = False
self.running = True
self.worker.start()

def _check_closed(self, msg=None):
if self._closed:
raise ValueError("I/O operation on closed file")

def _decompress(self):
block_size = self.block_size
block_queue = self.queue
Expand All @@ -105,6 +111,7 @@ def _decompress(self):
pass

def readinto(self, b):
self._check_closed()
result = self.buffer.readinto(b)
if result == 0:
while True:
Expand All @@ -125,16 +132,22 @@ def readinto(self, b):
def readable(self) -> bool:
return True

def writable(self) -> bool:
return False

def tell(self) -> int:
self._check_closed()
return self.pos

def close(self) -> None:
if self._closed:
return
self.running = False
self.worker.join()
self.fileobj.close()
self.raw.close()
self._closed = True

@property
def closed(self) -> bool:
return self._closed


class _ThreadedGzipWriter(io.RawIOBase):
Expand Down Expand Up @@ -199,6 +212,10 @@ def __init__(self,
self._write_gzip_header()
self.start()

def _check_closed(self, msg=None):
if self._closed:
raise ValueError("I/O operation on closed file")

def _write_gzip_header(self):
"""Simple gzip header. Only xfl flag is set according to level."""
magic1 = 0x1f
Expand All @@ -225,11 +242,10 @@ def stop(self):
self.output_worker.join()

def write(self, b) -> int:
self._check_closed()
with self.lock:
if self.exception:
raise self.exception
if self._closed:
raise IOError("Can not write closed file")
index = self.index
data = bytes(b)
zdict = memoryview(self.previous_block)[-DEFLATE_WINDOW_SIZE:]
Expand All @@ -240,8 +256,7 @@ def write(self, b) -> int:
return len(data)

def flush(self):
if self._closed:
raise IOError("Can not write closed file")
self._check_closed()
# Wait for all data to be compressed
for in_q in self.input_queues:
in_q.join()
Expand All @@ -251,9 +266,13 @@ def flush(self):
self.raw.flush()

def close(self) -> None:
if self._closed:
return
self.flush()
self.stop()
if self.exception:
self.raw.close()
self._closed = True
raise self.exception
# Write an empty deflate block with a lost block marker.
self.raw.write(isal_zlib.compress(b"", wbits=-15))
Expand Down
66 changes: 17 additions & 49 deletions tests/test_igzip.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
import re
import shutil
import struct
import subprocess
import sys
import tempfile
import zlib
Expand All @@ -29,21 +28,6 @@
DATA = b'This is a simple test with igzip'
COMPRESSED_DATA = gzip.compress(DATA)
TEST_FILE = str((Path(__file__).parent / "data" / "test.fastq.gz"))
PYPY = sys.implementation.name == "pypy"


def run_isal_igzip(*args, stdin=None):
"""Calling isal.igzip externally seems to solve some issues on PyPy where
files would not be written properly when igzip.main() was called. This is
probably due to some out of order execution that PyPy tries to pull.
Running the process externally is detrimental to the coverage report,
so this is only done for PyPy."""
process = subprocess.Popen(["python", "-m", "isal.igzip", *args],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
stdin=subprocess.PIPE)

return process.communicate(stdin)


def test_wrong_compresslevel_igzipfile():
Expand Down Expand Up @@ -128,12 +112,9 @@ def test_decompress_infile_outfile(tmp_path, capsysbinary):
def test_compress_infile_outfile(tmp_path, capsysbinary):
test_file = tmp_path / "test"
test_file.write_bytes(DATA)
if PYPY:
out, err = run_isal_igzip(str(test_file))
else:
sys.argv = ['', str(test_file)]
igzip.main()
out, err = capsysbinary.readouterr()
sys.argv = ['', str(test_file)]
igzip.main()
out, err = capsysbinary.readouterr()
out_file = test_file.with_suffix(".gz")
assert err == b''
assert out == b''
Expand Down Expand Up @@ -196,12 +177,9 @@ def test_compress_infile_out_file(tmp_path, capsysbinary):
test.write_bytes(DATA)
out_file = tmp_path / "compressed.gz"
args = ['-o', str(out_file), str(test)]
if PYPY:
out, err = run_isal_igzip(*args)
else:
sys.argv = ['', *args]
igzip.main()
out, err = capsysbinary.readouterr()
sys.argv = ['', *args]
igzip.main()
out, err = capsysbinary.readouterr()
assert gzip.decompress(out_file.read_bytes()) == DATA
assert err == b''
assert out == b''
Expand All @@ -213,12 +191,9 @@ def test_compress_infile_out_file_force(tmp_path, capsysbinary):
out_file = tmp_path / "compressed.gz"
out_file.touch()
args = ['-f', '-o', str(out_file), str(test)]
if PYPY:
out, err = run_isal_igzip(*args)
else:
sys.argv = ['', *args]
igzip.main()
out, err = capsysbinary.readouterr()
sys.argv = ['', *args]
igzip.main()
out, err = capsysbinary.readouterr()
assert gzip.decompress(out_file.read_bytes()) == DATA
assert err == b''
assert out == b''
Expand Down Expand Up @@ -261,14 +236,11 @@ def test_compress_infile_out_file_inmplicit_name_prompt_accept(
test.write_bytes(DATA)
out_file = tmp_path / "test.gz"
out_file.touch()
if PYPY:
out, err = run_isal_igzip(str(test), stdin=b"y\n")
else:
sys.argv = ['', str(test)]
mock_stdin = io.BytesIO(b"y")
sys.stdin = io.TextIOWrapper(mock_stdin)
igzip.main()
out, err = capsysbinary.readouterr()
sys.argv = ['', str(test)]
mock_stdin = io.BytesIO(b"y")
sys.stdin = io.TextIOWrapper(mock_stdin)
igzip.main()
out, err = capsysbinary.readouterr()
assert b"already exists; do you wish to overwrite" in out
assert err == b""
assert gzip.decompress(out_file.read_bytes()) == DATA
Expand All @@ -278,13 +250,9 @@ def test_compress_infile_out_file_no_name(tmp_path, capsysbinary):
test = tmp_path / "test"
test.write_bytes(DATA)
out_file = tmp_path / "compressed.gz"
args = ['-n', '-o', str(out_file), str(test)]
if PYPY:
out, err = run_isal_igzip(*args)
else:
sys.argv = ['', '-n', '-o', str(out_file), str(test)]
igzip.main()
out, err = capsysbinary.readouterr()
sys.argv = ['', '-n', '-o', str(out_file), str(test)]
igzip.main()
out, err = capsysbinary.readouterr()
output = out_file.read_bytes()
assert gzip.decompress(output) == DATA
assert err == b''
Expand Down
51 changes: 51 additions & 0 deletions tests/test_igzip_threaded.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,3 +81,54 @@ def test_threaded_write_error(monkeypatch):
with igzip_threaded.open(tmp, "wb", compresslevel=3) as writer:
writer.write(b"x")
error.match("no attribute 'compressobj'")


def test_close_reader():
tmp = io.BytesIO(Path(TEST_FILE).read_bytes())
f = igzip_threaded._ThreadedGzipReader(tmp, "rb")
f.close()
assert f.closed
# Make sure double closing does not raise errors
f.close()


def test_close_writer():
f = igzip_threaded._ThreadedGzipWriter(io.BytesIO())
f.close()
assert f.closed
# Make sure double closing does not raise errors
f.close()


def test_reader_not_writable():
with igzip_threaded.open(TEST_FILE, "rb") as f:
assert not f.writable()


def test_writer_not_readable():
with igzip_threaded.open(io.BytesIO(), "wb") as f:
assert not f.readable()


def test_writer_wrong_level():
with pytest.raises(ValueError) as error:
igzip_threaded._ThreadedGzipWriter(io.BytesIO(), level=42)
error.match("Invalid compression level")
error.match("42")


def test_reader_read_after_close():
with open(TEST_FILE, "rb") as test_f:
f = igzip_threaded._ThreadedGzipReader(test_f)
f.close()
with pytest.raises(ValueError) as error:
f.read(1024)
error.match("closed")


def test_writer_write_after_close():
f = igzip_threaded._ThreadedGzipWriter(io.BytesIO())
f.close()
with pytest.raises(ValueError) as error:
f.write(b"abc")
error.match("closed")
2 changes: 2 additions & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ deps=pytest
passenv=
PYTHON_ISAL_LINK_DYNAMIC
INCLUDE
setenv =
PYTHONDEVMODE=1
commands =
# Create HTML coverage report for humans and xml coverage report for external services.
coverage run --branch --source=isal -m pytest tests
Expand Down

0 comments on commit c689da0

Please sign in to comment.