From 16b557135d9b43ba85f4f6187ad1e6fb3cdf1584 Mon Sep 17 00:00:00 2001 From: Eduardo Hirata-Miyasaki Date: Wed, 6 Nov 2024 11:35:57 -0800 Subject: [PATCH] Add decompressed OME-Zarr dataset size to iohub info (#248) * adding datastore size to info * adding uncompressed string * adding changes for readability * typo * Only show decompressed size due to zarr-python bug * add test for size formatting * add test for CLI size info --------- Co-authored-by: Ziwen Liu <67518483+ziw-liu@users.noreply.github.com> --- iohub/reader.py | 27 +++++++++++++++++++++++++++ tests/cli/test_cli.py | 2 ++ tests/test_reader.py | 10 +++++++++- 3 files changed, 38 insertions(+), 1 deletion(-) diff --git a/iohub/reader.py b/iohub/reader.py index 69cb2460..d76719f0 100644 --- a/iohub/reader.py +++ b/iohub/reader.py @@ -262,11 +262,23 @@ def print_info(path: StrOrBytesPath, verbose=False): print("Zarr hierarchy:") reader.print_tree() positions = list(reader.positions()) + total_bytes_uncompressed = sum( + p["0"].nbytes for _, p in positions + ) msgs.append(f"Positions:\t\t {len(positions)}") msgs.append(f"Chunk size:\t\t {positions[0][1][0].chunks}") + msgs.append( + f"No. bytes decompressed:\t\t {total_bytes_uncompressed} " + f"[{sizeof_fmt(total_bytes_uncompressed)}]" + ) else: + total_bytes_uncompressed = reader["0"].nbytes msgs.append(f"(Z, Y, X) scale (um):\t {tuple(reader.scale[2:])}") msgs.append(f"Chunk size:\t\t {reader['0'].chunks}") + msgs.append( + f"No. bytes decompressed:\t\t {total_bytes_uncompressed} " + f"[{sizeof_fmt(total_bytes_uncompressed)}]" + ) if verbose: msgs.extend( [ @@ -280,3 +292,18 @@ def print_info(path: StrOrBytesPath, verbose=False): reader.print_tree() print("\n".join(msgs)) reader.close() + + +def sizeof_fmt(num: int) -> str: + """ + Human readable file size + Adapted form: + https://web.archive.org/web/20111010015624/ + http://blogmag.net/blog/read/38/Print_human_readable_file_size + """ + if num < 1024: + return f"{num} B" + for x in ["KiB", "MiB", "GiB", "TiB"]: + num /= 1024 + if num < 1024: + return f"{num:.1f} {x}" diff --git a/tests/cli/test_cli.py b/tests/cli/test_cli.py index 7376b159..e37d6f0b 100644 --- a/tests/cli/test_cli.py +++ b/tests/cli/test_cli.py @@ -90,11 +90,13 @@ def test_cli_info_ome_zarr(verbose): assert result.exit_code == 0 assert re.search(r"Wells:\s+1", result.output) assert ("Chunk size" in result.output) == bool(verbose) + assert ("No. bytes decompressed" in result.output) == bool(verbose) # Test on single position result_pos = runner.invoke(cli, ["info", str(hcs_ref / "B" / "03" / "0")]) assert "Channel names" in result_pos.output assert "scale (um)" in result_pos.output assert "Chunk size" in result_pos.output + assert "84.4 MiB" in result_pos.output @pytest.mark.parametrize("grid_layout", ["-g", None]) diff --git a/tests/test_reader.py b/tests/test_reader.py index cb04e977..10b360ef 100644 --- a/tests/test_reader.py +++ b/tests/test_reader.py @@ -3,7 +3,7 @@ from iohub._deprecated.singlepagetiff import MicromanagerSequenceReader from iohub.mmstack import MMStack from iohub.ndtiff import NDTiffDataset -from iohub.reader import read_images +from iohub.reader import read_images, sizeof_fmt from tests.conftest import ( mm2gamma_ome_tiffs, mm2gamma_singlepage_tiffs, @@ -36,3 +36,11 @@ def test_detect_ndtiff(data_path): def test_detect_single_page_tiff(data_path): reader = read_images(data_path) assert isinstance(reader, MicromanagerSequenceReader) + + +@pytest.mark.parametrize( + "num_bytes,expected", + [(3, "3 B"), (2.234 * 2**20, "2.2 MiB"), (3.456 * 2**40, "3.5 TiB")], +) +def test_sizeof_fmt(num_bytes, expected): + assert sizeof_fmt(num_bytes) == expected