Skip to content

Commit

Permalink
add 5 largest files to --inspect (fixes #121) (#187)
Browse files Browse the repository at this point in the history
  • Loading branch information
jameslamb authored Sep 5, 2023
1 parent 0fc1de2 commit 9002cff
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 0 deletions.
3 changes: 3 additions & 0 deletions src/pydistcheck/distribution_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,9 @@ def num_directories(self) -> int:
def num_files(self) -> int:
return len(self.files)

def get_largest_files(self, n: int) -> List[_FileInfo]:
return sorted(self.files, key=lambda f: f.uncompressed_size_bytes, reverse=True)[:n]

@property
def uncompressed_size_bytes(self) -> int:
return sum(f.uncompressed_size_bytes for f in self.files)
Expand Down
5 changes: 5 additions & 0 deletions src/pydistcheck/inspect.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,8 @@ def inspect_distribution(filepath: str) -> None:
for extension, size in summary.size_by_file_extension.items():
size_pct = size / summary.uncompressed_size_bytes
print(f" * {extension} - {round(size / 1024.0, 1)}K ({round(size_pct * 100, 1)}%)")

largest_files = summary.get_largest_files(n=5)
print("largest files")
for file_info in largest_files:
print(f" * ({_FileSize(file_info.uncompressed_size_bytes, 'B')}) {file_info.name}")
24 changes: 24 additions & 0 deletions tests/test_distribution_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,30 @@ def test_distribution_summary_basically_works(distro_file):
assert size_in_bytes < last_size_seen
last_size_seen = size_in_bytes

# get_largest_files() should return a non-empty list of _FileInfo objects
largest_files = ds.get_largest_files(n=2)
assert isinstance(largest_files, list)
assert len(largest_files) == 2
assert all(isinstance(x, _FileInfo) for x in largest_files)
assert all(x in ds.files for x in largest_files)

# should actually choose the 2 largest files
assert largest_files[0].name == "base-package-0.1.0/LICENSE.txt"
assert largest_files[1].name == "base-package-0.1.0/setup.cfg"


def test_distribution_summary_get_largest_files_works():
ds = _DistributionSummary.from_file(os.path.join(TEST_DATA_DIR, BASE_PACKAGE_SDISTS[0]))

# get_largest_files() should return a non-empty list of _FileInfo objects
num_files = ds.num_files

# if you ask for more files than there are, you get however many there are
assert len(ds.get_largest_files(n=num_files + 1)) == num_files

# if you ask for less files than there are, you get exactly that many
assert len(ds.get_largest_files(n=num_files - 1)) == num_files - 1


@pytest.mark.parametrize("distro_file", BASE_WHEELS)
def test_distribution_summary_correctly_reads_contents_of_wheels(distro_file):
Expand Down

0 comments on commit 9002cff

Please sign in to comment.