From aff9fd4e5bc2ac22ac0aba4c6d2dc1b3d7d04691 Mon Sep 17 00:00:00 2001 From: "Adam J. Stewart" Date: Sat, 17 Jun 2023 10:06:15 -0500 Subject: [PATCH] Simpler file chunking --- test/_utils/_common_utils_for_test.py | 4 +--- torchdata/datapipes/iter/util/cacheholder.py | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/test/_utils/_common_utils_for_test.py b/test/_utils/_common_utils_for_test.py index fea9c82fc..524b6f4fc 100644 --- a/test/_utils/_common_utils_for_test.py +++ b/test/_utils/_common_utils_for_test.py @@ -87,9 +87,7 @@ def check_hash_fn(filepath, expected_hash, hash_type="md5"): raise ValueError("Invalid hash_type requested, should be one of {}".format(["sha256", "md5"])) with open(filepath, "rb") as f: - chunk = f.read(1024 ** 2) - while chunk: + while chunk := f.read(1024 ** 2): hash_fn.update(chunk) - chunk = f.read(1024 ** 2) return hash_fn.hexdigest() == expected_hash diff --git a/torchdata/datapipes/iter/util/cacheholder.py b/torchdata/datapipes/iter/util/cacheholder.py index f12a6a38c..1e2ead45e 100644 --- a/torchdata/datapipes/iter/util/cacheholder.py +++ b/torchdata/datapipes/iter/util/cacheholder.py @@ -150,10 +150,8 @@ def _hash_check(filepath, hash_dict, hash_type): # TODO(634): Line above will require all readers (Win) to obtain proper locks, # I'm putting it on hold as we need to modify PyTorch core codebase heavily. with open(filepath, "rb") as f: - chunk = f.read(1024 ** 2) - while chunk: + while chunk := f.read(1024 ** 2): hash_func.update(chunk) - chunk = f.read(1024 ** 2) return hash_func.hexdigest() == hash_dict[filepath]