Skip to content

Commit

Permalink
Compare checksum
Browse files Browse the repository at this point in the history
  • Loading branch information
tukiains committed Oct 21, 2024
1 parent ab61bbc commit 37b6962
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 24 deletions.
46 changes: 22 additions & 24 deletions cloudnetpy/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from cloudnetpy.categorize import generate_categorize
from cloudnetpy.exceptions import PlottingError
from cloudnetpy.plotting import generate_figure
from cloudnetpy.utils import md5sum

if TYPE_CHECKING:
from collections.abc import Callable
Expand Down Expand Up @@ -358,7 +359,7 @@ def _fetch_product(
meta = meta[0]
suffix = "geophysical" if "geophysical" in meta["product"]["type"] else "instrument"
folder = _create_output_folder(suffix, args)
return _download_file(meta, folder, args)
return _download_file(meta, folder)


def _fetch_model(args: argparse.Namespace) -> str | None:
Expand All @@ -375,7 +376,7 @@ def _fetch_model(args: argparse.Namespace) -> str | None:
return None
meta = meta[0]
folder = _create_output_folder("instrument", args)
return _download_file(meta, folder, args)
return _download_file(meta, folder)


def _fetch_raw(metadata: list[dict], args: argparse.Namespace) -> list[str]:
Expand All @@ -384,28 +385,32 @@ def _fetch_raw(metadata: list[dict], args: argparse.Namespace) -> list[str]:
folder = _create_input_folder(instrument, args)
filepaths = []
with ThreadPoolExecutor() as executor:
futures = [
executor.submit(_download_file, meta, folder, args) for meta in metadata
]
futures = [executor.submit(_download_file, meta, folder) for meta in metadata]
for future in as_completed(futures):
filepaths.append(future.result())
return filepaths


def _download_file(meta: dict, folder: Path, args: argparse.Namespace) -> str:
def _download_file(meta: dict, folder: Path) -> str:
filepath = folder / meta["filename"]
filepath_trunc = filepath.with_suffix("") if filepath.suffix == ".gz" else filepath
if filepath.exists() and not args.force:
logging.info("Using existing file: %s", filepath)
elif filepath_trunc.exists() and not args.force:
logging.info("Using existing file: %s", filepath_trunc)
filepath = filepath_trunc
else:
logging.info("Downloading file: %s", filepath)
res = requests.get(meta["downloadUrl"], timeout=60)
res.raise_for_status()
filepath.write_bytes(res.content)

possible_filepaths = [filepath]
if filepath.suffix == ".gz":
possible_filepaths.append(filepath.with_suffix(""))

for path in possible_filepaths:
if path.exists() and md5sum(path) == meta["checksum"]:
logging.info("Using existing file: %s", path)
return str(path)

logging.info("Downloading file: %s", filepath)
res = requests.get(meta["downloadUrl"], timeout=60)
res.raise_for_status()
filepath.write_bytes(res.content)

if filepath.suffix == ".gz":
filepath = _unzip_gz_file(filepath)

return str(filepath)


Expand Down Expand Up @@ -504,13 +509,6 @@ def main():
default=False,
action=argparse.BooleanOptionalAction,
)
parser.add_argument(
"-f",
"--force",
help="Force download of files",
default=False,
action=argparse.BooleanOptionalAction,
)
args = parser.parse_args()

logger = logging.getLogger()
Expand Down
22 changes: 22 additions & 0 deletions cloudnetpy/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
"""This module contains general helper functions."""

import base64
import datetime
import hashlib
import logging
import os
import re
Expand Down Expand Up @@ -1026,3 +1028,23 @@ def remove_masked_blocks(array: ma.MaskedArray, limit: int = 50) -> np.ndarray:
mask = np.bincount(labeled_array) < limit
mask[0] = True
return mask[labeled_array]


def sha256sum(filename: str | os.PathLike) -> str:
"""Calculates hash of file using sha-256."""
return _calc_hash_sum(filename, "sha256", is_base64=False)


def md5sum(filename: str | os.PathLike, *, is_base64: bool = False) -> str:
"""Calculates hash of file using md5."""
return _calc_hash_sum(filename, "md5", is_base64=is_base64)


def _calc_hash_sum(filename, method, *, is_base64: bool) -> str:
hash_sum = getattr(hashlib, method)()
with open(filename, "rb") as f:
for byte_block in iter(lambda: f.read(4096), b""):
hash_sum.update(byte_block)
if is_base64:
return base64.encodebytes(hash_sum.digest()).decode("utf-8").strip()
return hash_sum.hexdigest()

0 comments on commit 37b6962

Please sign in to comment.