Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add resolution-based chunking to ABI L1b reader #2621

Merged
merged 16 commits into from
Nov 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions satpy/etc/readers/abi_l1b.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,16 +25,19 @@ file_types:
# "suffix" is an arbitrary suffix that may be added during third-party testing (see PR #1380)
c01:
file_reader: !!python/name:satpy.readers.abi_l1b.NC_ABI_L1B
resolution: 1000
file_patterns: ['{system_environment:2s}_{mission_id:3s}-L1b-{observation_type:3s}{scene_abbr:s}-{scan_mode:2s}C01_{platform_shortname:3s}_s{start_time:%Y%j%H%M%S%f}_e{end_time:%Y%j%H%M%S%f}_c{creation_time:%Y%j%H%M%S%f}.nc{nc_version}',
'{system_environment:2s}_{mission_id:3s}-L1b-{observation_type:3s}{scene_abbr:s}-{scan_mode:2s}C01_{platform_shortname:3s}_s{start_time:%Y%j%H%M%S%f}_e{end_time:%Y%j%H%M%S%f}_c{creation_time:%Y%j%H%M%S%f}-{chid:6d}_0.nc{nc_version}',
'{system_environment:2s}_{mission_id:3s}-L1b-{observation_type:3s}{scene_abbr:s}-{scan_mode:2s}C01_{platform_shortname:3s}_s{start_time:%Y%j%H%M%S%f}_e{end_time:%Y%j%H%M%S%f}_c{creation_time:%Y%j%H%M%S%f}_{suffix}.nc{nc_version}']
c02:
file_reader: !!python/name:satpy.readers.abi_l1b.NC_ABI_L1B
resolution: 500
file_patterns: ['{system_environment:2s}_{mission_id:3s}-L1b-{observation_type:3s}{scene_abbr:s}-{scan_mode:2s}C02_{platform_shortname:3s}_s{start_time:%Y%j%H%M%S%f}_e{end_time:%Y%j%H%M%S%f}_c{creation_time:%Y%j%H%M%S%f}.nc{nc_version}',
'{system_environment:2s}_{mission_id:3s}-L1b-{observation_type:3s}{scene_abbr:s}-{scan_mode:2s}C02_{platform_shortname:3s}_s{start_time:%Y%j%H%M%S%f}_e{end_time:%Y%j%H%M%S%f}_c{creation_time:%Y%j%H%M%S%f}-{chid:6d}_0.nc{nc_version}',
'{system_environment:2s}_{mission_id:3s}-L1b-{observation_type:3s}{scene_abbr:s}-{scan_mode:2s}C02_{platform_shortname:3s}_s{start_time:%Y%j%H%M%S%f}_e{end_time:%Y%j%H%M%S%f}_c{creation_time:%Y%j%H%M%S%f}_{suffix}.nc{nc_version}']
c03:
file_reader: !!python/name:satpy.readers.abi_l1b.NC_ABI_L1B
resolution: 1000
file_patterns: ['{system_environment:2s}_{mission_id:3s}-L1b-{observation_type:3s}{scene_abbr:s}-{scan_mode:2s}C03_{platform_shortname:3s}_s{start_time:%Y%j%H%M%S%f}_e{end_time:%Y%j%H%M%S%f}_c{creation_time:%Y%j%H%M%S%f}.nc{nc_version}',
'{system_environment:2s}_{mission_id:3s}-L1b-{observation_type:3s}{scene_abbr:s}-{scan_mode:2s}C03_{platform_shortname:3s}_s{start_time:%Y%j%H%M%S%f}_e{end_time:%Y%j%H%M%S%f}_c{creation_time:%Y%j%H%M%S%f}-{chid:6d}_0.nc{nc_version}',
'{system_environment:2s}_{mission_id:3s}-L1b-{observation_type:3s}{scene_abbr:s}-{scan_mode:2s}C03_{platform_shortname:3s}_s{start_time:%Y%j%H%M%S%f}_e{end_time:%Y%j%H%M%S%f}_c{creation_time:%Y%j%H%M%S%f}_{suffix}.nc{nc_version}']
Expand All @@ -44,6 +47,7 @@ file_types:
'{system_environment:2s}_{mission_id:3s}-L1b-{observation_type:3s}{scene_abbr:s}-{scan_mode:2s}C04_{platform_shortname:3s}_s{start_time:%Y%j%H%M%S%f}_e{end_time:%Y%j%H%M%S%f}_c{creation_time:%Y%j%H%M%S%f}_{suffix}.nc{nc_version}']
c05:
file_reader: !!python/name:satpy.readers.abi_l1b.NC_ABI_L1B
resolution: 1000
file_patterns: ['{system_environment:2s}_{mission_id:3s}-L1b-{observation_type:3s}{scene_abbr:s}-{scan_mode:2s}C05_{platform_shortname:3s}_s{start_time:%Y%j%H%M%S%f}_e{end_time:%Y%j%H%M%S%f}_c{creation_time:%Y%j%H%M%S%f}.nc{nc_version}',
'{system_environment:2s}_{mission_id:3s}-L1b-{observation_type:3s}{scene_abbr:s}-{scan_mode:2s}C05_{platform_shortname:3s}_s{start_time:%Y%j%H%M%S%f}_e{end_time:%Y%j%H%M%S%f}_c{creation_time:%Y%j%H%M%S%f}-{chid:6d}_0.nc{nc_version}',
'{system_environment:2s}_{mission_id:3s}-L1b-{observation_type:3s}{scene_abbr:s}-{scan_mode:2s}C05_{platform_shortname:3s}_s{start_time:%Y%j%H%M%S%f}_e{end_time:%Y%j%H%M%S%f}_c{creation_time:%Y%j%H%M%S%f}_{suffix}.nc{nc_version}']
Expand Down
52 changes: 35 additions & 17 deletions satpy/readers/abi_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,21 +18,22 @@
"""Advance Baseline Imager reader base class for the Level 1b and l2+ reader."""

import logging
import math
from contextlib import suppress
from datetime import datetime

import dask
import numpy as np
import xarray as xr
from pyresample import geometry

from satpy._compat import cached_property
from satpy.readers import open_file_or_filename
from satpy.readers.file_handlers import BaseFileHandler
from satpy.utils import get_legacy_chunk_size
from satpy.utils import get_dask_chunk_size_in_bytes

logger = logging.getLogger(__name__)

CHUNK_SIZE = get_legacy_chunk_size()
PLATFORM_NAMES = {
"g16": "GOES-16",
"g17": "GOES-17",
Expand Down Expand Up @@ -62,20 +63,42 @@
@cached_property
def nc(self):
"""Get the xarray dataset for this file."""
f_obj = open_file_or_filename(self.filename)
try:
chunk_bytes = self._chunk_bytes_for_resolution()
with dask.config.set({"array.chunk-size": chunk_bytes}):
f_obj = open_file_or_filename(self.filename)
nc = xr.open_dataset(f_obj,
decode_cf=True,
mask_and_scale=False,
chunks={"x": CHUNK_SIZE, "y": CHUNK_SIZE}, )
except ValueError:
nc = xr.open_dataset(f_obj,
decode_cf=True,
mask_and_scale=False,
chunks={"lon": CHUNK_SIZE, "lat": CHUNK_SIZE}, )
chunks="auto")
nc = self._rename_dims(nc)
return nc

def _chunk_bytes_for_resolution(self) -> int:
"""Get a best-guess optimal chunk size for resolution-based chunking.

First a chunk size is chosen for the provided Dask setting `array.chunk-size`
and then aligned with a hardcoded on-disk chunk size of 226. This is then
adjusted to match the current resolution.

This should result in 500 meter data having 4 times as many pixels per
dask array chunk (2 in each dimension) as 1km data and 8 times as many
as 2km data. As data is combined or upsampled geographically the arrays
should not need to be rechunked. Care is taken to make sure that array
chunks are aligned with on-disk file chunks at all resolutions, but at
the cost of flexibility due to a hardcoded on-disk chunk size of 226
elements per dimension.

"""
num_high_res_elems_per_dim = math.sqrt(get_dask_chunk_size_in_bytes() / 4) # 32-bit floats
# assume on-disk chunk size of 226
# this is true for all CSPP Geo GRB output (226 for all sectors) and full disk from other sources
# 250 has been seen for AWS/CLASS CONUS, Mesoscale 1, and Mesoscale 2 files
# we align this with 4 on-disk chunks at 500m, so it will be 2 on-disk chunks for 1km, and 1 for 2km
high_res_elems_disk_aligned = np.round(max(num_high_res_elems_per_dim / (4 * 226), 1)) * (4 * 226)
low_res_factor = int(self.filetype_info.get("resolution", 2000) // 500)
res_elems_per_dim = int(high_res_elems_disk_aligned / low_res_factor)
return (res_elems_per_dim ** 2) * 4

@staticmethod
def _rename_dims(nc):
if "t" in nc.dims or "t" in nc.coords:
Expand Down Expand Up @@ -136,19 +159,14 @@
if is_int(data) and is_int(factor) and is_int(offset):
new_fill = fill
else:
new_fill = np.nan
new_fill = np.float32(np.nan)
data = data.where(data != fill, new_fill)
if factor != 1 and item in ("x", "y"):
# be more precise with x/y coordinates
# see get_area_def for more information
data = data * np.round(float(factor), 6) + np.round(float(offset), 6)
elif factor != 1:
# make sure the factor is a 64-bit float
# can't do this in place since data is most likely uint16
# and we are making it a 64-bit float
if not is_int(factor):
factor = float(factor)
data = data * factor + offset
data = data * np.float32(factor) + np.float32(offset)

Check notice on line 169 in satpy/readers/abi_base.py

View check run for this annotation

CodeScene Delta Analysis / CodeScene Cloud Delta Analysis (main)

✅ Getting better: Complex Method

NC_ABI_BASE._adjust_data decreases in cyclomatic complexity from 14 to 13, threshold = 9. This function has many conditional statements (e.g. if, for, while), leading to lower code health. Avoid adding more conditionals and code to it without refactoring.

Check notice on line 169 in satpy/readers/abi_base.py

View check run for this annotation

CodeScene Delta Analysis / CodeScene Cloud Delta Analysis (main)

✅ Getting better: Bumpy Road Ahead

NC_ABI_BASE._adjust_data decreases from 3 to 2 logical blocks with deeply nested code, threshold is one single block per function. The Bumpy Road code smell is a function that contains multiple chunks of nested conditional logic. The deeper the nesting and the more bumps, the lower the code health.
return data

def _adjust_coords(self, data, item):
Expand Down
13 changes: 5 additions & 8 deletions satpy/readers/abi_l1b.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ def get_dataset(self, key, info):
# For raw cal, don't apply scale and offset, return raw file counts
if key["calibration"] == "counts":
radiances = self.nc["Rad"].copy()
radiances = self._adjust_coords(radiances, "Rad")
else:
radiances = self["Rad"]

Expand All @@ -59,12 +60,8 @@ def get_dataset(self, key, info):
"radiance": self._rad_calibrate,
"counts": self._raw_calibrate,
}

try:
func = cal_dictionary[key["calibration"]]
res = func(radiances)
except KeyError:
raise ValueError("Unknown calibration '{}'".format(key["calibration"]))
func = cal_dictionary[key["calibration"]]
res = func(radiances)

# convert to satpy standard units
if res.attrs["units"] == "1" and key["calibration"] != "counts":
Expand Down Expand Up @@ -136,11 +133,11 @@ def _raw_calibrate(self, data):
def _vis_calibrate(self, data):
"""Calibrate visible channels to reflectance."""
solar_irradiance = self["esun"]
esd = self["earth_sun_distance_anomaly_in_AU"].astype(float)
esd = self["earth_sun_distance_anomaly_in_AU"]

factor = np.pi * esd * esd / solar_irradiance

res = data * factor
res = data * np.float32(factor)
res.attrs = data.attrs
res.attrs["units"] = "1"
res.attrs["long_name"] = "Bidirectional Reflectance"
Expand Down
Loading
Loading