pytroll · mraspaud · Nov 2, 2023 · Oct 23, 2023 · Oct 27, 2023 · Oct 27, 2023
@@ -25,16 +25,19 @@ file_types:
     # "suffix" is an arbitrary suffix that may be added during third-party testing (see PR #1380)
     c01:
         file_reader: !!python/name:satpy.readers.abi_l1b.NC_ABI_L1B
+        resolution: 1000
         file_patterns: ['{system_environment:2s}_{mission_id:3s}-L1b-{observation_type:3s}{scene_abbr:s}-{scan_mode:2s}C01_{platform_shortname:3s}_s{start_time:%Y%j%H%M%S%f}_e{end_time:%Y%j%H%M%S%f}_c{creation_time:%Y%j%H%M%S%f}.nc{nc_version}',
                         '{system_environment:2s}_{mission_id:3s}-L1b-{observation_type:3s}{scene_abbr:s}-{scan_mode:2s}C01_{platform_shortname:3s}_s{start_time:%Y%j%H%M%S%f}_e{end_time:%Y%j%H%M%S%f}_c{creation_time:%Y%j%H%M%S%f}-{chid:6d}_0.nc{nc_version}',
                         '{system_environment:2s}_{mission_id:3s}-L1b-{observation_type:3s}{scene_abbr:s}-{scan_mode:2s}C01_{platform_shortname:3s}_s{start_time:%Y%j%H%M%S%f}_e{end_time:%Y%j%H%M%S%f}_c{creation_time:%Y%j%H%M%S%f}_{suffix}.nc{nc_version}']
     c02:
         file_reader: !!python/name:satpy.readers.abi_l1b.NC_ABI_L1B
+        resolution: 500
         file_patterns: ['{system_environment:2s}_{mission_id:3s}-L1b-{observation_type:3s}{scene_abbr:s}-{scan_mode:2s}C02_{platform_shortname:3s}_s{start_time:%Y%j%H%M%S%f}_e{end_time:%Y%j%H%M%S%f}_c{creation_time:%Y%j%H%M%S%f}.nc{nc_version}',
                         '{system_environment:2s}_{mission_id:3s}-L1b-{observation_type:3s}{scene_abbr:s}-{scan_mode:2s}C02_{platform_shortname:3s}_s{start_time:%Y%j%H%M%S%f}_e{end_time:%Y%j%H%M%S%f}_c{creation_time:%Y%j%H%M%S%f}-{chid:6d}_0.nc{nc_version}',
                         '{system_environment:2s}_{mission_id:3s}-L1b-{observation_type:3s}{scene_abbr:s}-{scan_mode:2s}C02_{platform_shortname:3s}_s{start_time:%Y%j%H%M%S%f}_e{end_time:%Y%j%H%M%S%f}_c{creation_time:%Y%j%H%M%S%f}_{suffix}.nc{nc_version}']
     c03:
         file_reader: !!python/name:satpy.readers.abi_l1b.NC_ABI_L1B
+        resolution: 1000
         file_patterns: ['{system_environment:2s}_{mission_id:3s}-L1b-{observation_type:3s}{scene_abbr:s}-{scan_mode:2s}C03_{platform_shortname:3s}_s{start_time:%Y%j%H%M%S%f}_e{end_time:%Y%j%H%M%S%f}_c{creation_time:%Y%j%H%M%S%f}.nc{nc_version}',
                         '{system_environment:2s}_{mission_id:3s}-L1b-{observation_type:3s}{scene_abbr:s}-{scan_mode:2s}C03_{platform_shortname:3s}_s{start_time:%Y%j%H%M%S%f}_e{end_time:%Y%j%H%M%S%f}_c{creation_time:%Y%j%H%M%S%f}-{chid:6d}_0.nc{nc_version}',
                         '{system_environment:2s}_{mission_id:3s}-L1b-{observation_type:3s}{scene_abbr:s}-{scan_mode:2s}C03_{platform_shortname:3s}_s{start_time:%Y%j%H%M%S%f}_e{end_time:%Y%j%H%M%S%f}_c{creation_time:%Y%j%H%M%S%f}_{suffix}.nc{nc_version}']
@@ -44,6 +47,7 @@ file_types:
                         '{system_environment:2s}_{mission_id:3s}-L1b-{observation_type:3s}{scene_abbr:s}-{scan_mode:2s}C04_{platform_shortname:3s}_s{start_time:%Y%j%H%M%S%f}_e{end_time:%Y%j%H%M%S%f}_c{creation_time:%Y%j%H%M%S%f}_{suffix}.nc{nc_version}']
     c05:
         file_reader: !!python/name:satpy.readers.abi_l1b.NC_ABI_L1B
+        resolution: 1000
         file_patterns: ['{system_environment:2s}_{mission_id:3s}-L1b-{observation_type:3s}{scene_abbr:s}-{scan_mode:2s}C05_{platform_shortname:3s}_s{start_time:%Y%j%H%M%S%f}_e{end_time:%Y%j%H%M%S%f}_c{creation_time:%Y%j%H%M%S%f}.nc{nc_version}',
                         '{system_environment:2s}_{mission_id:3s}-L1b-{observation_type:3s}{scene_abbr:s}-{scan_mode:2s}C05_{platform_shortname:3s}_s{start_time:%Y%j%H%M%S%f}_e{end_time:%Y%j%H%M%S%f}_c{creation_time:%Y%j%H%M%S%f}-{chid:6d}_0.nc{nc_version}',
                         '{system_environment:2s}_{mission_id:3s}-L1b-{observation_type:3s}{scene_abbr:s}-{scan_mode:2s}C05_{platform_shortname:3s}_s{start_time:%Y%j%H%M%S%f}_e{end_time:%Y%j%H%M%S%f}_c{creation_time:%Y%j%H%M%S%f}_{suffix}.nc{nc_version}']

@@ -18,21 +18,22 @@
 """Advance Baseline Imager reader base class for the Level 1b and l2+ reader."""
 
 import logging
+import math
 from contextlib import suppress
 from datetime import datetime
 
+import dask
 import numpy as np
 import xarray as xr
 from pyresample import geometry
 
 from satpy._compat import cached_property
 from satpy.readers import open_file_or_filename
 from satpy.readers.file_handlers import BaseFileHandler
-from satpy.utils import get_legacy_chunk_size
+from satpy.utils import get_dask_chunk_size_in_bytes
 
 logger = logging.getLogger(__name__)
 
-CHUNK_SIZE = get_legacy_chunk_size()
 PLATFORM_NAMES = {
     "g16": "GOES-16",
     "g17": "GOES-17",
@@ -62,20 +63,42 @@
     @cached_property
     def nc(self):
         """Get the xarray dataset for this file."""
-        f_obj = open_file_or_filename(self.filename)
-        try:
+        chunk_bytes = self._chunk_bytes_for_resolution()
+        with dask.config.set({"array.chunk-size": chunk_bytes}):
+            f_obj = open_file_or_filename(self.filename)
             nc = xr.open_dataset(f_obj,
                                  decode_cf=True,
                                  mask_and_scale=False,
-                                 chunks={"x": CHUNK_SIZE, "y": CHUNK_SIZE}, )
-        except ValueError:
-            nc = xr.open_dataset(f_obj,
-                                 decode_cf=True,
-                                 mask_and_scale=False,
-                                 chunks={"lon": CHUNK_SIZE, "lat": CHUNK_SIZE}, )
+                                 chunks="auto")
         nc = self._rename_dims(nc)
         return nc
 
+    def _chunk_bytes_for_resolution(self) -> int:
+        """Get a best-guess optimal chunk size for resolution-based chunking.
+
+        First a chunk size is chosen for the provided Dask setting `array.chunk-size`
+        and then aligned with a hardcoded on-disk chunk size of 226. This is then
+        adjusted to match the current resolution.
+
+        This should result in 500 meter data having 4 times as many pixels per
+        dask array chunk (2 in each dimension) as 1km data and 8 times as many
+        as 2km data. As data is combined or upsampled geographically the arrays
+        should not need to be rechunked. Care is taken to make sure that array
+        chunks are aligned with on-disk file chunks at all resolutions, but at
+        the cost of flexibility due to a hardcoded on-disk chunk size of 226
+        elements per dimension.
+
+        """
+        num_high_res_elems_per_dim = math.sqrt(get_dask_chunk_size_in_bytes() / 4)  # 32-bit floats
+        # assume on-disk chunk size of 226
+        # this is true for all CSPP Geo GRB output (226 for all sectors) and full disk from other sources
+        # 250 has been seen for AWS/CLASS CONUS, Mesoscale 1, and Mesoscale 2 files
+        # we align this with 4 on-disk chunks at 500m, so it will be 2 on-disk chunks for 1km, and 1 for 2km
+        high_res_elems_disk_aligned = np.round(max(num_high_res_elems_per_dim / (4 * 226), 1)) * (4 * 226)
+        low_res_factor = int(self.filetype_info.get("resolution", 2000) // 500)
+        res_elems_per_dim = int(high_res_elems_disk_aligned / low_res_factor)
+        return (res_elems_per_dim ** 2) * 4
+
     @staticmethod
     def _rename_dims(nc):
         if "t" in nc.dims or "t" in nc.coords:
@@ -136,19 +159,14 @@
             if is_int(data) and is_int(factor) and is_int(offset):
                 new_fill = fill
             else:
-                new_fill = np.nan
+                new_fill = np.float32(np.nan)
             data = data.where(data != fill, new_fill)
         if factor != 1 and item in ("x", "y"):
             # be more precise with x/y coordinates
             # see get_area_def for more information
             data = data * np.round(float(factor), 6) + np.round(float(offset), 6)
         elif factor != 1:
-            # make sure the factor is a 64-bit float
-            # can't do this in place since data is most likely uint16
-            # and we are making it a 64-bit float
-            if not is_int(factor):
-                factor = float(factor)
-            data = data * factor + offset
+            data = data * np.float32(factor) + np.float32(offset)
         return data
 
     def _adjust_coords(self, data, item):

@@ -49,6 +49,7 @@ def get_dataset(self, key, info):
         # For raw cal, don't apply scale and offset, return raw file counts
         if key["calibration"] == "counts":
             radiances = self.nc["Rad"].copy()
+            radiances = self._adjust_coords(radiances, "Rad")
         else:
             radiances = self["Rad"]
 
@@ -59,12 +60,8 @@ def get_dataset(self, key, info):
             "radiance": self._rad_calibrate,
             "counts": self._raw_calibrate,
         }
-
-        try:
-            func = cal_dictionary[key["calibration"]]
-            res = func(radiances)
-        except KeyError:
-            raise ValueError("Unknown calibration '{}'".format(key["calibration"]))
+        func = cal_dictionary[key["calibration"]]
+        res = func(radiances)
 
         # convert to satpy standard units
         if res.attrs["units"] == "1" and key["calibration"] != "counts":
@@ -136,11 +133,11 @@ def _raw_calibrate(self, data):
     def _vis_calibrate(self, data):
         """Calibrate visible channels to reflectance."""
         solar_irradiance = self["esun"]
-        esd = self["earth_sun_distance_anomaly_in_AU"].astype(float)
+        esd = self["earth_sun_distance_anomaly_in_AU"]
 
         factor = np.pi * esd * esd / solar_irradiance
 
-        res = data * factor
+        res = data * np.float32(factor)
         res.attrs = data.attrs
         res.attrs["units"] = "1"
         res.attrs["long_name"] = "Bidirectional Reflectance"