Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix AGRI L1 C07 having a valid LUT value for its fill value #2726

Merged
merged 4 commits into from
Jan 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 14 additions & 5 deletions satpy/readers/fy4_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@

import dask.array as da
import numpy as np
import numpy.typing as npt
import xarray as xr

from satpy._compat import cached_property
Expand Down Expand Up @@ -86,7 +87,7 @@ def scale(dn, slope, offset):

return ref

def apply_lut(self, data, lut):
def _apply_lut(self, data: xr.DataArray, lut: npt.NDArray[np.float32]) -> xr.DataArray:
"""Calibrate digital number (DN) by applying a LUT.

Args:
Expand All @@ -96,8 +97,16 @@ def apply_lut(self, data, lut):
Calibrated quantity
"""
# append nan to the end of lut for fillvalue
fill_value = data.attrs.get("FillValue")
if fill_value is not None and fill_value.item() <= lut.shape[0] - 1:
# If LUT includes the fill_value, remove that entry and everything
# after it.
# Ex. C07 has a LUT of 65536 elements, but fill value is 65535
# This is considered a bug in the input file format
lut = lut[:fill_value.item()]

lut = np.append(lut, np.nan)
data.data = da.where(data.data > lut.shape[0], lut.shape[0] - 1, data.data)
data.data = da.where(data.data >= lut.shape[0], lut.shape[0] - 1, data.data)
res = data.data.map_blocks(self._getitem, lut, dtype=lut.dtype)
res = xr.DataArray(res, dims=data.dims,
attrs=data.attrs, coords=data.coords)
Expand Down Expand Up @@ -138,8 +147,8 @@ def calibrate(self, data, ds_info, ds_name, file_key):
raise NotImplementedError("Calibration to radiance is not supported.")
# Apply range limits, but not for counts or we convert to float!
if calibration != "counts":
data = data.where((data >= min(data.attrs["valid_range"])) &
(data <= max(data.attrs["valid_range"])))
data = data.where((data >= min(ds_info["valid_range"])) &
(data <= max(ds_info["valid_range"])))
else:
data.attrs["_FillValue"] = data.attrs["FillValue"].item()
return data
Expand Down Expand Up @@ -182,7 +191,7 @@ def calibrate_to_bt(self, data, ds_info, ds_name):
lut = self[lut_key]

# the value of dn is the index of brightness_temperature
data = self.apply_lut(data, lut)
data = self._apply_lut(data, lut.compute().data)
ds_info["valid_range"] = lut.attrs["valid_range"]
return data

Expand Down
195 changes: 88 additions & 107 deletions satpy/tests/reader_tests/test_agri_l1.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,71 +56,65 @@
class FakeHDF5FileHandler2(FakeHDF5FileHandler):
"""Swap-in HDF5 File Handler."""

def make_test_data(self, cwl, ch, prefix, dims, file_type):
def _make_cal_data(self, cwl, ch, dims):
"""Make test data."""
if prefix == "CAL":
data = xr.DataArray(
da.from_array((np.arange(10.) + 1.) / 10., [dims[0] * dims[1]]),
attrs={
"Slope": np.array(1.), "Intercept": np.array(0.),
"FillValue": np.array(-65535.0),
"units": "NUL",
"center_wavelength": "{}um".format(cwl).encode("utf-8"),
"band_names": "band{}(band number is range from 1 to 14)"
.format(ch).encode("utf-8"),
"long_name": "Calibration table of {}um Channel".format(cwl).encode("utf-8"),
"valid_range": np.array([0, 1.5]),
},
dims="_const")

elif prefix == "NOM":
data = xr.DataArray(
da.from_array(np.arange(10, dtype=np.uint16).reshape((2, 5)) + 1,
[dim for dim in dims]),
attrs={
"Slope": np.array(1.), "Intercept": np.array(0.),
"FillValue": np.array(65535),
"units": "DN",
"center_wavelength": "{}um".format(cwl).encode("utf-8"),
"band_names": "band{}(band number is range from 1 to 14)"
.format(ch).encode("utf-8"),
"long_name": "Calibration table of {}um Channel".format(cwl).encode("utf-8"),
"valid_range": np.array([0, 4095]),
},
dims=("_RegLength", "_RegWidth"))

elif prefix == "GEO":
data = xr.DataArray(
da.from_array(np.arange(0., 360., 36., dtype=np.float32).reshape((2, 5)),
[dim for dim in dims]),
attrs={
"Slope": np.array(1.), "Intercept": np.array(0.),
"FillValue": np.array(65535.),
"units": "NUL",
"band_names": "NUL",
"valid_range": np.array([0., 360.]),
},
dims=("_RegLength", "_RegWidth"))

elif prefix == "COEF":
if file_type == "500":
data = self._create_coeff_array(1)

elif file_type == "1000":
data = self._create_coeff_array(3)

elif file_type == "2000":
data = self._create_coeff_array(7)

elif file_type == "4000":
data = self._create_coeff_array(14)
return xr.DataArray(
da.from_array((np.arange(10.) + 1.) / 10., [dims[0] * dims[1]]),
attrs={
"Slope": np.array(1.), "Intercept": np.array(0.),
"FillValue": np.array(-65535.0),
"units": "NUL",
"center_wavelength": "{}um".format(cwl).encode("utf-8"),
"band_names": "band{}(band number is range from 1 to 14)"
.format(ch).encode("utf-8"),
"long_name": "Calibration table of {}um Channel".format(cwl).encode("utf-8"),
"valid_range": np.array([0, 1.5]),
},
dims="_const")

def _make_nom_data(self, cwl, ch, dims):
# Add +1 to check that values beyond the LUT are clipped
data_np = np.arange(10, dtype=np.uint16).reshape((2, 5)) + 1
fill_value = 65535
valid_max = 4095
if ch == 7:
# mimic C07 bug where the fill value is in the LUT
fill_value = 9 # at index [1, 3] (second to last element)
valid_max = 8
return xr.DataArray(
da.from_array(data_np, chunks=[dim for dim in dims]),
attrs={
"Slope": np.array(1.), "Intercept": np.array(0.),
"FillValue": np.array(fill_value),
"units": "DN",
"center_wavelength": "{}um".format(cwl).encode("utf-8"),
"band_names": "band{}(band number is range from 1 to 14)"
.format(ch).encode("utf-8"),
"long_name": "Calibration table of {}um Channel".format(cwl).encode("utf-8"),
"valid_range": np.array([0, valid_max]),
},
dims=("_RegLength", "_RegWidth"))

return data
def _make_geo_data(self, dims):
return xr.DataArray(
da.from_array(np.arange(0., 360., 36., dtype=np.float32).reshape((2, 5)),
[dim for dim in dims]),
attrs={
"Slope": np.array(1.), "Intercept": np.array(0.),
"FillValue": np.array(65535.),
"units": "NUL",
"band_names": "NUL",
"valid_range": np.array([0., 360.]),
},
dims=("_RegLength", "_RegWidth"))

Check notice on line 109 in satpy/tests/reader_tests/test_agri_l1.py

View check run for this annotation

CodeScene Delta Analysis / CodeScene Cloud Delta Analysis (main)

✅ No longer an issue: Complex Method

FakeHDF5FileHandler2.make_test_data is no longer above the threshold for cyclomatic complexity

Check notice on line 109 in satpy/tests/reader_tests/test_agri_l1.py

View check run for this annotation

CodeScene Delta Analysis / CodeScene Cloud Delta Analysis (main)

✅ No longer an issue: Excess Number of Function Arguments

FakeHDF5FileHandler2.make_test_data is no longer above the threshold for number of arguments

def _create_coeff_array(self, nb_channels):
def _create_coeffs_array(self, channel_numbers: list[int]) -> xr.DataArray:
# make coefficients consistent between file types
all_possible_coeffs = (np.arange(14 * 2).reshape((14, 2)) + 1.0) / np.array([1E4, 1E2])
# get the coefficients for the specific channels this resolution has
these_coeffs = all_possible_coeffs[[chan_num - 1 for chan_num in channel_numbers]]
data = xr.DataArray(
da.from_array((np.arange(nb_channels * 2).reshape((nb_channels, 2)) + 1.) /
np.array([1E4, 1E2]), [nb_channels, 2]),
da.from_array(these_coeffs, chunks=[len(channel_numbers), 2]),
attrs={
"Slope": 1., "Intercept": 0.,
"FillValue": 0,
Expand All @@ -132,60 +126,46 @@
dims=("_num_channel", "_coefs"))
return data

def _create_channel_data(self, chs, cwls, file_type):
def _create_channel_data(self, chs, cwls):
dim_0 = 2
dim_1 = 5
data = {}
for index, _cwl in enumerate(cwls):
data["CALChannel" + "%02d" % chs[index]] = self.make_test_data(cwls[index], chs[index], "CAL",
[dim_0, dim_1], file_type)
data["Calibration/CALChannel" + "%02d" % chs[index]] = self.make_test_data(cwls[index], chs[index], "CAL",
[dim_0, dim_1], file_type)
data["NOMChannel" + "%02d" % chs[index]] = self.make_test_data(cwls[index], chs[index], "NOM",
[dim_0, dim_1], file_type)
data["Data/NOMChannel" + "%02d" % chs[index]] = self.make_test_data(cwls[index], chs[index], "NOM",
[dim_0, dim_1], file_type)
data["CALIBRATION_COEF(SCALE+OFFSET)"] = self.make_test_data(cwls[index], chs[index], "COEF",
[dim_0, dim_1], file_type)
data["Calibration/CALIBRATION_COEF(SCALE+OFFSET)"] = self.make_test_data(cwls[index], chs[index], "COEF",
[dim_0, dim_1], file_type)
for chan_num, chan_wl in zip(chs, cwls):
cal_data = self._make_cal_data(chan_wl, chan_num, [dim_0, dim_1])
data[f"CALChannel{chan_num:02d}"] = cal_data
data[f"Calibration/CALChannel{chan_num:02d}"] = cal_data
nom_data = self._make_nom_data(chan_wl, chan_num, [dim_0, dim_1])
data[f"NOMChannel{chan_num:02d}"] = nom_data
data[f"Data/NOMChannel{chan_num:02d}"] = nom_data
data["CALIBRATION_COEF(SCALE+OFFSET)"] = self._create_coeffs_array(chs)
data["Calibration/CALIBRATION_COEF(SCALE+OFFSET)"] = self._create_coeffs_array(chs)
return data

def _get_500m_data(self, file_type):
def _get_500m_data(self):
chs = [2]
cwls = [0.65]
data = self._create_channel_data(chs, cwls, file_type)
return self._create_channel_data(chs, cwls)

return data

def _get_1km_data(self, file_type):
chs = np.linspace(1, 3, 3)
def _get_1km_data(self):
chs = [1, 2, 3]
cwls = [0.47, 0.65, 0.83]
data = self._create_channel_data(chs, cwls, file_type)
return self._create_channel_data(chs, cwls)

return data

def _get_2km_data(self, file_type):
chs = np.linspace(1, 7, 7)
def _get_2km_data(self):
chs = [1, 2, 3, 4, 5, 6, 7]
cwls = [0.47, 0.65, 0.83, 1.37, 1.61, 2.22, 3.72]
data = self._create_channel_data(chs, cwls, file_type)
return self._create_channel_data(chs, cwls)

return data

def _get_4km_data(self, file_type):
chs = np.linspace(1, 14, 14)
def _get_4km_data(self):
chs = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]
cwls = [0.47, 0.65, 0.83, 1.37, 1.61, 2.22, 3.72, 3.72, 6.25, 7.10, 8.50, 10.8, 12, 13.5]
data = self._create_channel_data(chs, cwls, file_type)

return data
return self._create_channel_data(chs, cwls)

def _get_geo_data(self, file_type):
def _get_geo_data(self):
dim_0 = 2
dim_1 = 5
data = {"NOMSunAzimuth": self.make_test_data("NUL", "NUL", "GEO",
[dim_0, dim_1], file_type),
"Navigation/NOMSunAzimuth": self.make_test_data("NUL", "NUL", "GEO",
[dim_0, dim_1], file_type)}
data = {"NOMSunAzimuth": self._make_geo_data([dim_0, dim_1]),
"Navigation/NOMSunAzimuth": self._make_geo_data([dim_0, dim_1])}
return data

def get_test_content(self, filename, filename_info, filetype_info):
Expand All @@ -210,17 +190,17 @@

data = {}
if self.filetype_info["file_type"] == "agri_l1_0500m":
data = self._get_500m_data("500")
data = self._get_500m_data()
elif self.filetype_info["file_type"] == "agri_l1_1000m":
data = self._get_1km_data("1000")
data = self._get_1km_data()
elif self.filetype_info["file_type"] == "agri_l1_2000m":
data = self._get_2km_data("2000")
data = self._get_2km_data()
global_attrs["/attr/Observing Beginning Time"] = "00:30:01"
global_attrs["/attr/Observing Ending Time"] = "00:34:07"
elif self.filetype_info["file_type"] == "agri_l1_4000m":
data = self._get_4km_data("4000")
data = self._get_4km_data()
elif self.filetype_info["file_type"] == "agri_l1_4000m_geo":
data = self._get_geo_data("4000")
data = self._get_geo_data()

test_content = {}
test_content.update(global_attrs)
Expand Down Expand Up @@ -263,7 +243,7 @@
4: np.array([[8.07, 8.14, 8.21, 8.28, 8.35], [8.42, 8.49, 8.56, 8.63, 8.7]]),
5: np.array([[10.09, 10.18, 10.27, 10.36, 10.45], [10.54, 10.63, 10.72, 10.81, 10.9]]),
6: np.array([[12.11, 12.22, 12.33, 12.44, 12.55], [12.66, 12.77, 12.88, 12.99, 13.1]]),
7: np.array([[0.2, 0.3, 0.4, 0.5, 0.6], [0.7, 0.8, 0.9, 1., np.nan]]),
7: np.array([[0.2, 0.3, 0.4, 0.5, 0.6], [0.7, 0.8, 0.9, np.nan, np.nan]]),
8: np.array([[0.2, 0.3, 0.4, 0.5, 0.6], [0.7, 0.8, 0.9, 1., np.nan]]),
9: np.array([[0.2, 0.3, 0.4, 0.5, 0.6], [0.7, 0.8, 0.9, 1., np.nan]]),
10: np.array([[0.2, 0.3, 0.4, 0.5, 0.6], [0.7, 0.8, 0.9, 1., np.nan]]),
Expand Down Expand Up @@ -398,10 +378,11 @@
AREA_EXTENTS_BY_RESOLUTION[satname][resolution_to_test])

def _check_calibration_and_units(self, band_names, result):
for index, band_name in enumerate(band_names):
for band_name in band_names:
band_number = int(band_name[-2:])
assert result[band_name].attrs["sensor"].islower()
assert result[band_name].shape == (2, 5)
np.testing.assert_allclose(result[band_name].values, self.expected[index + 1], equal_nan=True)
np.testing.assert_allclose(result[band_name].values, self.expected[band_number], equal_nan=True)
self._check_units(band_name, result)

@staticmethod
Expand Down
Loading