From 5ac92a774ce9f1cf84ae9e35d81e823c82ff906f Mon Sep 17 00:00:00 2001 From: ghiggi Date: Tue, 27 Jun 2023 22:17:22 +0200 Subject: [PATCH 01/37] Refactor area-related functions --- satpy/tests/writer_tests/cf_tests/__init__.py | 18 + .../tests/writer_tests/cf_tests/test_area.py | 401 ++++++++++++++++++ satpy/tests/writer_tests/test_cf.py | 379 +---------------- satpy/writers/cf/area.py | 192 +++++++++ satpy/writers/cf_writer.py | 178 +------- 5 files changed, 621 insertions(+), 547 deletions(-) create mode 100644 satpy/tests/writer_tests/cf_tests/__init__.py create mode 100644 satpy/tests/writer_tests/cf_tests/test_area.py create mode 100644 satpy/writers/cf/area.py diff --git a/satpy/tests/writer_tests/cf_tests/__init__.py b/satpy/tests/writer_tests/cf_tests/__init__.py new file mode 100644 index 0000000000..e654e26dcc --- /dev/null +++ b/satpy/tests/writer_tests/cf_tests/__init__.py @@ -0,0 +1,18 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright (c) 2017-2023 Satpy developers +# +# This file is part of satpy. +# +# satpy is free software: you can redistribute it and/or modify it under the +# terms of the GNU General Public License as published by the Free Software +# Foundation, either version 3 of the License, or (at your option) any later +# version. +# +# satpy is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along with +# satpy. If not, see . +"""The CF dataset tests package.""" diff --git a/satpy/tests/writer_tests/cf_tests/test_area.py b/satpy/tests/writer_tests/cf_tests/test_area.py new file mode 100644 index 0000000000..e293ff39a6 --- /dev/null +++ b/satpy/tests/writer_tests/cf_tests/test_area.py @@ -0,0 +1,401 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright (c) 2017-2023 Satpy developers +# +# This file is part of satpy. +# +# satpy is free software: you can redistribute it and/or modify it under the +# terms of the GNU General Public License as published by the Free Software +# Foundation, either version 3 of the License, or (at your option) any later +# version. +# +# satpy is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along with +# satpy. If not, see . +"""Tests for the CF Area.""" +import dask.array as da +import numpy as np +import pytest +import xarray as xr +from pyresample import AreaDefinition, SwathDefinition + + +class TestCFArea: + """Test case for CF Area.""" + + def test_assert_xy_unique(self): + """Test that the x and y coordinates are unique.""" + from satpy.writers.cf.area import assert_xy_unique + + dummy = [[1, 2], [3, 4]] + datas = {'a': xr.DataArray(data=dummy, dims=('y', 'x'), coords={'y': [1, 2], 'x': [3, 4]}), + 'b': xr.DataArray(data=dummy, dims=('y', 'x'), coords={'y': [1, 2], 'x': [3, 4]}), + 'n': xr.DataArray(data=dummy, dims=('v', 'w'), coords={'v': [1, 2], 'w': [3, 4]})} + assert_xy_unique(datas) + + datas['c'] = xr.DataArray(data=dummy, dims=('y', 'x'), coords={'y': [1, 3], 'x': [3, 4]}) + with pytest.raises(ValueError): + assert_xy_unique(datas) + + def test_link_coords(self): + """Check that coordinates link has been established correctly.""" + from satpy.writers.cf.area import link_coords + + data = [[1, 2], [3, 4]] + lon = np.zeros((2, 2)) + lon2 = np.zeros((1, 2, 2)) + lat = np.ones((2, 2)) + datasets = { + 'var1': xr.DataArray(data=data, dims=('y', 'x'), attrs={'coordinates': 'lon lat'}), + 'var2': xr.DataArray(data=data, dims=('y', 'x')), + 'var3': xr.DataArray(data=data, dims=('y', 'x'), attrs={'coordinates': 'lon2 lat'}), + 'var4': xr.DataArray(data=data, dims=('y', 'x'), attrs={'coordinates': 'not_exist lon lat'}), + 'lon': xr.DataArray(data=lon, dims=('y', 'x')), + 'lon2': xr.DataArray(data=lon2, dims=('time', 'y', 'x')), + 'lat': xr.DataArray(data=lat, dims=('y', 'x')) + } + + link_coords(datasets) + + # Check that link has been established correctly and 'coordinate' atrribute has been dropped + assert 'lon' in datasets['var1'].coords + assert 'lat' in datasets['var1'].coords + np.testing.assert_array_equal(datasets['var1']['lon'].data, lon) + np.testing.assert_array_equal(datasets['var1']['lat'].data, lat) + assert 'coordinates' not in datasets['var1'].attrs + + # There should be no link if there was no 'coordinate' attribute + assert 'lon' not in datasets['var2'].coords + assert 'lat' not in datasets['var2'].coords + + # The non-existent dimension or coordinate should be dropped + assert 'time' not in datasets['var3'].coords + assert 'not_exist' not in datasets['var4'].coords + + def test_make_alt_coords_unique(self): + """Test that created coordinate variables are unique.""" + from satpy.writers.cf.area import make_alt_coords_unique + + data = [[1, 2], [3, 4]] + y = [1, 2] + x = [1, 2] + time1 = [1, 2] + time2 = [3, 4] + datasets = {'var1': xr.DataArray(data=data, + dims=('y', 'x'), + coords={'y': y, 'x': x, 'acq_time': ('y', time1)}), + 'var2': xr.DataArray(data=data, + dims=('y', 'x'), + coords={'y': y, 'x': x, 'acq_time': ('y', time2)})} + + # Test that dataset names are prepended to alternative coordinates + res = make_alt_coords_unique(datasets) + np.testing.assert_array_equal(res['var1']['var1_acq_time'], time1) + np.testing.assert_array_equal(res['var2']['var2_acq_time'], time2) + assert 'acq_time' not in res['var1'].coords + assert 'acq_time' not in res['var2'].coords + + # Make sure nothing else is modified + np.testing.assert_array_equal(res['var1']['x'], x) + np.testing.assert_array_equal(res['var1']['y'], y) + np.testing.assert_array_equal(res['var2']['x'], x) + np.testing.assert_array_equal(res['var2']['y'], y) + + # Coords not unique -> Dataset names must be prepended, even if pretty=True + with pytest.warns(UserWarning, match='Cannot pretty-format "acq_time"'): + res = make_alt_coords_unique(datasets, pretty=True) + np.testing.assert_array_equal(res['var1']['var1_acq_time'], time1) + np.testing.assert_array_equal(res['var2']['var2_acq_time'], time2) + assert 'acq_time' not in res['var1'].coords + assert 'acq_time' not in res['var2'].coords + + # Coords unique and pretty=True -> Don't modify coordinate names + datasets['var2']['acq_time'] = ('y', time1) + res = make_alt_coords_unique(datasets, pretty=True) + np.testing.assert_array_equal(res['var1']['acq_time'], time1) + np.testing.assert_array_equal(res['var2']['acq_time'], time1) + assert 'var1_acq_time' not in res['var1'].coords + assert 'var2_acq_time' not in res['var2'].coords + + def test_area2cf(self): + """Test the conversion of an area to CF standards.""" + from satpy.writers.cf.area import area2cf + + ds_base = xr.DataArray(data=[[1, 2], [3, 4]], dims=('y', 'x'), coords={'y': [1, 2], 'x': [3, 4]}, + attrs={'name': 'var1'}) + + # a) Area Definition and strict=False + geos = AreaDefinition( + area_id='geos', + description='geos', + proj_id='geos', + projection={'proj': 'geos', 'h': 35785831., 'a': 6378169., 'b': 6356583.8}, + width=2, height=2, + area_extent=[-1, -1, 1, 1]) + ds = ds_base.copy(deep=True) + ds.attrs['area'] = geos + + res = area2cf(ds, include_lonlats=False) + assert len(res) == 2 + assert res[0].size == 1 # grid mapping variable + assert res[0].name == res[1].attrs['grid_mapping'] + + # b) Area Definition and include_lonlats=False + ds = ds_base.copy(deep=True) + ds.attrs['area'] = geos + res = area2cf(ds, include_lonlats=True) + # same as above + assert len(res) == 2 + assert res[0].size == 1 # grid mapping variable + assert res[0].name == res[1].attrs['grid_mapping'] + # but now also have the lon/lats + assert 'longitude' in res[1].coords + assert 'latitude' in res[1].coords + + # c) Swath Definition + swath = SwathDefinition(lons=[[1, 1], [2, 2]], lats=[[1, 2], [1, 2]]) + ds = ds_base.copy(deep=True) + ds.attrs['area'] = swath + + res = area2cf(ds, include_lonlats=False) + assert len(res) == 1 + assert 'longitude' in res[0].coords + assert 'latitude' in res[0].coords + assert 'grid_mapping' not in res[0].attrs + + def test__add_grid_mapping(self): + """Test the conversion from pyresample area object to CF grid mapping.""" + from satpy.writers.cf.area import _add_grid_mapping + + def _gm_matches(gmapping, expected): + """Assert that all keys in ``expected`` match the values in ``gmapping``.""" + for attr_key, attr_val in expected.attrs.items(): + test_val = gmapping.attrs[attr_key] + if attr_val is None or isinstance(attr_val, str): + assert test_val == attr_val + else: + np.testing.assert_almost_equal(test_val, attr_val, decimal=3) + + ds_base = xr.DataArray(data=[[1, 2], [3, 4]], dims=('y', 'x'), coords={'y': [1, 2], 'x': [3, 4]}, + attrs={'name': 'var1'}) + + # a) Projection has a corresponding CF representation (e.g. geos) + a = 6378169. + b = 6356583.8 + h = 35785831. + geos = AreaDefinition( + area_id='geos', + description='geos', + proj_id='geos', + projection={'proj': 'geos', 'h': h, 'a': a, 'b': b, + 'lat_0': 0, 'lon_0': 0}, + width=2, height=2, + area_extent=[-1, -1, 1, 1]) + geos_expected = xr.DataArray(data=0, + attrs={'perspective_point_height': h, + 'latitude_of_projection_origin': 0, + 'longitude_of_projection_origin': 0, + 'grid_mapping_name': 'geostationary', + 'semi_major_axis': a, + 'semi_minor_axis': b, + # 'sweep_angle_axis': None, + }) + + ds = ds_base.copy() + ds.attrs['area'] = geos + new_ds, grid_mapping = _add_grid_mapping(ds) + if 'sweep_angle_axis' in grid_mapping.attrs: + # older versions of pyproj might not include this + assert grid_mapping.attrs['sweep_angle_axis'] == 'y' + + assert new_ds.attrs['grid_mapping'] == 'geos' + _gm_matches(grid_mapping, geos_expected) + # should not have been modified + assert 'grid_mapping' not in ds.attrs + + # b) Projection does not have a corresponding CF representation (COSMO) + cosmo7 = AreaDefinition( + area_id='cosmo7', + description='cosmo7', + proj_id='cosmo7', + projection={'proj': 'ob_tran', 'ellps': 'WGS84', 'lat_0': 46, 'lon_0': 4.535, + 'o_proj': 'stere', 'o_lat_p': 90, 'o_lon_p': -5.465}, + width=597, height=510, + area_extent=[-1812933, -1003565, 814056, 1243448] + ) + + ds = ds_base.copy() + ds.attrs['area'] = cosmo7 + + new_ds, grid_mapping = _add_grid_mapping(ds) + assert 'crs_wkt' in grid_mapping.attrs + wkt = grid_mapping.attrs['crs_wkt'] + assert 'ELLIPSOID["WGS 84"' in wkt + assert 'PARAMETER["lat_0",46' in wkt + assert 'PARAMETER["lon_0",4.535' in wkt + assert 'PARAMETER["o_lat_p",90' in wkt + assert 'PARAMETER["o_lon_p",-5.465' in wkt + assert new_ds.attrs['grid_mapping'] == 'cosmo7' + + # c) Projection Transverse Mercator + lat_0 = 36.5 + lon_0 = 15.0 + + tmerc = AreaDefinition( + area_id='tmerc', + description='tmerc', + proj_id='tmerc', + projection={'proj': 'tmerc', 'ellps': 'WGS84', 'lat_0': 36.5, 'lon_0': 15.0}, + width=2, height=2, + area_extent=[-1, -1, 1, 1]) + + tmerc_expected = xr.DataArray(data=0, + attrs={'latitude_of_projection_origin': lat_0, + 'longitude_of_central_meridian': lon_0, + 'grid_mapping_name': 'transverse_mercator', + 'reference_ellipsoid_name': 'WGS 84', + 'false_easting': 0., + 'false_northing': 0., + }) + + ds = ds_base.copy() + ds.attrs['area'] = tmerc + new_ds, grid_mapping = _add_grid_mapping(ds) + assert new_ds.attrs['grid_mapping'] == 'tmerc' + _gm_matches(grid_mapping, tmerc_expected) + + # d) Projection that has a representation but no explicit a/b + h = 35785831. + geos = AreaDefinition( + area_id='geos', + description='geos', + proj_id='geos', + projection={'proj': 'geos', 'h': h, 'datum': 'WGS84', 'ellps': 'GRS80', + 'lat_0': 0, 'lon_0': 0}, + width=2, height=2, + area_extent=[-1, -1, 1, 1]) + geos_expected = xr.DataArray(data=0, + attrs={'perspective_point_height': h, + 'latitude_of_projection_origin': 0, + 'longitude_of_projection_origin': 0, + 'grid_mapping_name': 'geostationary', + # 'semi_major_axis': 6378137.0, + # 'semi_minor_axis': 6356752.314, + # 'sweep_angle_axis': None, + }) + + ds = ds_base.copy() + ds.attrs['area'] = geos + new_ds, grid_mapping = _add_grid_mapping(ds) + + assert new_ds.attrs['grid_mapping'] == 'geos' + _gm_matches(grid_mapping, geos_expected) + + # e) oblique Mercator + area = AreaDefinition( + area_id='omerc_otf', + description='On-the-fly omerc area', + proj_id='omerc', + projection={'alpha': '9.02638777018478', 'ellps': 'WGS84', 'gamma': '0', 'k': '1', + 'lat_0': '-0.256794486098476', 'lonc': '13.7888658224205', + 'proj': 'omerc', 'units': 'm'}, + width=2837, + height=5940, + area_extent=[-1460463.0893, 3455291.3877, 1538407.1158, 9615788.8787] + ) + + omerc_dict = {'azimuth_of_central_line': 9.02638777018478, + 'false_easting': 0., + 'false_northing': 0., + # 'gamma': 0, # this is not CF compliant + 'grid_mapping_name': "oblique_mercator", + 'latitude_of_projection_origin': -0.256794486098476, + 'longitude_of_projection_origin': 13.7888658224205, + # 'prime_meridian_name': "Greenwich", + 'reference_ellipsoid_name': "WGS 84"} + omerc_expected = xr.DataArray(data=0, attrs=omerc_dict) + + ds = ds_base.copy() + ds.attrs['area'] = area + new_ds, grid_mapping = _add_grid_mapping(ds) + + assert new_ds.attrs['grid_mapping'] == 'omerc_otf' + _gm_matches(grid_mapping, omerc_expected) + + # f) Projection that has a representation but no explicit a/b + h = 35785831. + geos = AreaDefinition( + area_id='geos', + description='geos', + proj_id='geos', + projection={'proj': 'geos', 'h': h, 'datum': 'WGS84', 'ellps': 'GRS80', + 'lat_0': 0, 'lon_0': 0}, + width=2, height=2, + area_extent=[-1, -1, 1, 1]) + geos_expected = xr.DataArray(data=0, + attrs={'perspective_point_height': h, + 'latitude_of_projection_origin': 0, + 'longitude_of_projection_origin': 0, + 'grid_mapping_name': 'geostationary', + 'reference_ellipsoid_name': 'WGS 84', + }) + + ds = ds_base.copy() + ds.attrs['area'] = geos + new_ds, grid_mapping = _add_grid_mapping(ds) + + assert new_ds.attrs['grid_mapping'] == 'geos' + _gm_matches(grid_mapping, geos_expected) + + def test_add_lonlat_coords(self): + """Test the conversion from areas to lon/lat.""" + from satpy.writers.cf.area import add_lonlat_coords + + area = AreaDefinition( + 'seviri', + 'Native SEVIRI grid', + 'geos', + "+a=6378169.0 +h=35785831.0 +b=6356583.8 +lon_0=0 +proj=geos", + 2, 2, + [-5570248.686685662, -5567248.28340708, 5567248.28340708, 5570248.686685662] + ) + lons_ref, lats_ref = area.get_lonlats() + dataarray = xr.DataArray(data=[[1, 2], [3, 4]], dims=('y', 'x'), attrs={'area': area}) + + res = add_lonlat_coords(dataarray) + + # original should be unmodified + assert 'longitude' not in dataarray.coords + assert set(res.coords) == {'longitude', 'latitude'} + lat = res['latitude'] + lon = res['longitude'] + np.testing.assert_array_equal(lat.data, lats_ref) + np.testing.assert_array_equal(lon.data, lons_ref) + assert {'name': 'latitude', 'standard_name': 'latitude', 'units': 'degrees_north'}.items() <= lat.attrs.items() + assert {'name': 'longitude', 'standard_name': 'longitude', 'units': 'degrees_east'}.items() <= lon.attrs.items() + + area = AreaDefinition( + 'seviri', + 'Native SEVIRI grid', + 'geos', + "+a=6378169.0 +h=35785831.0 +b=6356583.8 +lon_0=0 +proj=geos", + 10, 10, + [-5570248.686685662, -5567248.28340708, 5567248.28340708, 5570248.686685662] + ) + lons_ref, lats_ref = area.get_lonlats() + dataarray = xr.DataArray(data=da.from_array(np.arange(3 * 10 * 10).reshape(3, 10, 10), chunks=(1, 5, 5)), + dims=('bands', 'y', 'x'), attrs={'area': area}) + res = add_lonlat_coords(dataarray) + + # original should be unmodified + assert 'longitude' not in dataarray.coords + assert set(res.coords) == {'longitude', 'latitude'} + lat = res['latitude'] + lon = res['longitude'] + np.testing.assert_array_equal(lat.data, lats_ref) + np.testing.assert_array_equal(lon.data, lons_ref) + assert {'name': 'latitude', 'standard_name': 'latitude', 'units': 'degrees_north'}.items() <= lat.attrs.items() + assert {'name': 'longitude', 'standard_name': 'longitude', 'units': 'degrees_east'}.items() <= lon.attrs.items() diff --git a/satpy/tests/writer_tests/test_cf.py b/satpy/tests/writer_tests/test_cf.py index 2b0a5dfc6c..005509f165 100644 --- a/satpy/tests/writer_tests/test_cf.py +++ b/satpy/tests/writer_tests/test_cf.py @@ -25,7 +25,6 @@ from collections import OrderedDict from datetime import datetime -import dask.array as da import numpy as np import pyresample.geometry import pytest @@ -720,380 +719,6 @@ def test_collect_cf_dataarrays(self): assert 'grid_mapping' not in da_var2.attrs assert da_var2.attrs['long_name'] == 'variable 2' - def test_assert_xy_unique(self): - """Test that the x and y coordinates are unique.""" - from satpy.writers.cf_writer import assert_xy_unique - - dummy = [[1, 2], [3, 4]] - datas = {'a': xr.DataArray(data=dummy, dims=('y', 'x'), coords={'y': [1, 2], 'x': [3, 4]}), - 'b': xr.DataArray(data=dummy, dims=('y', 'x'), coords={'y': [1, 2], 'x': [3, 4]}), - 'n': xr.DataArray(data=dummy, dims=('v', 'w'), coords={'v': [1, 2], 'w': [3, 4]})} - assert_xy_unique(datas) - - datas['c'] = xr.DataArray(data=dummy, dims=('y', 'x'), coords={'y': [1, 3], 'x': [3, 4]}) - with pytest.raises(ValueError): - assert_xy_unique(datas) - - def test_link_coords(self): - """Check that coordinates link has been established correctly.""" - from satpy.writers.cf_writer import link_coords - - data = [[1, 2], [3, 4]] - lon = np.zeros((2, 2)) - lon2 = np.zeros((1, 2, 2)) - lat = np.ones((2, 2)) - datasets = { - 'var1': xr.DataArray(data=data, dims=('y', 'x'), attrs={'coordinates': 'lon lat'}), - 'var2': xr.DataArray(data=data, dims=('y', 'x')), - 'var3': xr.DataArray(data=data, dims=('y', 'x'), attrs={'coordinates': 'lon2 lat'}), - 'var4': xr.DataArray(data=data, dims=('y', 'x'), attrs={'coordinates': 'not_exist lon lat'}), - 'lon': xr.DataArray(data=lon, dims=('y', 'x')), - 'lon2': xr.DataArray(data=lon2, dims=('time', 'y', 'x')), - 'lat': xr.DataArray(data=lat, dims=('y', 'x')) - } - - link_coords(datasets) - - # Check that link has been established correctly and 'coordinate' atrribute has been dropped - assert 'lon' in datasets['var1'].coords - assert 'lat' in datasets['var1'].coords - np.testing.assert_array_equal(datasets['var1']['lon'].data, lon) - np.testing.assert_array_equal(datasets['var1']['lat'].data, lat) - assert 'coordinates' not in datasets['var1'].attrs - - # There should be no link if there was no 'coordinate' attribute - assert 'lon' not in datasets['var2'].coords - assert 'lat' not in datasets['var2'].coords - - # The non-existent dimension or coordinate should be dropped - assert 'time' not in datasets['var3'].coords - assert 'not_exist' not in datasets['var4'].coords - - def test_make_alt_coords_unique(self): - """Test that created coordinate variables are unique.""" - from satpy.writers.cf_writer import make_alt_coords_unique - - data = [[1, 2], [3, 4]] - y = [1, 2] - x = [1, 2] - time1 = [1, 2] - time2 = [3, 4] - datasets = {'var1': xr.DataArray(data=data, - dims=('y', 'x'), - coords={'y': y, 'x': x, 'acq_time': ('y', time1)}), - 'var2': xr.DataArray(data=data, - dims=('y', 'x'), - coords={'y': y, 'x': x, 'acq_time': ('y', time2)})} - - # Test that dataset names are prepended to alternative coordinates - res = make_alt_coords_unique(datasets) - np.testing.assert_array_equal(res['var1']['var1_acq_time'], time1) - np.testing.assert_array_equal(res['var2']['var2_acq_time'], time2) - assert 'acq_time' not in res['var1'].coords - assert 'acq_time' not in res['var2'].coords - - # Make sure nothing else is modified - np.testing.assert_array_equal(res['var1']['x'], x) - np.testing.assert_array_equal(res['var1']['y'], y) - np.testing.assert_array_equal(res['var2']['x'], x) - np.testing.assert_array_equal(res['var2']['y'], y) - - # Coords not unique -> Dataset names must be prepended, even if pretty=True - with pytest.warns(UserWarning, match='Cannot pretty-format "acq_time"'): - res = make_alt_coords_unique(datasets, pretty=True) - np.testing.assert_array_equal(res['var1']['var1_acq_time'], time1) - np.testing.assert_array_equal(res['var2']['var2_acq_time'], time2) - assert 'acq_time' not in res['var1'].coords - assert 'acq_time' not in res['var2'].coords - - # Coords unique and pretty=True -> Don't modify coordinate names - datasets['var2']['acq_time'] = ('y', time1) - res = make_alt_coords_unique(datasets, pretty=True) - np.testing.assert_array_equal(res['var1']['acq_time'], time1) - np.testing.assert_array_equal(res['var2']['acq_time'], time1) - assert 'var1_acq_time' not in res['var1'].coords - assert 'var2_acq_time' not in res['var2'].coords - - def test_area2cf(self): - """Test the conversion of an area to CF standards.""" - from satpy.writers.cf_writer import area2cf - - ds_base = xr.DataArray(data=[[1, 2], [3, 4]], dims=('y', 'x'), coords={'y': [1, 2], 'x': [3, 4]}, - attrs={'name': 'var1'}) - - # a) Area Definition and strict=False - geos = pyresample.geometry.AreaDefinition( - area_id='geos', - description='geos', - proj_id='geos', - projection={'proj': 'geos', 'h': 35785831., 'a': 6378169., 'b': 6356583.8}, - width=2, height=2, - area_extent=[-1, -1, 1, 1]) - ds = ds_base.copy(deep=True) - ds.attrs['area'] = geos - - res = area2cf(ds, include_lonlats=False) - assert len(res) == 2 - assert res[0].size == 1 # grid mapping variable - assert res[0].name == res[1].attrs['grid_mapping'] - - # b) Area Definition and include_lonlats=False - ds = ds_base.copy(deep=True) - ds.attrs['area'] = geos - res = area2cf(ds, include_lonlats=True) - # same as above - assert len(res) == 2 - assert res[0].size == 1 # grid mapping variable - assert res[0].name == res[1].attrs['grid_mapping'] - # but now also have the lon/lats - assert 'longitude' in res[1].coords - assert 'latitude' in res[1].coords - - # c) Swath Definition - swath = pyresample.geometry.SwathDefinition(lons=[[1, 1], [2, 2]], lats=[[1, 2], [1, 2]]) - ds = ds_base.copy(deep=True) - ds.attrs['area'] = swath - - res = area2cf(ds, include_lonlats=False) - assert len(res) == 1 - assert 'longitude' in res[0].coords - assert 'latitude' in res[0].coords - assert 'grid_mapping' not in res[0].attrs - - def test__add_grid_mapping(self): - """Test the conversion from pyresample area object to CF grid mapping.""" - from satpy.writers.cf_writer import _add_grid_mapping - - def _gm_matches(gmapping, expected): - """Assert that all keys in ``expected`` match the values in ``gmapping``.""" - for attr_key, attr_val in expected.attrs.items(): - test_val = gmapping.attrs[attr_key] - if attr_val is None or isinstance(attr_val, str): - assert test_val == attr_val - else: - np.testing.assert_almost_equal(test_val, attr_val, decimal=3) - - ds_base = xr.DataArray(data=[[1, 2], [3, 4]], dims=('y', 'x'), coords={'y': [1, 2], 'x': [3, 4]}, - attrs={'name': 'var1'}) - - # a) Projection has a corresponding CF representation (e.g. geos) - a = 6378169. - b = 6356583.8 - h = 35785831. - geos = pyresample.geometry.AreaDefinition( - area_id='geos', - description='geos', - proj_id='geos', - projection={'proj': 'geos', 'h': h, 'a': a, 'b': b, - 'lat_0': 0, 'lon_0': 0}, - width=2, height=2, - area_extent=[-1, -1, 1, 1]) - geos_expected = xr.DataArray(data=0, - attrs={'perspective_point_height': h, - 'latitude_of_projection_origin': 0, - 'longitude_of_projection_origin': 0, - 'grid_mapping_name': 'geostationary', - 'semi_major_axis': a, - 'semi_minor_axis': b, - # 'sweep_angle_axis': None, - }) - - ds = ds_base.copy() - ds.attrs['area'] = geos - new_ds, grid_mapping = _add_grid_mapping(ds) - if 'sweep_angle_axis' in grid_mapping.attrs: - # older versions of pyproj might not include this - assert grid_mapping.attrs['sweep_angle_axis'] == 'y' - - assert new_ds.attrs['grid_mapping'] == 'geos' - _gm_matches(grid_mapping, geos_expected) - # should not have been modified - assert 'grid_mapping' not in ds.attrs - - # b) Projection does not have a corresponding CF representation (COSMO) - cosmo7 = pyresample.geometry.AreaDefinition( - area_id='cosmo7', - description='cosmo7', - proj_id='cosmo7', - projection={'proj': 'ob_tran', 'ellps': 'WGS84', 'lat_0': 46, 'lon_0': 4.535, - 'o_proj': 'stere', 'o_lat_p': 90, 'o_lon_p': -5.465}, - width=597, height=510, - area_extent=[-1812933, -1003565, 814056, 1243448] - ) - - ds = ds_base.copy() - ds.attrs['area'] = cosmo7 - - new_ds, grid_mapping = _add_grid_mapping(ds) - assert 'crs_wkt' in grid_mapping.attrs - wkt = grid_mapping.attrs['crs_wkt'] - assert 'ELLIPSOID["WGS 84"' in wkt - assert 'PARAMETER["lat_0",46' in wkt - assert 'PARAMETER["lon_0",4.535' in wkt - assert 'PARAMETER["o_lat_p",90' in wkt - assert 'PARAMETER["o_lon_p",-5.465' in wkt - assert new_ds.attrs['grid_mapping'] == 'cosmo7' - - # c) Projection Transverse Mercator - lat_0 = 36.5 - lon_0 = 15.0 - - tmerc = pyresample.geometry.AreaDefinition( - area_id='tmerc', - description='tmerc', - proj_id='tmerc', - projection={'proj': 'tmerc', 'ellps': 'WGS84', 'lat_0': 36.5, 'lon_0': 15.0}, - width=2, height=2, - area_extent=[-1, -1, 1, 1]) - - tmerc_expected = xr.DataArray(data=0, - attrs={'latitude_of_projection_origin': lat_0, - 'longitude_of_central_meridian': lon_0, - 'grid_mapping_name': 'transverse_mercator', - 'reference_ellipsoid_name': 'WGS 84', - 'false_easting': 0., - 'false_northing': 0., - }) - - ds = ds_base.copy() - ds.attrs['area'] = tmerc - new_ds, grid_mapping = _add_grid_mapping(ds) - assert new_ds.attrs['grid_mapping'] == 'tmerc' - _gm_matches(grid_mapping, tmerc_expected) - - # d) Projection that has a representation but no explicit a/b - h = 35785831. - geos = pyresample.geometry.AreaDefinition( - area_id='geos', - description='geos', - proj_id='geos', - projection={'proj': 'geos', 'h': h, 'datum': 'WGS84', 'ellps': 'GRS80', - 'lat_0': 0, 'lon_0': 0}, - width=2, height=2, - area_extent=[-1, -1, 1, 1]) - geos_expected = xr.DataArray(data=0, - attrs={'perspective_point_height': h, - 'latitude_of_projection_origin': 0, - 'longitude_of_projection_origin': 0, - 'grid_mapping_name': 'geostationary', - # 'semi_major_axis': 6378137.0, - # 'semi_minor_axis': 6356752.314, - # 'sweep_angle_axis': None, - }) - - ds = ds_base.copy() - ds.attrs['area'] = geos - new_ds, grid_mapping = _add_grid_mapping(ds) - - assert new_ds.attrs['grid_mapping'] == 'geos' - _gm_matches(grid_mapping, geos_expected) - - # e) oblique Mercator - area = pyresample.geometry.AreaDefinition( - area_id='omerc_otf', - description='On-the-fly omerc area', - proj_id='omerc', - projection={'alpha': '9.02638777018478', 'ellps': 'WGS84', 'gamma': '0', 'k': '1', - 'lat_0': '-0.256794486098476', 'lonc': '13.7888658224205', - 'proj': 'omerc', 'units': 'm'}, - width=2837, - height=5940, - area_extent=[-1460463.0893, 3455291.3877, 1538407.1158, 9615788.8787] - ) - - omerc_dict = {'azimuth_of_central_line': 9.02638777018478, - 'false_easting': 0., - 'false_northing': 0., - # 'gamma': 0, # this is not CF compliant - 'grid_mapping_name': "oblique_mercator", - 'latitude_of_projection_origin': -0.256794486098476, - 'longitude_of_projection_origin': 13.7888658224205, - # 'prime_meridian_name': "Greenwich", - 'reference_ellipsoid_name': "WGS 84"} - omerc_expected = xr.DataArray(data=0, attrs=omerc_dict) - - ds = ds_base.copy() - ds.attrs['area'] = area - new_ds, grid_mapping = _add_grid_mapping(ds) - - assert new_ds.attrs['grid_mapping'] == 'omerc_otf' - _gm_matches(grid_mapping, omerc_expected) - - # f) Projection that has a representation but no explicit a/b - h = 35785831. - geos = pyresample.geometry.AreaDefinition( - area_id='geos', - description='geos', - proj_id='geos', - projection={'proj': 'geos', 'h': h, 'datum': 'WGS84', 'ellps': 'GRS80', - 'lat_0': 0, 'lon_0': 0}, - width=2, height=2, - area_extent=[-1, -1, 1, 1]) - geos_expected = xr.DataArray(data=0, - attrs={'perspective_point_height': h, - 'latitude_of_projection_origin': 0, - 'longitude_of_projection_origin': 0, - 'grid_mapping_name': 'geostationary', - 'reference_ellipsoid_name': 'WGS 84', - }) - - ds = ds_base.copy() - ds.attrs['area'] = geos - new_ds, grid_mapping = _add_grid_mapping(ds) - - assert new_ds.attrs['grid_mapping'] == 'geos' - _gm_matches(grid_mapping, geos_expected) - - def test_add_lonlat_coords(self): - """Test the conversion from areas to lon/lat.""" - from satpy.writers.cf_writer import add_lonlat_coords - - area = pyresample.geometry.AreaDefinition( - 'seviri', - 'Native SEVIRI grid', - 'geos', - "+a=6378169.0 +h=35785831.0 +b=6356583.8 +lon_0=0 +proj=geos", - 2, 2, - [-5570248.686685662, -5567248.28340708, 5567248.28340708, 5570248.686685662] - ) - lons_ref, lats_ref = area.get_lonlats() - dataarray = xr.DataArray(data=[[1, 2], [3, 4]], dims=('y', 'x'), attrs={'area': area}) - - res = add_lonlat_coords(dataarray) - - # original should be unmodified - assert 'longitude' not in dataarray.coords - assert set(res.coords) == {'longitude', 'latitude'} - lat = res['latitude'] - lon = res['longitude'] - np.testing.assert_array_equal(lat.data, lats_ref) - np.testing.assert_array_equal(lon.data, lons_ref) - assert {'name': 'latitude', 'standard_name': 'latitude', 'units': 'degrees_north'}.items() <= lat.attrs.items() - assert {'name': 'longitude', 'standard_name': 'longitude', 'units': 'degrees_east'}.items() <= lon.attrs.items() - - area = pyresample.geometry.AreaDefinition( - 'seviri', - 'Native SEVIRI grid', - 'geos', - "+a=6378169.0 +h=35785831.0 +b=6356583.8 +lon_0=0 +proj=geos", - 10, 10, - [-5570248.686685662, -5567248.28340708, 5567248.28340708, 5570248.686685662] - ) - lons_ref, lats_ref = area.get_lonlats() - dataarray = xr.DataArray(data=da.from_array(np.arange(3 * 10 * 10).reshape(3, 10, 10), chunks=(1, 5, 5)), - dims=('bands', 'y', 'x'), attrs={'area': area}) - res = add_lonlat_coords(dataarray) - - # original should be unmodified - assert 'longitude' not in dataarray.coords - assert set(res.coords) == {'longitude', 'latitude'} - lat = res['latitude'] - lon = res['longitude'] - np.testing.assert_array_equal(lat.data, lats_ref) - np.testing.assert_array_equal(lon.data, lons_ref) - assert {'name': 'latitude', 'standard_name': 'latitude', 'units': 'degrees_north'}.items() <= lat.attrs.items() - assert {'name': 'longitude', 'standard_name': 'longitude', 'units': 'degrees_east'}.items() <= lon.attrs.items() - def test_load_module_with_old_pyproj(self): """Test that cf_writer can still be loaded with pyproj 1.9.6.""" import importlib @@ -1188,14 +813,14 @@ def datasets(self): def test_is_lon_or_lat_dataarray(self, datasets): """Test the is_lon_or_lat_dataarray function.""" - from satpy.writers.cf_writer import is_lon_or_lat_dataarray + from satpy.writers.cf.area import is_lon_or_lat_dataarray assert is_lon_or_lat_dataarray(datasets['lat']) assert not is_lon_or_lat_dataarray(datasets['var1']) def test_has_projection_coords(self, datasets): """Test the has_projection_coords function.""" - from satpy.writers.cf_writer import has_projection_coords + from satpy.writers.cf.area import has_projection_coords assert has_projection_coords(datasets) datasets['lat'].attrs['standard_name'] = 'dummy' diff --git a/satpy/writers/cf/area.py b/satpy/writers/cf/area.py new file mode 100644 index 0000000000..68113c1ee2 --- /dev/null +++ b/satpy/writers/cf/area.py @@ -0,0 +1,192 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright (c) 2017-2023 Satpy developers +# +# This file is part of satpy. +# +# satpy is free software: you can redistribute it and/or modify it under the +# terms of the GNU General Public License as published by the Free Software +# Foundation, either version 3 of the License, or (at your option) any later +# version. +# +# satpy is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along with +# satpy. If not, see . +"""CF processing of pyresample area information.""" +import logging +import warnings +from collections import defaultdict + +import xarray as xr +from dask.base import tokenize +from packaging.version import Version +from pyresample.geometry import AreaDefinition, SwathDefinition + +logger = logging.getLogger(__name__) + + +def add_lonlat_coords(dataarray): + """Add 'longitude' and 'latitude' coordinates to DataArray.""" + dataarray = dataarray.copy() + area = dataarray.attrs['area'] + ignore_dims = {dim: 0 for dim in dataarray.dims if dim not in ['x', 'y']} + chunks = getattr(dataarray.isel(**ignore_dims), 'chunks', None) + lons, lats = area.get_lonlats(chunks=chunks) + dataarray['longitude'] = xr.DataArray(lons, dims=['y', 'x'], + attrs={'name': "longitude", + 'standard_name': "longitude", + 'units': 'degrees_east'}, + name='longitude') + dataarray['latitude'] = xr.DataArray(lats, dims=['y', 'x'], + attrs={'name': "latitude", + 'standard_name': "latitude", + 'units': 'degrees_north'}, + name='latitude') + return dataarray + + +def _create_grid_mapping(area): + """Create the grid mapping instance for `area`.""" + import pyproj + + if Version(pyproj.__version__) < Version('2.4.1'): + # technically 2.2, but important bug fixes in 2.4.1 + raise ImportError("'cf' writer requires pyproj 2.4.1 or greater") + # let pyproj do the heavily lifting (pyproj 2.0+ required) + grid_mapping = area.crs.to_cf() + return area.area_id, grid_mapping + + +def _add_grid_mapping(dataarray): + """Convert an area to at CF grid mapping.""" + dataarray = dataarray.copy() + area = dataarray.attrs['area'] + gmapping_var_name, attrs = _create_grid_mapping(area) + dataarray.attrs['grid_mapping'] = gmapping_var_name + return dataarray, xr.DataArray(0, attrs=attrs, name=gmapping_var_name) + + +def area2cf(dataarray, include_lonlats=False, got_lonlats=False): + """Convert an area to at CF grid mapping or lon and lats.""" + res = [] + if not got_lonlats and (isinstance(dataarray.attrs['area'], SwathDefinition) or include_lonlats): + dataarray = add_lonlat_coords(dataarray) + if isinstance(dataarray.attrs['area'], AreaDefinition): + dataarray, gmapping = _add_grid_mapping(dataarray) + res.append(gmapping) + res.append(dataarray) + return res + + +def is_lon_or_lat_dataarray(dataarray): + """Check if the DataArray represents the latitude or longitude coordinate.""" + if 'standard_name' in dataarray.attrs and dataarray.attrs['standard_name'] in ['longitude', 'latitude']: + return True + return False + + +def has_projection_coords(ds_collection): + """Check if DataArray collection has a "longitude" or "latitude" DataArray.""" + for dataarray in ds_collection.values(): + if is_lon_or_lat_dataarray(dataarray): + return True + return False + + +def make_alt_coords_unique(datas, pretty=False): + """Make non-dimensional coordinates unique among all datasets. + + Non-dimensional (or alternative) coordinates, such as scanline timestamps, + may occur in multiple datasets with the same name and dimension + but different values. + + In order to avoid conflicts, prepend the dataset name to the coordinate name. + If a non-dimensional coordinate is unique among all datasets and ``pretty=True``, + its name will not be modified. + + Since all datasets must have the same projection coordinates, + this is not applied to latitude and longitude. + + Args: + datas (dict): + Dictionary of (dataset name, dataset) + pretty (bool): + Don't modify coordinate names, if possible. Makes the file prettier, but possibly less consistent. + + Returns: + Dictionary holding the updated datasets + + """ + # Determine which non-dimensional coordinates are unique + tokens = defaultdict(set) + for dataset in datas.values(): + for coord_name in dataset.coords: + if not is_lon_or_lat_dataarray(dataset[coord_name]) and coord_name not in dataset.dims: + tokens[coord_name].add(tokenize(dataset[coord_name].data)) + coords_unique = dict([(coord_name, len(tokens) == 1) for coord_name, tokens in tokens.items()]) + + # Prepend dataset name, if not unique or no pretty-format desired + new_datas = datas.copy() + for coord_name, unique in coords_unique.items(): + if not pretty or not unique: + if pretty: + warnings.warn( + 'Cannot pretty-format "{}" coordinates because they are ' + 'not identical among the given datasets'.format(coord_name), + stacklevel=2 + ) + for ds_name, dataset in datas.items(): + if coord_name in dataset.coords: + rename = {coord_name: '{}_{}'.format(ds_name, coord_name)} + new_datas[ds_name] = new_datas[ds_name].rename(rename) + + return new_datas + + +def assert_xy_unique(datas): + """Check that all datasets share the same projection coordinates x/y.""" + unique_x = set() + unique_y = set() + for dataset in datas.values(): + if 'y' in dataset.dims: + token_y = tokenize(dataset['y'].data) + unique_y.add(token_y) + if 'x' in dataset.dims: + token_x = tokenize(dataset['x'].data) + unique_x.add(token_x) + if len(unique_x) > 1 or len(unique_y) > 1: + raise ValueError('Datasets to be saved in one file (or one group) must have identical projection coordinates. ' + 'Please group them by area or save them in separate files.') + + +def link_coords(datas): + """Link dataarrays and coordinates. + + If the `coordinates` attribute of a data array links to other dataarrays in the scene, for example + `coordinates='lon lat'`, add them as coordinates to the data array and drop that attribute. In the final call to + `xr.Dataset.to_netcdf()` all coordinate relations will be resolved and the `coordinates` attributes be set + automatically. + + """ + for da_name, data in datas.items(): + declared_coordinates = data.attrs.get('coordinates', []) + if isinstance(declared_coordinates, str): + declared_coordinates = declared_coordinates.split(' ') + for coord in declared_coordinates: + if coord not in data.coords: + try: + dimensions_not_in_data = list(set(datas[coord].dims) - set(data.dims)) + data[coord] = datas[coord].squeeze(dimensions_not_in_data, drop=True) + except KeyError: + warnings.warn( + 'Coordinate "{}" referenced by dataarray {} does not ' + 'exist, dropping reference.'.format(coord, da_name), + stacklevel=2 + ) + continue + + # Drop 'coordinates' attribute in any case to avoid conflicts in xr.Dataset.to_netcdf() + data.attrs.pop('coordinates', None) diff --git a/satpy/writers/cf_writer.py b/satpy/writers/cf_writer.py index b9a24b9292..4c672b70b6 100644 --- a/satpy/writers/cf_writer.py +++ b/satpy/writers/cf_writer.py @@ -164,9 +164,7 @@ import numpy as np import xarray as xr -from dask.base import tokenize from packaging.version import Version -from pyresample.geometry import AreaDefinition, SwathDefinition from xarray.coding.times import CFDatetimeCoder from satpy.writers import Writer @@ -228,174 +226,6 @@ def get_extra_ds(dataarray, keys=None): return ds_collection -# ###--------------------------------------------------------------------------. -# ### CF-Area - - -def add_lonlat_coords(dataarray): - """Add 'longitude' and 'latitude' coordinates to DataArray.""" - dataarray = dataarray.copy() - area = dataarray.attrs['area'] - ignore_dims = {dim: 0 for dim in dataarray.dims if dim not in ['x', 'y']} - chunks = getattr(dataarray.isel(**ignore_dims), 'chunks', None) - lons, lats = area.get_lonlats(chunks=chunks) - dataarray['longitude'] = xr.DataArray(lons, dims=['y', 'x'], - attrs={'name': "longitude", - 'standard_name': "longitude", - 'units': 'degrees_east'}, - name='longitude') - dataarray['latitude'] = xr.DataArray(lats, dims=['y', 'x'], - attrs={'name': "latitude", - 'standard_name': "latitude", - 'units': 'degrees_north'}, - name='latitude') - return dataarray - - -def _create_grid_mapping(area): - """Create the grid mapping instance for `area`.""" - import pyproj - - if Version(pyproj.__version__) < Version('2.4.1'): - # technically 2.2, but important bug fixes in 2.4.1 - raise ImportError("'cf' writer requires pyproj 2.4.1 or greater") - # let pyproj do the heavily lifting (pyproj 2.0+ required) - grid_mapping = area.crs.to_cf() - return area.area_id, grid_mapping - - -def _add_grid_mapping(dataarray): - """Convert an area to at CF grid mapping.""" - dataarray = dataarray.copy() - area = dataarray.attrs['area'] - gmapping_var_name, attrs = _create_grid_mapping(area) - dataarray.attrs['grid_mapping'] = gmapping_var_name - return dataarray, xr.DataArray(0, attrs=attrs, name=gmapping_var_name) - - -def area2cf(dataarray, include_lonlats=False, got_lonlats=False): - """Convert an area to at CF grid mapping or lon and lats.""" - res = [] - if not got_lonlats and (isinstance(dataarray.attrs['area'], SwathDefinition) or include_lonlats): - dataarray = add_lonlat_coords(dataarray) - if isinstance(dataarray.attrs['area'], AreaDefinition): - dataarray, gmapping = _add_grid_mapping(dataarray) - res.append(gmapping) - res.append(dataarray) - return res - - -def is_lon_or_lat_dataarray(dataarray): - """Check if the DataArray represents the latitude or longitude coordinate.""" - if 'standard_name' in dataarray.attrs and dataarray.attrs['standard_name'] in ['longitude', 'latitude']: - return True - return False - - -def has_projection_coords(ds_collection): - """Check if DataArray collection has a "longitude" or "latitude" DataArray.""" - for dataarray in ds_collection.values(): - if is_lon_or_lat_dataarray(dataarray): - return True - return False - - -def make_alt_coords_unique(datas, pretty=False): - """Make non-dimensional coordinates unique among all datasets. - - Non-dimensional (or alternative) coordinates, such as scanline timestamps, - may occur in multiple datasets with the same name and dimension - but different values. - - In order to avoid conflicts, prepend the dataset name to the coordinate name. - If a non-dimensional coordinate is unique among all datasets and ``pretty=True``, - its name will not be modified. - - Since all datasets must have the same projection coordinates, - this is not applied to latitude and longitude. - - Args: - datas (dict): - Dictionary of (dataset name, dataset) - pretty (bool): - Don't modify coordinate names, if possible. Makes the file prettier, but possibly less consistent. - - Returns: - Dictionary holding the updated datasets - - """ - # Determine which non-dimensional coordinates are unique - tokens = defaultdict(set) - for dataset in datas.values(): - for coord_name in dataset.coords: - if not is_lon_or_lat_dataarray(dataset[coord_name]) and coord_name not in dataset.dims: - tokens[coord_name].add(tokenize(dataset[coord_name].data)) - coords_unique = dict([(coord_name, len(tokens) == 1) for coord_name, tokens in tokens.items()]) - - # Prepend dataset name, if not unique or no pretty-format desired - new_datas = datas.copy() - for coord_name, unique in coords_unique.items(): - if not pretty or not unique: - if pretty: - warnings.warn( - 'Cannot pretty-format "{}" coordinates because they are ' - 'not identical among the given datasets'.format(coord_name), - stacklevel=2 - ) - for ds_name, dataset in datas.items(): - if coord_name in dataset.coords: - rename = {coord_name: '{}_{}'.format(ds_name, coord_name)} - new_datas[ds_name] = new_datas[ds_name].rename(rename) - - return new_datas - - -def assert_xy_unique(datas): - """Check that all datasets share the same projection coordinates x/y.""" - unique_x = set() - unique_y = set() - for dataset in datas.values(): - if 'y' in dataset.dims: - token_y = tokenize(dataset['y'].data) - unique_y.add(token_y) - if 'x' in dataset.dims: - token_x = tokenize(dataset['x'].data) - unique_x.add(token_x) - if len(unique_x) > 1 or len(unique_y) > 1: - raise ValueError('Datasets to be saved in one file (or one group) must have identical projection coordinates. ' - 'Please group them by area or save them in separate files.') - - -def link_coords(datas): - """Link dataarrays and coordinates. - - If the `coordinates` attribute of a data array links to other dataarrays in the scene, for example - `coordinates='lon lat'`, add them as coordinates to the data array and drop that attribute. In the final call to - `xr.Dataset.to_netcdf()` all coordinate relations will be resolved and the `coordinates` attributes be set - automatically. - - """ - for da_name, data in datas.items(): - declared_coordinates = data.attrs.get('coordinates', []) - if isinstance(declared_coordinates, str): - declared_coordinates = declared_coordinates.split(' ') - for coord in declared_coordinates: - if coord not in data.coords: - try: - dimensions_not_in_data = list(set(datas[coord].dims) - set(data.dims)) - data[coord] = datas[coord].squeeze(dimensions_not_in_data, drop=True) - except KeyError: - warnings.warn( - 'Coordinate "{}" referenced by dataarray {} does not ' - 'exist, dropping reference.'.format(coord, da_name), - stacklevel=2 - ) - continue - - # Drop 'coordinates' attribute in any case to avoid conflicts in xr.Dataset.to_netcdf() - data.attrs.pop('coordinates', None) - - # ###--------------------------------------------------------------------------. # ### CF-Time def add_time_bounds_dimension(ds, time="time"): @@ -864,6 +694,14 @@ def _collect_cf_dataset(list_dataarrays, ds : xr.Dataset A partially CF-compliant xr.Dataset """ + from satpy.writers.cf.area import ( + area2cf, + assert_xy_unique, + has_projection_coords, + link_coords, + make_alt_coords_unique, + ) + # Create dictionary of input datarrays # --> Since keys=None, it doesn't never retrieve ancillary variables !!! ds_collection = {} From 4b195664431d0eb4a0bcb7271afacabcdd6a5573 Mon Sep 17 00:00:00 2001 From: ghiggi Date: Tue, 27 Jun 2023 23:10:01 +0200 Subject: [PATCH 02/37] Refactor attrs-related functions --- .../tests/writer_tests/cf_tests/test_attrs.py | 144 ++++++++++ .../writer_tests/cf_tests/test_time_coords.py | 44 +++ satpy/tests/writer_tests/test_cf.py | 44 +-- satpy/writers/cf/attrs.py | 220 +++++++++++++++ satpy/writers/cf/time.py | 62 +++++ satpy/writers/cf_writer.py | 254 +----------------- 6 files changed, 479 insertions(+), 289 deletions(-) create mode 100644 satpy/tests/writer_tests/cf_tests/test_attrs.py create mode 100644 satpy/tests/writer_tests/cf_tests/test_time_coords.py create mode 100644 satpy/writers/cf/attrs.py create mode 100644 satpy/writers/cf/time.py diff --git a/satpy/tests/writer_tests/cf_tests/test_attrs.py b/satpy/tests/writer_tests/cf_tests/test_attrs.py new file mode 100644 index 0000000000..a7b36837b4 --- /dev/null +++ b/satpy/tests/writer_tests/cf_tests/test_attrs.py @@ -0,0 +1,144 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright (c) 2017-2023 Satpy developers +# +# This file is part of satpy. +# +# satpy is free software: you can redistribute it and/or modify it under the +# terms of the GNU General Public License as published by the Free Software +# Foundation, either version 3 of the License, or (at your option) any later +# version. +# +# satpy is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along with +# satpy. If not, see . +"""Tests for CF-compatible attributes encoding.""" +import datetime +import json +from collections import OrderedDict + +import numpy as np + + +class TestCFAttributeEncoding: + """Test case for CF attribute encodings.""" + + def get_test_attrs(self): + """Create some dataset attributes for testing purpose. + + Returns: + Attributes, encoded attributes, encoded and flattened attributes + + """ + # TODO: this is also used by test_da2cf + attrs = {'name': 'IR_108', + 'start_time': datetime(2018, 1, 1, 0), + 'end_time': datetime(2018, 1, 1, 0, 15), + 'int': 1, + 'float': 1.0, + 'none': None, # should be dropped + 'numpy_int': np.uint8(1), + 'numpy_float': np.float32(1), + 'numpy_bool': True, + 'numpy_void': np.void(0), + 'numpy_bytes': np.bytes_('test'), + 'numpy_string': np.string_('test'), + 'list': [1, 2, np.float64(3)], + 'nested_list': ["1", ["2", [3]]], + 'bool': True, + 'array': np.array([1, 2, 3], dtype='uint8'), + 'array_bool': np.array([True, False, True]), + 'array_2d': np.array([[1, 2], [3, 4]]), + 'array_3d': np.array([[[1, 2], [3, 4]], [[1, 2], [3, 4]]]), + 'dict': {'a': 1, 'b': 2}, + 'nested_dict': {'l1': {'l2': {'l3': np.array([1, 2, 3], dtype='uint8')}}}, + 'raw_metadata': OrderedDict([ + ('recarray', np.zeros(3, dtype=[('x', 'i4'), ('y', 'u1')])), + ('flag', np.bool_(True)), + ('dict', OrderedDict([('a', 1), ('b', np.array([1, 2, 3], dtype='uint8'))])) + ])} + encoded = {'name': 'IR_108', + 'start_time': '2018-01-01 00:00:00', + 'end_time': '2018-01-01 00:15:00', + 'int': 1, + 'float': 1.0, + 'numpy_int': np.uint8(1), + 'numpy_float': np.float32(1), + 'numpy_bool': 'true', + 'numpy_void': '[]', + 'numpy_bytes': 'test', + 'numpy_string': 'test', + 'list': [1, 2, np.float64(3)], + 'nested_list': '["1", ["2", [3]]]', + 'bool': 'true', + 'array': np.array([1, 2, 3], dtype='uint8'), + 'array_bool': ['true', 'false', 'true'], + 'array_2d': '[[1, 2], [3, 4]]', + 'array_3d': '[[[1, 2], [3, 4]], [[1, 2], [3, 4]]]', + 'dict': '{"a": 1, "b": 2}', + 'nested_dict': '{"l1": {"l2": {"l3": [1, 2, 3]}}}', + 'raw_metadata': '{"recarray": [[0, 0], [0, 0], [0, 0]], ' + '"flag": "true", "dict": {"a": 1, "b": [1, 2, 3]}}'} + encoded_flat = {'name': 'IR_108', + 'start_time': '2018-01-01 00:00:00', + 'end_time': '2018-01-01 00:15:00', + 'int': 1, + 'float': 1.0, + 'numpy_int': np.uint8(1), + 'numpy_float': np.float32(1), + 'numpy_bool': 'true', + 'numpy_void': '[]', + 'numpy_bytes': 'test', + 'numpy_string': 'test', + 'list': [1, 2, np.float64(3)], + 'nested_list': '["1", ["2", [3]]]', + 'bool': 'true', + 'array': np.array([1, 2, 3], dtype='uint8'), + 'array_bool': ['true', 'false', 'true'], + 'array_2d': '[[1, 2], [3, 4]]', + 'array_3d': '[[[1, 2], [3, 4]], [[1, 2], [3, 4]]]', + 'dict_a': 1, + 'dict_b': 2, + 'nested_dict_l1_l2_l3': np.array([1, 2, 3], dtype='uint8'), + 'raw_metadata_recarray': '[[0, 0], [0, 0], [0, 0]]', + 'raw_metadata_flag': 'true', + 'raw_metadata_dict_a': 1, + 'raw_metadata_dict_b': np.array([1, 2, 3], dtype='uint8')} + return attrs, encoded, encoded_flat + + def assertDictWithArraysEqual(self, d1, d2): + """Check that dicts containing arrays are equal.""" + # TODO: this is also used by test_da2cf + assert set(d1.keys()) == set(d2.keys()) + for key, val1 in d1.items(): + val2 = d2[key] + if isinstance(val1, np.ndarray): + np.testing.assert_array_equal(val1, val2) + assert val1.dtype == val2.dtype + else: + assert val1 == val2 + if isinstance(val1, (np.floating, np.integer, np.bool_)): + assert isinstance(val2, np.generic) + assert val1.dtype == val2.dtype + + def test_encode_attrs_nc(self): + """Test attributes encoding.""" + from satpy.writers.cf.attrs import encode_attrs_nc + + attrs, expected, _ = self.get_test_attrs() + + # Test encoding + encoded = encode_attrs_nc(attrs) + self.assertDictWithArraysEqual(expected, encoded) + + # Test decoding of json-encoded attributes + raw_md_roundtrip = {'recarray': [[0, 0], [0, 0], [0, 0]], + 'flag': 'true', + 'dict': {'a': 1, 'b': [1, 2, 3]}} + assert json.loads(encoded['raw_metadata']) == raw_md_roundtrip + assert json.loads(encoded['array_3d']) == [[[1, 2], [3, 4]], [[1, 2], [3, 4]]] + assert json.loads(encoded['nested_dict']) == {"l1": {"l2": {"l3": [1, 2, 3]}}} + assert json.loads(encoded['nested_list']) == ["1", ["2", [3]]] diff --git a/satpy/tests/writer_tests/cf_tests/test_time_coords.py b/satpy/tests/writer_tests/cf_tests/test_time_coords.py new file mode 100644 index 0000000000..ce7845dcca --- /dev/null +++ b/satpy/tests/writer_tests/cf_tests/test_time_coords.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright (c) 2017-2023 Satpy developers +# +# This file is part of satpy. +# +# satpy is free software: you can redistribute it and/or modify it under the +# terms of the GNU General Public License as published by the Free Software +# Foundation, either version 3 of the License, or (at your option) any later +# version. +# +# satpy is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along with +# satpy. If not, see . +"""CF processing of time information (coordinates and dimensions).""" +import numpy as np +import xarray as xr + + +class TestCFtime: + """Test cases for CF time dimension and coordinates.""" + + def test_add_time_bounds_dimension(self): + """Test addition of CF-compliant time attributes.""" + from satpy.writers.cf.time import add_time_bounds_dimension + + test_array = np.array([[1, 2], [3, 4], [5, 6], [7, 8]]) + times = np.array(['2018-05-30T10:05:00', '2018-05-30T10:05:01', + '2018-05-30T10:05:02', '2018-05-30T10:05:03'], dtype=np.datetime64) + dataarray = xr.DataArray(test_array, + dims=['y', 'x'], + coords={'time': ('y', times)}, + attrs=dict(start_time=times[0], end_time=times[-1])) + ds = dataarray.to_dataset(name='test-array') + ds = add_time_bounds_dimension(ds) + + assert "bnds_1d" in ds.dims + assert ds.dims['bnds_1d'] == 2 + assert "time_bnds" in list(ds.data_vars) + assert "bounds" in ds["time"].attrs + assert "standard_name" in ds["time"].attrs diff --git a/satpy/tests/writer_tests/test_cf.py b/satpy/tests/writer_tests/test_cf.py index 005509f165..ae55dcc1a2 100644 --- a/satpy/tests/writer_tests/test_cf.py +++ b/satpy/tests/writer_tests/test_cf.py @@ -17,7 +17,6 @@ # satpy. If not, see . """Tests for the CF writer.""" -import json import logging import os import tempfile @@ -155,28 +154,6 @@ def test_preprocess_dataarray_name(): assert "original_name" not in out_da.attrs -def test_add_time_cf_attrs(): - """Test addition of CF-compliant time attributes.""" - from satpy import Scene - from satpy.writers.cf_writer import add_time_bounds_dimension - - scn = Scene() - test_array = np.array([[1, 2], [3, 4], [5, 6], [7, 8]]) - times = np.array(['2018-05-30T10:05:00', '2018-05-30T10:05:01', - '2018-05-30T10:05:02', '2018-05-30T10:05:03'], dtype=np.datetime64) - scn['test-array'] = xr.DataArray(test_array, - dims=['y', 'x'], - coords={'time': ('y', times)}, - attrs=dict(start_time=times[0], end_time=times[-1])) - ds = scn['test-array'].to_dataset(name='test-array') - ds = add_time_bounds_dimension(ds) - assert "bnds_1d" in ds.dims - assert ds.dims['bnds_1d'] == 2 - assert "time_bnds" in list(ds.data_vars) - assert "bounds" in ds["time"].attrs - assert "standard_name" in ds["time"].attrs - - def test_empty_collect_cf_datasets(): """Test that if no DataArrays, collect_cf_datasets raise error.""" from satpy.writers.cf_writer import collect_cf_datasets @@ -525,6 +502,7 @@ def get_test_attrs(self): Attributes, encoded attributes, encoded and flattened attributes """ + # TODO: also used by cf/test_attrs.py attrs = {'name': 'IR_108', 'start_time': datetime(2018, 1, 1, 0), 'end_time': datetime(2018, 1, 1, 0, 15), @@ -602,6 +580,7 @@ def get_test_attrs(self): def assertDictWithArraysEqual(self, d1, d2): """Check that dicts containing arrays are equal.""" + # TODO: also used by cf/test_attrs.py assert set(d1.keys()) == set(d2.keys()) for key, val1 in d1.items(): val2 = d2[key] @@ -614,25 +593,6 @@ def assertDictWithArraysEqual(self, d1, d2): assert isinstance(val2, np.generic) assert val1.dtype == val2.dtype - def test_encode_attrs_nc(self): - """Test attributes encoding.""" - from satpy.writers.cf_writer import encode_attrs_nc - - attrs, expected, _ = self.get_test_attrs() - - # Test encoding - encoded = encode_attrs_nc(attrs) - self.assertDictWithArraysEqual(expected, encoded) - - # Test decoding of json-encoded attributes - raw_md_roundtrip = {'recarray': [[0, 0], [0, 0], [0, 0]], - 'flag': 'true', - 'dict': {'a': 1, 'b': [1, 2, 3]}} - assert json.loads(encoded['raw_metadata']) == raw_md_roundtrip - assert json.loads(encoded['array_3d']) == [[[1, 2], [3, 4]], [[1, 2], [3, 4]]] - assert json.loads(encoded['nested_dict']) == {"l1": {"l2": {"l3": [1, 2, 3]}}} - assert json.loads(encoded['nested_list']) == ["1", ["2", [3]]] - def test_da2cf(self): """Test the conversion of a DataArray to a CF-compatible DataArray.""" from satpy.writers.cf_writer import CFWriter diff --git a/satpy/writers/cf/attrs.py b/satpy/writers/cf/attrs.py new file mode 100644 index 0000000000..7a9ecc33c8 --- /dev/null +++ b/satpy/writers/cf/attrs.py @@ -0,0 +1,220 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright (c) 2017-2023 Satpy developers +# +# This file is part of satpy. +# +# satpy is free software: you can redistribute it and/or modify it under the +# terms of the GNU General Public License as published by the Free Software +# Foundation, either version 3 of the License, or (at your option) any later +# version. +# +# satpy is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along with +# satpy. If not, see . +"""CF processing of attributes.""" + +import datetime +import json +from collections import OrderedDict + +import numpy as np + +from satpy.writers.utils import flatten_dict + + +class AttributeEncoder(json.JSONEncoder): + """JSON encoder for dataset attributes.""" + + def default(self, obj): + """Return a json-serializable object for *obj*. + + In order to facilitate decoding, elements in dictionaries, lists/tuples and multi-dimensional arrays are + encoded recursively. + """ + if isinstance(obj, dict): + serialized = {} + for key, val in obj.items(): + serialized[key] = self.default(val) + return serialized + elif isinstance(obj, (list, tuple, np.ndarray)): + return [self.default(item) for item in obj] + return self._encode(obj) + + def _encode(self, obj): + """Encode the given object as a json-serializable datatype.""" + if isinstance(obj, (bool, np.bool_)): + # Bool has to be checked first, because it is a subclass of int + return str(obj).lower() + elif isinstance(obj, (int, float, str)): + return obj + elif isinstance(obj, np.integer): + return int(obj) + elif isinstance(obj, np.floating): + return float(obj) + elif isinstance(obj, np.void): + return tuple(obj) + elif isinstance(obj, np.ndarray): + return obj.tolist() + + return str(obj) + + +def _encode_nc(obj): + """Try to encode `obj` as a netCDF/Zarr compatible datatype which most closely resembles the object's nature. + + Raises: + ValueError if no such datatype could be found + """ + from satpy.writers.cf_writer import NC4_DTYPES + + if isinstance(obj, int) and not isinstance(obj, (bool, np.bool_)): + return obj + elif isinstance(obj, (float, str, np.integer, np.floating)): + return obj + elif isinstance(obj, np.ndarray): + # Only plain 1-d arrays are supported. Skip record arrays and multi-dimensional arrays. + is_plain_1d = not obj.dtype.fields and len(obj.shape) <= 1 + if is_plain_1d: + if obj.dtype in NC4_DTYPES: + return obj + elif obj.dtype == np.bool_: + # Boolean arrays are not supported, convert to array of strings. + return [s.lower() for s in obj.astype(str)] + return obj.tolist() + raise ValueError('Unable to encode') + + +def encode_nc(obj): + """Encode the given object as a netcdf compatible datatype.""" + try: + return obj.to_cf() + except AttributeError: + return _encode_python_objects(obj) + + +def _encode_python_objects(obj): + """Try to find the datatype which most closely resembles the object's nature. + + If on failure, encode as a string. Plain lists are encoded recursively. + """ + if isinstance(obj, (list, tuple)) and all([not isinstance(item, (list, tuple)) for item in obj]): + return [encode_nc(item) for item in obj] + try: + dump = _encode_nc(obj) + except ValueError: + try: + # Decode byte-strings + decoded = obj.decode() + except AttributeError: + decoded = obj + dump = json.dumps(decoded, cls=AttributeEncoder).strip('"') + return dump + + +def encode_attrs_nc(attrs): + """Encode dataset attributes in a netcdf compatible datatype. + + Args: + attrs (dict): + Attributes to be encoded + Returns: + dict: Encoded (and sorted) attributes + + """ + encoded_attrs = [] + for key, val in sorted(attrs.items()): + if val is not None: + encoded_attrs.append((key, encode_nc(val))) + return OrderedDict(encoded_attrs) + + +def _add_ancillary_variables_attrs(dataarray): + """Replace ancillary_variables DataArray with a list of their name.""" + list_ancillary_variable_names = [da_ancillary.attrs['name'] + for da_ancillary in dataarray.attrs.get('ancillary_variables', [])] + if list_ancillary_variable_names: + dataarray.attrs['ancillary_variables'] = ' '.join(list_ancillary_variable_names) + else: + dataarray.attrs.pop("ancillary_variables", None) + return dataarray + + +def _drop_exclude_attrs(dataarray, exclude_attrs): + """Remove user-specified list of attributes.""" + if exclude_attrs is None: + exclude_attrs = [] + for key in exclude_attrs: + dataarray.attrs.pop(key, None) + return dataarray + + +def _remove_satpy_attrs(new_data): + """Remove _satpy attribute.""" + satpy_attrs = [key for key in new_data.attrs if key.startswith('_satpy')] + for satpy_attr in satpy_attrs: + new_data.attrs.pop(satpy_attr) + new_data.attrs.pop('_last_resampler', None) + return new_data + + +def _format_prerequisites_attrs(dataarray): + """Reformat prerequisites attribute value to string.""" + if 'prerequisites' in dataarray.attrs: + dataarray.attrs['prerequisites'] = [np.string_(str(prereq)) for prereq in dataarray.attrs['prerequisites']] + return dataarray + + +def _remove_none_attrs(dataarray): + """Remove attribute keys with None value.""" + for key, val in dataarray.attrs.copy().items(): + if val is None: + dataarray.attrs.pop(key) + return dataarray + + +def preprocess_datarray_attrs(dataarray, flatten_attrs, exclude_attrs): + """Preprocess DataArray attributes to be written into CF-compliant netCDF/Zarr.""" + dataarray = _remove_satpy_attrs(dataarray) + dataarray = _add_ancillary_variables_attrs(dataarray) + dataarray = _drop_exclude_attrs(dataarray, exclude_attrs) + dataarray = _format_prerequisites_attrs(dataarray) + dataarray = _remove_none_attrs(dataarray) + _ = dataarray.attrs.pop("area", None) + + if 'long_name' not in dataarray.attrs and 'standard_name' not in dataarray.attrs: + dataarray.attrs['long_name'] = dataarray.name + + if flatten_attrs: + dataarray.attrs = flatten_dict(dataarray.attrs) + + dataarray.attrs = encode_attrs_nc(dataarray.attrs) + + return dataarray + + +def _add_history(attrs): + """Add 'history' attribute to dictionary.""" + _history_create = 'Created by pytroll/satpy on {}'.format(datetime.utcnow()) + if 'history' in attrs: + if isinstance(attrs['history'], list): + attrs['history'] = ''.join(attrs['history']) + attrs['history'] += '\n' + _history_create + else: + attrs['history'] = _history_create + return attrs + + +def preprocess_header_attrs(header_attrs, flatten_attrs=False): + """Prepare file header attributes.""" + if header_attrs is not None: + if flatten_attrs: + header_attrs = flatten_dict(header_attrs) + header_attrs = encode_attrs_nc(header_attrs) # OrderedDict + else: + header_attrs = {} + header_attrs = _add_history(header_attrs) + return header_attrs diff --git a/satpy/writers/cf/time.py b/satpy/writers/cf/time.py new file mode 100644 index 0000000000..6308f42364 --- /dev/null +++ b/satpy/writers/cf/time.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright (c) 2017-2023 Satpy developers +# +# This file is part of satpy. +# +# satpy is free software: you can redistribute it and/or modify it under the +# terms of the GNU General Public License as published by the Free Software +# Foundation, either version 3 of the License, or (at your option) any later +# version. +# +# satpy is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along with +# satpy. If not, see . +"""CF processing of time dimension and coordinates.""" +import numpy as np +import xarray as xr + + +def add_time_bounds_dimension(ds, time="time"): + """Add time bound dimension to xr.Dataset.""" + start_times = [] + end_times = [] + for _var_name, data_array in ds.items(): + start_times.append(data_array.attrs.get("start_time", None)) + end_times.append(data_array.attrs.get("end_time", None)) + + start_time = min(start_time for start_time in start_times + if start_time is not None) + end_time = min(end_time for end_time in end_times + if end_time is not None) + ds['time_bnds'] = xr.DataArray([[np.datetime64(start_time), + np.datetime64(end_time)]], + dims=['time', 'bnds_1d']) + ds[time].attrs['bounds'] = "time_bnds" + ds[time].attrs['standard_name'] = "time" + return ds + + +def _process_time_coord(dataarray, epoch): + """Process the 'time' coordinate, if existing. + + It expand the DataArray with a time dimension if does not yet exists. + + The function assumes + + - that x and y dimensions have at least shape > 1 + - the time coordinate has size 1 + + """ + if 'time' in dataarray.coords: + dataarray['time'].encoding['units'] = epoch + dataarray['time'].attrs['standard_name'] = 'time' + dataarray['time'].attrs.pop('bounds', None) + + if 'time' not in dataarray.dims and dataarray["time"].size not in dataarray.shape: + dataarray = dataarray.expand_dims('time') + + return dataarray diff --git a/satpy/writers/cf_writer.py b/satpy/writers/cf_writer.py index 4c672b70b6..a2edd70ab2 100644 --- a/satpy/writers/cf_writer.py +++ b/satpy/writers/cf_writer.py @@ -154,13 +154,10 @@ .. _xarray encoding documentation: http://xarray.pydata.org/en/stable/user-guide/io.html?highlight=encoding#writing-encoded-data """ - import copy -import json import logging import warnings -from collections import OrderedDict, defaultdict -from datetime import datetime +from collections import defaultdict import numpy as np import xarray as xr @@ -169,7 +166,6 @@ from satpy.writers import Writer from satpy.writers.cf.coords_attrs import add_xy_coords_attrs -from satpy.writers.utils import flatten_dict logger = logging.getLogger(__name__) @@ -226,236 +222,6 @@ def get_extra_ds(dataarray, keys=None): return ds_collection -# ###--------------------------------------------------------------------------. -# ### CF-Time -def add_time_bounds_dimension(ds, time="time"): - """Add time bound dimension to xr.Dataset.""" - start_times = [] - end_times = [] - for _var_name, data_array in ds.items(): - start_times.append(data_array.attrs.get("start_time", None)) - end_times.append(data_array.attrs.get("end_time", None)) - - start_time = min(start_time for start_time in start_times - if start_time is not None) - end_time = min(end_time for end_time in end_times - if end_time is not None) - ds['time_bnds'] = xr.DataArray([[np.datetime64(start_time), - np.datetime64(end_time)]], - dims=['time', 'bnds_1d']) - ds[time].attrs['bounds'] = "time_bnds" - ds[time].attrs['standard_name'] = "time" - return ds - - -def _process_time_coord(dataarray, epoch): - """Process the 'time' coordinate, if existing. - - If expand the DataArray with a time dimension if does not yet exists. - - The function assumes - - - that x and y dimensions have at least shape > 1 - - the time coordinate has size 1 - - """ - if 'time' in dataarray.coords: - dataarray['time'].encoding['units'] = epoch - dataarray['time'].attrs['standard_name'] = 'time' - dataarray['time'].attrs.pop('bounds', None) - - if 'time' not in dataarray.dims and dataarray["time"].size not in dataarray.shape: - dataarray = dataarray.expand_dims('time') - - return dataarray - - -# --------------------------------------------------------------------------. -# ### Attributes - - -class AttributeEncoder(json.JSONEncoder): - """JSON encoder for dataset attributes.""" - - def default(self, obj): - """Return a json-serializable object for *obj*. - - In order to facilitate decoding, elements in dictionaries, lists/tuples and multi-dimensional arrays are - encoded recursively. - """ - if isinstance(obj, dict): - serialized = {} - for key, val in obj.items(): - serialized[key] = self.default(val) - return serialized - elif isinstance(obj, (list, tuple, np.ndarray)): - return [self.default(item) for item in obj] - return self._encode(obj) - - def _encode(self, obj): - """Encode the given object as a json-serializable datatype.""" - if isinstance(obj, (bool, np.bool_)): - # Bool has to be checked first, because it is a subclass of int - return str(obj).lower() - elif isinstance(obj, (int, float, str)): - return obj - elif isinstance(obj, np.integer): - return int(obj) - elif isinstance(obj, np.floating): - return float(obj) - elif isinstance(obj, np.void): - return tuple(obj) - elif isinstance(obj, np.ndarray): - return obj.tolist() - - return str(obj) - - -def _encode_nc(obj): - """Try to encode `obj` as a netcdf compatible datatype which most closely resembles the object's nature. - - Raises: - ValueError if no such datatype could be found - - """ - if isinstance(obj, int) and not isinstance(obj, (bool, np.bool_)): - return obj - elif isinstance(obj, (float, str, np.integer, np.floating)): - return obj - elif isinstance(obj, np.ndarray): - # Only plain 1-d arrays are supported. Skip record arrays and multi-dimensional arrays. - is_plain_1d = not obj.dtype.fields and len(obj.shape) <= 1 - if is_plain_1d: - if obj.dtype in NC4_DTYPES: - return obj - elif obj.dtype == np.bool_: - # Boolean arrays are not supported, convert to array of strings. - return [s.lower() for s in obj.astype(str)] - return obj.tolist() - - raise ValueError('Unable to encode') - - -def encode_nc(obj): - """Encode the given object as a netcdf compatible datatype.""" - try: - return obj.to_cf() - except AttributeError: - return _encode_python_objects(obj) - - -def _encode_python_objects(obj): - """Try to find the datatype which most closely resembles the object's nature. - - If on failure, encode as a string. Plain lists are encoded recursively. - """ - if isinstance(obj, (list, tuple)) and all([not isinstance(item, (list, tuple)) for item in obj]): - return [encode_nc(item) for item in obj] - try: - dump = _encode_nc(obj) - except ValueError: - try: - # Decode byte-strings - decoded = obj.decode() - except AttributeError: - decoded = obj - dump = json.dumps(decoded, cls=AttributeEncoder).strip('"') - return dump - - -def encode_attrs_nc(attrs): - """Encode dataset attributes in a netcdf compatible datatype. - - Args: - attrs (dict): - Attributes to be encoded - Returns: - dict: Encoded (and sorted) attributes - - """ - encoded_attrs = [] - for key, val in sorted(attrs.items()): - if val is not None: - encoded_attrs.append((key, encode_nc(val))) - return OrderedDict(encoded_attrs) - - -def _add_ancillary_variables_attrs(dataarray): - """Replace ancillary_variables DataArray with a list of their name.""" - list_ancillary_variable_names = [da_ancillary.attrs['name'] - for da_ancillary in dataarray.attrs.get('ancillary_variables', [])] - if list_ancillary_variable_names: - dataarray.attrs['ancillary_variables'] = ' '.join(list_ancillary_variable_names) - else: - dataarray.attrs.pop("ancillary_variables", None) - return dataarray - - -def _drop_exclude_attrs(dataarray, exclude_attrs): - """Remove user-specified list of attributes.""" - if exclude_attrs is None: - exclude_attrs = [] - for key in exclude_attrs: - dataarray.attrs.pop(key, None) - return dataarray - - -def _remove_satpy_attrs(new_data): - """Remove _satpy attribute.""" - satpy_attrs = [key for key in new_data.attrs if key.startswith('_satpy')] - for satpy_attr in satpy_attrs: - new_data.attrs.pop(satpy_attr) - new_data.attrs.pop('_last_resampler', None) - return new_data - - -def _format_prerequisites_attrs(dataarray): - """Reformat prerequisites attribute value to string.""" - if 'prerequisites' in dataarray.attrs: - dataarray.attrs['prerequisites'] = [np.string_(str(prereq)) for prereq in dataarray.attrs['prerequisites']] - return dataarray - - -def _remove_none_attrs(dataarray): - """Remove attribute keys with None value.""" - for key, val in dataarray.attrs.copy().items(): - if val is None: - dataarray.attrs.pop(key) - return dataarray - - -def preprocess_datarray_attrs(dataarray, flatten_attrs, exclude_attrs): - """Preprocess DataArray attributes to be written into CF-compliant netCDF/Zarr.""" - dataarray = _remove_satpy_attrs(dataarray) - dataarray = _add_ancillary_variables_attrs(dataarray) - dataarray = _drop_exclude_attrs(dataarray, exclude_attrs) - dataarray = _format_prerequisites_attrs(dataarray) - dataarray = _remove_none_attrs(dataarray) - _ = dataarray.attrs.pop("area", None) - - if 'long_name' not in dataarray.attrs and 'standard_name' not in dataarray.attrs: - dataarray.attrs['long_name'] = dataarray.name - - if flatten_attrs: - dataarray.attrs = flatten_dict(dataarray.attrs) - - dataarray.attrs = encode_attrs_nc(dataarray.attrs) - - return dataarray - - -def preprocess_header_attrs(header_attrs, flatten_attrs=False): - """Prepare file header attributes.""" - if header_attrs is not None: - if flatten_attrs: - header_attrs = flatten_dict(header_attrs) - header_attrs = encode_attrs_nc(header_attrs) # OrderedDict - else: - header_attrs = {} - header_attrs = _add_history(header_attrs) - return header_attrs - - # ###--------------------------------------------------------------------------. # ### netCDF encodings @@ -582,18 +348,6 @@ def _preprocess_dataarray_name(dataarray, numeric_name_prefix, include_orig_name return dataarray -def _add_history(attrs): - """Add 'history' attribute to dictionary.""" - _history_create = 'Created by pytroll/satpy on {}'.format(datetime.utcnow()) - if 'history' in attrs: - if isinstance(attrs['history'], list): - attrs['history'] = ''.join(attrs['history']) - attrs['history'] += '\n' + _history_create - else: - attrs['history'] = _history_create - return attrs - - def _get_groups(groups, list_datarrays): """Return a dictionary with the list of xr.DataArray associated to each group. @@ -645,6 +399,9 @@ def make_cf_dataarray(dataarray, CF-compliant xr.DataArray. """ + from satpy.writers.cf.attrs import preprocess_datarray_attrs + from satpy.writers.cf.time import _process_time_coord + dataarray = _preprocess_dataarray_name(dataarray=dataarray, numeric_name_prefix=numeric_name_prefix, include_orig_name=include_orig_name) @@ -822,6 +579,9 @@ def collect_cf_datasets(list_dataarrays, header_attrs : dict Global attributes to be attached to the xr.Dataset / netCDF4. """ + from satpy.writers.cf.attrs import preprocess_header_attrs + from satpy.writers.cf.time import add_time_bounds_dimension + if not list_dataarrays: raise RuntimeError("None of the requested datasets have been " "generated or could not be loaded. Requested " From 6c27d86ed22ffd5a697449c57a0ee0a616081cbc Mon Sep 17 00:00:00 2001 From: ghiggi Date: Tue, 27 Jun 2023 23:13:10 +0200 Subject: [PATCH 03/37] Fix datetime import --- satpy/tests/writer_tests/cf_tests/test_attrs.py | 4 ++-- satpy/writers/cf/attrs.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/satpy/tests/writer_tests/cf_tests/test_attrs.py b/satpy/tests/writer_tests/cf_tests/test_attrs.py index a7b36837b4..87cdfd173d 100644 --- a/satpy/tests/writer_tests/cf_tests/test_attrs.py +++ b/satpy/tests/writer_tests/cf_tests/test_attrs.py @@ -35,8 +35,8 @@ def get_test_attrs(self): """ # TODO: this is also used by test_da2cf attrs = {'name': 'IR_108', - 'start_time': datetime(2018, 1, 1, 0), - 'end_time': datetime(2018, 1, 1, 0, 15), + 'start_time': datetime.datetime(2018, 1, 1, 0), + 'end_time': datetime.datetime(2018, 1, 1, 0, 15), 'int': 1, 'float': 1.0, 'none': None, # should be dropped diff --git a/satpy/writers/cf/attrs.py b/satpy/writers/cf/attrs.py index 7a9ecc33c8..aac0f5f289 100644 --- a/satpy/writers/cf/attrs.py +++ b/satpy/writers/cf/attrs.py @@ -198,7 +198,7 @@ def preprocess_datarray_attrs(dataarray, flatten_attrs, exclude_attrs): def _add_history(attrs): """Add 'history' attribute to dictionary.""" - _history_create = 'Created by pytroll/satpy on {}'.format(datetime.utcnow()) + _history_create = 'Created by pytroll/satpy on {}'.format(datetime.datetime.utcnow()) if 'history' in attrs: if isinstance(attrs['history'], list): attrs['history'] = ''.join(attrs['history']) From 59f73ea592231a8379788cd7999273cf9be9dae1 Mon Sep 17 00:00:00 2001 From: ghiggi Date: Tue, 27 Jun 2023 23:18:04 +0200 Subject: [PATCH 04/37] Replace deprecated CFWriter.da2cf with make_cf_dataarray --- satpy/tests/writer_tests/test_cf.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/satpy/tests/writer_tests/test_cf.py b/satpy/tests/writer_tests/test_cf.py index ae55dcc1a2..35b454c87f 100644 --- a/satpy/tests/writer_tests/test_cf.py +++ b/satpy/tests/writer_tests/test_cf.py @@ -87,10 +87,10 @@ def test_lonlat_storage(tmp_path): np.testing.assert_allclose(ds["mavas"].attrs["inverse_flattening"], 298.257223563) -def test_da2cf_lonlat(): - """Test correct da2cf encoding for area with lon/lat units.""" +def test_make_cf_dataarray_lonlat(): + """Test correct CF encoding for area with lon/lat units.""" from satpy.resample import add_crs_xy_coords - from satpy.writers.cf_writer import CFWriter + from satpy.writers.cf_writer import make_cf_dataarray area = create_area_def("mavas", 4326, shape=(5, 5), center=(0, 0), resolution=(1, 1)) @@ -99,7 +99,7 @@ def test_da2cf_lonlat(): dims=("y", "x"), attrs={"area": area}) da = add_crs_xy_coords(da, area) - new_da = CFWriter.da2cf(da) + new_da = make_cf_dataarray(da) assert new_da["x"].attrs["units"] == "degrees_east" assert new_da["y"].attrs["units"] == "degrees_north" @@ -593,9 +593,9 @@ def assertDictWithArraysEqual(self, d1, d2): assert isinstance(val2, np.generic) assert val1.dtype == val2.dtype - def test_da2cf(self): + def test_make_cf_dataarray(self): """Test the conversion of a DataArray to a CF-compatible DataArray.""" - from satpy.writers.cf_writer import CFWriter + from satpy.writers.cf_writer import make_cf_dataarray # Create set of test attributes attrs, attrs_expected, attrs_expected_flat = self.get_test_attrs() @@ -618,7 +618,7 @@ def test_da2cf(self): coords={'y': [0, 1], 'x': [1, 2], 'acq_time': ('y', [3, 4])}) # Test conversion to something cf-compliant - res = CFWriter.da2cf(arr) + res = make_cf_dataarray(arr) np.testing.assert_array_equal(res['x'], arr['x']) np.testing.assert_array_equal(res['y'], arr['y']) np.testing.assert_array_equal(res['acq_time'], arr['acq_time']) @@ -627,17 +627,17 @@ def test_da2cf(self): self.assertDictWithArraysEqual(res.attrs, attrs_expected) # Test attribute kwargs - res_flat = CFWriter.da2cf(arr, flatten_attrs=True, exclude_attrs=['int']) + res_flat = make_cf_dataarray(arr, flatten_attrs=True, exclude_attrs=['int']) attrs_expected_flat.pop('int') self.assertDictWithArraysEqual(res_flat.attrs, attrs_expected_flat) - def test_da2cf_one_dimensional_array(self): + def test_make_cf_dataarray_one_dimensional_array(self): """Test the conversion of an 1d DataArray to a CF-compatible DataArray.""" - from satpy.writers.cf_writer import CFWriter + from satpy.writers.cf_writer import make_cf_dataarray arr = xr.DataArray(np.array([1, 2, 3, 4]), attrs={}, dims=('y',), coords={'y': [0, 1, 2, 3], 'acq_time': ('y', [0, 1, 2, 3])}) - _ = CFWriter.da2cf(arr) + _ = make_cf_dataarray(arr) def test_collect_cf_dataarrays(self): """Test collecting CF datasets from a DataArray objects.""" From 396700f0c04aeee05b381eeb16f0480d9ee657ac Mon Sep 17 00:00:00 2001 From: ghiggi Date: Tue, 27 Jun 2023 23:35:24 +0200 Subject: [PATCH 05/37] Refactor Dataset encodings --- .../writer_tests/cf_tests/test_encoding.py | 123 ++++++++++++++++++ satpy/tests/writer_tests/test_cf.py | 107 +-------------- satpy/writers/cf/encoding.py | 110 ++++++++++++++++ satpy/writers/cf_writer.py | 99 +------------- 4 files changed, 241 insertions(+), 198 deletions(-) create mode 100644 satpy/tests/writer_tests/cf_tests/test_encoding.py create mode 100644 satpy/writers/cf/encoding.py diff --git a/satpy/tests/writer_tests/cf_tests/test_encoding.py b/satpy/tests/writer_tests/cf_tests/test_encoding.py new file mode 100644 index 0000000000..66f7c72a48 --- /dev/null +++ b/satpy/tests/writer_tests/cf_tests/test_encoding.py @@ -0,0 +1,123 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright (c) 2017-2023 Satpy developers +# +# This file is part of satpy. +# +# satpy is free software: you can redistribute it and/or modify it under the +# terms of the GNU General Public License as published by the Free Software +# Foundation, either version 3 of the License, or (at your option) any later +# version. +# +# satpy is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along with +# satpy. If not, see . +"""Tests for compatible netCDF/Zarr DataArray encodings.""" +import datetime + +import pytest +import xarray as xr + + +class TestUpdateDatasetEncodings: + """Test update of Dataset encodings.""" + + @pytest.fixture + def fake_ds(self): + """Create fake data for testing.""" + ds = xr.Dataset({'foo': (('y', 'x'), [[1, 2], [3, 4]]), + 'bar': (('y', 'x'), [[3, 4], [5, 6]])}, + coords={'y': [1, 2], + 'x': [3, 4], + 'lon': (('y', 'x'), [[7, 8], [9, 10]])}) + return ds + + @pytest.fixture + def fake_ds_digit(self): + """Create fake data for testing.""" + ds_digit = xr.Dataset({'CHANNEL_1': (('y', 'x'), [[1, 2], [3, 4]]), + 'CHANNEL_2': (('y', 'x'), [[3, 4], [5, 6]])}, + coords={'y': [1, 2], + 'x': [3, 4], + 'lon': (('y', 'x'), [[7, 8], [9, 10]])}) + return ds_digit + + def test_dataset_name_digit(self, fake_ds_digit): + """Test data with dataset name staring with a digit.""" + from satpy.writers.cf.encoding import update_encoding + + # Dataset with name staring with digit + ds_digit = fake_ds_digit + kwargs = {'encoding': {'1': {'dtype': 'float32'}, + '2': {'dtype': 'float32'}}, + 'other': 'kwargs'} + enc, other_kwargs = update_encoding(ds_digit, kwargs, numeric_name_prefix='CHANNEL_') + expected_dict = { + 'y': {'_FillValue': None}, + 'x': {'_FillValue': None}, + 'CHANNEL_1': {'dtype': 'float32'}, + 'CHANNEL_2': {'dtype': 'float32'} + } + assert enc == expected_dict + assert other_kwargs == {'other': 'kwargs'} + + def test_without_time(self, fake_ds): + """Test data with no time dimension.""" + from satpy.writers.cf.encoding import update_encoding + + # Without time dimension + ds = fake_ds.chunk(2) + kwargs = {'encoding': {'bar': {'chunksizes': (1, 1)}}, + 'other': 'kwargs'} + enc, other_kwargs = update_encoding(ds, kwargs) + expected_dict = { + 'y': {'_FillValue': None}, + 'x': {'_FillValue': None}, + 'lon': {'chunksizes': (2, 2)}, + 'foo': {'chunksizes': (2, 2)}, + 'bar': {'chunksizes': (1, 1)} + } + assert enc == expected_dict + assert other_kwargs == {'other': 'kwargs'} + + # Chunksize may not exceed shape + ds = fake_ds.chunk(8) + kwargs = {'encoding': {}, 'other': 'kwargs'} + enc, other_kwargs = update_encoding(ds, kwargs) + expected_dict = { + 'y': {'_FillValue': None}, + 'x': {'_FillValue': None}, + 'lon': {'chunksizes': (2, 2)}, + 'foo': {'chunksizes': (2, 2)}, + 'bar': {'chunksizes': (2, 2)} + } + assert enc == expected_dict + + def test_with_time(self, fake_ds): + """Test data with a time dimension.""" + from satpy.writers.cf.encoding import update_encoding + + # With time dimension + ds = fake_ds.chunk(8).expand_dims({'time': [datetime.datetime(2009, 7, 1, 12, 15)]}) + kwargs = {'encoding': {'bar': {'chunksizes': (1, 1, 1)}}, + 'other': 'kwargs'} + enc, other_kwargs = update_encoding(ds, kwargs) + expected_dict = { + 'y': {'_FillValue': None}, + 'x': {'_FillValue': None}, + 'lon': {'chunksizes': (2, 2)}, + 'foo': {'chunksizes': (1, 2, 2)}, + 'bar': {'chunksizes': (1, 1, 1)}, + 'time': {'_FillValue': None, + 'calendar': 'proleptic_gregorian', + 'units': 'days since 2009-07-01 12:15:00'}, + 'time_bnds': {'_FillValue': None, + 'calendar': 'proleptic_gregorian', + 'units': 'days since 2009-07-01 12:15:00'} + } + assert enc == expected_dict + # User-defined encoding may not be altered + assert kwargs['encoding'] == {'bar': {'chunksizes': (1, 1, 1)}} diff --git a/satpy/tests/writer_tests/test_cf.py b/satpy/tests/writer_tests/test_cf.py index 35b454c87f..1d9e9c7650 100644 --- a/satpy/tests/writer_tests/test_cf.py +++ b/satpy/tests/writer_tests/test_cf.py @@ -808,109 +808,8 @@ def test_collect_cf_dataarrays_with_latitude_named_lat(self, datasets): assert ds2['var1']['longitude'].attrs['name'] == 'longitude' -class EncodingUpdateTest: - """Test update of netCDF encoding.""" - - @pytest.fixture - def fake_ds(self): - """Create fake data for testing.""" - ds = xr.Dataset({'foo': (('y', 'x'), [[1, 2], [3, 4]]), - 'bar': (('y', 'x'), [[3, 4], [5, 6]])}, - coords={'y': [1, 2], - 'x': [3, 4], - 'lon': (('y', 'x'), [[7, 8], [9, 10]])}) - return ds - - @pytest.fixture - def fake_ds_digit(self): - """Create fake data for testing.""" - ds_digit = xr.Dataset({'CHANNEL_1': (('y', 'x'), [[1, 2], [3, 4]]), - 'CHANNEL_2': (('y', 'x'), [[3, 4], [5, 6]])}, - coords={'y': [1, 2], - 'x': [3, 4], - 'lon': (('y', 'x'), [[7, 8], [9, 10]])}) - return ds_digit - - def test_dataset_name_digit(self, fake_ds_digit): - """Test data with dataset name staring with a digit.""" - from satpy.writers.cf_writer import update_encoding - - # Dataset with name staring with digit - ds_digit = fake_ds_digit - kwargs = {'encoding': {'1': {'dtype': 'float32'}, - '2': {'dtype': 'float32'}}, - 'other': 'kwargs'} - enc, other_kwargs = update_encoding(ds_digit, kwargs, numeric_name_prefix='CHANNEL_') - expected_dict = { - 'y': {'_FillValue': None}, - 'x': {'_FillValue': None}, - 'CHANNEL_1': {'dtype': 'float32'}, - 'CHANNEL_2': {'dtype': 'float32'} - } - assert enc == expected_dict - assert other_kwargs == {'other': 'kwargs'} - - def test_without_time(self, fake_ds): - """Test data with no time dimension.""" - from satpy.writers.cf_writer import update_encoding - - # Without time dimension - ds = fake_ds.chunk(2) - kwargs = {'encoding': {'bar': {'chunksizes': (1, 1)}}, - 'other': 'kwargs'} - enc, other_kwargs = update_encoding(ds, kwargs) - expected_dict = { - 'y': {'_FillValue': None}, - 'x': {'_FillValue': None}, - 'lon': {'chunksizes': (2, 2)}, - 'foo': {'chunksizes': (2, 2)}, - 'bar': {'chunksizes': (1, 1)} - } - assert enc == expected_dict - assert other_kwargs == {'other': 'kwargs'} - - # Chunksize may not exceed shape - ds = fake_ds.chunk(8) - kwargs = {'encoding': {}, 'other': 'kwargs'} - enc, other_kwargs = update_encoding(ds, kwargs) - expected_dict = { - 'y': {'_FillValue': None}, - 'x': {'_FillValue': None}, - 'lon': {'chunksizes': (2, 2)}, - 'foo': {'chunksizes': (2, 2)}, - 'bar': {'chunksizes': (2, 2)} - } - assert enc == expected_dict - - def test_with_time(self, fake_ds): - """Test data with a time dimension.""" - from satpy.writers.cf_writer import update_encoding - - # With time dimension - ds = fake_ds.chunk(8).expand_dims({'time': [datetime(2009, 7, 1, 12, 15)]}) - kwargs = {'encoding': {'bar': {'chunksizes': (1, 1, 1)}}, - 'other': 'kwargs'} - enc, other_kwargs = update_encoding(ds, kwargs) - expected_dict = { - 'y': {'_FillValue': None}, - 'x': {'_FillValue': None}, - 'lon': {'chunksizes': (2, 2)}, - 'foo': {'chunksizes': (1, 2, 2)}, - 'bar': {'chunksizes': (1, 1, 1)}, - 'time': {'_FillValue': None, - 'calendar': 'proleptic_gregorian', - 'units': 'days since 2009-07-01 12:15:00'}, - 'time_bnds': {'_FillValue': None, - 'calendar': 'proleptic_gregorian', - 'units': 'days since 2009-07-01 12:15:00'} - } - assert enc == expected_dict - # User-defined encoding may not be altered - assert kwargs['encoding'] == {'bar': {'chunksizes': (1, 1, 1)}} - - -class TestEncodingKwarg: - """Test CF writer with 'encoding' keyword argument.""" +class TestNETCDFEncodingKwargs: + """Test netCDF compression encodings.""" @pytest.fixture def scene(self): @@ -1001,7 +900,7 @@ def test_no_warning_if_backends_match(self, scene, filename, monkeypatch): warnings.simplefilter("error") -class TestEncodingAttribute(TestEncodingKwarg): +class TestEncodingAttribute(TestNETCDFEncodingKwargs): """Test CF writer with 'encoding' dataset attribute.""" @pytest.fixture diff --git a/satpy/writers/cf/encoding.py b/satpy/writers/cf/encoding.py new file mode 100644 index 0000000000..c8ea0f25f4 --- /dev/null +++ b/satpy/writers/cf/encoding.py @@ -0,0 +1,110 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright (c) 2017-2023 Satpy developers +# +# This file is part of satpy. +# +# satpy is free software: you can redistribute it and/or modify it under the +# terms of the GNU General Public License as published by the Free Software +# Foundation, either version 3 of the License, or (at your option) any later +# version. +# +# satpy is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along with +# satpy. If not, see . +"""CF encoding.""" + +import numpy as np +import xarray as xr +from xarray.coding.times import CFDatetimeCoder + + +def _set_default_chunks(encoding, dataset): + """Update encoding to preserve current dask chunks. + + Existing user-defined chunks take precedence. + """ + for var_name, variable in dataset.variables.items(): + if variable.chunks: + chunks = tuple( + np.stack([variable.data.chunksize, + variable.shape]).min(axis=0) + ) # Chunksize may not exceed shape + encoding.setdefault(var_name, {}) + encoding[var_name].setdefault('chunksizes', chunks) + return encoding + + +def _set_default_fill_value(encoding, dataset): + """Set default fill values. + + Avoid _FillValue attribute being added to coordinate variables + (https://github.com/pydata/xarray/issues/1865). + """ + coord_vars = [] + for data_array in dataset.values(): + coord_vars.extend(set(data_array.dims).intersection(data_array.coords)) + for coord_var in coord_vars: + encoding.setdefault(coord_var, {}) + encoding[coord_var].update({'_FillValue': None}) + return encoding + + +def _set_default_time_encoding(encoding, dataset): + """Set default time encoding. + + Make sure time coordinates and bounds have the same units. + Default is xarray's CF datetime encoding, which can be overridden + by user-defined encoding. + """ + if 'time' in dataset: + try: + dtnp64 = dataset['time'].data[0] + except IndexError: + dtnp64 = dataset['time'].data + + default = CFDatetimeCoder().encode(xr.DataArray(dtnp64)) + time_enc = {'units': default.attrs['units'], 'calendar': default.attrs['calendar']} + time_enc.update(encoding.get('time', {})) + bounds_enc = {'units': time_enc['units'], + 'calendar': time_enc['calendar'], + '_FillValue': None} + encoding['time'] = time_enc + encoding['time_bnds'] = bounds_enc # FUTURE: Not required anymore with xarray-0.14+ + return encoding + + +def _update_encoding_dataset_names(encoding, dataset, numeric_name_prefix): + """Ensure variable names of the encoding dictionary account for numeric_name_prefix. + + A lot of channel names in satpy starts with a digit. + When preparing CF-compliant datasets, these channels are prefixed with numeric_name_prefix. + + If variables names in the encoding dictionary are numeric digits, their name is prefixed + with numeric_name_prefix + """ + for var_name in list(dataset.variables): + if not numeric_name_prefix or not var_name.startswith(numeric_name_prefix): + continue + orig_var_name = var_name.replace(numeric_name_prefix, '') + if orig_var_name in encoding: + encoding[var_name] = encoding.pop(orig_var_name) + return encoding + + +def update_encoding(dataset, to_netcdf_kwargs, numeric_name_prefix='CHANNEL_'): + """Update encoding. + + Preserve dask chunks, avoid fill values in coordinate variables and make sure that + time & time bounds have the same units. + """ + other_to_netcdf_kwargs = to_netcdf_kwargs.copy() + encoding = other_to_netcdf_kwargs.pop('encoding', {}).copy() + encoding = _update_encoding_dataset_names(encoding, dataset, numeric_name_prefix) + encoding = _set_default_chunks(encoding, dataset) + encoding = _set_default_fill_value(encoding, dataset) + encoding = _set_default_time_encoding(encoding, dataset) + return encoding, other_to_netcdf_kwargs diff --git a/satpy/writers/cf_writer.py b/satpy/writers/cf_writer.py index a2edd70ab2..63f57f2e63 100644 --- a/satpy/writers/cf_writer.py +++ b/satpy/writers/cf_writer.py @@ -162,7 +162,6 @@ import numpy as np import xarray as xr from packaging.version import Version -from xarray.coding.times import CFDatetimeCoder from satpy.writers import Writer from satpy.writers.cf.coords_attrs import add_xy_coords_attrs @@ -222,98 +221,6 @@ def get_extra_ds(dataarray, keys=None): return ds_collection -# ###--------------------------------------------------------------------------. -# ### netCDF encodings - - -def _set_default_chunks(encoding, dataset): - """Update encoding to preserve current dask chunks. - - Existing user-defined chunks take precedence. - """ - for var_name, variable in dataset.variables.items(): - if variable.chunks: - chunks = tuple( - np.stack([variable.data.chunksize, - variable.shape]).min(axis=0) - ) # Chunksize may not exceed shape - encoding.setdefault(var_name, {}) - encoding[var_name].setdefault('chunksizes', chunks) - return encoding - - -def _set_default_fill_value(encoding, dataset): - """Set default fill values. - - Avoid _FillValue attribute being added to coordinate variables - (https://github.com/pydata/xarray/issues/1865). - """ - coord_vars = [] - for data_array in dataset.values(): - coord_vars.extend(set(data_array.dims).intersection(data_array.coords)) - for coord_var in coord_vars: - encoding.setdefault(coord_var, {}) - encoding[coord_var].update({'_FillValue': None}) - return encoding - - -def _set_default_time_encoding(encoding, dataset): - """Set default time encoding. - - Make sure time coordinates and bounds have the same units. - Default is xarray's CF datetime encoding, which can be overridden - by user-defined encoding. - """ - if 'time' in dataset: - try: - dtnp64 = dataset['time'].data[0] - except IndexError: - dtnp64 = dataset['time'].data - - default = CFDatetimeCoder().encode(xr.DataArray(dtnp64)) - time_enc = {'units': default.attrs['units'], 'calendar': default.attrs['calendar']} - time_enc.update(encoding.get('time', {})) - bounds_enc = {'units': time_enc['units'], - 'calendar': time_enc['calendar'], - '_FillValue': None} - encoding['time'] = time_enc - encoding['time_bnds'] = bounds_enc # FUTURE: Not required anymore with xarray-0.14+ - return encoding - - -def _update_encoding_dataset_names(encoding, dataset, numeric_name_prefix): - """Ensure variable names of the encoding dictionary account for numeric_name_prefix. - - A lot of channel names in satpy starts with a digit. - When preparing CF-compliant datasets, these channels are prefixed with numeric_name_prefix. - - If variables names in the encoding dictionary are numeric digits, their name is prefixed - with numeric_name_prefix - """ - for var_name in list(dataset.variables): - if not numeric_name_prefix or not var_name.startswith(numeric_name_prefix): - continue - orig_var_name = var_name.replace(numeric_name_prefix, '') - if orig_var_name in encoding: - encoding[var_name] = encoding.pop(orig_var_name) - return encoding - - -def update_encoding(dataset, to_netcdf_kwargs, numeric_name_prefix='CHANNEL_'): - """Update encoding. - - Preserve dask chunks, avoid fill values in coordinate variables and make sure that - time & time bounds have the same units. - """ - other_to_netcdf_kwargs = to_netcdf_kwargs.copy() - encoding = other_to_netcdf_kwargs.pop('encoding', {}).copy() - encoding = _update_encoding_dataset_names(encoding, dataset, numeric_name_prefix) - encoding = _set_default_chunks(encoding, dataset) - encoding = _set_default_fill_value(encoding, dataset) - encoding = _set_default_time_encoding(encoding, dataset) - return encoding, other_to_netcdf_kwargs - - # ###--------------------------------------------------------------------------. # ### CF-conversion @@ -681,8 +588,10 @@ def da2cf(dataarray, epoch=EPOCH, flatten_attrs=False, exclude_attrs=None, @staticmethod def update_encoding(dataset, to_netcdf_kwargs): """Update encoding info (deprecated).""" + from satpy.writers.cf.encoding import update_encoding + warnings.warn('CFWriter.update_encoding is deprecated. ' - 'Use satpy.writers.cf_writer.update_encoding instead.', + 'Use satpy.writers.cf.encoding.update_encoding instead.', DeprecationWarning, stacklevel=3) return update_encoding(dataset, to_netcdf_kwargs) @@ -728,6 +637,8 @@ def save_datasets(self, datasets, filename=None, groups=None, header_attrs=None, Prefix to add the each variable with name starting with a digit. Use '' or None to leave this out. """ + from satpy.writers.cf.encoding import update_encoding + logger.info('Saving datasets to NetCDF4/CF.') _check_backend_versions() From f664c60925e7edf844c1e66c8bf7bf3e810f9db6 Mon Sep 17 00:00:00 2001 From: ghiggi Date: Tue, 27 Jun 2023 23:56:48 +0200 Subject: [PATCH 06/37] Refactor CF-compliant DataArray creation --- .../writer_tests/cf_tests/test_dataaarray.py | 197 ++++++++++++++++++ satpy/tests/writer_tests/test_cf.py | 174 +--------------- satpy/writers/cf/dataarray.py | 97 +++++++++ satpy/writers/cf/time.py | 2 + satpy/writers/cf_writer.py | 119 ++--------- 5 files changed, 320 insertions(+), 269 deletions(-) create mode 100644 satpy/tests/writer_tests/cf_tests/test_dataaarray.py create mode 100644 satpy/writers/cf/dataarray.py diff --git a/satpy/tests/writer_tests/cf_tests/test_dataaarray.py b/satpy/tests/writer_tests/cf_tests/test_dataaarray.py new file mode 100644 index 0000000000..20c893d0a6 --- /dev/null +++ b/satpy/tests/writer_tests/cf_tests/test_dataaarray.py @@ -0,0 +1,197 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright (c) 2017-2023 Satpy developers +# +# This file is part of satpy. +# +# satpy is free software: you can redistribute it and/or modify it under the +# terms of the GNU General Public License as published by the Free Software +# Foundation, either version 3 of the License, or (at your option) any later +# version. +# +# satpy is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along with +# satpy. If not, see . +"""Tests CF-compliant DataArray creation.""" + +import datetime +from collections import OrderedDict + +import numpy as np +import xarray as xr + +from satpy.tests.utils import make_dsq + + +def test_preprocess_dataarray_name(): + """Test saving an array to netcdf/cf where dataset name starting with a digit with prefix include orig name.""" + from satpy import Scene + from satpy.writers.cf.dataarray import _preprocess_dataarray_name + + scn = Scene() + scn['1'] = xr.DataArray([1, 2, 3]) + dataarray = scn['1'] + # If numeric_name_prefix is a string, test add the original_name attributes + out_da = _preprocess_dataarray_name(dataarray, numeric_name_prefix="TEST", include_orig_name=True) + assert out_da.attrs['original_name'] == '1' + + # If numeric_name_prefix is empty string, False or None, test do not add original_name attributes + out_da = _preprocess_dataarray_name(dataarray, numeric_name_prefix="", include_orig_name=True) + assert "original_name" not in out_da.attrs + + out_da = _preprocess_dataarray_name(dataarray, numeric_name_prefix=False, include_orig_name=True) + assert "original_name" not in out_da.attrs + + out_da = _preprocess_dataarray_name(dataarray, numeric_name_prefix=None, include_orig_name=True) + assert "original_name" not in out_da.attrs + + +class TestCFWriter: + """Test creation of CF DataArray.""" + + def get_test_attrs(self): + """Create some dataset attributes for testing purpose. + + Returns: + Attributes, encoded attributes, encoded and flattened attributes + + """ + # TODO: also used by cf/test_attrs.py + attrs = {'name': 'IR_108', + 'start_time': datetime.datetime(2018, 1, 1, 0), + 'end_time': datetime.datetime(2018, 1, 1, 0, 15), + 'int': 1, + 'float': 1.0, + 'none': None, # should be dropped + 'numpy_int': np.uint8(1), + 'numpy_float': np.float32(1), + 'numpy_bool': True, + 'numpy_void': np.void(0), + 'numpy_bytes': np.bytes_('test'), + 'numpy_string': np.string_('test'), + 'list': [1, 2, np.float64(3)], + 'nested_list': ["1", ["2", [3]]], + 'bool': True, + 'array': np.array([1, 2, 3], dtype='uint8'), + 'array_bool': np.array([True, False, True]), + 'array_2d': np.array([[1, 2], [3, 4]]), + 'array_3d': np.array([[[1, 2], [3, 4]], [[1, 2], [3, 4]]]), + 'dict': {'a': 1, 'b': 2}, + 'nested_dict': {'l1': {'l2': {'l3': np.array([1, 2, 3], dtype='uint8')}}}, + 'raw_metadata': OrderedDict([ + ('recarray', np.zeros(3, dtype=[('x', 'i4'), ('y', 'u1')])), + ('flag', np.bool_(True)), + ('dict', OrderedDict([('a', 1), ('b', np.array([1, 2, 3], dtype='uint8'))])) + ])} + encoded = {'name': 'IR_108', + 'start_time': '2018-01-01 00:00:00', + 'end_time': '2018-01-01 00:15:00', + 'int': 1, + 'float': 1.0, + 'numpy_int': np.uint8(1), + 'numpy_float': np.float32(1), + 'numpy_bool': 'true', + 'numpy_void': '[]', + 'numpy_bytes': 'test', + 'numpy_string': 'test', + 'list': [1, 2, np.float64(3)], + 'nested_list': '["1", ["2", [3]]]', + 'bool': 'true', + 'array': np.array([1, 2, 3], dtype='uint8'), + 'array_bool': ['true', 'false', 'true'], + 'array_2d': '[[1, 2], [3, 4]]', + 'array_3d': '[[[1, 2], [3, 4]], [[1, 2], [3, 4]]]', + 'dict': '{"a": 1, "b": 2}', + 'nested_dict': '{"l1": {"l2": {"l3": [1, 2, 3]}}}', + 'raw_metadata': '{"recarray": [[0, 0], [0, 0], [0, 0]], ' + '"flag": "true", "dict": {"a": 1, "b": [1, 2, 3]}}'} + encoded_flat = {'name': 'IR_108', + 'start_time': '2018-01-01 00:00:00', + 'end_time': '2018-01-01 00:15:00', + 'int': 1, + 'float': 1.0, + 'numpy_int': np.uint8(1), + 'numpy_float': np.float32(1), + 'numpy_bool': 'true', + 'numpy_void': '[]', + 'numpy_bytes': 'test', + 'numpy_string': 'test', + 'list': [1, 2, np.float64(3)], + 'nested_list': '["1", ["2", [3]]]', + 'bool': 'true', + 'array': np.array([1, 2, 3], dtype='uint8'), + 'array_bool': ['true', 'false', 'true'], + 'array_2d': '[[1, 2], [3, 4]]', + 'array_3d': '[[[1, 2], [3, 4]], [[1, 2], [3, 4]]]', + 'dict_a': 1, + 'dict_b': 2, + 'nested_dict_l1_l2_l3': np.array([1, 2, 3], dtype='uint8'), + 'raw_metadata_recarray': '[[0, 0], [0, 0], [0, 0]]', + 'raw_metadata_flag': 'true', + 'raw_metadata_dict_a': 1, + 'raw_metadata_dict_b': np.array([1, 2, 3], dtype='uint8')} + return attrs, encoded, encoded_flat + + def assertDictWithArraysEqual(self, d1, d2): + """Check that dicts containing arrays are equal.""" + # TODO: also used by cf/test_attrs.py + assert set(d1.keys()) == set(d2.keys()) + for key, val1 in d1.items(): + val2 = d2[key] + if isinstance(val1, np.ndarray): + np.testing.assert_array_equal(val1, val2) + assert val1.dtype == val2.dtype + else: + assert val1 == val2 + if isinstance(val1, (np.floating, np.integer, np.bool_)): + assert isinstance(val2, np.generic) + assert val1.dtype == val2.dtype + + def test_make_cf_dataarray(self): + """Test the conversion of a DataArray to a CF-compatible DataArray.""" + from satpy.writers.cf.dataarray import make_cf_dataarray + + # Create set of test attributes + attrs, attrs_expected, attrs_expected_flat = self.get_test_attrs() + attrs['area'] = 'some_area' + attrs['prerequisites'] = [make_dsq(name='hej')] + attrs['_satpy_id_name'] = 'myname' + + # Adjust expected attributes + expected_prereq = ("DataQuery(name='hej')") + update = {'prerequisites': [expected_prereq], 'long_name': attrs['name']} + + attrs_expected.update(update) + attrs_expected_flat.update(update) + + attrs_expected.pop('name') + attrs_expected_flat.pop('name') + + # Create test data array + arr = xr.DataArray(np.array([[1, 2], [3, 4]]), attrs=attrs, dims=('y', 'x'), + coords={'y': [0, 1], 'x': [1, 2], 'acq_time': ('y', [3, 4])}) + + # Test conversion to something cf-compliant + res = make_cf_dataarray(arr) + np.testing.assert_array_equal(res['x'], arr['x']) + np.testing.assert_array_equal(res['y'], arr['y']) + np.testing.assert_array_equal(res['acq_time'], arr['acq_time']) + assert res['x'].attrs == {'units': 'm', 'standard_name': 'projection_x_coordinate'} + assert res['y'].attrs == {'units': 'm', 'standard_name': 'projection_y_coordinate'} + self.assertDictWithArraysEqual(res.attrs, attrs_expected) + + # Test attribute kwargs + res_flat = make_cf_dataarray(arr, flatten_attrs=True, exclude_attrs=['int']) + attrs_expected_flat.pop('int') + self.assertDictWithArraysEqual(res_flat.attrs, attrs_expected_flat) + + def test_make_cf_dataarray_one_dimensional_array(self): + """Test the conversion of an 1d DataArray to a CF-compatible DataArray.""" + from satpy.writers.cf.dataarray import make_cf_dataarray + + arr = xr.DataArray(np.array([1, 2, 3, 4]), attrs={}, dims=('y',), + coords={'y': [0, 1, 2, 3], 'acq_time': ('y', [0, 1, 2, 3])}) + _ = make_cf_dataarray(arr) diff --git a/satpy/tests/writer_tests/test_cf.py b/satpy/tests/writer_tests/test_cf.py index 1d9e9c7650..96cc09069a 100644 --- a/satpy/tests/writer_tests/test_cf.py +++ b/satpy/tests/writer_tests/test_cf.py @@ -21,7 +21,6 @@ import os import tempfile import warnings -from collections import OrderedDict from datetime import datetime import numpy as np @@ -90,7 +89,7 @@ def test_lonlat_storage(tmp_path): def test_make_cf_dataarray_lonlat(): """Test correct CF encoding for area with lon/lat units.""" from satpy.resample import add_crs_xy_coords - from satpy.writers.cf_writer import make_cf_dataarray + from satpy.writers.cf.dataarray import make_cf_dataarray area = create_area_def("mavas", 4326, shape=(5, 5), center=(0, 0), resolution=(1, 1)) @@ -131,29 +130,6 @@ def test_is_projected(caplog): assert "Failed to tell if data are projected." in caplog.text -def test_preprocess_dataarray_name(): - """Test saving an array to netcdf/cf where dataset name starting with a digit with prefix include orig name.""" - from satpy import Scene - from satpy.writers.cf_writer import _preprocess_dataarray_name - - scn = Scene() - scn['1'] = xr.DataArray([1, 2, 3]) - dataarray = scn['1'] - # If numeric_name_prefix is a string, test add the original_name attributes - out_da = _preprocess_dataarray_name(dataarray, numeric_name_prefix="TEST", include_orig_name=True) - assert out_da.attrs['original_name'] == '1' - - # If numeric_name_prefix is empty string, False or None, test do not add original_name attributes - out_da = _preprocess_dataarray_name(dataarray, numeric_name_prefix="", include_orig_name=True) - assert "original_name" not in out_da.attrs - - out_da = _preprocess_dataarray_name(dataarray, numeric_name_prefix=False, include_orig_name=True) - assert "original_name" not in out_da.attrs - - out_da = _preprocess_dataarray_name(dataarray, numeric_name_prefix=None, include_orig_name=True) - assert "original_name" not in out_da.attrs - - def test_empty_collect_cf_datasets(): """Test that if no DataArrays, collect_cf_datasets raise error.""" from satpy.writers.cf_writer import collect_cf_datasets @@ -495,150 +471,6 @@ def test_header_attrs(self): assert f.attrs['bool_'] == 'true' assert 'none' not in f.attrs.keys() - def get_test_attrs(self): - """Create some dataset attributes for testing purpose. - - Returns: - Attributes, encoded attributes, encoded and flattened attributes - - """ - # TODO: also used by cf/test_attrs.py - attrs = {'name': 'IR_108', - 'start_time': datetime(2018, 1, 1, 0), - 'end_time': datetime(2018, 1, 1, 0, 15), - 'int': 1, - 'float': 1.0, - 'none': None, # should be dropped - 'numpy_int': np.uint8(1), - 'numpy_float': np.float32(1), - 'numpy_bool': True, - 'numpy_void': np.void(0), - 'numpy_bytes': np.bytes_('test'), - 'numpy_string': np.string_('test'), - 'list': [1, 2, np.float64(3)], - 'nested_list': ["1", ["2", [3]]], - 'bool': True, - 'array': np.array([1, 2, 3], dtype='uint8'), - 'array_bool': np.array([True, False, True]), - 'array_2d': np.array([[1, 2], [3, 4]]), - 'array_3d': np.array([[[1, 2], [3, 4]], [[1, 2], [3, 4]]]), - 'dict': {'a': 1, 'b': 2}, - 'nested_dict': {'l1': {'l2': {'l3': np.array([1, 2, 3], dtype='uint8')}}}, - 'raw_metadata': OrderedDict([ - ('recarray', np.zeros(3, dtype=[('x', 'i4'), ('y', 'u1')])), - ('flag', np.bool_(True)), - ('dict', OrderedDict([('a', 1), ('b', np.array([1, 2, 3], dtype='uint8'))])) - ])} - encoded = {'name': 'IR_108', - 'start_time': '2018-01-01 00:00:00', - 'end_time': '2018-01-01 00:15:00', - 'int': 1, - 'float': 1.0, - 'numpy_int': np.uint8(1), - 'numpy_float': np.float32(1), - 'numpy_bool': 'true', - 'numpy_void': '[]', - 'numpy_bytes': 'test', - 'numpy_string': 'test', - 'list': [1, 2, np.float64(3)], - 'nested_list': '["1", ["2", [3]]]', - 'bool': 'true', - 'array': np.array([1, 2, 3], dtype='uint8'), - 'array_bool': ['true', 'false', 'true'], - 'array_2d': '[[1, 2], [3, 4]]', - 'array_3d': '[[[1, 2], [3, 4]], [[1, 2], [3, 4]]]', - 'dict': '{"a": 1, "b": 2}', - 'nested_dict': '{"l1": {"l2": {"l3": [1, 2, 3]}}}', - 'raw_metadata': '{"recarray": [[0, 0], [0, 0], [0, 0]], ' - '"flag": "true", "dict": {"a": 1, "b": [1, 2, 3]}}'} - encoded_flat = {'name': 'IR_108', - 'start_time': '2018-01-01 00:00:00', - 'end_time': '2018-01-01 00:15:00', - 'int': 1, - 'float': 1.0, - 'numpy_int': np.uint8(1), - 'numpy_float': np.float32(1), - 'numpy_bool': 'true', - 'numpy_void': '[]', - 'numpy_bytes': 'test', - 'numpy_string': 'test', - 'list': [1, 2, np.float64(3)], - 'nested_list': '["1", ["2", [3]]]', - 'bool': 'true', - 'array': np.array([1, 2, 3], dtype='uint8'), - 'array_bool': ['true', 'false', 'true'], - 'array_2d': '[[1, 2], [3, 4]]', - 'array_3d': '[[[1, 2], [3, 4]], [[1, 2], [3, 4]]]', - 'dict_a': 1, - 'dict_b': 2, - 'nested_dict_l1_l2_l3': np.array([1, 2, 3], dtype='uint8'), - 'raw_metadata_recarray': '[[0, 0], [0, 0], [0, 0]]', - 'raw_metadata_flag': 'true', - 'raw_metadata_dict_a': 1, - 'raw_metadata_dict_b': np.array([1, 2, 3], dtype='uint8')} - return attrs, encoded, encoded_flat - - def assertDictWithArraysEqual(self, d1, d2): - """Check that dicts containing arrays are equal.""" - # TODO: also used by cf/test_attrs.py - assert set(d1.keys()) == set(d2.keys()) - for key, val1 in d1.items(): - val2 = d2[key] - if isinstance(val1, np.ndarray): - np.testing.assert_array_equal(val1, val2) - assert val1.dtype == val2.dtype - else: - assert val1 == val2 - if isinstance(val1, (np.floating, np.integer, np.bool_)): - assert isinstance(val2, np.generic) - assert val1.dtype == val2.dtype - - def test_make_cf_dataarray(self): - """Test the conversion of a DataArray to a CF-compatible DataArray.""" - from satpy.writers.cf_writer import make_cf_dataarray - - # Create set of test attributes - attrs, attrs_expected, attrs_expected_flat = self.get_test_attrs() - attrs['area'] = 'some_area' - attrs['prerequisites'] = [make_dsq(name='hej')] - attrs['_satpy_id_name'] = 'myname' - - # Adjust expected attributes - expected_prereq = ("DataQuery(name='hej')") - update = {'prerequisites': [expected_prereq], 'long_name': attrs['name']} - - attrs_expected.update(update) - attrs_expected_flat.update(update) - - attrs_expected.pop('name') - attrs_expected_flat.pop('name') - - # Create test data array - arr = xr.DataArray(np.array([[1, 2], [3, 4]]), attrs=attrs, dims=('y', 'x'), - coords={'y': [0, 1], 'x': [1, 2], 'acq_time': ('y', [3, 4])}) - - # Test conversion to something cf-compliant - res = make_cf_dataarray(arr) - np.testing.assert_array_equal(res['x'], arr['x']) - np.testing.assert_array_equal(res['y'], arr['y']) - np.testing.assert_array_equal(res['acq_time'], arr['acq_time']) - assert res['x'].attrs == {'units': 'm', 'standard_name': 'projection_x_coordinate'} - assert res['y'].attrs == {'units': 'm', 'standard_name': 'projection_y_coordinate'} - self.assertDictWithArraysEqual(res.attrs, attrs_expected) - - # Test attribute kwargs - res_flat = make_cf_dataarray(arr, flatten_attrs=True, exclude_attrs=['int']) - attrs_expected_flat.pop('int') - self.assertDictWithArraysEqual(res_flat.attrs, attrs_expected_flat) - - def test_make_cf_dataarray_one_dimensional_array(self): - """Test the conversion of an 1d DataArray to a CF-compatible DataArray.""" - from satpy.writers.cf_writer import make_cf_dataarray - - arr = xr.DataArray(np.array([1, 2, 3, 4]), attrs={}, dims=('y',), - coords={'y': [0, 1, 2, 3], 'acq_time': ('y', [0, 1, 2, 3])}) - _ = make_cf_dataarray(arr) - def test_collect_cf_dataarrays(self): """Test collecting CF datasets from a DataArray objects.""" from satpy.writers.cf_writer import _collect_cf_dataset @@ -808,7 +640,7 @@ def test_collect_cf_dataarrays_with_latitude_named_lat(self, datasets): assert ds2['var1']['longitude'].attrs['name'] == 'longitude' -class TestNETCDFEncodingKwargs: +class TestNetcdfEncodingKwargs: """Test netCDF compression encodings.""" @pytest.fixture @@ -900,7 +732,7 @@ def test_no_warning_if_backends_match(self, scene, filename, monkeypatch): warnings.simplefilter("error") -class TestEncodingAttribute(TestNETCDFEncodingKwargs): +class TestEncodingAttribute(TestNetcdfEncodingKwargs): """Test CF writer with 'encoding' dataset attribute.""" @pytest.fixture diff --git a/satpy/writers/cf/dataarray.py b/satpy/writers/cf/dataarray.py new file mode 100644 index 0000000000..9ca90ae52f --- /dev/null +++ b/satpy/writers/cf/dataarray.py @@ -0,0 +1,97 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright (c) 2017-2023 Satpy developers +# +# This file is part of satpy. +# +# satpy is free software: you can redistribute it and/or modify it under the +# terms of the GNU General Public License as published by the Free Software +# Foundation, either version 3 of the License, or (at your option) any later +# version. +# +# satpy is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along with +# satpy. If not, see . +"""Utility to generate a CF-compliant DataArray.""" +import warnings + +from satpy.writers.cf.attrs import preprocess_datarray_attrs +from satpy.writers.cf.coords_attrs import add_xy_coords_attrs +from satpy.writers.cf.time import EPOCH, _process_time_coord + + +def _handle_dataarray_name(original_name, numeric_name_prefix): + if original_name[0].isdigit(): + if numeric_name_prefix: + new_name = numeric_name_prefix + original_name + else: + warnings.warn( + f'Invalid NetCDF dataset name: {original_name} starts with a digit.', + stacklevel=5 + ) + new_name = original_name # occurs when numeric_name_prefix = '', None or False + else: + new_name = original_name + return original_name, new_name + + +def _preprocess_dataarray_name(dataarray, numeric_name_prefix, include_orig_name): + """Change the DataArray name by prepending numeric_name_prefix if the name is a digit.""" + original_name = None + dataarray = dataarray.copy() + if 'name' in dataarray.attrs: + original_name = dataarray.attrs.pop('name') + original_name, new_name = _handle_dataarray_name(original_name, numeric_name_prefix) + dataarray = dataarray.rename(new_name) + + if include_orig_name and numeric_name_prefix and original_name and original_name != new_name: + dataarray.attrs['original_name'] = original_name + + return dataarray + + +def make_cf_dataarray(dataarray, + epoch=EPOCH, + flatten_attrs=False, + exclude_attrs=None, + include_orig_name=True, + numeric_name_prefix='CHANNEL_'): + """Make the xr.DataArray CF-compliant. + + Parameters + ---------- + dataarray : xr.DataArray + The data array to be made CF-compliant. + epoch : str, optional + Reference time for encoding of time coordinates. + flatten_attrs : bool, optional + If True, flatten dict-type attributes. + The default is False. + exclude_attrs : list, optional + List of dataset attributes to be excluded. + The default is None. + include_orig_name : bool, optional + Include the original dataset name in the netcdf variable attributes. + The default is True. + numeric_name_prefix : TYPE, optional + Prepend dataset name with this if starting with a digit. + The default is ``"CHANNEL_"``. + + Returns + ------- + new_data : xr.DataArray + CF-compliant xr.DataArray. + + """ + dataarray = _preprocess_dataarray_name(dataarray=dataarray, + numeric_name_prefix=numeric_name_prefix, + include_orig_name=include_orig_name) + dataarray = preprocess_datarray_attrs(dataarray=dataarray, + flatten_attrs=flatten_attrs, + exclude_attrs=exclude_attrs) + dataarray = add_xy_coords_attrs(dataarray) + dataarray = _process_time_coord(dataarray, epoch=epoch) + return dataarray diff --git a/satpy/writers/cf/time.py b/satpy/writers/cf/time.py index 6308f42364..821f7b47b0 100644 --- a/satpy/writers/cf/time.py +++ b/satpy/writers/cf/time.py @@ -19,6 +19,8 @@ import numpy as np import xarray as xr +EPOCH = u"seconds since 1970-01-01 00:00:00" + def add_time_bounds_dimension(ds, time="time"): """Add time bound dimension to xr.Dataset.""" diff --git a/satpy/writers/cf_writer.py b/satpy/writers/cf_writer.py index 63f57f2e63..d7503860b4 100644 --- a/satpy/writers/cf_writer.py +++ b/satpy/writers/cf_writer.py @@ -164,12 +164,10 @@ from packaging.version import Version from satpy.writers import Writer -from satpy.writers.cf.coords_attrs import add_xy_coords_attrs +from satpy.writers.cf.time import EPOCH logger = logging.getLogger(__name__) -EPOCH = u"seconds since 1970-01-01 00:00:00" - # Check availability of either netCDF4 or h5netcdf package try: import netCDF4 @@ -225,101 +223,6 @@ def get_extra_ds(dataarray, keys=None): # ### CF-conversion -def _handle_dataarray_name(original_name, numeric_name_prefix): - if original_name[0].isdigit(): - if numeric_name_prefix: - new_name = numeric_name_prefix + original_name - else: - warnings.warn( - f'Invalid NetCDF dataset name: {original_name} starts with a digit.', - stacklevel=5 - ) - new_name = original_name # occurs when numeric_name_prefix = '', None or False - else: - new_name = original_name - return original_name, new_name - - -def _preprocess_dataarray_name(dataarray, numeric_name_prefix, include_orig_name): - """Change the DataArray name by prepending numeric_name_prefix if the name is a digit.""" - original_name = None - dataarray = dataarray.copy() - if 'name' in dataarray.attrs: - original_name = dataarray.attrs.pop('name') - original_name, new_name = _handle_dataarray_name(original_name, numeric_name_prefix) - dataarray = dataarray.rename(new_name) - - if include_orig_name and numeric_name_prefix and original_name and original_name != new_name: - dataarray.attrs['original_name'] = original_name - - return dataarray - - -def _get_groups(groups, list_datarrays): - """Return a dictionary with the list of xr.DataArray associated to each group. - - If no groups (groups=None), return all DataArray attached to a single None key. - Else, collect the DataArrays associated to each group. - """ - if groups is None: - grouped_dataarrays = {None: list_datarrays} - else: - grouped_dataarrays = defaultdict(list) - for datarray in list_datarrays: - for group_name, group_members in groups.items(): - if datarray.attrs['name'] in group_members: - grouped_dataarrays[group_name].append(datarray) - break - return grouped_dataarrays - - -def make_cf_dataarray(dataarray, - epoch=EPOCH, - flatten_attrs=False, - exclude_attrs=None, - include_orig_name=True, - numeric_name_prefix='CHANNEL_'): - """Make the xr.DataArray CF-compliant. - - Parameters - ---------- - dataarray : xr.DataArray - The data array to be made CF-compliant. - epoch : str, optional - Reference time for encoding of time coordinates. - flatten_attrs : bool, optional - If True, flatten dict-type attributes. - The default is False. - exclude_attrs : list, optional - List of dataset attributes to be excluded. - The default is None. - include_orig_name : bool, optional - Include the original dataset name in the netcdf variable attributes. - The default is True. - numeric_name_prefix : TYPE, optional - Prepend dataset name with this if starting with a digit. - The default is ``"CHANNEL_"``. - - Returns - ------- - new_data : xr.DataArray - CF-compliant xr.DataArray. - - """ - from satpy.writers.cf.attrs import preprocess_datarray_attrs - from satpy.writers.cf.time import _process_time_coord - - dataarray = _preprocess_dataarray_name(dataarray=dataarray, - numeric_name_prefix=numeric_name_prefix, - include_orig_name=include_orig_name) - dataarray = preprocess_datarray_attrs(dataarray=dataarray, - flatten_attrs=flatten_attrs, - exclude_attrs=exclude_attrs) - dataarray = add_xy_coords_attrs(dataarray) - dataarray = _process_time_coord(dataarray, epoch=epoch) - return dataarray - - def _collect_cf_dataset(list_dataarrays, epoch=EPOCH, flatten_attrs=False, @@ -365,6 +268,7 @@ def _collect_cf_dataset(list_dataarrays, link_coords, make_alt_coords_unique, ) + from satpy.writers.cf.dataarray import make_cf_dataarray # Create dictionary of input datarrays # --> Since keys=None, it doesn't never retrieve ancillary variables !!! @@ -534,6 +438,24 @@ def collect_cf_datasets(list_dataarrays, return grouped_datasets, header_attrs +def _get_groups(groups, list_datarrays): + """Return a dictionary with the list of xr.DataArray associated to each group. + + If no groups (groups=None), return all DataArray attached to a single None key. + Else, collect the DataArrays associated to each group. + """ + if groups is None: + grouped_dataarrays = {None: list_datarrays} + else: + grouped_dataarrays = defaultdict(list) + for datarray in list_datarrays: + for group_name, group_members in groups.items(): + if datarray.attrs['name'] in group_members: + grouped_dataarrays[group_name].append(datarray) + break + return grouped_dataarrays + + def _sanitize_writer_kwargs(writer_kwargs): """Remove satpy-specific kwargs.""" writer_kwargs = copy.deepcopy(writer_kwargs) @@ -575,6 +497,7 @@ def da2cf(dataarray, epoch=EPOCH, flatten_attrs=False, exclude_attrs=None, numeric_name_prefix (str): Prepend dataset name with this if starting with a digit """ + from satpy.writers.cf.dataarray import make_cf_dataarray warnings.warn('CFWriter.da2cf is deprecated.' 'Use satpy.writers.cf_writer.make_cf_dataarray instead.', DeprecationWarning, stacklevel=3) From 3953319ca3c8f94886e67030d589a35a68491fcc Mon Sep 17 00:00:00 2001 From: ghiggi Date: Wed, 28 Jun 2023 00:19:13 +0200 Subject: [PATCH 07/37] Refactor CF-compliant Datasets creation --- .../writer_tests/cf_tests/test_dataaarray.py | 19 + .../writer_tests/cf_tests/test_datasets.py | 129 +++++++ satpy/tests/writer_tests/test_cf.py | 86 ----- satpy/writers/cf/datasets.py | 272 ++++++++++++++ satpy/writers/cf_writer.py | 340 +++--------------- 5 files changed, 466 insertions(+), 380 deletions(-) create mode 100644 satpy/tests/writer_tests/cf_tests/test_datasets.py create mode 100644 satpy/writers/cf/datasets.py diff --git a/satpy/tests/writer_tests/cf_tests/test_dataaarray.py b/satpy/tests/writer_tests/cf_tests/test_dataaarray.py index 20c893d0a6..13ed11474e 100644 --- a/satpy/tests/writer_tests/cf_tests/test_dataaarray.py +++ b/satpy/tests/writer_tests/cf_tests/test_dataaarray.py @@ -49,6 +49,25 @@ def test_preprocess_dataarray_name(): assert "original_name" not in out_da.attrs +def test_make_cf_dataarray_lonlat(): + """Test correct CF encoding for area with lon/lat units.""" + from pyresample import create_area_def + + from satpy.resample import add_crs_xy_coords + from satpy.writers.cf.dataarray import make_cf_dataarray + + area = create_area_def("mavas", 4326, shape=(5, 5), + center=(0, 0), resolution=(1, 1)) + da = xr.DataArray( + np.arange(25).reshape(5, 5), + dims=("y", "x"), + attrs={"area": area}) + da = add_crs_xy_coords(da, area) + new_da = make_cf_dataarray(da) + assert new_da["x"].attrs["units"] == "degrees_east" + assert new_da["y"].attrs["units"] == "degrees_north" + + class TestCFWriter: """Test creation of CF DataArray.""" diff --git a/satpy/tests/writer_tests/cf_tests/test_datasets.py b/satpy/tests/writer_tests/cf_tests/test_datasets.py new file mode 100644 index 0000000000..d6784cd78f --- /dev/null +++ b/satpy/tests/writer_tests/cf_tests/test_datasets.py @@ -0,0 +1,129 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright (c) 2017-2023 Satpy developers +# +# This file is part of satpy. +# +# satpy is free software: you can redistribute it and/or modify it under the +# terms of the GNU General Public License as published by the Free Software +# Foundation, either version 3 of the License, or (at your option) any later +# version. +# +# satpy is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along with +# satpy. If not, see . +"""Tests CF-compliant DataArray creation.""" +import datetime + +import pytest +import xarray as xr +from pyresample import AreaDefinition + + +def test_empty_collect_cf_datasets(): + """Test that if no DataArrays, collect_cf_datasets raise error.""" + from satpy.writers.cf.datasets import collect_cf_datasets + + with pytest.raises(RuntimeError): + collect_cf_datasets(list_dataarrays=[]) + + +class TestCollectCfDatasets: + """Test case for collect_cf_dataset.""" + + def test_collect_cf_dataarrays(self): + """Test collecting CF datasets from a DataArray objects.""" + from satpy.writers.cf.datasets import _collect_cf_dataset + + geos = AreaDefinition( + area_id='geos', + description='geos', + proj_id='geos', + projection={'proj': 'geos', 'h': 35785831., 'a': 6378169., 'b': 6356583.8}, + width=2, height=2, + area_extent=[-1, -1, 1, 1]) + + # Define test datasets + data = [[1, 2], [3, 4]] + y = [1, 2] + x = [1, 2] + time = [1, 2] + tstart = datetime.datetime(2019, 4, 1, 12, 0) + tend = datetime.datetime(2019, 4, 1, 12, 15) + list_dataarrays = [xr.DataArray(data=data, dims=('y', 'x'), coords={'y': y, 'x': x, 'acq_time': ('y', time)}, + attrs={'name': 'var1', 'start_time': tstart, 'end_time': tend, 'area': geos}), + xr.DataArray(data=data, dims=('y', 'x'), coords={'y': y, 'x': x, 'acq_time': ('y', time)}, + attrs={'name': 'var2', 'long_name': 'variable 2'})] + + # Collect datasets + ds = _collect_cf_dataset(list_dataarrays, include_lonlats=True) + + # Test results + assert len(ds.keys()) == 3 + assert set(ds.keys()) == {'var1', 'var2', 'geos'} + + da_var1 = ds['var1'] + da_var2 = ds['var2'] + assert da_var1.name == 'var1' + assert da_var1.attrs['grid_mapping'] == 'geos' + assert da_var1.attrs['long_name'] == 'var1' + # variable 2 + assert 'grid_mapping' not in da_var2.attrs + assert da_var2.attrs['long_name'] == 'variable 2' + + def test_collect_cf_dataarrays_with_latitude_named_lat(self): + """Test collecting CF datasets with latitude named lat.""" + from satpy.writers.cf.datasets import _collect_cf_dataset + + data = [[75, 2], [3, 4]] + y = [1, 2] + x = [1, 2] + geos = AreaDefinition( + area_id='geos', + description='geos', + proj_id='geos', + projection={'proj': 'geos', 'h': 35785831., 'a': 6378169., 'b': 6356583.8}, + width=2, height=2, + area_extent=[-1, -1, 1, 1]) + datasets = { + 'var1': xr.DataArray(data=data, + dims=('y', 'x'), + coords={'y': y, 'x': x}), + 'var2': xr.DataArray(data=data, + dims=('y', 'x'), + coords={'y': y, 'x': x}), + 'lat': xr.DataArray(data=data, + dims=('y', 'x'), + coords={'y': y, 'x': x}), + 'lon': xr.DataArray(data=data, + dims=('y', 'x'), + coords={'y': y, 'x': x})} + datasets['lat'].attrs['standard_name'] = 'latitude' + datasets['var1'].attrs['standard_name'] = 'dummy' + datasets['var2'].attrs['standard_name'] = 'dummy' + datasets['var2'].attrs['area'] = geos + datasets['var1'].attrs['area'] = geos + datasets['lat'].attrs['name'] = 'lat' + datasets['var1'].attrs['name'] = 'var1' + datasets['var2'].attrs['name'] = 'var2' + datasets['lon'].attrs['name'] = 'lon' + + datasets_list = [datasets[key] for key in datasets.keys()] + datasets_list_no_latlon = [datasets[key] for key in ['var1', 'var2']] + + # Collect datasets + ds = _collect_cf_dataset(datasets_list, include_lonlats=True) + ds2 = _collect_cf_dataset(datasets_list_no_latlon, include_lonlats=True) + + # Test results + assert len(ds.keys()) == 5 + assert set(ds.keys()) == {'var1', 'var2', 'lon', 'lat', 'geos'} + with pytest.raises(KeyError): + ds['var1'].attrs["latitude"] + with pytest.raises(KeyError): + ds['var1'].attrs["longitude"] + assert ds2['var1']['latitude'].attrs['name'] == 'latitude' + assert ds2['var1']['longitude'].attrs['name'] == 'longitude' diff --git a/satpy/tests/writer_tests/test_cf.py b/satpy/tests/writer_tests/test_cf.py index 96cc09069a..628a32fed3 100644 --- a/satpy/tests/writer_tests/test_cf.py +++ b/satpy/tests/writer_tests/test_cf.py @@ -86,23 +86,6 @@ def test_lonlat_storage(tmp_path): np.testing.assert_allclose(ds["mavas"].attrs["inverse_flattening"], 298.257223563) -def test_make_cf_dataarray_lonlat(): - """Test correct CF encoding for area with lon/lat units.""" - from satpy.resample import add_crs_xy_coords - from satpy.writers.cf.dataarray import make_cf_dataarray - - area = create_area_def("mavas", 4326, shape=(5, 5), - center=(0, 0), resolution=(1, 1)) - da = xr.DataArray( - np.arange(25).reshape(5, 5), - dims=("y", "x"), - attrs={"area": area}) - da = add_crs_xy_coords(da, area) - new_da = make_cf_dataarray(da) - assert new_da["x"].attrs["units"] == "degrees_east" - assert new_da["y"].attrs["units"] == "degrees_north" - - def test_is_projected(caplog): """Tests for private _is_projected function.""" from satpy.writers.cf.crs import _is_projected @@ -130,14 +113,6 @@ def test_is_projected(caplog): assert "Failed to tell if data are projected." in caplog.text -def test_empty_collect_cf_datasets(): - """Test that if no DataArrays, collect_cf_datasets raise error.""" - from satpy.writers.cf_writer import collect_cf_datasets - - with pytest.raises(RuntimeError): - collect_cf_datasets(list_dataarrays=[]) - - class TestCFWriter: """Test case for CF writer.""" @@ -471,46 +446,6 @@ def test_header_attrs(self): assert f.attrs['bool_'] == 'true' assert 'none' not in f.attrs.keys() - def test_collect_cf_dataarrays(self): - """Test collecting CF datasets from a DataArray objects.""" - from satpy.writers.cf_writer import _collect_cf_dataset - - geos = pyresample.geometry.AreaDefinition( - area_id='geos', - description='geos', - proj_id='geos', - projection={'proj': 'geos', 'h': 35785831., 'a': 6378169., 'b': 6356583.8}, - width=2, height=2, - area_extent=[-1, -1, 1, 1]) - - # Define test datasets - data = [[1, 2], [3, 4]] - y = [1, 2] - x = [1, 2] - time = [1, 2] - tstart = datetime(2019, 4, 1, 12, 0) - tend = datetime(2019, 4, 1, 12, 15) - list_dataarrays = [xr.DataArray(data=data, dims=('y', 'x'), coords={'y': y, 'x': x, 'acq_time': ('y', time)}, - attrs={'name': 'var1', 'start_time': tstart, 'end_time': tend, 'area': geos}), - xr.DataArray(data=data, dims=('y', 'x'), coords={'y': y, 'x': x, 'acq_time': ('y', time)}, - attrs={'name': 'var2', 'long_name': 'variable 2'})] - - # Collect datasets - ds = _collect_cf_dataset(list_dataarrays, include_lonlats=True) - - # Test results - assert len(ds.keys()) == 3 - assert set(ds.keys()) == {'var1', 'var2', 'geos'} - - da_var1 = ds['var1'] - da_var2 = ds['var2'] - assert da_var1.name == 'var1' - assert da_var1.attrs['grid_mapping'] == 'geos' - assert da_var1.attrs['long_name'] == 'var1' - # variable 2 - assert 'grid_mapping' not in da_var2.attrs - assert da_var2.attrs['long_name'] == 'variable 2' - def test_load_module_with_old_pyproj(self): """Test that cf_writer can still be loaded with pyproj 1.9.6.""" import importlib @@ -618,27 +553,6 @@ def test_has_projection_coords(self, datasets): datasets['lat'].attrs['standard_name'] = 'dummy' assert not has_projection_coords(datasets) - def test_collect_cf_dataarrays_with_latitude_named_lat(self, datasets): - """Test collecting CF datasets with latitude named lat.""" - from satpy.writers.cf_writer import _collect_cf_dataset - - datasets_list = [datasets[key] for key in datasets.keys()] - datasets_list_no_latlon = [datasets[key] for key in ['var1', 'var2']] - - # Collect datasets - ds = _collect_cf_dataset(datasets_list, include_lonlats=True) - ds2 = _collect_cf_dataset(datasets_list_no_latlon, include_lonlats=True) - - # Test results - assert len(ds.keys()) == 5 - assert set(ds.keys()) == {'var1', 'var2', 'lon', 'lat', 'geos'} - with pytest.raises(KeyError): - ds['var1'].attrs["latitude"] - with pytest.raises(KeyError): - ds['var1'].attrs["longitude"] - assert ds2['var1']['latitude'].attrs['name'] == 'latitude' - assert ds2['var1']['longitude'].attrs['name'] == 'longitude' - class TestNetcdfEncodingKwargs: """Test netCDF compression encodings.""" diff --git a/satpy/writers/cf/datasets.py b/satpy/writers/cf/datasets.py new file mode 100644 index 0000000000..09726c0e32 --- /dev/null +++ b/satpy/writers/cf/datasets.py @@ -0,0 +1,272 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright (c) 2017-2023 Satpy developers +# +# This file is part of satpy. +# +# satpy is free software: you can redistribute it and/or modify it under the +# terms of the GNU General Public License as published by the Free Software +# Foundation, either version 3 of the License, or (at your option) any later +# version. +# +# satpy is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along with +# satpy. If not, see . +"""Utility to generate a CF-compliant Datasets.""" +import warnings +from collections import defaultdict + +import xarray as xr + +from satpy.writers.cf.time import EPOCH +from satpy.writers.cf_writer import CF_DTYPES, CF_VERSION + + +def get_extra_ds(dataarray, keys=None): + """Get the ancillary_variables DataArrays associated to a dataset.""" + ds_collection = {} + # Retrieve ancillary variable datarrays + for ancillary_dataarray in dataarray.attrs.get('ancillary_variables', []): + ancillary_variable = ancillary_dataarray.name + if keys and ancillary_variable not in keys: + keys.append(ancillary_variable) + ds_collection.update(get_extra_ds(ancillary_dataarray, keys=keys)) + # Add input dataarray + ds_collection[dataarray.attrs['name']] = dataarray + return ds_collection + + +def _get_groups(groups, list_datarrays): + """Return a dictionary with the list of xr.DataArray associated to each group. + + If no groups (groups=None), return all DataArray attached to a single None key. + Else, collect the DataArrays associated to each group. + """ + if groups is None: + grouped_dataarrays = {None: list_datarrays} + else: + grouped_dataarrays = defaultdict(list) + for datarray in list_datarrays: + for group_name, group_members in groups.items(): + if datarray.attrs['name'] in group_members: + grouped_dataarrays[group_name].append(datarray) + break + return grouped_dataarrays + + +def _collect_cf_dataset(list_dataarrays, + epoch=EPOCH, + flatten_attrs=False, + exclude_attrs=None, + include_lonlats=True, + pretty=False, + include_orig_name=True, + numeric_name_prefix='CHANNEL_'): + """Process a list of xr.DataArray and return a dictionary with CF-compliant xr.Dataset. + + Parameters + ---------- + list_dataarrays : list + List of DataArrays to make CF compliant and merge into a xr.Dataset. + epoch : str + Reference time for encoding the time coordinates (if available). + Example format: "seconds since 1970-01-01 00:00:00". + If None, the default reference time is retrieved using `from satpy.cf_writer import EPOCH` + flatten_attrs : bool, optional + If True, flatten dict-type attributes. + exclude_attrs : list, optional + List of xr.DataArray attribute names to be excluded. + include_lonlats : bool, optional + If True, it includes 'latitude' and 'longitude' coordinates also for satpy scene defined on an AreaDefinition. + If the 'area' attribute is a SwathDefinition, it always include latitude and longitude coordinates. + pretty : bool, optional + Don't modify coordinate names, if possible. Makes the file prettier, but possibly less consistent. + include_orig_name : bool, optional + Include the original dataset name as a variable attribute in the xr.Dataset. + numeric_name_prefix : str, optional + Prefix to add the each variable with name starting with a digit. + Use '' or None to leave this out. + + Returns + ------- + ds : xr.Dataset + A partially CF-compliant xr.Dataset + """ + from satpy.writers.cf.area import ( + area2cf, + assert_xy_unique, + has_projection_coords, + link_coords, + make_alt_coords_unique, + ) + from satpy.writers.cf.dataarray import make_cf_dataarray + + # Create dictionary of input datarrays + # --> Since keys=None, it doesn't never retrieve ancillary variables !!! + ds_collection = {} + for dataarray in list_dataarrays: + ds_collection.update(get_extra_ds(dataarray)) + + # Check if one DataArray in the collection has 'longitude' or 'latitude' + got_lonlats = has_projection_coords(ds_collection) + + # Sort dictionary by keys name + ds_collection = dict(sorted(ds_collection.items())) + + dict_dataarrays = {} + for dataarray in ds_collection.values(): + dataarray_type = dataarray.dtype + if dataarray_type not in CF_DTYPES: + warnings.warn( + f'dtype {dataarray_type} not compatible with {CF_VERSION}.', + stacklevel=3 + ) + # Deep copy the datarray since adding/modifying attributes and coordinates + dataarray = dataarray.copy(deep=True) + + # Add CF-compliant area information from the pyresample area + # - If include_lonlats=True, add latitude and longitude coordinates + # - Add grid_mapping attribute to the DataArray + # - Return the CRS DataArray as first list element + # - Return the CF-compliant input DataArray as second list element + try: + list_new_dataarrays = area2cf(dataarray, + include_lonlats=include_lonlats, + got_lonlats=got_lonlats) + except KeyError: + list_new_dataarrays = [dataarray] + + # Ensure each DataArray is CF-compliant + # --> NOTE: Here the CRS DataArray is repeatedly overwrited + # --> NOTE: If the input list_dataarrays have different pyresample areas with the same name + # area information can be lost here !!! + for new_dataarray in list_new_dataarrays: + new_dataarray = make_cf_dataarray(new_dataarray, + epoch=epoch, + flatten_attrs=flatten_attrs, + exclude_attrs=exclude_attrs, + include_orig_name=include_orig_name, + numeric_name_prefix=numeric_name_prefix) + dict_dataarrays[new_dataarray.name] = new_dataarray + + # Check all DataArray have same size + assert_xy_unique(dict_dataarrays) + + # Deal with the 'coordinates' attributes indicating lat/lon coords + # NOTE: this currently is dropped by default !!! + link_coords(dict_dataarrays) + + # Ensure non-dimensional coordinates to be unique across DataArrays + # --> If not unique, prepend the DataArray name to the coordinate + # --> If unique, does not prepend the DataArray name only if pretty=True + # --> 'longitude' and 'latitude' coordinates are not prepended + dict_dataarrays = make_alt_coords_unique(dict_dataarrays, pretty=pretty) + + # Create a xr.Dataset + ds = xr.Dataset(dict_dataarrays) + return ds + + +def collect_cf_datasets(list_dataarrays, + header_attrs=None, + exclude_attrs=None, + flatten_attrs=False, + pretty=True, + include_lonlats=True, + epoch=EPOCH, + include_orig_name=True, + numeric_name_prefix='CHANNEL_', + groups=None): + """Process a list of xr.DataArray and return a dictionary with CF-compliant xr.Datasets. + + If the xr.DataArrays does not share the same dimensions, it creates a collection + of xr.Datasets sharing the same dimensions. + + Parameters + ---------- + list_dataarrays (list): + List of DataArrays to make CF compliant and merge into groups of xr.Datasets. + header_attrs: (dict): + Global attributes of the output xr.Dataset. + epoch (str): + Reference time for encoding the time coordinates (if available). + Example format: "seconds since 1970-01-01 00:00:00". + If None, the default reference time is retrieved using `from satpy.cf_writer import EPOCH` + flatten_attrs (bool): + If True, flatten dict-type attributes. + exclude_attrs (list): + List of xr.DataArray attribute names to be excluded. + include_lonlats (bool): + If True, it includes 'latitude' and 'longitude' coordinates also for satpy scene defined on an AreaDefinition. + If the 'area' attribute is a SwathDefinition, it always include latitude and longitude coordinates. + pretty (bool): + Don't modify coordinate names, if possible. Makes the file prettier, but possibly less consistent. + include_orig_name (bool). + Include the original dataset name as a variable attribute in the xr.Dataset. + numeric_name_prefix (str): + Prefix to add the each variable with name starting with a digit. + Use '' or None to leave this out. + groups (dict): + Group datasets according to the given assignment: + + `{'': ['dataset_name1', 'dataset_name2', ...]}` + + It is used to create grouped netCDFs using the CF_Writer. + If None (the default), no groups will be created. + + Returns + ------- + grouped_datasets : dict + A dictionary of CF-compliant xr.Dataset: {group_name: xr.Dataset} + header_attrs : dict + Global attributes to be attached to the xr.Dataset / netCDF4. + """ + from satpy.writers.cf.attrs import preprocess_header_attrs + from satpy.writers.cf.time import add_time_bounds_dimension + + if not list_dataarrays: + raise RuntimeError("None of the requested datasets have been " + "generated or could not be loaded. Requested " + "composite inputs may need to have matching " + "dimensions (eg. through resampling).") + + header_attrs = preprocess_header_attrs(header_attrs=header_attrs, + flatten_attrs=flatten_attrs) + + # Retrieve groups + # - If groups is None: {None: list_dataarrays} + # - if groups not None: {group_name: [xr.DataArray, xr.DataArray ,..], ...} + # Note: if all dataset names are wrong, behave like groups = None ! + grouped_dataarrays = _get_groups(groups, list_dataarrays) + is_grouped = len(grouped_dataarrays) >= 2 + + # If not grouped, add CF conventions. + # - If 'Conventions' key already present, do not overwrite ! + if "Conventions" not in header_attrs and not is_grouped: + header_attrs['Conventions'] = CF_VERSION + + # Create dictionary of group xr.Datasets + # --> If no groups (groups=None) --> group_name=None + grouped_datasets = {} + for group_name, group_dataarrays in grouped_dataarrays.items(): + ds = _collect_cf_dataset( + list_dataarrays=group_dataarrays, + epoch=epoch, + flatten_attrs=flatten_attrs, + exclude_attrs=exclude_attrs, + include_lonlats=include_lonlats, + pretty=pretty, + include_orig_name=include_orig_name, + numeric_name_prefix=numeric_name_prefix) + + if not is_grouped: + ds.attrs = header_attrs + + if 'time' in ds: + ds = add_time_bounds_dimension(ds, time="time") + + grouped_datasets[group_name] = ds + return grouped_datasets, header_attrs diff --git a/satpy/writers/cf_writer.py b/satpy/writers/cf_writer.py index d7503860b4..85c6fe999b 100644 --- a/satpy/writers/cf_writer.py +++ b/satpy/writers/cf_writer.py @@ -157,7 +157,6 @@ import copy import logging import warnings -from collections import defaultdict import numpy as np import xarray as xr @@ -183,6 +182,10 @@ if netCDF4 is None and h5netcdf is None: raise ImportError('Ensure that the netCDF4 or h5netcdf package is installed.') + +CF_VERSION = 'CF-1.7' + + # Numpy datatypes compatible with all netCDF4 backends. ``np.unicode_`` is # excluded because h5py (and thus h5netcdf) has problems with unicode, see # https://github.com/h5py/h5py/issues/624.""" @@ -202,259 +205,6 @@ np.dtype('float64'), np.string_] -CF_VERSION = 'CF-1.7' - - -def get_extra_ds(dataarray, keys=None): - """Get the ancillary_variables DataArrays associated to a dataset.""" - ds_collection = {} - # Retrieve ancillary variable datarrays - for ancillary_dataarray in dataarray.attrs.get('ancillary_variables', []): - ancillary_variable = ancillary_dataarray.name - if keys and ancillary_variable not in keys: - keys.append(ancillary_variable) - ds_collection.update(get_extra_ds(ancillary_dataarray, keys=keys)) - # Add input dataarray - ds_collection[dataarray.attrs['name']] = dataarray - return ds_collection - - -# ###--------------------------------------------------------------------------. -# ### CF-conversion - - -def _collect_cf_dataset(list_dataarrays, - epoch=EPOCH, - flatten_attrs=False, - exclude_attrs=None, - include_lonlats=True, - pretty=False, - include_orig_name=True, - numeric_name_prefix='CHANNEL_'): - """Process a list of xr.DataArray and return a dictionary with CF-compliant xr.Dataset. - - Parameters - ---------- - list_dataarrays : list - List of DataArrays to make CF compliant and merge into a xr.Dataset. - epoch : str - Reference time for encoding the time coordinates (if available). - Example format: "seconds since 1970-01-01 00:00:00". - If None, the default reference time is retrieved using `from satpy.cf_writer import EPOCH` - flatten_attrs : bool, optional - If True, flatten dict-type attributes. - exclude_attrs : list, optional - List of xr.DataArray attribute names to be excluded. - include_lonlats : bool, optional - If True, it includes 'latitude' and 'longitude' coordinates also for satpy scene defined on an AreaDefinition. - If the 'area' attribute is a SwathDefinition, it always include latitude and longitude coordinates. - pretty : bool, optional - Don't modify coordinate names, if possible. Makes the file prettier, but possibly less consistent. - include_orig_name : bool, optional - Include the original dataset name as a variable attribute in the xr.Dataset. - numeric_name_prefix : str, optional - Prefix to add the each variable with name starting with a digit. - Use '' or None to leave this out. - - Returns - ------- - ds : xr.Dataset - A partially CF-compliant xr.Dataset - """ - from satpy.writers.cf.area import ( - area2cf, - assert_xy_unique, - has_projection_coords, - link_coords, - make_alt_coords_unique, - ) - from satpy.writers.cf.dataarray import make_cf_dataarray - - # Create dictionary of input datarrays - # --> Since keys=None, it doesn't never retrieve ancillary variables !!! - ds_collection = {} - for dataarray in list_dataarrays: - ds_collection.update(get_extra_ds(dataarray)) - - # Check if one DataArray in the collection has 'longitude' or 'latitude' - got_lonlats = has_projection_coords(ds_collection) - - # Sort dictionary by keys name - ds_collection = dict(sorted(ds_collection.items())) - - dict_dataarrays = {} - for dataarray in ds_collection.values(): - dataarray_type = dataarray.dtype - if dataarray_type not in CF_DTYPES: - warnings.warn( - f'dtype {dataarray_type} not compatible with {CF_VERSION}.', - stacklevel=3 - ) - # Deep copy the datarray since adding/modifying attributes and coordinates - dataarray = dataarray.copy(deep=True) - - # Add CF-compliant area information from the pyresample area - # - If include_lonlats=True, add latitude and longitude coordinates - # - Add grid_mapping attribute to the DataArray - # - Return the CRS DataArray as first list element - # - Return the CF-compliant input DataArray as second list element - try: - list_new_dataarrays = area2cf(dataarray, - include_lonlats=include_lonlats, - got_lonlats=got_lonlats) - except KeyError: - list_new_dataarrays = [dataarray] - - # Ensure each DataArray is CF-compliant - # --> NOTE: Here the CRS DataArray is repeatedly overwrited - # --> NOTE: If the input list_dataarrays have different pyresample areas with the same name - # area information can be lost here !!! - for new_dataarray in list_new_dataarrays: - new_dataarray = make_cf_dataarray(new_dataarray, - epoch=epoch, - flatten_attrs=flatten_attrs, - exclude_attrs=exclude_attrs, - include_orig_name=include_orig_name, - numeric_name_prefix=numeric_name_prefix) - dict_dataarrays[new_dataarray.name] = new_dataarray - - # Check all DataArray have same size - assert_xy_unique(dict_dataarrays) - - # Deal with the 'coordinates' attributes indicating lat/lon coords - # NOTE: this currently is dropped by default !!! - link_coords(dict_dataarrays) - - # Ensure non-dimensional coordinates to be unique across DataArrays - # --> If not unique, prepend the DataArray name to the coordinate - # --> If unique, does not prepend the DataArray name only if pretty=True - # --> 'longitude' and 'latitude' coordinates are not prepended - dict_dataarrays = make_alt_coords_unique(dict_dataarrays, pretty=pretty) - - # Create a xr.Dataset - ds = xr.Dataset(dict_dataarrays) - return ds - - -def collect_cf_datasets(list_dataarrays, - header_attrs=None, - exclude_attrs=None, - flatten_attrs=False, - pretty=True, - include_lonlats=True, - epoch=EPOCH, - include_orig_name=True, - numeric_name_prefix='CHANNEL_', - groups=None): - """Process a list of xr.DataArray and return a dictionary with CF-compliant xr.Datasets. - - If the xr.DataArrays does not share the same dimensions, it creates a collection - of xr.Datasets sharing the same dimensions. - - Parameters - ---------- - list_dataarrays (list): - List of DataArrays to make CF compliant and merge into groups of xr.Datasets. - header_attrs: (dict): - Global attributes of the output xr.Dataset. - epoch (str): - Reference time for encoding the time coordinates (if available). - Example format: "seconds since 1970-01-01 00:00:00". - If None, the default reference time is retrieved using `from satpy.cf_writer import EPOCH` - flatten_attrs (bool): - If True, flatten dict-type attributes. - exclude_attrs (list): - List of xr.DataArray attribute names to be excluded. - include_lonlats (bool): - If True, it includes 'latitude' and 'longitude' coordinates also for satpy scene defined on an AreaDefinition. - If the 'area' attribute is a SwathDefinition, it always include latitude and longitude coordinates. - pretty (bool): - Don't modify coordinate names, if possible. Makes the file prettier, but possibly less consistent. - include_orig_name (bool). - Include the original dataset name as a variable attribute in the xr.Dataset. - numeric_name_prefix (str): - Prefix to add the each variable with name starting with a digit. - Use '' or None to leave this out. - groups (dict): - Group datasets according to the given assignment: - - `{'': ['dataset_name1', 'dataset_name2', ...]}` - - It is used to create grouped netCDFs using the CF_Writer. - If None (the default), no groups will be created. - - Returns - ------- - grouped_datasets : dict - A dictionary of CF-compliant xr.Dataset: {group_name: xr.Dataset} - header_attrs : dict - Global attributes to be attached to the xr.Dataset / netCDF4. - """ - from satpy.writers.cf.attrs import preprocess_header_attrs - from satpy.writers.cf.time import add_time_bounds_dimension - - if not list_dataarrays: - raise RuntimeError("None of the requested datasets have been " - "generated or could not be loaded. Requested " - "composite inputs may need to have matching " - "dimensions (eg. through resampling).") - - header_attrs = preprocess_header_attrs(header_attrs=header_attrs, - flatten_attrs=flatten_attrs) - - # Retrieve groups - # - If groups is None: {None: list_dataarrays} - # - if groups not None: {group_name: [xr.DataArray, xr.DataArray ,..], ...} - # Note: if all dataset names are wrong, behave like groups = None ! - grouped_dataarrays = _get_groups(groups, list_dataarrays) - is_grouped = len(grouped_dataarrays) >= 2 - - # If not grouped, add CF conventions. - # - If 'Conventions' key already present, do not overwrite ! - if "Conventions" not in header_attrs and not is_grouped: - header_attrs['Conventions'] = CF_VERSION - - # Create dictionary of group xr.Datasets - # --> If no groups (groups=None) --> group_name=None - grouped_datasets = {} - for group_name, group_dataarrays in grouped_dataarrays.items(): - ds = _collect_cf_dataset( - list_dataarrays=group_dataarrays, - epoch=epoch, - flatten_attrs=flatten_attrs, - exclude_attrs=exclude_attrs, - include_lonlats=include_lonlats, - pretty=pretty, - include_orig_name=include_orig_name, - numeric_name_prefix=numeric_name_prefix) - - if not is_grouped: - ds.attrs = header_attrs - - if 'time' in ds: - ds = add_time_bounds_dimension(ds, time="time") - - grouped_datasets[group_name] = ds - return grouped_datasets, header_attrs - - -def _get_groups(groups, list_datarrays): - """Return a dictionary with the list of xr.DataArray associated to each group. - - If no groups (groups=None), return all DataArray attached to a single None key. - Else, collect the DataArrays associated to each group. - """ - if groups is None: - grouped_dataarrays = {None: list_datarrays} - else: - grouped_dataarrays = defaultdict(list) - for datarray in list_datarrays: - for group_name, group_members in groups.items(): - if datarray.attrs['name'] in group_members: - grouped_dataarrays[group_name].append(datarray) - break - return grouped_dataarrays - def _sanitize_writer_kwargs(writer_kwargs): """Remove satpy-specific kwargs.""" @@ -478,46 +228,6 @@ def _initialize_root_netcdf(filename, engine, header_attrs, to_netcdf_kwargs): class CFWriter(Writer): """Writer producing NetCDF/CF compatible datasets.""" - @staticmethod - def da2cf(dataarray, epoch=EPOCH, flatten_attrs=False, exclude_attrs=None, - include_orig_name=True, numeric_name_prefix='CHANNEL_'): - """Convert the dataarray to something cf-compatible. - - Args: - dataarray (xr.DataArray): - The data array to be converted - epoch (str): - Reference time for encoding of time coordinates - flatten_attrs (bool): - If True, flatten dict-type attributes - exclude_attrs (list): - List of dataset attributes to be excluded - include_orig_name (bool): - Include the original dataset name in the netcdf variable attributes - numeric_name_prefix (str): - Prepend dataset name with this if starting with a digit - """ - from satpy.writers.cf.dataarray import make_cf_dataarray - warnings.warn('CFWriter.da2cf is deprecated.' - 'Use satpy.writers.cf_writer.make_cf_dataarray instead.', - DeprecationWarning, stacklevel=3) - return make_cf_dataarray(dataarray=dataarray, - epoch=epoch, - flatten_attrs=flatten_attrs, - exclude_attrs=exclude_attrs, - include_orig_name=include_orig_name, - numeric_name_prefix=numeric_name_prefix) - - @staticmethod - def update_encoding(dataset, to_netcdf_kwargs): - """Update encoding info (deprecated).""" - from satpy.writers.cf.encoding import update_encoding - - warnings.warn('CFWriter.update_encoding is deprecated. ' - 'Use satpy.writers.cf.encoding.update_encoding instead.', - DeprecationWarning, stacklevel=3) - return update_encoding(dataset, to_netcdf_kwargs) - def save_dataset(self, dataset, filename=None, fill_value=None, **kwargs): """Save the *dataset* to a given *filename*.""" return self.save_datasets([dataset], filename, **kwargs) @@ -560,6 +270,7 @@ def save_datasets(self, datasets, filename=None, groups=None, header_attrs=None, Prefix to add the each variable with name starting with a digit. Use '' or None to leave this out. """ + from satpy.writers.cf.datasets import collect_cf_datasets from satpy.writers.cf.encoding import update_encoding logger.info('Saving datasets to NetCDF4/CF.') @@ -614,6 +325,47 @@ def save_datasets(self, datasets, filename=None, groups=None, header_attrs=None, written.append(res) return written + @staticmethod + def da2cf(dataarray, epoch=EPOCH, flatten_attrs=False, exclude_attrs=None, + include_orig_name=True, numeric_name_prefix='CHANNEL_'): + """Convert the dataarray to something cf-compatible. + + Args: + dataarray (xr.DataArray): + The data array to be converted + epoch (str): + Reference time for encoding of time coordinates + flatten_attrs (bool): + If True, flatten dict-type attributes + exclude_attrs (list): + List of dataset attributes to be excluded + include_orig_name (bool): + Include the original dataset name in the netcdf variable attributes + numeric_name_prefix (str): + Prepend dataset name with this if starting with a digit + """ + from satpy.writers.cf.dataarray import make_cf_dataarray + warnings.warn('CFWriter.da2cf is deprecated.' + 'Use satpy.writers.cf_writer.make_cf_dataarray instead.', + DeprecationWarning, stacklevel=3) + return make_cf_dataarray(dataarray=dataarray, + epoch=epoch, + flatten_attrs=flatten_attrs, + exclude_attrs=exclude_attrs, + include_orig_name=include_orig_name, + numeric_name_prefix=numeric_name_prefix) + + @staticmethod + def update_encoding(dataset, to_netcdf_kwargs): + """Update encoding info (deprecated).""" + from satpy.writers.cf.encoding import update_encoding + + warnings.warn('CFWriter.update_encoding is deprecated. ' + 'Use satpy.writers.cf.encoding.update_encoding instead.', + DeprecationWarning, stacklevel=3) + return update_encoding(dataset, to_netcdf_kwargs) + + # --------------------------------------------------------------------------. # NetCDF version From 064558d25f460b2ba8c3a59082e0c9dee85d568a Mon Sep 17 00:00:00 2001 From: ghiggi Date: Wed, 28 Jun 2023 00:20:53 +0200 Subject: [PATCH 08/37] Fix changed imports --- satpy/_scene_converters.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/satpy/_scene_converters.py b/satpy/_scene_converters.py index 25fe728b9f..ba4432a58f 100644 --- a/satpy/_scene_converters.py +++ b/satpy/_scene_converters.py @@ -90,7 +90,8 @@ def to_xarray(scn, A CF-compliant xr.Dataset """ - from satpy.writers.cf_writer import EPOCH, collect_cf_datasets + from satpy.writers.cf.datasets import collect_cf_datasets + from satpy.writers.cf.time import EPOCH if epoch is None: epoch = EPOCH From 5762950332a450f4f3d36ff9af01bae26271e6e4 Mon Sep 17 00:00:00 2001 From: ghiggi Date: Wed, 28 Jun 2023 00:44:41 +0200 Subject: [PATCH 09/37] Fix all writers tests --- .../tests/writer_tests/cf_tests/test_area.py | 87 ++++++++++++++ .../writer_tests/cf_tests/test_datasets.py | 23 +++- satpy/tests/writer_tests/test_cf.py | 108 ------------------ 3 files changed, 109 insertions(+), 109 deletions(-) diff --git a/satpy/tests/writer_tests/cf_tests/test_area.py b/satpy/tests/writer_tests/cf_tests/test_area.py index e293ff39a6..e3454b3170 100644 --- a/satpy/tests/writer_tests/cf_tests/test_area.py +++ b/satpy/tests/writer_tests/cf_tests/test_area.py @@ -16,12 +16,21 @@ # You should have received a copy of the GNU General Public License along with # satpy. If not, see . """Tests for the CF Area.""" +import logging + import dask.array as da import numpy as np import pytest import xarray as xr from pyresample import AreaDefinition, SwathDefinition +logger = logging.getLogger(__name__) + + +# NOTE: +# The following fixtures are not defined in this file, but are used and injected by Pytest: +# - caplog + class TestCFArea: """Test case for CF Area.""" @@ -399,3 +408,81 @@ def test_add_lonlat_coords(self): np.testing.assert_array_equal(lon.data, lons_ref) assert {'name': 'latitude', 'standard_name': 'latitude', 'units': 'degrees_north'}.items() <= lat.attrs.items() assert {'name': 'longitude', 'standard_name': 'longitude', 'units': 'degrees_east'}.items() <= lon.attrs.items() + + def test_is_projected(self, caplog): + """Tests for private _is_projected function.""" + from satpy.writers.cf.crs import _is_projected + + # test case with units but no area + da = xr.DataArray( + np.arange(25).reshape(5, 5), + dims=("y", "x"), + coords={"x": xr.DataArray(np.arange(5), dims=("x",), attrs={"units": "m"}), + "y": xr.DataArray(np.arange(5), dims=("y",), attrs={"units": "m"})}) + assert _is_projected(da) + + da = xr.DataArray( + np.arange(25).reshape(5, 5), + dims=("y", "x"), + coords={"x": xr.DataArray(np.arange(5), dims=("x",), attrs={"units": "degrees_east"}), + "y": xr.DataArray(np.arange(5), dims=("y",), attrs={"units": "degrees_north"})}) + assert not _is_projected(da) + + da = xr.DataArray( + np.arange(25).reshape(5, 5), + dims=("y", "x")) + with caplog.at_level(logging.WARNING): + assert _is_projected(da) + assert "Failed to tell if data are projected." in caplog.text + + @pytest.fixture + def datasets(self): + """Create test dataset.""" + data = [[75, 2], [3, 4]] + y = [1, 2] + x = [1, 2] + geos = AreaDefinition( + area_id='geos', + description='geos', + proj_id='geos', + projection={'proj': 'geos', 'h': 35785831., 'a': 6378169., 'b': 6356583.8}, + width=2, height=2, + area_extent=[-1, -1, 1, 1]) + datasets = { + 'var1': xr.DataArray(data=data, + dims=('y', 'x'), + coords={'y': y, 'x': x}), + 'var2': xr.DataArray(data=data, + dims=('y', 'x'), + coords={'y': y, 'x': x}), + 'lat': xr.DataArray(data=data, + dims=('y', 'x'), + coords={'y': y, 'x': x}), + 'lon': xr.DataArray(data=data, + dims=('y', 'x'), + coords={'y': y, 'x': x})} + datasets['lat'].attrs['standard_name'] = 'latitude' + datasets['var1'].attrs['standard_name'] = 'dummy' + datasets['var2'].attrs['standard_name'] = 'dummy' + datasets['var2'].attrs['area'] = geos + datasets['var1'].attrs['area'] = geos + datasets['lat'].attrs['name'] = 'lat' + datasets['var1'].attrs['name'] = 'var1' + datasets['var2'].attrs['name'] = 'var2' + datasets['lon'].attrs['name'] = 'lon' + return datasets + + def test_is_lon_or_lat_dataarray(self, datasets): + """Test the is_lon_or_lat_dataarray function.""" + from satpy.writers.cf.area import is_lon_or_lat_dataarray + + assert is_lon_or_lat_dataarray(datasets['lat']) + assert not is_lon_or_lat_dataarray(datasets['var1']) + + def test_has_projection_coords(self, datasets): + """Test the has_projection_coords function.""" + from satpy.writers.cf.area import has_projection_coords + + assert has_projection_coords(datasets) + datasets['lat'].attrs['standard_name'] = 'dummy' + assert not has_projection_coords(datasets) diff --git a/satpy/tests/writer_tests/cf_tests/test_datasets.py b/satpy/tests/writer_tests/cf_tests/test_datasets.py index d6784cd78f..b094feecbc 100644 --- a/satpy/tests/writer_tests/cf_tests/test_datasets.py +++ b/satpy/tests/writer_tests/cf_tests/test_datasets.py @@ -18,9 +18,10 @@ """Tests CF-compliant DataArray creation.""" import datetime +import numpy as np import pytest import xarray as xr -from pyresample import AreaDefinition +from pyresample import AreaDefinition, create_area_def def test_empty_collect_cf_datasets(): @@ -127,3 +128,23 @@ def test_collect_cf_dataarrays_with_latitude_named_lat(self): ds['var1'].attrs["longitude"] assert ds2['var1']['latitude'].attrs['name'] == 'latitude' assert ds2['var1']['longitude'].attrs['name'] == 'longitude' + + def test_geographic_area_coords_attrs(self): + """Test correct storage for area with lon/lat units.""" + from satpy.tests.utils import make_fake_scene + from satpy.writers.cf.datasets import _collect_cf_dataset + + scn = make_fake_scene( + {"ketolysis": np.arange(25).reshape(5, 5)}, + daskify=True, + area=create_area_def("mavas", 4326, shape=(5, 5), + center=(0, 0), resolution=(1, 1))) + + ds = _collect_cf_dataset([scn["ketolysis"]], include_lonlats=False) + assert ds["ketolysis"].attrs["grid_mapping"] == "mavas" + assert ds["mavas"].attrs["grid_mapping_name"] == "latitude_longitude" + assert ds["x"].attrs["units"] == "degrees_east" + assert ds["y"].attrs["units"] == "degrees_north" + assert ds["mavas"].attrs["longitude_of_prime_meridian"] == 0.0 + np.testing.assert_allclose(ds["mavas"].attrs["semi_major_axis"], 6378137.0) + np.testing.assert_allclose(ds["mavas"].attrs["inverse_flattening"], 298.257223563) diff --git a/satpy/tests/writer_tests/test_cf.py b/satpy/tests/writer_tests/test_cf.py index 628a32fed3..046e689002 100644 --- a/satpy/tests/writer_tests/test_cf.py +++ b/satpy/tests/writer_tests/test_cf.py @@ -17,18 +17,15 @@ # satpy. If not, see . """Tests for the CF writer.""" -import logging import os import tempfile import warnings from datetime import datetime import numpy as np -import pyresample.geometry import pytest import xarray as xr from packaging.version import Version -from pyresample import create_area_def from satpy import Scene from satpy.tests.utils import make_dsq @@ -42,7 +39,6 @@ # NOTE: # The following fixtures are not defined in this file, but are used and injected by Pytest: # - tmp_path -# - caplog # - request @@ -65,54 +61,6 @@ def __exit__(self, *args): os.remove(self.filename) -def test_lonlat_storage(tmp_path): - """Test correct storage for area with lon/lat units.""" - from ..utils import make_fake_scene - scn = make_fake_scene( - {"ketolysis": np.arange(25).reshape(5, 5)}, - daskify=True, - area=create_area_def("mavas", 4326, shape=(5, 5), - center=(0, 0), resolution=(1, 1))) - - filename = os.fspath(tmp_path / "test.nc") - scn.save_datasets(filename=filename, writer="cf", include_lonlats=False) - with xr.open_dataset(filename) as ds: - assert ds["ketolysis"].attrs["grid_mapping"] == "mavas" - assert ds["mavas"].attrs["grid_mapping_name"] == "latitude_longitude" - assert ds["x"].attrs["units"] == "degrees_east" - assert ds["y"].attrs["units"] == "degrees_north" - assert ds["mavas"].attrs["longitude_of_prime_meridian"] == 0.0 - np.testing.assert_allclose(ds["mavas"].attrs["semi_major_axis"], 6378137.0) - np.testing.assert_allclose(ds["mavas"].attrs["inverse_flattening"], 298.257223563) - - -def test_is_projected(caplog): - """Tests for private _is_projected function.""" - from satpy.writers.cf.crs import _is_projected - - # test case with units but no area - da = xr.DataArray( - np.arange(25).reshape(5, 5), - dims=("y", "x"), - coords={"x": xr.DataArray(np.arange(5), dims=("x",), attrs={"units": "m"}), - "y": xr.DataArray(np.arange(5), dims=("y",), attrs={"units": "m"})}) - assert _is_projected(da) - - da = xr.DataArray( - np.arange(25).reshape(5, 5), - dims=("y", "x"), - coords={"x": xr.DataArray(np.arange(5), dims=("x",), attrs={"units": "degrees_east"}), - "y": xr.DataArray(np.arange(5), dims=("y",), attrs={"units": "degrees_north"})}) - assert not _is_projected(da) - - da = xr.DataArray( - np.arange(25).reshape(5, 5), - dims=("y", "x")) - with caplog.at_level(logging.WARNING): - assert _is_projected(da) - assert "Failed to tell if data are projected." in caplog.text - - class TestCFWriter: """Test case for CF writer.""" @@ -498,62 +446,6 @@ def test_global_attr_history_and_Conventions(self): assert 'Created by pytroll/satpy on' in f.attrs['history'] -class TestCFWriterData: - """Test case for CF writer where data arrays are needed.""" - - @pytest.fixture - def datasets(self): - """Create test dataset.""" - data = [[75, 2], [3, 4]] - y = [1, 2] - x = [1, 2] - geos = pyresample.geometry.AreaDefinition( - area_id='geos', - description='geos', - proj_id='geos', - projection={'proj': 'geos', 'h': 35785831., 'a': 6378169., 'b': 6356583.8}, - width=2, height=2, - area_extent=[-1, -1, 1, 1]) - datasets = { - 'var1': xr.DataArray(data=data, - dims=('y', 'x'), - coords={'y': y, 'x': x}), - 'var2': xr.DataArray(data=data, - dims=('y', 'x'), - coords={'y': y, 'x': x}), - 'lat': xr.DataArray(data=data, - dims=('y', 'x'), - coords={'y': y, 'x': x}), - 'lon': xr.DataArray(data=data, - dims=('y', 'x'), - coords={'y': y, 'x': x})} - datasets['lat'].attrs['standard_name'] = 'latitude' - datasets['var1'].attrs['standard_name'] = 'dummy' - datasets['var2'].attrs['standard_name'] = 'dummy' - datasets['var2'].attrs['area'] = geos - datasets['var1'].attrs['area'] = geos - datasets['lat'].attrs['name'] = 'lat' - datasets['var1'].attrs['name'] = 'var1' - datasets['var2'].attrs['name'] = 'var2' - datasets['lon'].attrs['name'] = 'lon' - return datasets - - def test_is_lon_or_lat_dataarray(self, datasets): - """Test the is_lon_or_lat_dataarray function.""" - from satpy.writers.cf.area import is_lon_or_lat_dataarray - - assert is_lon_or_lat_dataarray(datasets['lat']) - assert not is_lon_or_lat_dataarray(datasets['var1']) - - def test_has_projection_coords(self, datasets): - """Test the has_projection_coords function.""" - from satpy.writers.cf.area import has_projection_coords - - assert has_projection_coords(datasets) - datasets['lat'].attrs['standard_name'] = 'dummy' - assert not has_projection_coords(datasets) - - class TestNetcdfEncodingKwargs: """Test netCDF compression encodings.""" From dabaa44223ed4f6acd634b9ab518ec149ca2fbfc Mon Sep 17 00:00:00 2001 From: ghiggi Date: Wed, 28 Jun 2023 00:49:10 +0200 Subject: [PATCH 10/37] Add logging on top-file --- satpy/tests/writer_tests/cf_tests/test_area.py | 3 --- satpy/writers/cf/attrs.py | 4 +++- satpy/writers/cf/dataarray.py | 3 +++ satpy/writers/cf/datasets.py | 3 +++ satpy/writers/cf/encoding.py | 3 +++ satpy/writers/cf/time.py | 5 +++++ 6 files changed, 17 insertions(+), 4 deletions(-) diff --git a/satpy/tests/writer_tests/cf_tests/test_area.py b/satpy/tests/writer_tests/cf_tests/test_area.py index e3454b3170..1dd82ddd9d 100644 --- a/satpy/tests/writer_tests/cf_tests/test_area.py +++ b/satpy/tests/writer_tests/cf_tests/test_area.py @@ -24,9 +24,6 @@ import xarray as xr from pyresample import AreaDefinition, SwathDefinition -logger = logging.getLogger(__name__) - - # NOTE: # The following fixtures are not defined in this file, but are used and injected by Pytest: # - caplog diff --git a/satpy/writers/cf/attrs.py b/satpy/writers/cf/attrs.py index aac0f5f289..153d645594 100644 --- a/satpy/writers/cf/attrs.py +++ b/satpy/writers/cf/attrs.py @@ -16,15 +16,17 @@ # You should have received a copy of the GNU General Public License along with # satpy. If not, see . """CF processing of attributes.""" - import datetime import json +import logging from collections import OrderedDict import numpy as np from satpy.writers.utils import flatten_dict +logger = logging.getLogger(__name__) + class AttributeEncoder(json.JSONEncoder): """JSON encoder for dataset attributes.""" diff --git a/satpy/writers/cf/dataarray.py b/satpy/writers/cf/dataarray.py index 9ca90ae52f..fd9b20be5e 100644 --- a/satpy/writers/cf/dataarray.py +++ b/satpy/writers/cf/dataarray.py @@ -16,12 +16,15 @@ # You should have received a copy of the GNU General Public License along with # satpy. If not, see . """Utility to generate a CF-compliant DataArray.""" +import logging import warnings from satpy.writers.cf.attrs import preprocess_datarray_attrs from satpy.writers.cf.coords_attrs import add_xy_coords_attrs from satpy.writers.cf.time import EPOCH, _process_time_coord +logger = logging.getLogger(__name__) + def _handle_dataarray_name(original_name, numeric_name_prefix): if original_name[0].isdigit(): diff --git a/satpy/writers/cf/datasets.py b/satpy/writers/cf/datasets.py index 09726c0e32..4baabbc894 100644 --- a/satpy/writers/cf/datasets.py +++ b/satpy/writers/cf/datasets.py @@ -16,6 +16,7 @@ # You should have received a copy of the GNU General Public License along with # satpy. If not, see . """Utility to generate a CF-compliant Datasets.""" +import logging import warnings from collections import defaultdict @@ -24,6 +25,8 @@ from satpy.writers.cf.time import EPOCH from satpy.writers.cf_writer import CF_DTYPES, CF_VERSION +logger = logging.getLogger(__name__) + def get_extra_ds(dataarray, keys=None): """Get the ancillary_variables DataArrays associated to a dataset.""" diff --git a/satpy/writers/cf/encoding.py b/satpy/writers/cf/encoding.py index c8ea0f25f4..55a48f70fd 100644 --- a/satpy/writers/cf/encoding.py +++ b/satpy/writers/cf/encoding.py @@ -16,11 +16,14 @@ # You should have received a copy of the GNU General Public License along with # satpy. If not, see . """CF encoding.""" +import logging import numpy as np import xarray as xr from xarray.coding.times import CFDatetimeCoder +logger = logging.getLogger(__name__) + def _set_default_chunks(encoding, dataset): """Update encoding to preserve current dask chunks. diff --git a/satpy/writers/cf/time.py b/satpy/writers/cf/time.py index 821f7b47b0..05b90c4641 100644 --- a/satpy/writers/cf/time.py +++ b/satpy/writers/cf/time.py @@ -16,9 +16,14 @@ # You should have received a copy of the GNU General Public License along with # satpy. If not, see . """CF processing of time dimension and coordinates.""" +import logging + import numpy as np import xarray as xr +logger = logging.getLogger(__name__) + + EPOCH = u"seconds since 1970-01-01 00:00:00" From ea12d9b348cac72cf86c3c3df6abe140fce5d42e Mon Sep 17 00:00:00 2001 From: Gionata Ghiggi Date: Wed, 28 Jun 2023 11:54:21 +0200 Subject: [PATCH 11/37] Update satpy/tests/writer_tests/cf_tests/test_dataaarray.py --- satpy/tests/writer_tests/cf_tests/test_dataaarray.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/satpy/tests/writer_tests/cf_tests/test_dataaarray.py b/satpy/tests/writer_tests/cf_tests/test_dataaarray.py index 13ed11474e..896de5c55b 100644 --- a/satpy/tests/writer_tests/cf_tests/test_dataaarray.py +++ b/satpy/tests/writer_tests/cf_tests/test_dataaarray.py @@ -68,7 +68,7 @@ def test_make_cf_dataarray_lonlat(): assert new_da["y"].attrs["units"] == "degrees_north" -class TestCFWriter: +class TestCfDataArray: """Test creation of CF DataArray.""" def get_test_attrs(self): From fe43fc8e1be7047aaebbe23e82a40cfbd771b5f1 Mon Sep 17 00:00:00 2001 From: ghiggi Date: Tue, 10 Oct 2023 16:43:20 +0200 Subject: [PATCH 12/37] Dummy changes --- satpy/tests/writer_tests/cf_tests/test_attrs.py | 2 +- satpy/tests/writer_tests/cf_tests/test_dataaarray.py | 2 +- satpy/writers/cf/attrs.py | 2 +- satpy/writers/cf/encoding.py | 8 ++++---- satpy/writers/cf_writer.py | 10 +++++----- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/satpy/tests/writer_tests/cf_tests/test_attrs.py b/satpy/tests/writer_tests/cf_tests/test_attrs.py index 87cdfd173d..5895f115c3 100644 --- a/satpy/tests/writer_tests/cf_tests/test_attrs.py +++ b/satpy/tests/writer_tests/cf_tests/test_attrs.py @@ -45,7 +45,7 @@ def get_test_attrs(self): 'numpy_bool': True, 'numpy_void': np.void(0), 'numpy_bytes': np.bytes_('test'), - 'numpy_string': np.string_('test'), + 'numpy_string': np.str_('test'), 'list': [1, 2, np.float64(3)], 'nested_list': ["1", ["2", [3]]], 'bool': True, diff --git a/satpy/tests/writer_tests/cf_tests/test_dataaarray.py b/satpy/tests/writer_tests/cf_tests/test_dataaarray.py index 896de5c55b..a67cae9ca2 100644 --- a/satpy/tests/writer_tests/cf_tests/test_dataaarray.py +++ b/satpy/tests/writer_tests/cf_tests/test_dataaarray.py @@ -90,7 +90,7 @@ def get_test_attrs(self): 'numpy_bool': True, 'numpy_void': np.void(0), 'numpy_bytes': np.bytes_('test'), - 'numpy_string': np.string_('test'), + 'numpy_string': np.str_('test'), 'list': [1, 2, np.float64(3)], 'nested_list': ["1", ["2", [3]]], 'bool': True, diff --git a/satpy/writers/cf/attrs.py b/satpy/writers/cf/attrs.py index 153d645594..fad60fe97e 100644 --- a/satpy/writers/cf/attrs.py +++ b/satpy/writers/cf/attrs.py @@ -166,7 +166,7 @@ def _remove_satpy_attrs(new_data): def _format_prerequisites_attrs(dataarray): """Reformat prerequisites attribute value to string.""" if 'prerequisites' in dataarray.attrs: - dataarray.attrs['prerequisites'] = [np.string_(str(prereq)) for prereq in dataarray.attrs['prerequisites']] + dataarray.attrs['prerequisites'] = [np.bytes_(str(prereq)) for prereq in dataarray.attrs['prerequisites']] return dataarray diff --git a/satpy/writers/cf/encoding.py b/satpy/writers/cf/encoding.py index 55a48f70fd..4e244e82f9 100644 --- a/satpy/writers/cf/encoding.py +++ b/satpy/writers/cf/encoding.py @@ -98,16 +98,16 @@ def _update_encoding_dataset_names(encoding, dataset, numeric_name_prefix): return encoding -def update_encoding(dataset, to_netcdf_kwargs, numeric_name_prefix='CHANNEL_'): +def update_encoding(dataset, to_engine_kwargs, numeric_name_prefix='CHANNEL_'): """Update encoding. Preserve dask chunks, avoid fill values in coordinate variables and make sure that time & time bounds have the same units. """ - other_to_netcdf_kwargs = to_netcdf_kwargs.copy() - encoding = other_to_netcdf_kwargs.pop('encoding', {}).copy() + other_to_engine_kwargs = to_engine_kwargs.copy() + encoding = other_to_engine_kwargs.pop('encoding', {}).copy() encoding = _update_encoding_dataset_names(encoding, dataset, numeric_name_prefix) encoding = _set_default_chunks(encoding, dataset) encoding = _set_default_fill_value(encoding, dataset) encoding = _set_default_time_encoding(encoding, dataset) - return encoding, other_to_netcdf_kwargs + return encoding, other_to_engine_kwargs diff --git a/satpy/writers/cf_writer.py b/satpy/writers/cf_writer.py index 85c6fe999b..30ca7e784e 100644 --- a/satpy/writers/cf_writer.py +++ b/satpy/writers/cf_writer.py @@ -186,7 +186,7 @@ CF_VERSION = 'CF-1.7' -# Numpy datatypes compatible with all netCDF4 backends. ``np.unicode_`` is +# Numpy datatypes compatible with all netCDF4 backends. ``np.str_`` is # excluded because h5py (and thus h5netcdf) has problems with unicode, see # https://github.com/h5py/h5py/issues/624.""" NC4_DTYPES = [np.dtype('int8'), np.dtype('uint8'), @@ -194,7 +194,7 @@ np.dtype('int32'), np.dtype('uint32'), np.dtype('int64'), np.dtype('uint64'), np.dtype('float32'), np.dtype('float64'), - np.string_] + np.bytes_] # Unsigned and int64 isn't CF 1.7 compatible # Note: Unsigned and int64 are CF 1.9 compatible @@ -203,7 +203,7 @@ np.dtype('int32'), np.dtype('float32'), np.dtype('float64'), - np.string_] + np.bytes_] def _sanitize_writer_kwargs(writer_kwargs): @@ -314,7 +314,7 @@ def save_datasets(self, datasets, filename=None, groups=None, header_attrs=None, # - If single netCDF, it write directly for group_name, ds in grouped_datasets.items(): encoding, other_to_netcdf_kwargs = update_encoding(ds, - to_netcdf_kwargs=to_netcdf_kwargs, + to_engine_kwargs=to_netcdf_kwargs, numeric_name_prefix=numeric_name_prefix) res = ds.to_netcdf(filename, engine=engine, @@ -346,7 +346,7 @@ def da2cf(dataarray, epoch=EPOCH, flatten_attrs=False, exclude_attrs=None, """ from satpy.writers.cf.dataarray import make_cf_dataarray warnings.warn('CFWriter.da2cf is deprecated.' - 'Use satpy.writers.cf_writer.make_cf_dataarray instead.', + 'Use satpy.writers.cf.dataarray.make_cf_dataarray instead.', DeprecationWarning, stacklevel=3) return make_cf_dataarray(dataarray=dataarray, epoch=epoch, From c37fcb763110e0c3fc4bbfc5bb72df249a4b6017 Mon Sep 17 00:00:00 2001 From: ghiggi Date: Tue, 10 Oct 2023 17:08:21 +0200 Subject: [PATCH 13/37] Set private functions --- satpy/readers/amsr2_l2_gaasp.py | 4 ++-- satpy/tests/writer_tests/cf_tests/test_area.py | 18 +++++++++--------- .../tests/writer_tests/cf_tests/test_attrs.py | 6 +++--- satpy/writers/cf/area.py | 10 +++++----- satpy/writers/cf/attrs.py | 16 ++++++++-------- satpy/writers/cf/dataarray.py | 4 ++-- satpy/writers/cf/datasets.py | 6 +++--- satpy/writers/cf/time.py | 2 +- 8 files changed, 33 insertions(+), 33 deletions(-) diff --git a/satpy/readers/amsr2_l2_gaasp.py b/satpy/readers/amsr2_l2_gaasp.py index 5f91e2d965..4f045057b4 100644 --- a/satpy/readers/amsr2_l2_gaasp.py +++ b/satpy/readers/amsr2_l2_gaasp.py @@ -189,7 +189,7 @@ def _available_if_this_file_type(self, configured_datasets): continue yield self.file_type_matches(ds_info['file_type']), ds_info - def _add_lonlat_coords(self, data_arr, ds_info): + def __add_lonlat_coords(self, data_arr, ds_info): lat_coord = None lon_coord = None for coord_name in data_arr.coords: @@ -209,7 +209,7 @@ def _get_ds_info_for_data_arr(self, var_name, data_arr): if x_dim_name in self.dim_resolutions: ds_info['resolution'] = self.dim_resolutions[x_dim_name] if not self.is_gridded and data_arr.coords: - self._add_lonlat_coords(data_arr, ds_info) + self.__add_lonlat_coords(data_arr, ds_info) return ds_info def _is_2d_yx_data_array(self, data_arr): diff --git a/satpy/tests/writer_tests/cf_tests/test_area.py b/satpy/tests/writer_tests/cf_tests/test_area.py index 1dd82ddd9d..92088f6d68 100644 --- a/satpy/tests/writer_tests/cf_tests/test_area.py +++ b/satpy/tests/writer_tests/cf_tests/test_area.py @@ -356,9 +356,9 @@ def _gm_matches(gmapping, expected): assert new_ds.attrs['grid_mapping'] == 'geos' _gm_matches(grid_mapping, geos_expected) - def test_add_lonlat_coords(self): + def test__add_lonlat_coords(self): """Test the conversion from areas to lon/lat.""" - from satpy.writers.cf.area import add_lonlat_coords + from satpy.writers.cf.area import _add_lonlat_coords area = AreaDefinition( 'seviri', @@ -371,7 +371,7 @@ def test_add_lonlat_coords(self): lons_ref, lats_ref = area.get_lonlats() dataarray = xr.DataArray(data=[[1, 2], [3, 4]], dims=('y', 'x'), attrs={'area': area}) - res = add_lonlat_coords(dataarray) + res = _add_lonlat_coords(dataarray) # original should be unmodified assert 'longitude' not in dataarray.coords @@ -394,7 +394,7 @@ def test_add_lonlat_coords(self): lons_ref, lats_ref = area.get_lonlats() dataarray = xr.DataArray(data=da.from_array(np.arange(3 * 10 * 10).reshape(3, 10, 10), chunks=(1, 5, 5)), dims=('bands', 'y', 'x'), attrs={'area': area}) - res = add_lonlat_coords(dataarray) + res = _add_lonlat_coords(dataarray) # original should be unmodified assert 'longitude' not in dataarray.coords @@ -469,12 +469,12 @@ def datasets(self): datasets['lon'].attrs['name'] = 'lon' return datasets - def test_is_lon_or_lat_dataarray(self, datasets): - """Test the is_lon_or_lat_dataarray function.""" - from satpy.writers.cf.area import is_lon_or_lat_dataarray + def test__is_lon_or_lat_dataarray(self, datasets): + """Test the _is_lon_or_lat_dataarray function.""" + from satpy.writers.cf.area import _is_lon_or_lat_dataarray - assert is_lon_or_lat_dataarray(datasets['lat']) - assert not is_lon_or_lat_dataarray(datasets['var1']) + assert _is_lon_or_lat_dataarray(datasets['lat']) + assert not _is_lon_or_lat_dataarray(datasets['var1']) def test_has_projection_coords(self, datasets): """Test the has_projection_coords function.""" diff --git a/satpy/tests/writer_tests/cf_tests/test_attrs.py b/satpy/tests/writer_tests/cf_tests/test_attrs.py index 5895f115c3..a969765181 100644 --- a/satpy/tests/writer_tests/cf_tests/test_attrs.py +++ b/satpy/tests/writer_tests/cf_tests/test_attrs.py @@ -124,14 +124,14 @@ def assertDictWithArraysEqual(self, d1, d2): assert isinstance(val2, np.generic) assert val1.dtype == val2.dtype - def test_encode_attrs_nc(self): + def test__encode_attrs_nc(self): """Test attributes encoding.""" - from satpy.writers.cf.attrs import encode_attrs_nc + from satpy.writers.cf.attrs import _encode_attrs_nc attrs, expected, _ = self.get_test_attrs() # Test encoding - encoded = encode_attrs_nc(attrs) + encoded = _encode_attrs_nc(attrs) self.assertDictWithArraysEqual(expected, encoded) # Test decoding of json-encoded attributes diff --git a/satpy/writers/cf/area.py b/satpy/writers/cf/area.py index 68113c1ee2..5916083d62 100644 --- a/satpy/writers/cf/area.py +++ b/satpy/writers/cf/area.py @@ -28,7 +28,7 @@ logger = logging.getLogger(__name__) -def add_lonlat_coords(dataarray): +def _add_lonlat_coords(dataarray): """Add 'longitude' and 'latitude' coordinates to DataArray.""" dataarray = dataarray.copy() area = dataarray.attrs['area'] @@ -73,7 +73,7 @@ def area2cf(dataarray, include_lonlats=False, got_lonlats=False): """Convert an area to at CF grid mapping or lon and lats.""" res = [] if not got_lonlats and (isinstance(dataarray.attrs['area'], SwathDefinition) or include_lonlats): - dataarray = add_lonlat_coords(dataarray) + dataarray = _add_lonlat_coords(dataarray) if isinstance(dataarray.attrs['area'], AreaDefinition): dataarray, gmapping = _add_grid_mapping(dataarray) res.append(gmapping) @@ -81,7 +81,7 @@ def area2cf(dataarray, include_lonlats=False, got_lonlats=False): return res -def is_lon_or_lat_dataarray(dataarray): +def _is_lon_or_lat_dataarray(dataarray): """Check if the DataArray represents the latitude or longitude coordinate.""" if 'standard_name' in dataarray.attrs and dataarray.attrs['standard_name'] in ['longitude', 'latitude']: return True @@ -91,7 +91,7 @@ def is_lon_or_lat_dataarray(dataarray): def has_projection_coords(ds_collection): """Check if DataArray collection has a "longitude" or "latitude" DataArray.""" for dataarray in ds_collection.values(): - if is_lon_or_lat_dataarray(dataarray): + if _is_lon_or_lat_dataarray(dataarray): return True return False @@ -124,7 +124,7 @@ def make_alt_coords_unique(datas, pretty=False): tokens = defaultdict(set) for dataset in datas.values(): for coord_name in dataset.coords: - if not is_lon_or_lat_dataarray(dataset[coord_name]) and coord_name not in dataset.dims: + if not _is_lon_or_lat_dataarray(dataset[coord_name]) and coord_name not in dataset.dims: tokens[coord_name].add(tokenize(dataset[coord_name].data)) coords_unique = dict([(coord_name, len(tokens) == 1) for coord_name, tokens in tokens.items()]) diff --git a/satpy/writers/cf/attrs.py b/satpy/writers/cf/attrs.py index fad60fe97e..d4a41f2bfc 100644 --- a/satpy/writers/cf/attrs.py +++ b/satpy/writers/cf/attrs.py @@ -65,7 +65,7 @@ def _encode(self, obj): return str(obj) -def _encode_nc(obj): +def __encode_nc(obj): """Try to encode `obj` as a netCDF/Zarr compatible datatype which most closely resembles the object's nature. Raises: @@ -90,7 +90,7 @@ def _encode_nc(obj): raise ValueError('Unable to encode') -def encode_nc(obj): +def _encode_nc(obj): """Encode the given object as a netcdf compatible datatype.""" try: return obj.to_cf() @@ -104,9 +104,9 @@ def _encode_python_objects(obj): If on failure, encode as a string. Plain lists are encoded recursively. """ if isinstance(obj, (list, tuple)) and all([not isinstance(item, (list, tuple)) for item in obj]): - return [encode_nc(item) for item in obj] + return [_encode_nc(item) for item in obj] try: - dump = _encode_nc(obj) + dump = __encode_nc(obj) except ValueError: try: # Decode byte-strings @@ -117,7 +117,7 @@ def _encode_python_objects(obj): return dump -def encode_attrs_nc(attrs): +def _encode_attrs_nc(attrs): """Encode dataset attributes in a netcdf compatible datatype. Args: @@ -130,7 +130,7 @@ def encode_attrs_nc(attrs): encoded_attrs = [] for key, val in sorted(attrs.items()): if val is not None: - encoded_attrs.append((key, encode_nc(val))) + encoded_attrs.append((key, _encode_nc(val))) return OrderedDict(encoded_attrs) @@ -193,7 +193,7 @@ def preprocess_datarray_attrs(dataarray, flatten_attrs, exclude_attrs): if flatten_attrs: dataarray.attrs = flatten_dict(dataarray.attrs) - dataarray.attrs = encode_attrs_nc(dataarray.attrs) + dataarray.attrs = _encode_attrs_nc(dataarray.attrs) return dataarray @@ -215,7 +215,7 @@ def preprocess_header_attrs(header_attrs, flatten_attrs=False): if header_attrs is not None: if flatten_attrs: header_attrs = flatten_dict(header_attrs) - header_attrs = encode_attrs_nc(header_attrs) # OrderedDict + header_attrs = _encode_attrs_nc(header_attrs) # OrderedDict else: header_attrs = {} header_attrs = _add_history(header_attrs) diff --git a/satpy/writers/cf/dataarray.py b/satpy/writers/cf/dataarray.py index fd9b20be5e..a5322cfee4 100644 --- a/satpy/writers/cf/dataarray.py +++ b/satpy/writers/cf/dataarray.py @@ -21,7 +21,7 @@ from satpy.writers.cf.attrs import preprocess_datarray_attrs from satpy.writers.cf.coords_attrs import add_xy_coords_attrs -from satpy.writers.cf.time import EPOCH, _process_time_coord +from satpy.writers.cf.time import EPOCH, process_time_coord logger = logging.getLogger(__name__) @@ -96,5 +96,5 @@ def make_cf_dataarray(dataarray, flatten_attrs=flatten_attrs, exclude_attrs=exclude_attrs) dataarray = add_xy_coords_attrs(dataarray) - dataarray = _process_time_coord(dataarray, epoch=epoch) + dataarray = process_time_coord(dataarray, epoch=epoch) return dataarray diff --git a/satpy/writers/cf/datasets.py b/satpy/writers/cf/datasets.py index 4baabbc894..c87e6673d4 100644 --- a/satpy/writers/cf/datasets.py +++ b/satpy/writers/cf/datasets.py @@ -28,7 +28,7 @@ logger = logging.getLogger(__name__) -def get_extra_ds(dataarray, keys=None): +def _get_extra_ds(dataarray, keys=None): """Get the ancillary_variables DataArrays associated to a dataset.""" ds_collection = {} # Retrieve ancillary variable datarrays @@ -36,7 +36,7 @@ def get_extra_ds(dataarray, keys=None): ancillary_variable = ancillary_dataarray.name if keys and ancillary_variable not in keys: keys.append(ancillary_variable) - ds_collection.update(get_extra_ds(ancillary_dataarray, keys=keys)) + ds_collection.update(_get_extra_ds(ancillary_dataarray, keys=keys)) # Add input dataarray ds_collection[dataarray.attrs['name']] = dataarray return ds_collection @@ -111,7 +111,7 @@ def _collect_cf_dataset(list_dataarrays, # --> Since keys=None, it doesn't never retrieve ancillary variables !!! ds_collection = {} for dataarray in list_dataarrays: - ds_collection.update(get_extra_ds(dataarray)) + ds_collection.update(_get_extra_ds(dataarray)) # Check if one DataArray in the collection has 'longitude' or 'latitude' got_lonlats = has_projection_coords(ds_collection) diff --git a/satpy/writers/cf/time.py b/satpy/writers/cf/time.py index 05b90c4641..4c5cbf5bc9 100644 --- a/satpy/writers/cf/time.py +++ b/satpy/writers/cf/time.py @@ -47,7 +47,7 @@ def add_time_bounds_dimension(ds, time="time"): return ds -def _process_time_coord(dataarray, epoch): +def process_time_coord(dataarray, epoch): """Process the 'time' coordinate, if existing. It expand the DataArray with a time dimension if does not yet exists. From 22a8d097b698dd87039a13d36a336ccb21fcf277 Mon Sep 17 00:00:00 2001 From: ghiggi Date: Tue, 10 Oct 2023 18:11:46 +0200 Subject: [PATCH 14/37] Reorg files --- satpy/_scene_converters.py | 4 +- satpy/scene.py | 2 +- satpy/tests/utils.py | 15 ++ .../tests/writer_tests/cf_tests/test_area.py | 32 ----- .../tests/writer_tests/cf_tests/test_attrs.py | 18 +-- .../{test_time_coords.py => test_coords.py} | 41 +++++- .../writer_tests/cf_tests/test_dataaarray.py | 22 +-- .../writer_tests/cf_tests/test_datasets.py | 27 ++-- .../writer_tests/cf_tests/test_encoding.py | 4 +- satpy/writers/cf/__init__.py | 2 + satpy/writers/cf/coords.py | 128 ++++++++++++++++++ satpy/writers/cf/coords_attrs.py | 46 ------- satpy/writers/cf/crs.py | 47 ------- satpy/writers/cf/dataarray.py | 4 +- satpy/writers/cf/datasets.py | 8 +- satpy/writers/cf/time.py | 69 ---------- satpy/writers/cf_writer.py | 2 +- 17 files changed, 219 insertions(+), 252 deletions(-) rename satpy/tests/writer_tests/cf_tests/{test_time_coords.py => test_coords.py} (56%) create mode 100644 satpy/writers/cf/coords.py delete mode 100644 satpy/writers/cf/coords_attrs.py delete mode 100644 satpy/writers/cf/crs.py delete mode 100644 satpy/writers/cf/time.py diff --git a/satpy/_scene_converters.py b/satpy/_scene_converters.py index ba4432a58f..c5c0b1c896 100644 --- a/satpy/_scene_converters.py +++ b/satpy/_scene_converters.py @@ -66,7 +66,7 @@ def to_xarray(scn, epoch (str): Reference time for encoding the time coordinates (if available). Example format: "seconds since 1970-01-01 00:00:00". - If None, the default reference time is retrieved using "from satpy.cf_writer import EPOCH" + If None, the default reference time is retrieved using "from satpy.writers.cf import EPOCH" flatten_attrs (bool): If True, flatten dict-type attributes. exclude_attrs (list): @@ -90,8 +90,8 @@ def to_xarray(scn, A CF-compliant xr.Dataset """ + from satpy.writers.cf import EPOCH from satpy.writers.cf.datasets import collect_cf_datasets - from satpy.writers.cf.time import EPOCH if epoch is None: epoch = EPOCH diff --git a/satpy/scene.py b/satpy/scene.py index e3e71811e9..52580d14e8 100644 --- a/satpy/scene.py +++ b/satpy/scene.py @@ -1128,7 +1128,7 @@ def to_xarray(self, epoch (str): Reference time for encoding the time coordinates (if available). Example format: "seconds since 1970-01-01 00:00:00". - If None, the default reference time is retrieved using "from satpy.cf_writer import EPOCH" + If None, the default reference time is retrieved using "from satpy.writers.cf import EPOCH" flatten_attrs (bool): If True, flatten dict-type attributes. exclude_attrs (list): diff --git a/satpy/tests/utils.py b/satpy/tests/utils.py index c87cd1055c..155916aca1 100644 --- a/satpy/tests/utils.py +++ b/satpy/tests/utils.py @@ -407,3 +407,18 @@ def assert_attrs_equal(attrs, attrs_exp, tolerance=0): ) except TypeError: assert attrs[key] == attrs_exp[key], err_msg + + +def assert_dict_array_equality(d1, d2): + """Check that dicts containing arrays are equal.""" + assert set(d1.keys()) == set(d2.keys()) + for key, val1 in d1.items(): + val2 = d2[key] + if isinstance(val1, np.ndarray): + np.testing.assert_array_equal(val1, val2) + assert val1.dtype == val2.dtype + else: + assert val1 == val2 + if isinstance(val1, (np.floating, np.integer, np.bool_)): + assert isinstance(val2, np.generic) + assert val1.dtype == val2.dtype diff --git a/satpy/tests/writer_tests/cf_tests/test_area.py b/satpy/tests/writer_tests/cf_tests/test_area.py index 92088f6d68..5b7dd86d38 100644 --- a/satpy/tests/writer_tests/cf_tests/test_area.py +++ b/satpy/tests/writer_tests/cf_tests/test_area.py @@ -16,18 +16,12 @@ # You should have received a copy of the GNU General Public License along with # satpy. If not, see . """Tests for the CF Area.""" -import logging - import dask.array as da import numpy as np import pytest import xarray as xr from pyresample import AreaDefinition, SwathDefinition -# NOTE: -# The following fixtures are not defined in this file, but are used and injected by Pytest: -# - caplog - class TestCFArea: """Test case for CF Area.""" @@ -406,32 +400,6 @@ def test__add_lonlat_coords(self): assert {'name': 'latitude', 'standard_name': 'latitude', 'units': 'degrees_north'}.items() <= lat.attrs.items() assert {'name': 'longitude', 'standard_name': 'longitude', 'units': 'degrees_east'}.items() <= lon.attrs.items() - def test_is_projected(self, caplog): - """Tests for private _is_projected function.""" - from satpy.writers.cf.crs import _is_projected - - # test case with units but no area - da = xr.DataArray( - np.arange(25).reshape(5, 5), - dims=("y", "x"), - coords={"x": xr.DataArray(np.arange(5), dims=("x",), attrs={"units": "m"}), - "y": xr.DataArray(np.arange(5), dims=("y",), attrs={"units": "m"})}) - assert _is_projected(da) - - da = xr.DataArray( - np.arange(25).reshape(5, 5), - dims=("y", "x"), - coords={"x": xr.DataArray(np.arange(5), dims=("x",), attrs={"units": "degrees_east"}), - "y": xr.DataArray(np.arange(5), dims=("y",), attrs={"units": "degrees_north"})}) - assert not _is_projected(da) - - da = xr.DataArray( - np.arange(25).reshape(5, 5), - dims=("y", "x")) - with caplog.at_level(logging.WARNING): - assert _is_projected(da) - assert "Failed to tell if data are projected." in caplog.text - @pytest.fixture def datasets(self): """Create test dataset.""" diff --git a/satpy/tests/writer_tests/cf_tests/test_attrs.py b/satpy/tests/writer_tests/cf_tests/test_attrs.py index a969765181..6988e761ee 100644 --- a/satpy/tests/writer_tests/cf_tests/test_attrs.py +++ b/satpy/tests/writer_tests/cf_tests/test_attrs.py @@ -109,30 +109,16 @@ def get_test_attrs(self): 'raw_metadata_dict_b': np.array([1, 2, 3], dtype='uint8')} return attrs, encoded, encoded_flat - def assertDictWithArraysEqual(self, d1, d2): - """Check that dicts containing arrays are equal.""" - # TODO: this is also used by test_da2cf - assert set(d1.keys()) == set(d2.keys()) - for key, val1 in d1.items(): - val2 = d2[key] - if isinstance(val1, np.ndarray): - np.testing.assert_array_equal(val1, val2) - assert val1.dtype == val2.dtype - else: - assert val1 == val2 - if isinstance(val1, (np.floating, np.integer, np.bool_)): - assert isinstance(val2, np.generic) - assert val1.dtype == val2.dtype - def test__encode_attrs_nc(self): """Test attributes encoding.""" + from satpy.tests.utils import assert_dict_array_equality from satpy.writers.cf.attrs import _encode_attrs_nc attrs, expected, _ = self.get_test_attrs() # Test encoding encoded = _encode_attrs_nc(attrs) - self.assertDictWithArraysEqual(expected, encoded) + assert_dict_array_equality(expected, encoded) # Test decoding of json-encoded attributes raw_md_roundtrip = {'recarray': [[0, 0], [0, 0], [0, 0]], diff --git a/satpy/tests/writer_tests/cf_tests/test_time_coords.py b/satpy/tests/writer_tests/cf_tests/test_coords.py similarity index 56% rename from satpy/tests/writer_tests/cf_tests/test_time_coords.py rename to satpy/tests/writer_tests/cf_tests/test_coords.py index ce7845dcca..9e9d8c4607 100644 --- a/satpy/tests/writer_tests/cf_tests/test_time_coords.py +++ b/satpy/tests/writer_tests/cf_tests/test_coords.py @@ -16,16 +16,22 @@ # You should have received a copy of the GNU General Public License along with # satpy. If not, see . """CF processing of time information (coordinates and dimensions).""" +import logging + import numpy as np import xarray as xr +# NOTE: +# The following fixtures are not defined in this file, but are used and injected by Pytest: +# - caplog + class TestCFtime: """Test cases for CF time dimension and coordinates.""" def test_add_time_bounds_dimension(self): """Test addition of CF-compliant time attributes.""" - from satpy.writers.cf.time import add_time_bounds_dimension + from satpy.writers.cf.coords import add_time_bounds_dimension test_array = np.array([[1, 2], [3, 4], [5, 6], [7, 8]]) times = np.array(['2018-05-30T10:05:00', '2018-05-30T10:05:01', @@ -42,3 +48,36 @@ def test_add_time_bounds_dimension(self): assert "time_bnds" in list(ds.data_vars) assert "bounds" in ds["time"].attrs assert "standard_name" in ds["time"].attrs + + +class TestCFcoords: + """Test cases for CF spatial dimension and coordinates.""" + + def test_is_projected(self, caplog): + """Tests for private _is_projected function.""" + from satpy.writers.cf.coords import _is_projected + + # test case with units but no area + da = xr.DataArray( + np.arange(25).reshape(5, 5), + dims=("y", "x"), + coords={"x": xr.DataArray(np.arange(5), dims=("x",), attrs={"units": "m"}), + "y": xr.DataArray(np.arange(5), dims=("y",), attrs={"units": "m"})}) + assert _is_projected(da) + + da = xr.DataArray( + np.arange(25).reshape(5, 5), + dims=("y", "x"), + coords={"x": xr.DataArray(np.arange(5), dims=("x",), attrs={"units": "degrees_east"}), + "y": xr.DataArray(np.arange(5), dims=("y",), attrs={"units": "degrees_north"})}) + assert not _is_projected(da) + + da = xr.DataArray( + np.arange(25).reshape(5, 5), + dims=("y", "x")) + with caplog.at_level(logging.WARNING): + assert _is_projected(da) + assert "Failed to tell if data are projected." in caplog.text + + # add_xy_coords_attrs + # process_time_coord diff --git a/satpy/tests/writer_tests/cf_tests/test_dataaarray.py b/satpy/tests/writer_tests/cf_tests/test_dataaarray.py index a67cae9ca2..43b87cfc20 100644 --- a/satpy/tests/writer_tests/cf_tests/test_dataaarray.py +++ b/satpy/tests/writer_tests/cf_tests/test_dataaarray.py @@ -154,23 +154,9 @@ def get_test_attrs(self): 'raw_metadata_dict_b': np.array([1, 2, 3], dtype='uint8')} return attrs, encoded, encoded_flat - def assertDictWithArraysEqual(self, d1, d2): - """Check that dicts containing arrays are equal.""" - # TODO: also used by cf/test_attrs.py - assert set(d1.keys()) == set(d2.keys()) - for key, val1 in d1.items(): - val2 = d2[key] - if isinstance(val1, np.ndarray): - np.testing.assert_array_equal(val1, val2) - assert val1.dtype == val2.dtype - else: - assert val1 == val2 - if isinstance(val1, (np.floating, np.integer, np.bool_)): - assert isinstance(val2, np.generic) - assert val1.dtype == val2.dtype - def test_make_cf_dataarray(self): """Test the conversion of a DataArray to a CF-compatible DataArray.""" + from satpy.tests.utils import assert_dict_array_equality from satpy.writers.cf.dataarray import make_cf_dataarray # Create set of test attributes @@ -200,12 +186,12 @@ def test_make_cf_dataarray(self): np.testing.assert_array_equal(res['acq_time'], arr['acq_time']) assert res['x'].attrs == {'units': 'm', 'standard_name': 'projection_x_coordinate'} assert res['y'].attrs == {'units': 'm', 'standard_name': 'projection_y_coordinate'} - self.assertDictWithArraysEqual(res.attrs, attrs_expected) + assert_dict_array_equality(res.attrs, attrs_expected) # Test attribute kwargs res_flat = make_cf_dataarray(arr, flatten_attrs=True, exclude_attrs=['int']) attrs_expected_flat.pop('int') - self.assertDictWithArraysEqual(res_flat.attrs, attrs_expected_flat) + assert_dict_array_equality(res_flat.attrs, attrs_expected_flat) def test_make_cf_dataarray_one_dimensional_array(self): """Test the conversion of an 1d DataArray to a CF-compatible DataArray.""" @@ -214,3 +200,5 @@ def test_make_cf_dataarray_one_dimensional_array(self): arr = xr.DataArray(np.array([1, 2, 3, 4]), attrs={}, dims=('y',), coords={'y': [0, 1, 2, 3], 'acq_time': ('y', [0, 1, 2, 3])}) _ = make_cf_dataarray(arr) + + # _handle_dataarray_name diff --git a/satpy/tests/writer_tests/cf_tests/test_datasets.py b/satpy/tests/writer_tests/cf_tests/test_datasets.py index b094feecbc..d92099e869 100644 --- a/satpy/tests/writer_tests/cf_tests/test_datasets.py +++ b/satpy/tests/writer_tests/cf_tests/test_datasets.py @@ -15,7 +15,7 @@ # # You should have received a copy of the GNU General Public License along with # satpy. If not, see . -"""Tests CF-compliant DataArray creation.""" +"""Tests CF-compliant Dataset(s) creation.""" import datetime import numpy as np @@ -24,18 +24,10 @@ from pyresample import AreaDefinition, create_area_def -def test_empty_collect_cf_datasets(): - """Test that if no DataArrays, collect_cf_datasets raise error.""" - from satpy.writers.cf.datasets import collect_cf_datasets - - with pytest.raises(RuntimeError): - collect_cf_datasets(list_dataarrays=[]) - - -class TestCollectCfDatasets: +class TestCollectCfDataset: """Test case for collect_cf_dataset.""" - def test_collect_cf_dataarrays(self): + def test_collect_cf_dataset(self): """Test collecting CF datasets from a DataArray objects.""" from satpy.writers.cf.datasets import _collect_cf_dataset @@ -75,7 +67,7 @@ def test_collect_cf_dataarrays(self): assert 'grid_mapping' not in da_var2.attrs assert da_var2.attrs['long_name'] == 'variable 2' - def test_collect_cf_dataarrays_with_latitude_named_lat(self): + def test_collect_cf_dataset_with_latitude_named_lat(self): """Test collecting CF datasets with latitude named lat.""" from satpy.writers.cf.datasets import _collect_cf_dataset @@ -148,3 +140,14 @@ def test_geographic_area_coords_attrs(self): assert ds["mavas"].attrs["longitude_of_prime_meridian"] == 0.0 np.testing.assert_allclose(ds["mavas"].attrs["semi_major_axis"], 6378137.0) np.testing.assert_allclose(ds["mavas"].attrs["inverse_flattening"], 298.257223563) + + +class TestCollectCfDatasets: + """Test case for collect_cf_datasets.""" + + def test_empty_collect_cf_datasets(self): + """Test that if no DataArrays, collect_cf_datasets raise error.""" + from satpy.writers.cf.datasets import collect_cf_datasets + + with pytest.raises(RuntimeError): + collect_cf_datasets(list_dataarrays=[]) diff --git a/satpy/tests/writer_tests/cf_tests/test_encoding.py b/satpy/tests/writer_tests/cf_tests/test_encoding.py index 66f7c72a48..125c7eec94 100644 --- a/satpy/tests/writer_tests/cf_tests/test_encoding.py +++ b/satpy/tests/writer_tests/cf_tests/test_encoding.py @@ -22,8 +22,8 @@ import xarray as xr -class TestUpdateDatasetEncodings: - """Test update of Dataset encodings.""" +class TestUpdateEncoding: + """Test update of dataset encodings.""" @pytest.fixture def fake_ds(self): diff --git a/satpy/writers/cf/__init__.py b/satpy/writers/cf/__init__.py index f597a9264c..c48acebcf9 100644 --- a/satpy/writers/cf/__init__.py +++ b/satpy/writers/cf/__init__.py @@ -1,3 +1,5 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- """Code for generation of CF-compliant datasets.""" + +EPOCH = u"seconds since 1970-01-01 00:00:00" diff --git a/satpy/writers/cf/coords.py b/satpy/writers/cf/coords.py new file mode 100644 index 0000000000..dee28952b5 --- /dev/null +++ b/satpy/writers/cf/coords.py @@ -0,0 +1,128 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +"""Set CF-compliant spatial and temporal coordinates.""" + +import logging +from contextlib import suppress + +import numpy as np +import xarray as xr +from pyresample.geometry import AreaDefinition, SwathDefinition + +logger = logging.getLogger(__name__) + + +def add_xy_coords_attrs(dataarray): + """Add relevant attributes to x, y coordinates.""" + # If there are no coords, return dataarray + if not dataarray.coords.keys() & {"x", "y", "crs"}: + return dataarray + # If projected area + if _is_projected(dataarray): + dataarray = _add_xy_projected_coords_attrs(dataarray) + else: + dataarray = _add_xy_geographic_coords_attrs(dataarray) + if 'crs' in dataarray.coords: + dataarray = dataarray.drop_vars('crs') + return dataarray + + +def _is_projected(dataarray): + """Guess whether data are projected or not.""" + crs = _try_to_get_crs(dataarray) + if crs: + return crs.is_projected + units = _try_get_units_from_coords(dataarray) + if units: + if units.endswith("m"): + return True + if units.startswith("degrees"): + return False + logger.warning("Failed to tell if data are projected. Assuming yes.") + return True + + +def _try_to_get_crs(dataarray): + """Try to get a CRS from attributes.""" + if "area" in dataarray.attrs: + if isinstance(dataarray.attrs["area"], AreaDefinition): + return dataarray.attrs["area"].crs + if not isinstance(dataarray.attrs["area"], SwathDefinition): + logger.warning( + f"Could not tell CRS from area of type {type(dataarray.attrs['area']).__name__:s}. " + "Assuming projected CRS.") + if "crs" in dataarray.coords: + return dataarray.coords["crs"].item() + + +def _try_get_units_from_coords(dataarray): + """Try to retrieve coordinate x/y units.""" + for c in ["x", "y"]: + with suppress(KeyError): + # If the data has only 1 dimension, it has only one of x or y coords + if "units" in dataarray.coords[c].attrs: + return dataarray.coords[c].attrs["units"] + + +def _add_xy_projected_coords_attrs(dataarray, x='x', y='y'): + """Add relevant attributes to x, y coordinates of a projected CRS.""" + if x in dataarray.coords: + dataarray[x].attrs['standard_name'] = 'projection_x_coordinate' + dataarray[x].attrs['units'] = 'm' + if y in dataarray.coords: + dataarray[y].attrs['standard_name'] = 'projection_y_coordinate' + dataarray[y].attrs['units'] = 'm' + return dataarray + + +def _add_xy_geographic_coords_attrs(dataarray, x='x', y='y'): + """Add relevant attributes to x, y coordinates of a geographic CRS.""" + if x in dataarray.coords: + dataarray[x].attrs['standard_name'] = 'longitude' + dataarray[x].attrs['units'] = 'degrees_east' + if y in dataarray.coords: + dataarray[y].attrs['standard_name'] = 'latitude' + dataarray[y].attrs['units'] = 'degrees_north' + return dataarray + + +def add_time_bounds_dimension(ds, time="time"): + """Add time bound dimension to xr.Dataset.""" + start_times = [] + end_times = [] + for _var_name, data_array in ds.items(): + start_times.append(data_array.attrs.get("start_time", None)) + end_times.append(data_array.attrs.get("end_time", None)) + + start_time = min(start_time for start_time in start_times + if start_time is not None) + end_time = min(end_time for end_time in end_times + if end_time is not None) + ds['time_bnds'] = xr.DataArray([[np.datetime64(start_time), + np.datetime64(end_time)]], + dims=['time', 'bnds_1d']) + ds[time].attrs['bounds'] = "time_bnds" + ds[time].attrs['standard_name'] = "time" + return ds + + +def process_time_coord(dataarray, epoch): + """Process the 'time' coordinate, if existing. + + It expand the DataArray with a time dimension if does not yet exists. + + The function assumes + + - that x and y dimensions have at least shape > 1 + - the time coordinate has size 1 + + """ + if 'time' in dataarray.coords: + dataarray['time'].encoding['units'] = epoch + dataarray['time'].attrs['standard_name'] = 'time' + dataarray['time'].attrs.pop('bounds', None) + + if 'time' not in dataarray.dims and dataarray["time"].size not in dataarray.shape: + dataarray = dataarray.expand_dims('time') + + return dataarray diff --git a/satpy/writers/cf/coords_attrs.py b/satpy/writers/cf/coords_attrs.py deleted file mode 100644 index c7e559adc2..0000000000 --- a/satpy/writers/cf/coords_attrs.py +++ /dev/null @@ -1,46 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -"""Set CF-compliant attributes to x and y spatial dimensions.""" - -import logging - -from satpy.writers.cf.crs import _is_projected - -logger = logging.getLogger(__name__) - - -def add_xy_coords_attrs(dataarray): - """Add relevant attributes to x, y coordinates.""" - # If there are no coords, return dataarray - if not dataarray.coords.keys() & {"x", "y", "crs"}: - return dataarray - # If projected area - if _is_projected(dataarray): - dataarray = _add_xy_projected_coords_attrs(dataarray) - else: - dataarray = _add_xy_geographic_coords_attrs(dataarray) - if 'crs' in dataarray.coords: - dataarray = dataarray.drop_vars('crs') - return dataarray - - -def _add_xy_projected_coords_attrs(dataarray, x='x', y='y'): - """Add relevant attributes to x, y coordinates of a projected CRS.""" - if x in dataarray.coords: - dataarray[x].attrs['standard_name'] = 'projection_x_coordinate' - dataarray[x].attrs['units'] = 'm' - if y in dataarray.coords: - dataarray[y].attrs['standard_name'] = 'projection_y_coordinate' - dataarray[y].attrs['units'] = 'm' - return dataarray - - -def _add_xy_geographic_coords_attrs(dataarray, x='x', y='y'): - """Add relevant attributes to x, y coordinates of a geographic CRS.""" - if x in dataarray.coords: - dataarray[x].attrs['standard_name'] = 'longitude' - dataarray[x].attrs['units'] = 'degrees_east' - if y in dataarray.coords: - dataarray[y].attrs['standard_name'] = 'latitude' - dataarray[y].attrs['units'] = 'degrees_north' - return dataarray diff --git a/satpy/writers/cf/crs.py b/satpy/writers/cf/crs.py deleted file mode 100644 index e6952a484f..0000000000 --- a/satpy/writers/cf/crs.py +++ /dev/null @@ -1,47 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -"""CRS utility.""" - -import logging -from contextlib import suppress - -from pyresample.geometry import AreaDefinition, SwathDefinition - -logger = logging.getLogger(__name__) - - -def _is_projected(dataarray): - """Guess whether data are projected or not.""" - crs = _try_to_get_crs(dataarray) - if crs: - return crs.is_projected - units = _try_get_units_from_coords(dataarray) - if units: - if units.endswith("m"): - return True - if units.startswith("degrees"): - return False - logger.warning("Failed to tell if data are projected. Assuming yes.") - return True - - -def _try_to_get_crs(dataarray): - """Try to get a CRS from attributes.""" - if "area" in dataarray.attrs: - if isinstance(dataarray.attrs["area"], AreaDefinition): - return dataarray.attrs["area"].crs - if not isinstance(dataarray.attrs["area"], SwathDefinition): - logger.warning( - f"Could not tell CRS from area of type {type(dataarray.attrs['area']).__name__:s}. " - "Assuming projected CRS.") - if "crs" in dataarray.coords: - return dataarray.coords["crs"].item() - - -def _try_get_units_from_coords(dataarray): - """Try to retrieve coordinate x/y units.""" - for c in ["x", "y"]: - with suppress(KeyError): - # If the data has only 1 dimension, it has only one of x or y coords - if "units" in dataarray.coords[c].attrs: - return dataarray.coords[c].attrs["units"] diff --git a/satpy/writers/cf/dataarray.py b/satpy/writers/cf/dataarray.py index a5322cfee4..df52406f96 100644 --- a/satpy/writers/cf/dataarray.py +++ b/satpy/writers/cf/dataarray.py @@ -19,9 +19,9 @@ import logging import warnings +from satpy.writers.cf import EPOCH from satpy.writers.cf.attrs import preprocess_datarray_attrs -from satpy.writers.cf.coords_attrs import add_xy_coords_attrs -from satpy.writers.cf.time import EPOCH, process_time_coord +from satpy.writers.cf.coords import add_xy_coords_attrs, process_time_coord logger = logging.getLogger(__name__) diff --git a/satpy/writers/cf/datasets.py b/satpy/writers/cf/datasets.py index c87e6673d4..0cdf2b8210 100644 --- a/satpy/writers/cf/datasets.py +++ b/satpy/writers/cf/datasets.py @@ -22,7 +22,7 @@ import xarray as xr -from satpy.writers.cf.time import EPOCH +from satpy.writers.cf import EPOCH from satpy.writers.cf_writer import CF_DTYPES, CF_VERSION logger = logging.getLogger(__name__) @@ -77,7 +77,7 @@ def _collect_cf_dataset(list_dataarrays, epoch : str Reference time for encoding the time coordinates (if available). Example format: "seconds since 1970-01-01 00:00:00". - If None, the default reference time is retrieved using `from satpy.cf_writer import EPOCH` + If None, the default reference time is retrieved using `from satpy.writers.cf import EPOCH` flatten_attrs : bool, optional If True, flatten dict-type attributes. exclude_attrs : list, optional @@ -197,7 +197,7 @@ def collect_cf_datasets(list_dataarrays, epoch (str): Reference time for encoding the time coordinates (if available). Example format: "seconds since 1970-01-01 00:00:00". - If None, the default reference time is retrieved using `from satpy.cf_writer import EPOCH` + If None, the default reference time is retrieved using `from satpy.writers.cf import EPOCH` flatten_attrs (bool): If True, flatten dict-type attributes. exclude_attrs (list): @@ -228,7 +228,7 @@ def collect_cf_datasets(list_dataarrays, Global attributes to be attached to the xr.Dataset / netCDF4. """ from satpy.writers.cf.attrs import preprocess_header_attrs - from satpy.writers.cf.time import add_time_bounds_dimension + from satpy.writers.cf.coords import add_time_bounds_dimension if not list_dataarrays: raise RuntimeError("None of the requested datasets have been " diff --git a/satpy/writers/cf/time.py b/satpy/writers/cf/time.py deleted file mode 100644 index 4c5cbf5bc9..0000000000 --- a/satpy/writers/cf/time.py +++ /dev/null @@ -1,69 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# Copyright (c) 2017-2023 Satpy developers -# -# This file is part of satpy. -# -# satpy is free software: you can redistribute it and/or modify it under the -# terms of the GNU General Public License as published by the Free Software -# Foundation, either version 3 of the License, or (at your option) any later -# version. -# -# satpy is distributed in the hope that it will be useful, but WITHOUT ANY -# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR -# A PARTICULAR PURPOSE. See the GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along with -# satpy. If not, see . -"""CF processing of time dimension and coordinates.""" -import logging - -import numpy as np -import xarray as xr - -logger = logging.getLogger(__name__) - - -EPOCH = u"seconds since 1970-01-01 00:00:00" - - -def add_time_bounds_dimension(ds, time="time"): - """Add time bound dimension to xr.Dataset.""" - start_times = [] - end_times = [] - for _var_name, data_array in ds.items(): - start_times.append(data_array.attrs.get("start_time", None)) - end_times.append(data_array.attrs.get("end_time", None)) - - start_time = min(start_time for start_time in start_times - if start_time is not None) - end_time = min(end_time for end_time in end_times - if end_time is not None) - ds['time_bnds'] = xr.DataArray([[np.datetime64(start_time), - np.datetime64(end_time)]], - dims=['time', 'bnds_1d']) - ds[time].attrs['bounds'] = "time_bnds" - ds[time].attrs['standard_name'] = "time" - return ds - - -def process_time_coord(dataarray, epoch): - """Process the 'time' coordinate, if existing. - - It expand the DataArray with a time dimension if does not yet exists. - - The function assumes - - - that x and y dimensions have at least shape > 1 - - the time coordinate has size 1 - - """ - if 'time' in dataarray.coords: - dataarray['time'].encoding['units'] = epoch - dataarray['time'].attrs['standard_name'] = 'time' - dataarray['time'].attrs.pop('bounds', None) - - if 'time' not in dataarray.dims and dataarray["time"].size not in dataarray.shape: - dataarray = dataarray.expand_dims('time') - - return dataarray diff --git a/satpy/writers/cf_writer.py b/satpy/writers/cf_writer.py index 30ca7e784e..096293e2b7 100644 --- a/satpy/writers/cf_writer.py +++ b/satpy/writers/cf_writer.py @@ -163,7 +163,7 @@ from packaging.version import Version from satpy.writers import Writer -from satpy.writers.cf.time import EPOCH +from satpy.writers.cf import EPOCH logger = logging.getLogger(__name__) From e3df20e32171257eeb999bcf272de5d0ae08c7bf Mon Sep 17 00:00:00 2001 From: ghiggi Date: Tue, 10 Oct 2023 18:27:48 +0200 Subject: [PATCH 15/37] Unest cf directories --- doc/source/writers.rst | 4 ++-- satpy/_scene_converters.py | 6 ++--- satpy/{writers => }/cf/__init__.py | 0 satpy/{writers => }/cf/area.py | 0 satpy/{writers => }/cf/attrs.py | 2 +- satpy/{writers => }/cf/coords.py | 0 satpy/{writers => }/cf/dataarray.py | 6 ++--- satpy/{writers => }/cf/datasets.py | 22 +++++++------------ satpy/{writers => }/cf/encoding.py | 0 satpy/etc/writers/cf.yaml | 2 +- satpy/scene.py | 2 +- .../{writer_tests => }/cf_tests/__init__.py | 0 .../{writer_tests => }/cf_tests/test_area.py | 16 +++++++------- .../{writer_tests => }/cf_tests/test_attrs.py | 2 +- .../cf_tests/test_coords.py | 4 ++-- .../cf_tests/test_dataaarray.py | 8 +++---- .../cf_tests/test_datasets.py | 8 +++---- .../cf_tests/test_encoding.py | 6 ++--- satpy/tests/writer_tests/test_cf.py | 8 +++---- satpy/writers/cf_writer.py | 14 ++++++------ 20 files changed, 52 insertions(+), 58 deletions(-) rename satpy/{writers => }/cf/__init__.py (100%) rename satpy/{writers => }/cf/area.py (100%) rename satpy/{writers => }/cf/attrs.py (99%) rename satpy/{writers => }/cf/coords.py (100%) rename satpy/{writers => }/cf/dataarray.py (95%) rename satpy/{writers => }/cf/datasets.py (95%) rename satpy/{writers => }/cf/encoding.py (100%) rename satpy/tests/{writer_tests => }/cf_tests/__init__.py (100%) rename satpy/tests/{writer_tests => }/cf_tests/test_area.py (97%) rename satpy/tests/{writer_tests => }/cf_tests/test_attrs.py (99%) rename satpy/tests/{writer_tests => }/cf_tests/test_coords.py (96%) rename satpy/tests/{writer_tests => }/cf_tests/test_dataaarray.py (97%) rename satpy/tests/{writer_tests => }/cf_tests/test_datasets.py (95%) rename satpy/tests/{writer_tests => }/cf_tests/test_encoding.py (96%) diff --git a/doc/source/writers.rst b/doc/source/writers.rst index f453f4d5a5..e5c33ecba2 100644 --- a/doc/source/writers.rst +++ b/doc/source/writers.rst @@ -44,9 +44,9 @@ One common parameter across almost all Writers is ``filename`` and - Deprecated from NinJo 7 (use ninjogeotiff) - * - NetCDF (Standard CF) - - :class:`cf ` + - :class:`cf ` - Beta - - :mod:`Usage example ` + - :mod:`Usage example ` * - AWIPS II Tiled NetCDF4 - :class:`awips_tiled ` - Beta diff --git a/satpy/_scene_converters.py b/satpy/_scene_converters.py index c5c0b1c896..a620817236 100644 --- a/satpy/_scene_converters.py +++ b/satpy/_scene_converters.py @@ -66,7 +66,7 @@ def to_xarray(scn, epoch (str): Reference time for encoding the time coordinates (if available). Example format: "seconds since 1970-01-01 00:00:00". - If None, the default reference time is retrieved using "from satpy.writers.cf import EPOCH" + If None, the default reference time is retrieved using "from satpy.cf import EPOCH" flatten_attrs (bool): If True, flatten dict-type attributes. exclude_attrs (list): @@ -90,8 +90,8 @@ def to_xarray(scn, A CF-compliant xr.Dataset """ - from satpy.writers.cf import EPOCH - from satpy.writers.cf.datasets import collect_cf_datasets + from satpy.cf import EPOCH + from satpy.cf.datasets import collect_cf_datasets if epoch is None: epoch = EPOCH diff --git a/satpy/writers/cf/__init__.py b/satpy/cf/__init__.py similarity index 100% rename from satpy/writers/cf/__init__.py rename to satpy/cf/__init__.py diff --git a/satpy/writers/cf/area.py b/satpy/cf/area.py similarity index 100% rename from satpy/writers/cf/area.py rename to satpy/cf/area.py diff --git a/satpy/writers/cf/attrs.py b/satpy/cf/attrs.py similarity index 99% rename from satpy/writers/cf/attrs.py rename to satpy/cf/attrs.py index d4a41f2bfc..28cc41be98 100644 --- a/satpy/writers/cf/attrs.py +++ b/satpy/cf/attrs.py @@ -71,7 +71,7 @@ def __encode_nc(obj): Raises: ValueError if no such datatype could be found """ - from satpy.writers.cf_writer import NC4_DTYPES + from satpy.cf_writer import NC4_DTYPES if isinstance(obj, int) and not isinstance(obj, (bool, np.bool_)): return obj diff --git a/satpy/writers/cf/coords.py b/satpy/cf/coords.py similarity index 100% rename from satpy/writers/cf/coords.py rename to satpy/cf/coords.py diff --git a/satpy/writers/cf/dataarray.py b/satpy/cf/dataarray.py similarity index 95% rename from satpy/writers/cf/dataarray.py rename to satpy/cf/dataarray.py index df52406f96..661981681b 100644 --- a/satpy/writers/cf/dataarray.py +++ b/satpy/cf/dataarray.py @@ -19,9 +19,9 @@ import logging import warnings -from satpy.writers.cf import EPOCH -from satpy.writers.cf.attrs import preprocess_datarray_attrs -from satpy.writers.cf.coords import add_xy_coords_attrs, process_time_coord +from satpy.cf import EPOCH +from satpy.cf.attrs import preprocess_datarray_attrs +from satpy.cf.coords import add_xy_coords_attrs, process_time_coord logger = logging.getLogger(__name__) diff --git a/satpy/writers/cf/datasets.py b/satpy/cf/datasets.py similarity index 95% rename from satpy/writers/cf/datasets.py rename to satpy/cf/datasets.py index 0cdf2b8210..bcfe622f18 100644 --- a/satpy/writers/cf/datasets.py +++ b/satpy/cf/datasets.py @@ -22,8 +22,8 @@ import xarray as xr -from satpy.writers.cf import EPOCH -from satpy.writers.cf_writer import CF_DTYPES, CF_VERSION +from satpy.cf import EPOCH +from satpy.cf_writer import CF_DTYPES, CF_VERSION logger = logging.getLogger(__name__) @@ -77,7 +77,7 @@ def _collect_cf_dataset(list_dataarrays, epoch : str Reference time for encoding the time coordinates (if available). Example format: "seconds since 1970-01-01 00:00:00". - If None, the default reference time is retrieved using `from satpy.writers.cf import EPOCH` + If None, the default reference time is retrieved using `from satpy.cf import EPOCH` flatten_attrs : bool, optional If True, flatten dict-type attributes. exclude_attrs : list, optional @@ -98,14 +98,8 @@ def _collect_cf_dataset(list_dataarrays, ds : xr.Dataset A partially CF-compliant xr.Dataset """ - from satpy.writers.cf.area import ( - area2cf, - assert_xy_unique, - has_projection_coords, - link_coords, - make_alt_coords_unique, - ) - from satpy.writers.cf.dataarray import make_cf_dataarray + from satpy.cf.area import area2cf, assert_xy_unique, has_projection_coords, link_coords, make_alt_coords_unique + from satpy.cf.dataarray import make_cf_dataarray # Create dictionary of input datarrays # --> Since keys=None, it doesn't never retrieve ancillary variables !!! @@ -197,7 +191,7 @@ def collect_cf_datasets(list_dataarrays, epoch (str): Reference time for encoding the time coordinates (if available). Example format: "seconds since 1970-01-01 00:00:00". - If None, the default reference time is retrieved using `from satpy.writers.cf import EPOCH` + If None, the default reference time is retrieved using `from satpy.cf import EPOCH` flatten_attrs (bool): If True, flatten dict-type attributes. exclude_attrs (list): @@ -227,8 +221,8 @@ def collect_cf_datasets(list_dataarrays, header_attrs : dict Global attributes to be attached to the xr.Dataset / netCDF4. """ - from satpy.writers.cf.attrs import preprocess_header_attrs - from satpy.writers.cf.coords import add_time_bounds_dimension + from satpy.cf.attrs import preprocess_header_attrs + from satpy.cf.coords import add_time_bounds_dimension if not list_dataarrays: raise RuntimeError("None of the requested datasets have been " diff --git a/satpy/writers/cf/encoding.py b/satpy/cf/encoding.py similarity index 100% rename from satpy/writers/cf/encoding.py rename to satpy/cf/encoding.py diff --git a/satpy/etc/writers/cf.yaml b/satpy/etc/writers/cf.yaml index 5c4a875bec..b141a68c09 100644 --- a/satpy/etc/writers/cf.yaml +++ b/satpy/etc/writers/cf.yaml @@ -1,7 +1,7 @@ writer: name: cf description: Generic netCDF4/CF Writer - writer: !!python/name:satpy.writers.cf_writer.CFWriter + writer: !!python/name:satpy.cf_writer.CFWriter filename: '{name}_{start_time:%Y%m%d_%H%M%S}.nc' compress: DEFLATE zlevel: 6 diff --git a/satpy/scene.py b/satpy/scene.py index 52580d14e8..cebf57253b 100644 --- a/satpy/scene.py +++ b/satpy/scene.py @@ -1128,7 +1128,7 @@ def to_xarray(self, epoch (str): Reference time for encoding the time coordinates (if available). Example format: "seconds since 1970-01-01 00:00:00". - If None, the default reference time is retrieved using "from satpy.writers.cf import EPOCH" + If None, the default reference time is retrieved using "from satpy.cf import EPOCH" flatten_attrs (bool): If True, flatten dict-type attributes. exclude_attrs (list): diff --git a/satpy/tests/writer_tests/cf_tests/__init__.py b/satpy/tests/cf_tests/__init__.py similarity index 100% rename from satpy/tests/writer_tests/cf_tests/__init__.py rename to satpy/tests/cf_tests/__init__.py diff --git a/satpy/tests/writer_tests/cf_tests/test_area.py b/satpy/tests/cf_tests/test_area.py similarity index 97% rename from satpy/tests/writer_tests/cf_tests/test_area.py rename to satpy/tests/cf_tests/test_area.py index 5b7dd86d38..352bf35a2e 100644 --- a/satpy/tests/writer_tests/cf_tests/test_area.py +++ b/satpy/tests/cf_tests/test_area.py @@ -28,7 +28,7 @@ class TestCFArea: def test_assert_xy_unique(self): """Test that the x and y coordinates are unique.""" - from satpy.writers.cf.area import assert_xy_unique + from satpy.cf.area import assert_xy_unique dummy = [[1, 2], [3, 4]] datas = {'a': xr.DataArray(data=dummy, dims=('y', 'x'), coords={'y': [1, 2], 'x': [3, 4]}), @@ -42,7 +42,7 @@ def test_assert_xy_unique(self): def test_link_coords(self): """Check that coordinates link has been established correctly.""" - from satpy.writers.cf.area import link_coords + from satpy.cf.area import link_coords data = [[1, 2], [3, 4]] lon = np.zeros((2, 2)) @@ -77,7 +77,7 @@ def test_link_coords(self): def test_make_alt_coords_unique(self): """Test that created coordinate variables are unique.""" - from satpy.writers.cf.area import make_alt_coords_unique + from satpy.cf.area import make_alt_coords_unique data = [[1, 2], [3, 4]] y = [1, 2] @@ -122,7 +122,7 @@ def test_make_alt_coords_unique(self): def test_area2cf(self): """Test the conversion of an area to CF standards.""" - from satpy.writers.cf.area import area2cf + from satpy.cf.area import area2cf ds_base = xr.DataArray(data=[[1, 2], [3, 4]], dims=('y', 'x'), coords={'y': [1, 2], 'x': [3, 4]}, attrs={'name': 'var1'}) @@ -168,7 +168,7 @@ def test_area2cf(self): def test__add_grid_mapping(self): """Test the conversion from pyresample area object to CF grid mapping.""" - from satpy.writers.cf.area import _add_grid_mapping + from satpy.cf.area import _add_grid_mapping def _gm_matches(gmapping, expected): """Assert that all keys in ``expected`` match the values in ``gmapping``.""" @@ -352,7 +352,7 @@ def _gm_matches(gmapping, expected): def test__add_lonlat_coords(self): """Test the conversion from areas to lon/lat.""" - from satpy.writers.cf.area import _add_lonlat_coords + from satpy.cf.area import _add_lonlat_coords area = AreaDefinition( 'seviri', @@ -439,14 +439,14 @@ def datasets(self): def test__is_lon_or_lat_dataarray(self, datasets): """Test the _is_lon_or_lat_dataarray function.""" - from satpy.writers.cf.area import _is_lon_or_lat_dataarray + from satpy.cf.area import _is_lon_or_lat_dataarray assert _is_lon_or_lat_dataarray(datasets['lat']) assert not _is_lon_or_lat_dataarray(datasets['var1']) def test_has_projection_coords(self, datasets): """Test the has_projection_coords function.""" - from satpy.writers.cf.area import has_projection_coords + from satpy.cf.area import has_projection_coords assert has_projection_coords(datasets) datasets['lat'].attrs['standard_name'] = 'dummy' diff --git a/satpy/tests/writer_tests/cf_tests/test_attrs.py b/satpy/tests/cf_tests/test_attrs.py similarity index 99% rename from satpy/tests/writer_tests/cf_tests/test_attrs.py rename to satpy/tests/cf_tests/test_attrs.py index 6988e761ee..7eb86e172b 100644 --- a/satpy/tests/writer_tests/cf_tests/test_attrs.py +++ b/satpy/tests/cf_tests/test_attrs.py @@ -111,8 +111,8 @@ def get_test_attrs(self): def test__encode_attrs_nc(self): """Test attributes encoding.""" + from satpy.cf.attrs import _encode_attrs_nc from satpy.tests.utils import assert_dict_array_equality - from satpy.writers.cf.attrs import _encode_attrs_nc attrs, expected, _ = self.get_test_attrs() diff --git a/satpy/tests/writer_tests/cf_tests/test_coords.py b/satpy/tests/cf_tests/test_coords.py similarity index 96% rename from satpy/tests/writer_tests/cf_tests/test_coords.py rename to satpy/tests/cf_tests/test_coords.py index 9e9d8c4607..1361c0d5a4 100644 --- a/satpy/tests/writer_tests/cf_tests/test_coords.py +++ b/satpy/tests/cf_tests/test_coords.py @@ -31,7 +31,7 @@ class TestCFtime: def test_add_time_bounds_dimension(self): """Test addition of CF-compliant time attributes.""" - from satpy.writers.cf.coords import add_time_bounds_dimension + from satpy.cf.coords import add_time_bounds_dimension test_array = np.array([[1, 2], [3, 4], [5, 6], [7, 8]]) times = np.array(['2018-05-30T10:05:00', '2018-05-30T10:05:01', @@ -55,7 +55,7 @@ class TestCFcoords: def test_is_projected(self, caplog): """Tests for private _is_projected function.""" - from satpy.writers.cf.coords import _is_projected + from satpy.cf.coords import _is_projected # test case with units but no area da = xr.DataArray( diff --git a/satpy/tests/writer_tests/cf_tests/test_dataaarray.py b/satpy/tests/cf_tests/test_dataaarray.py similarity index 97% rename from satpy/tests/writer_tests/cf_tests/test_dataaarray.py rename to satpy/tests/cf_tests/test_dataaarray.py index 43b87cfc20..0fd3a9d41c 100644 --- a/satpy/tests/writer_tests/cf_tests/test_dataaarray.py +++ b/satpy/tests/cf_tests/test_dataaarray.py @@ -29,7 +29,7 @@ def test_preprocess_dataarray_name(): """Test saving an array to netcdf/cf where dataset name starting with a digit with prefix include orig name.""" from satpy import Scene - from satpy.writers.cf.dataarray import _preprocess_dataarray_name + from satpy.cf.dataarray import _preprocess_dataarray_name scn = Scene() scn['1'] = xr.DataArray([1, 2, 3]) @@ -53,8 +53,8 @@ def test_make_cf_dataarray_lonlat(): """Test correct CF encoding for area with lon/lat units.""" from pyresample import create_area_def + from satpy.cf.dataarray import make_cf_dataarray from satpy.resample import add_crs_xy_coords - from satpy.writers.cf.dataarray import make_cf_dataarray area = create_area_def("mavas", 4326, shape=(5, 5), center=(0, 0), resolution=(1, 1)) @@ -156,8 +156,8 @@ def get_test_attrs(self): def test_make_cf_dataarray(self): """Test the conversion of a DataArray to a CF-compatible DataArray.""" + from satpy.cf.dataarray import make_cf_dataarray from satpy.tests.utils import assert_dict_array_equality - from satpy.writers.cf.dataarray import make_cf_dataarray # Create set of test attributes attrs, attrs_expected, attrs_expected_flat = self.get_test_attrs() @@ -195,7 +195,7 @@ def test_make_cf_dataarray(self): def test_make_cf_dataarray_one_dimensional_array(self): """Test the conversion of an 1d DataArray to a CF-compatible DataArray.""" - from satpy.writers.cf.dataarray import make_cf_dataarray + from satpy.cf.dataarray import make_cf_dataarray arr = xr.DataArray(np.array([1, 2, 3, 4]), attrs={}, dims=('y',), coords={'y': [0, 1, 2, 3], 'acq_time': ('y', [0, 1, 2, 3])}) diff --git a/satpy/tests/writer_tests/cf_tests/test_datasets.py b/satpy/tests/cf_tests/test_datasets.py similarity index 95% rename from satpy/tests/writer_tests/cf_tests/test_datasets.py rename to satpy/tests/cf_tests/test_datasets.py index d92099e869..dc30d1b9d1 100644 --- a/satpy/tests/writer_tests/cf_tests/test_datasets.py +++ b/satpy/tests/cf_tests/test_datasets.py @@ -29,7 +29,7 @@ class TestCollectCfDataset: def test_collect_cf_dataset(self): """Test collecting CF datasets from a DataArray objects.""" - from satpy.writers.cf.datasets import _collect_cf_dataset + from satpy.cf.datasets import _collect_cf_dataset geos = AreaDefinition( area_id='geos', @@ -69,7 +69,7 @@ def test_collect_cf_dataset(self): def test_collect_cf_dataset_with_latitude_named_lat(self): """Test collecting CF datasets with latitude named lat.""" - from satpy.writers.cf.datasets import _collect_cf_dataset + from satpy.cf.datasets import _collect_cf_dataset data = [[75, 2], [3, 4]] y = [1, 2] @@ -123,8 +123,8 @@ def test_collect_cf_dataset_with_latitude_named_lat(self): def test_geographic_area_coords_attrs(self): """Test correct storage for area with lon/lat units.""" + from satpy.cf.datasets import _collect_cf_dataset from satpy.tests.utils import make_fake_scene - from satpy.writers.cf.datasets import _collect_cf_dataset scn = make_fake_scene( {"ketolysis": np.arange(25).reshape(5, 5)}, @@ -147,7 +147,7 @@ class TestCollectCfDatasets: def test_empty_collect_cf_datasets(self): """Test that if no DataArrays, collect_cf_datasets raise error.""" - from satpy.writers.cf.datasets import collect_cf_datasets + from satpy.cf.datasets import collect_cf_datasets with pytest.raises(RuntimeError): collect_cf_datasets(list_dataarrays=[]) diff --git a/satpy/tests/writer_tests/cf_tests/test_encoding.py b/satpy/tests/cf_tests/test_encoding.py similarity index 96% rename from satpy/tests/writer_tests/cf_tests/test_encoding.py rename to satpy/tests/cf_tests/test_encoding.py index 125c7eec94..5d49ebc647 100644 --- a/satpy/tests/writer_tests/cf_tests/test_encoding.py +++ b/satpy/tests/cf_tests/test_encoding.py @@ -47,7 +47,7 @@ def fake_ds_digit(self): def test_dataset_name_digit(self, fake_ds_digit): """Test data with dataset name staring with a digit.""" - from satpy.writers.cf.encoding import update_encoding + from satpy.cf.encoding import update_encoding # Dataset with name staring with digit ds_digit = fake_ds_digit @@ -66,7 +66,7 @@ def test_dataset_name_digit(self, fake_ds_digit): def test_without_time(self, fake_ds): """Test data with no time dimension.""" - from satpy.writers.cf.encoding import update_encoding + from satpy.cf.encoding import update_encoding # Without time dimension ds = fake_ds.chunk(2) @@ -98,7 +98,7 @@ def test_without_time(self, fake_ds): def test_with_time(self, fake_ds): """Test data with a time dimension.""" - from satpy.writers.cf.encoding import update_encoding + from satpy.cf.encoding import update_encoding # With time dimension ds = fake_ds.chunk(8).expand_dims({'time': [datetime.datetime(2009, 7, 1, 12, 15)]}) diff --git a/satpy/tests/writer_tests/test_cf.py b/satpy/tests/writer_tests/test_cf.py index f50a8e99d3..ba00ae4545 100644 --- a/satpy/tests/writer_tests/test_cf.py +++ b/satpy/tests/writer_tests/test_cf.py @@ -28,8 +28,8 @@ from packaging.version import Version from satpy import Scene +from satpy.cf_writer import _get_backend_versions from satpy.tests.utils import make_dsq -from satpy.writers.cf_writer import _get_backend_versions try: from pyproj import CRS @@ -66,8 +66,8 @@ class TestCFWriter: def test_init(self): """Test initializing the CFWriter class.""" + from satpy.cf_writer import CFWriter from satpy.writers import configs_for_writer - from satpy.writers.cf_writer import CFWriter CFWriter(config_files=list(configs_for_writer('cf'))[0]) @@ -403,11 +403,11 @@ def test_load_module_with_old_pyproj(self): old_version = sys.modules['pyproj'].__version__ sys.modules['pyproj'].__version__ = "1.9.6" try: - importlib.reload(sys.modules['satpy.writers.cf_writer']) + importlib.reload(sys.modules['satpy.cf_writer']) finally: # Tear down sys.modules['pyproj'].__version__ = old_version - importlib.reload(sys.modules['satpy.writers.cf_writer']) + importlib.reload(sys.modules['satpy.cf_writer']) def test_global_attr_default_history_and_Conventions(self): """Test saving global attributes history and Conventions.""" diff --git a/satpy/writers/cf_writer.py b/satpy/writers/cf_writer.py index 096293e2b7..4093b7877b 100644 --- a/satpy/writers/cf_writer.py +++ b/satpy/writers/cf_writer.py @@ -162,8 +162,8 @@ import xarray as xr from packaging.version import Version +from satpy.cf import EPOCH from satpy.writers import Writer -from satpy.writers.cf import EPOCH logger = logging.getLogger(__name__) @@ -270,8 +270,8 @@ def save_datasets(self, datasets, filename=None, groups=None, header_attrs=None, Prefix to add the each variable with name starting with a digit. Use '' or None to leave this out. """ - from satpy.writers.cf.datasets import collect_cf_datasets - from satpy.writers.cf.encoding import update_encoding + from satpy.cf.datasets import collect_cf_datasets + from satpy.cf.encoding import update_encoding logger.info('Saving datasets to NetCDF4/CF.') _check_backend_versions() @@ -344,9 +344,9 @@ def da2cf(dataarray, epoch=EPOCH, flatten_attrs=False, exclude_attrs=None, numeric_name_prefix (str): Prepend dataset name with this if starting with a digit """ - from satpy.writers.cf.dataarray import make_cf_dataarray + from satpy.cf.dataarray import make_cf_dataarray warnings.warn('CFWriter.da2cf is deprecated.' - 'Use satpy.writers.cf.dataarray.make_cf_dataarray instead.', + 'Use satpy.cf.dataarray.make_cf_dataarray instead.', DeprecationWarning, stacklevel=3) return make_cf_dataarray(dataarray=dataarray, epoch=epoch, @@ -358,10 +358,10 @@ def da2cf(dataarray, epoch=EPOCH, flatten_attrs=False, exclude_attrs=None, @staticmethod def update_encoding(dataset, to_netcdf_kwargs): """Update encoding info (deprecated).""" - from satpy.writers.cf.encoding import update_encoding + from satpy.cf.encoding import update_encoding warnings.warn('CFWriter.update_encoding is deprecated. ' - 'Use satpy.writers.cf.encoding.update_encoding instead.', + 'Use satpy.cf.encoding.update_encoding instead.', DeprecationWarning, stacklevel=3) return update_encoding(dataset, to_netcdf_kwargs) From bf337195456cab9c541d633bf0b35d6d866149d2 Mon Sep 17 00:00:00 2001 From: ghiggi Date: Tue, 10 Oct 2023 18:50:16 +0200 Subject: [PATCH 16/37] Fix imports error --- doc/source/writers.rst | 4 ++-- satpy/cf/attrs.py | 2 +- satpy/cf/datasets.py | 2 +- satpy/readers/amsr2_l2_gaasp.py | 4 ++-- satpy/tests/writer_tests/test_cf.py | 4 ++-- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/doc/source/writers.rst b/doc/source/writers.rst index e5c33ecba2..f453f4d5a5 100644 --- a/doc/source/writers.rst +++ b/doc/source/writers.rst @@ -44,9 +44,9 @@ One common parameter across almost all Writers is ``filename`` and - Deprecated from NinJo 7 (use ninjogeotiff) - * - NetCDF (Standard CF) - - :class:`cf ` + - :class:`cf ` - Beta - - :mod:`Usage example ` + - :mod:`Usage example ` * - AWIPS II Tiled NetCDF4 - :class:`awips_tiled ` - Beta diff --git a/satpy/cf/attrs.py b/satpy/cf/attrs.py index 28cc41be98..d4a41f2bfc 100644 --- a/satpy/cf/attrs.py +++ b/satpy/cf/attrs.py @@ -71,7 +71,7 @@ def __encode_nc(obj): Raises: ValueError if no such datatype could be found """ - from satpy.cf_writer import NC4_DTYPES + from satpy.writers.cf_writer import NC4_DTYPES if isinstance(obj, int) and not isinstance(obj, (bool, np.bool_)): return obj diff --git a/satpy/cf/datasets.py b/satpy/cf/datasets.py index bcfe622f18..c2799ab8d1 100644 --- a/satpy/cf/datasets.py +++ b/satpy/cf/datasets.py @@ -23,7 +23,7 @@ import xarray as xr from satpy.cf import EPOCH -from satpy.cf_writer import CF_DTYPES, CF_VERSION +from satpy.writers.cf_writer import CF_DTYPES, CF_VERSION logger = logging.getLogger(__name__) diff --git a/satpy/readers/amsr2_l2_gaasp.py b/satpy/readers/amsr2_l2_gaasp.py index 4f045057b4..5f91e2d965 100644 --- a/satpy/readers/amsr2_l2_gaasp.py +++ b/satpy/readers/amsr2_l2_gaasp.py @@ -189,7 +189,7 @@ def _available_if_this_file_type(self, configured_datasets): continue yield self.file_type_matches(ds_info['file_type']), ds_info - def __add_lonlat_coords(self, data_arr, ds_info): + def _add_lonlat_coords(self, data_arr, ds_info): lat_coord = None lon_coord = None for coord_name in data_arr.coords: @@ -209,7 +209,7 @@ def _get_ds_info_for_data_arr(self, var_name, data_arr): if x_dim_name in self.dim_resolutions: ds_info['resolution'] = self.dim_resolutions[x_dim_name] if not self.is_gridded and data_arr.coords: - self.__add_lonlat_coords(data_arr, ds_info) + self._add_lonlat_coords(data_arr, ds_info) return ds_info def _is_2d_yx_data_array(self, data_arr): diff --git a/satpy/tests/writer_tests/test_cf.py b/satpy/tests/writer_tests/test_cf.py index ba00ae4545..31e59a2fb7 100644 --- a/satpy/tests/writer_tests/test_cf.py +++ b/satpy/tests/writer_tests/test_cf.py @@ -28,8 +28,8 @@ from packaging.version import Version from satpy import Scene -from satpy.cf_writer import _get_backend_versions from satpy.tests.utils import make_dsq +from satpy.writers.cf_writer import _get_backend_versions try: from pyproj import CRS @@ -66,8 +66,8 @@ class TestCFWriter: def test_init(self): """Test initializing the CFWriter class.""" - from satpy.cf_writer import CFWriter from satpy.writers import configs_for_writer + from satpy.writers.cf_writer import CFWriter CFWriter(config_files=list(configs_for_writer('cf'))[0]) From 0dae7746f5180d7f7ab9f960dd7150e63b710ef5 Mon Sep 17 00:00:00 2001 From: ghiggi Date: Tue, 10 Oct 2023 20:23:25 +0200 Subject: [PATCH 17/37] Rename functions and refactor --- CHANGELOG.md | 2 +- satpy/cf/area.py | 114 ---------------- satpy/cf/coords.py | 168 ++++++++++++++++++++---- satpy/cf/dataarray.py | 5 +- satpy/cf/datasets.py | 45 ++++--- satpy/etc/writers/cf.yaml | 2 +- satpy/tests/cf_tests/_test_data.py | 111 ++++++++++++++++ satpy/tests/cf_tests/test_area.py | 147 --------------------- satpy/tests/cf_tests/test_attrs.py | 90 +------------ satpy/tests/cf_tests/test_coords.py | 151 ++++++++++++++++++++- satpy/tests/cf_tests/test_dataaarray.py | 90 +------------ 11 files changed, 442 insertions(+), 483 deletions(-) create mode 100644 satpy/tests/cf_tests/_test_data.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 799ae0a867..12c8e50194 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1143,7 +1143,7 @@ In this release 6 issues were closed. * [PR 1606](https://github.com/pytroll/satpy/pull/1606) - Make FSFile hashable again ([1605](https://github.com/pytroll/satpy/issues/1605), [1604](https://github.com/pytroll/satpy/issues/1604)) * [PR 1603](https://github.com/pytroll/satpy/pull/1603) - Update slstr_l2.yaml * [PR 1600](https://github.com/pytroll/satpy/pull/1600) - When setting `upper_right_corner` make sure that all dataset coordinates are flipped -* [PR 1588](https://github.com/pytroll/satpy/pull/1588) - Bugfix of link_coords ([1493](https://github.com/pytroll/satpy/issues/1493)) +* [PR 1588](https://github.com/pytroll/satpy/pull/1588) - Bugfix of add_coordinates_attrs_coords ([1493](https://github.com/pytroll/satpy/issues/1493)) #### Features added diff --git a/satpy/cf/area.py b/satpy/cf/area.py index 5916083d62..2eec0efd4a 100644 --- a/satpy/cf/area.py +++ b/satpy/cf/area.py @@ -17,11 +17,8 @@ # satpy. If not, see . """CF processing of pyresample area information.""" import logging -import warnings -from collections import defaultdict import xarray as xr -from dask.base import tokenize from packaging.version import Version from pyresample.geometry import AreaDefinition, SwathDefinition @@ -79,114 +76,3 @@ def area2cf(dataarray, include_lonlats=False, got_lonlats=False): res.append(gmapping) res.append(dataarray) return res - - -def _is_lon_or_lat_dataarray(dataarray): - """Check if the DataArray represents the latitude or longitude coordinate.""" - if 'standard_name' in dataarray.attrs and dataarray.attrs['standard_name'] in ['longitude', 'latitude']: - return True - return False - - -def has_projection_coords(ds_collection): - """Check if DataArray collection has a "longitude" or "latitude" DataArray.""" - for dataarray in ds_collection.values(): - if _is_lon_or_lat_dataarray(dataarray): - return True - return False - - -def make_alt_coords_unique(datas, pretty=False): - """Make non-dimensional coordinates unique among all datasets. - - Non-dimensional (or alternative) coordinates, such as scanline timestamps, - may occur in multiple datasets with the same name and dimension - but different values. - - In order to avoid conflicts, prepend the dataset name to the coordinate name. - If a non-dimensional coordinate is unique among all datasets and ``pretty=True``, - its name will not be modified. - - Since all datasets must have the same projection coordinates, - this is not applied to latitude and longitude. - - Args: - datas (dict): - Dictionary of (dataset name, dataset) - pretty (bool): - Don't modify coordinate names, if possible. Makes the file prettier, but possibly less consistent. - - Returns: - Dictionary holding the updated datasets - - """ - # Determine which non-dimensional coordinates are unique - tokens = defaultdict(set) - for dataset in datas.values(): - for coord_name in dataset.coords: - if not _is_lon_or_lat_dataarray(dataset[coord_name]) and coord_name not in dataset.dims: - tokens[coord_name].add(tokenize(dataset[coord_name].data)) - coords_unique = dict([(coord_name, len(tokens) == 1) for coord_name, tokens in tokens.items()]) - - # Prepend dataset name, if not unique or no pretty-format desired - new_datas = datas.copy() - for coord_name, unique in coords_unique.items(): - if not pretty or not unique: - if pretty: - warnings.warn( - 'Cannot pretty-format "{}" coordinates because they are ' - 'not identical among the given datasets'.format(coord_name), - stacklevel=2 - ) - for ds_name, dataset in datas.items(): - if coord_name in dataset.coords: - rename = {coord_name: '{}_{}'.format(ds_name, coord_name)} - new_datas[ds_name] = new_datas[ds_name].rename(rename) - - return new_datas - - -def assert_xy_unique(datas): - """Check that all datasets share the same projection coordinates x/y.""" - unique_x = set() - unique_y = set() - for dataset in datas.values(): - if 'y' in dataset.dims: - token_y = tokenize(dataset['y'].data) - unique_y.add(token_y) - if 'x' in dataset.dims: - token_x = tokenize(dataset['x'].data) - unique_x.add(token_x) - if len(unique_x) > 1 or len(unique_y) > 1: - raise ValueError('Datasets to be saved in one file (or one group) must have identical projection coordinates. ' - 'Please group them by area or save them in separate files.') - - -def link_coords(datas): - """Link dataarrays and coordinates. - - If the `coordinates` attribute of a data array links to other dataarrays in the scene, for example - `coordinates='lon lat'`, add them as coordinates to the data array and drop that attribute. In the final call to - `xr.Dataset.to_netcdf()` all coordinate relations will be resolved and the `coordinates` attributes be set - automatically. - - """ - for da_name, data in datas.items(): - declared_coordinates = data.attrs.get('coordinates', []) - if isinstance(declared_coordinates, str): - declared_coordinates = declared_coordinates.split(' ') - for coord in declared_coordinates: - if coord not in data.coords: - try: - dimensions_not_in_data = list(set(datas[coord].dims) - set(data.dims)) - data[coord] = datas[coord].squeeze(dimensions_not_in_data, drop=True) - except KeyError: - warnings.warn( - 'Coordinate "{}" referenced by dataarray {} does not ' - 'exist, dropping reference.'.format(coord, da_name), - stacklevel=2 - ) - continue - - # Drop 'coordinates' attribute in any case to avoid conflicts in xr.Dataset.to_netcdf() - data.attrs.pop('coordinates', None) diff --git a/satpy/cf/coords.py b/satpy/cf/coords.py index dee28952b5..0c5acc7df4 100644 --- a/satpy/cf/coords.py +++ b/satpy/cf/coords.py @@ -3,10 +3,13 @@ """Set CF-compliant spatial and temporal coordinates.""" import logging +import warnings +from collections import defaultdict from contextlib import suppress import numpy as np import xarray as xr +from dask.base import tokenize from pyresample.geometry import AreaDefinition, SwathDefinition logger = logging.getLogger(__name__) @@ -86,6 +89,149 @@ def _add_xy_geographic_coords_attrs(dataarray, x='x', y='y'): return dataarray +def set_cf_time_info(dataarray, epoch): + """Set CF time attributes and encoding. + + It expand the DataArray with a time dimension if does not yet exists. + + The function assumes + + - that x and y dimensions have at least shape > 1 + - the time coordinate has size 1 + + """ + dataarray['time'].encoding['units'] = epoch + dataarray['time'].attrs['standard_name'] = 'time' + dataarray['time'].attrs.pop('bounds', None) + + if 'time' not in dataarray.dims and dataarray["time"].size not in dataarray.shape: + dataarray = dataarray.expand_dims('time') + + return dataarray + + +def _is_lon_or_lat_dataarray(dataarray): + """Check if the DataArray represents the latitude or longitude coordinate.""" + if 'standard_name' in dataarray.attrs and dataarray.attrs['standard_name'] in ['longitude', 'latitude']: + return True + return False + + +def has_projection_coords(dict_datarrays): + """Check if DataArray collection has a "longitude" or "latitude" DataArray.""" + for dataarray in dict_datarrays.values(): + if _is_lon_or_lat_dataarray(dataarray): + return True + return False + + +def ensure_unique_nondimensional_coords(dict_dataarrays, pretty=False): + """Make non-dimensional coordinates unique among all datasets. + + Non-dimensional coordinates, such as scanline timestamps, + may occur in multiple datasets with the same name and dimension + but different values. + + In order to avoid conflicts, prepend the dataset name to the coordinate name. + If a non-dimensional coordinate is unique among all datasets and ``pretty=True``, + its name will not be modified. + + Since all datasets must have the same projection coordinates, + this is not applied to latitude and longitude. + + Args: + datas (dict): + Dictionary of (dataset name, dataset) + pretty (bool): + Don't modify coordinate names, if possible. Makes the file prettier, but possibly less consistent. + + Returns: + Dictionary holding the updated datasets + + """ + # Determine which non-dimensional coordinates are unique + # - coords_unique has structure: {coord_name: True/False} + tokens = defaultdict(set) + for dataarray in dict_dataarrays.values(): + for coord_name in dataarray.coords: + if not _is_lon_or_lat_dataarray(dataarray[coord_name]) and coord_name not in dataarray.dims: + tokens[coord_name].add(tokenize(dataarray[coord_name].data)) + coords_unique = dict([(coord_name, len(tokens) == 1) for coord_name, tokens in tokens.items()]) + + # Prepend dataset name, if not unique or no pretty-format desired + new_dict_dataarrays = dict_dataarrays.copy() + for coord_name, unique in coords_unique.items(): + if not pretty or not unique: + if pretty: + warnings.warn( + 'Cannot pretty-format "{}" coordinates because they are ' + 'not identical among the given datasets'.format(coord_name), + stacklevel=2 + ) + for name, dataarray in dict_dataarrays.items(): + if coord_name in dataarray.coords: + rename = {coord_name: '{}_{}'.format(name, coord_name)} + new_dict_dataarrays[name] = new_dict_dataarrays[name].rename(rename) + + return new_dict_dataarrays + + +def check_unique_projection_coords(dict_dataarrays): + """Check that all datasets share the same projection coordinates x/y.""" + unique_x = set() + unique_y = set() + for dataarray in dict_dataarrays.values(): + if 'y' in dataarray.dims: + token_y = tokenize(dataarray['y'].data) + unique_y.add(token_y) + if 'x' in dataarray.dims: + token_x = tokenize(dataarray['x'].data) + unique_x.add(token_x) + if len(unique_x) > 1 or len(unique_y) > 1: + raise ValueError('Datasets to be saved in one file (or one group) must have identical projection coordinates. ' + 'Please group them by area or save them in separate files.') + + +def add_coordinates_attrs_coords(dict_dataarrays): + """Add to DataArrays the coordinates specified in the 'coordinates' attribute. + + It deal with the 'coordinates' attributes indicating lat/lon coords + The 'coordinates' attribute is dropped from each DataArray + + If the `coordinates` attribute of a data array links to other dataarrays in the scene, for example + `coordinates='lon lat'`, add them as coordinates to the data array and drop that attribute. + + In the final call to `xr.Dataset.to_netcdf()` all coordinate relations will be resolved + and the `coordinates` attributes be set automatically. + """ + for da_name, dataarray in dict_dataarrays.items(): + declared_coordinates = _get_coordinates_list(dataarray) + for coord in declared_coordinates: + if coord not in dataarray.coords: + try: + dimensions_not_in_data = list(set(dict_dataarrays[coord].dims) - set(dataarray.dims)) + dataarray[coord] = dict_dataarrays[coord].squeeze(dimensions_not_in_data, drop=True) + except KeyError: + warnings.warn( + 'Coordinate "{}" referenced by dataarray {} does not ' + 'exist, dropping reference.'.format(coord, da_name), + stacklevel=2 + ) + continue + + # Drop 'coordinates' attribute in any case to avoid conflicts in xr.Dataset.to_netcdf() + dataarray.attrs.pop('coordinates', None) + return dict_dataarrays + + +def _get_coordinates_list(dataarray): + """Return a list with the coordinates names specified in the 'coordinates' attribute.""" + declared_coordinates = dataarray.attrs.get('coordinates', []) + if isinstance(declared_coordinates, str): + declared_coordinates = declared_coordinates.split(' ') + return declared_coordinates + + def add_time_bounds_dimension(ds, time="time"): """Add time bound dimension to xr.Dataset.""" start_times = [] @@ -104,25 +250,3 @@ def add_time_bounds_dimension(ds, time="time"): ds[time].attrs['bounds'] = "time_bnds" ds[time].attrs['standard_name'] = "time" return ds - - -def process_time_coord(dataarray, epoch): - """Process the 'time' coordinate, if existing. - - It expand the DataArray with a time dimension if does not yet exists. - - The function assumes - - - that x and y dimensions have at least shape > 1 - - the time coordinate has size 1 - - """ - if 'time' in dataarray.coords: - dataarray['time'].encoding['units'] = epoch - dataarray['time'].attrs['standard_name'] = 'time' - dataarray['time'].attrs.pop('bounds', None) - - if 'time' not in dataarray.dims and dataarray["time"].size not in dataarray.shape: - dataarray = dataarray.expand_dims('time') - - return dataarray diff --git a/satpy/cf/dataarray.py b/satpy/cf/dataarray.py index 661981681b..8a3c78a476 100644 --- a/satpy/cf/dataarray.py +++ b/satpy/cf/dataarray.py @@ -21,7 +21,7 @@ from satpy.cf import EPOCH from satpy.cf.attrs import preprocess_datarray_attrs -from satpy.cf.coords import add_xy_coords_attrs, process_time_coord +from satpy.cf.coords import add_xy_coords_attrs, set_cf_time_info logger = logging.getLogger(__name__) @@ -96,5 +96,6 @@ def make_cf_dataarray(dataarray, flatten_attrs=flatten_attrs, exclude_attrs=exclude_attrs) dataarray = add_xy_coords_attrs(dataarray) - dataarray = process_time_coord(dataarray, epoch=epoch) + if 'time' in dataarray.coords: + dataarray = set_cf_time_info(dataarray, epoch=epoch) return dataarray diff --git a/satpy/cf/datasets.py b/satpy/cf/datasets.py index c2799ab8d1..3dcbb33985 100644 --- a/satpy/cf/datasets.py +++ b/satpy/cf/datasets.py @@ -30,16 +30,16 @@ def _get_extra_ds(dataarray, keys=None): """Get the ancillary_variables DataArrays associated to a dataset.""" - ds_collection = {} + dict_datarrays = {} # Retrieve ancillary variable datarrays for ancillary_dataarray in dataarray.attrs.get('ancillary_variables', []): ancillary_variable = ancillary_dataarray.name if keys and ancillary_variable not in keys: keys.append(ancillary_variable) - ds_collection.update(_get_extra_ds(ancillary_dataarray, keys=keys)) + dict_datarrays.update(_get_extra_ds(ancillary_dataarray, keys=keys)) # Add input dataarray - ds_collection[dataarray.attrs['name']] = dataarray - return ds_collection + dict_datarrays[dataarray.attrs['name']] = dataarray + return dict_datarrays def _get_groups(groups, list_datarrays): @@ -98,23 +98,29 @@ def _collect_cf_dataset(list_dataarrays, ds : xr.Dataset A partially CF-compliant xr.Dataset """ - from satpy.cf.area import area2cf, assert_xy_unique, has_projection_coords, link_coords, make_alt_coords_unique + from satpy.cf.area import area2cf + from satpy.cf.coords import ( + add_coordinates_attrs_coords, + check_unique_projection_coords, + ensure_unique_nondimensional_coords, + has_projection_coords, + ) from satpy.cf.dataarray import make_cf_dataarray # Create dictionary of input datarrays # --> Since keys=None, it doesn't never retrieve ancillary variables !!! - ds_collection = {} + dict_dataarrays = {} for dataarray in list_dataarrays: - ds_collection.update(_get_extra_ds(dataarray)) + dict_dataarrays.update(_get_extra_ds(dataarray)) # Check if one DataArray in the collection has 'longitude' or 'latitude' - got_lonlats = has_projection_coords(ds_collection) + got_lonlats = has_projection_coords(dict_dataarrays) # Sort dictionary by keys name - ds_collection = dict(sorted(ds_collection.items())) + dict_dataarrays = dict(sorted(dict_dataarrays.items())) - dict_dataarrays = {} - for dataarray in ds_collection.values(): + dict_cf_dataarrays = {} + for dataarray in dict_dataarrays.values(): dataarray_type = dataarray.dtype if dataarray_type not in CF_DTYPES: warnings.warn( @@ -147,23 +153,24 @@ def _collect_cf_dataset(list_dataarrays, exclude_attrs=exclude_attrs, include_orig_name=include_orig_name, numeric_name_prefix=numeric_name_prefix) - dict_dataarrays[new_dataarray.name] = new_dataarray + dict_cf_dataarrays[new_dataarray.name] = new_dataarray - # Check all DataArray have same size - assert_xy_unique(dict_dataarrays) + # Check all DataArrays have same projection coordinates + check_unique_projection_coords(dict_cf_dataarrays) - # Deal with the 'coordinates' attributes indicating lat/lon coords - # NOTE: this currently is dropped by default !!! - link_coords(dict_dataarrays) + # Add to DataArrays the coordinates specified in the 'coordinates' attribute + # - Deal with the 'coordinates' attributes indicating lat/lon coords + # - The 'coordinates' attribute is dropped from each DataArray + dict_cf_dataarrays = add_coordinates_attrs_coords(dict_cf_dataarrays) # Ensure non-dimensional coordinates to be unique across DataArrays # --> If not unique, prepend the DataArray name to the coordinate # --> If unique, does not prepend the DataArray name only if pretty=True # --> 'longitude' and 'latitude' coordinates are not prepended - dict_dataarrays = make_alt_coords_unique(dict_dataarrays, pretty=pretty) + dict_cf_dataarrays = ensure_unique_nondimensional_coords(dict_cf_dataarrays, pretty=pretty) # Create a xr.Dataset - ds = xr.Dataset(dict_dataarrays) + ds = xr.Dataset(dict_cf_dataarrays) return ds diff --git a/satpy/etc/writers/cf.yaml b/satpy/etc/writers/cf.yaml index b141a68c09..5c4a875bec 100644 --- a/satpy/etc/writers/cf.yaml +++ b/satpy/etc/writers/cf.yaml @@ -1,7 +1,7 @@ writer: name: cf description: Generic netCDF4/CF Writer - writer: !!python/name:satpy.cf_writer.CFWriter + writer: !!python/name:satpy.writers.cf_writer.CFWriter filename: '{name}_{start_time:%Y%m%d_%H%M%S}.nc' compress: DEFLATE zlevel: 6 diff --git a/satpy/tests/cf_tests/_test_data.py b/satpy/tests/cf_tests/_test_data.py new file mode 100644 index 0000000000..2ea13afd2e --- /dev/null +++ b/satpy/tests/cf_tests/_test_data.py @@ -0,0 +1,111 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright (c) 2017-2023 Satpy developers +# +# This file is part of satpy. +# +# satpy is free software: you can redistribute it and/or modify it under the +# terms of the GNU General Public License as published by the Free Software +# Foundation, either version 3 of the License, or (at your option) any later +# version. +# +# satpy is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along with +# satpy. If not, see . +"""Functions and fixture to test CF code.""" +import datetime +from collections import OrderedDict + +import numpy as np + + +def get_test_attrs(): + """Create some dataset attributes for testing purpose. + + Returns: + Attributes, encoded attributes, encoded and flattened attributes + + """ + attrs = { + 'name': 'IR_108', + 'start_time': datetime.datetime(2018, 1, 1, 0), + 'end_time': datetime.datetime(2018, 1, 1, 0, 15), + 'int': 1, + 'float': 1.0, + 'none': None, # should be dropped + 'numpy_int': np.uint8(1), + 'numpy_float': np.float32(1), + 'numpy_bool': True, + 'numpy_void': np.void(0), + 'numpy_bytes': np.bytes_('test'), + 'numpy_string': np.str_('test'), + 'list': [1, 2, np.float64(3)], + 'nested_list': ["1", ["2", [3]]], + 'bool': True, + 'array': np.array([1, 2, 3], dtype='uint8'), + 'array_bool': np.array([True, False, True]), + 'array_2d': np.array([[1, 2], [3, 4]]), + 'array_3d': np.array([[[1, 2], [3, 4]], [[1, 2], [3, 4]]]), + 'dict': {'a': 1, 'b': 2}, + 'nested_dict': {'l1': {'l2': {'l3': np.array([1, 2, 3], dtype='uint8')}}}, + 'raw_metadata': OrderedDict([ + ('recarray', np.zeros(3, dtype=[('x', 'i4'), ('y', 'u1')])), + ('flag', np.bool_(True)), + ('dict', OrderedDict([('a', 1), ('b', np.array([1, 2, 3], dtype='uint8'))])) + ]) + } + encoded = { + 'name': 'IR_108', + 'start_time': '2018-01-01 00:00:00', + 'end_time': '2018-01-01 00:15:00', + 'int': 1, + 'float': 1.0, + 'numpy_int': np.uint8(1), + 'numpy_float': np.float32(1), + 'numpy_bool': 'true', + 'numpy_void': '[]', + 'numpy_bytes': 'test', + 'numpy_string': 'test', + 'list': [1, 2, np.float64(3)], + 'nested_list': '["1", ["2", [3]]]', + 'bool': 'true', + 'array': np.array([1, 2, 3], dtype='uint8'), + 'array_bool': ['true', 'false', 'true'], + 'array_2d': '[[1, 2], [3, 4]]', + 'array_3d': '[[[1, 2], [3, 4]], [[1, 2], [3, 4]]]', + 'dict': '{"a": 1, "b": 2}', + 'nested_dict': '{"l1": {"l2": {"l3": [1, 2, 3]}}}', + 'raw_metadata': '{"recarray": [[0, 0], [0, 0], [0, 0]], ' + '"flag": "true", "dict": {"a": 1, "b": [1, 2, 3]}}' + } + encoded_flat = { + 'name': 'IR_108', + 'start_time': '2018-01-01 00:00:00', + 'end_time': '2018-01-01 00:15:00', + 'int': 1, + 'float': 1.0, + 'numpy_int': np.uint8(1), + 'numpy_float': np.float32(1), + 'numpy_bool': 'true', + 'numpy_void': '[]', + 'numpy_bytes': 'test', + 'numpy_string': 'test', + 'list': [1, 2, np.float64(3)], + 'nested_list': '["1", ["2", [3]]]', + 'bool': 'true', + 'array': np.array([1, 2, 3], dtype='uint8'), + 'array_bool': ['true', 'false', 'true'], + 'array_2d': '[[1, 2], [3, 4]]', + 'array_3d': '[[[1, 2], [3, 4]], [[1, 2], [3, 4]]]', + 'dict_a': 1, + 'dict_b': 2, + 'nested_dict_l1_l2_l3': np.array([1, 2, 3], dtype='uint8'), + 'raw_metadata_recarray': '[[0, 0], [0, 0], [0, 0]]', + 'raw_metadata_flag': 'true', + 'raw_metadata_dict_a': 1, + 'raw_metadata_dict_b': np.array([1, 2, 3], dtype='uint8') + } + return attrs, encoded, encoded_flat diff --git a/satpy/tests/cf_tests/test_area.py b/satpy/tests/cf_tests/test_area.py index 352bf35a2e..e34118c7cb 100644 --- a/satpy/tests/cf_tests/test_area.py +++ b/satpy/tests/cf_tests/test_area.py @@ -18,7 +18,6 @@ """Tests for the CF Area.""" import dask.array as da import numpy as np -import pytest import xarray as xr from pyresample import AreaDefinition, SwathDefinition @@ -26,100 +25,6 @@ class TestCFArea: """Test case for CF Area.""" - def test_assert_xy_unique(self): - """Test that the x and y coordinates are unique.""" - from satpy.cf.area import assert_xy_unique - - dummy = [[1, 2], [3, 4]] - datas = {'a': xr.DataArray(data=dummy, dims=('y', 'x'), coords={'y': [1, 2], 'x': [3, 4]}), - 'b': xr.DataArray(data=dummy, dims=('y', 'x'), coords={'y': [1, 2], 'x': [3, 4]}), - 'n': xr.DataArray(data=dummy, dims=('v', 'w'), coords={'v': [1, 2], 'w': [3, 4]})} - assert_xy_unique(datas) - - datas['c'] = xr.DataArray(data=dummy, dims=('y', 'x'), coords={'y': [1, 3], 'x': [3, 4]}) - with pytest.raises(ValueError): - assert_xy_unique(datas) - - def test_link_coords(self): - """Check that coordinates link has been established correctly.""" - from satpy.cf.area import link_coords - - data = [[1, 2], [3, 4]] - lon = np.zeros((2, 2)) - lon2 = np.zeros((1, 2, 2)) - lat = np.ones((2, 2)) - datasets = { - 'var1': xr.DataArray(data=data, dims=('y', 'x'), attrs={'coordinates': 'lon lat'}), - 'var2': xr.DataArray(data=data, dims=('y', 'x')), - 'var3': xr.DataArray(data=data, dims=('y', 'x'), attrs={'coordinates': 'lon2 lat'}), - 'var4': xr.DataArray(data=data, dims=('y', 'x'), attrs={'coordinates': 'not_exist lon lat'}), - 'lon': xr.DataArray(data=lon, dims=('y', 'x')), - 'lon2': xr.DataArray(data=lon2, dims=('time', 'y', 'x')), - 'lat': xr.DataArray(data=lat, dims=('y', 'x')) - } - - link_coords(datasets) - - # Check that link has been established correctly and 'coordinate' atrribute has been dropped - assert 'lon' in datasets['var1'].coords - assert 'lat' in datasets['var1'].coords - np.testing.assert_array_equal(datasets['var1']['lon'].data, lon) - np.testing.assert_array_equal(datasets['var1']['lat'].data, lat) - assert 'coordinates' not in datasets['var1'].attrs - - # There should be no link if there was no 'coordinate' attribute - assert 'lon' not in datasets['var2'].coords - assert 'lat' not in datasets['var2'].coords - - # The non-existent dimension or coordinate should be dropped - assert 'time' not in datasets['var3'].coords - assert 'not_exist' not in datasets['var4'].coords - - def test_make_alt_coords_unique(self): - """Test that created coordinate variables are unique.""" - from satpy.cf.area import make_alt_coords_unique - - data = [[1, 2], [3, 4]] - y = [1, 2] - x = [1, 2] - time1 = [1, 2] - time2 = [3, 4] - datasets = {'var1': xr.DataArray(data=data, - dims=('y', 'x'), - coords={'y': y, 'x': x, 'acq_time': ('y', time1)}), - 'var2': xr.DataArray(data=data, - dims=('y', 'x'), - coords={'y': y, 'x': x, 'acq_time': ('y', time2)})} - - # Test that dataset names are prepended to alternative coordinates - res = make_alt_coords_unique(datasets) - np.testing.assert_array_equal(res['var1']['var1_acq_time'], time1) - np.testing.assert_array_equal(res['var2']['var2_acq_time'], time2) - assert 'acq_time' not in res['var1'].coords - assert 'acq_time' not in res['var2'].coords - - # Make sure nothing else is modified - np.testing.assert_array_equal(res['var1']['x'], x) - np.testing.assert_array_equal(res['var1']['y'], y) - np.testing.assert_array_equal(res['var2']['x'], x) - np.testing.assert_array_equal(res['var2']['y'], y) - - # Coords not unique -> Dataset names must be prepended, even if pretty=True - with pytest.warns(UserWarning, match='Cannot pretty-format "acq_time"'): - res = make_alt_coords_unique(datasets, pretty=True) - np.testing.assert_array_equal(res['var1']['var1_acq_time'], time1) - np.testing.assert_array_equal(res['var2']['var2_acq_time'], time2) - assert 'acq_time' not in res['var1'].coords - assert 'acq_time' not in res['var2'].coords - - # Coords unique and pretty=True -> Don't modify coordinate names - datasets['var2']['acq_time'] = ('y', time1) - res = make_alt_coords_unique(datasets, pretty=True) - np.testing.assert_array_equal(res['var1']['acq_time'], time1) - np.testing.assert_array_equal(res['var2']['acq_time'], time1) - assert 'var1_acq_time' not in res['var1'].coords - assert 'var2_acq_time' not in res['var2'].coords - def test_area2cf(self): """Test the conversion of an area to CF standards.""" from satpy.cf.area import area2cf @@ -399,55 +304,3 @@ def test__add_lonlat_coords(self): np.testing.assert_array_equal(lon.data, lons_ref) assert {'name': 'latitude', 'standard_name': 'latitude', 'units': 'degrees_north'}.items() <= lat.attrs.items() assert {'name': 'longitude', 'standard_name': 'longitude', 'units': 'degrees_east'}.items() <= lon.attrs.items() - - @pytest.fixture - def datasets(self): - """Create test dataset.""" - data = [[75, 2], [3, 4]] - y = [1, 2] - x = [1, 2] - geos = AreaDefinition( - area_id='geos', - description='geos', - proj_id='geos', - projection={'proj': 'geos', 'h': 35785831., 'a': 6378169., 'b': 6356583.8}, - width=2, height=2, - area_extent=[-1, -1, 1, 1]) - datasets = { - 'var1': xr.DataArray(data=data, - dims=('y', 'x'), - coords={'y': y, 'x': x}), - 'var2': xr.DataArray(data=data, - dims=('y', 'x'), - coords={'y': y, 'x': x}), - 'lat': xr.DataArray(data=data, - dims=('y', 'x'), - coords={'y': y, 'x': x}), - 'lon': xr.DataArray(data=data, - dims=('y', 'x'), - coords={'y': y, 'x': x})} - datasets['lat'].attrs['standard_name'] = 'latitude' - datasets['var1'].attrs['standard_name'] = 'dummy' - datasets['var2'].attrs['standard_name'] = 'dummy' - datasets['var2'].attrs['area'] = geos - datasets['var1'].attrs['area'] = geos - datasets['lat'].attrs['name'] = 'lat' - datasets['var1'].attrs['name'] = 'var1' - datasets['var2'].attrs['name'] = 'var2' - datasets['lon'].attrs['name'] = 'lon' - return datasets - - def test__is_lon_or_lat_dataarray(self, datasets): - """Test the _is_lon_or_lat_dataarray function.""" - from satpy.cf.area import _is_lon_or_lat_dataarray - - assert _is_lon_or_lat_dataarray(datasets['lat']) - assert not _is_lon_or_lat_dataarray(datasets['var1']) - - def test_has_projection_coords(self, datasets): - """Test the has_projection_coords function.""" - from satpy.cf.area import has_projection_coords - - assert has_projection_coords(datasets) - datasets['lat'].attrs['standard_name'] = 'dummy' - assert not has_projection_coords(datasets) diff --git a/satpy/tests/cf_tests/test_attrs.py b/satpy/tests/cf_tests/test_attrs.py index 7eb86e172b..787d1dc82d 100644 --- a/satpy/tests/cf_tests/test_attrs.py +++ b/satpy/tests/cf_tests/test_attrs.py @@ -16,105 +16,19 @@ # You should have received a copy of the GNU General Public License along with # satpy. If not, see . """Tests for CF-compatible attributes encoding.""" -import datetime import json -from collections import OrderedDict - -import numpy as np class TestCFAttributeEncoding: """Test case for CF attribute encodings.""" - def get_test_attrs(self): - """Create some dataset attributes for testing purpose. - - Returns: - Attributes, encoded attributes, encoded and flattened attributes - - """ - # TODO: this is also used by test_da2cf - attrs = {'name': 'IR_108', - 'start_time': datetime.datetime(2018, 1, 1, 0), - 'end_time': datetime.datetime(2018, 1, 1, 0, 15), - 'int': 1, - 'float': 1.0, - 'none': None, # should be dropped - 'numpy_int': np.uint8(1), - 'numpy_float': np.float32(1), - 'numpy_bool': True, - 'numpy_void': np.void(0), - 'numpy_bytes': np.bytes_('test'), - 'numpy_string': np.str_('test'), - 'list': [1, 2, np.float64(3)], - 'nested_list': ["1", ["2", [3]]], - 'bool': True, - 'array': np.array([1, 2, 3], dtype='uint8'), - 'array_bool': np.array([True, False, True]), - 'array_2d': np.array([[1, 2], [3, 4]]), - 'array_3d': np.array([[[1, 2], [3, 4]], [[1, 2], [3, 4]]]), - 'dict': {'a': 1, 'b': 2}, - 'nested_dict': {'l1': {'l2': {'l3': np.array([1, 2, 3], dtype='uint8')}}}, - 'raw_metadata': OrderedDict([ - ('recarray', np.zeros(3, dtype=[('x', 'i4'), ('y', 'u1')])), - ('flag', np.bool_(True)), - ('dict', OrderedDict([('a', 1), ('b', np.array([1, 2, 3], dtype='uint8'))])) - ])} - encoded = {'name': 'IR_108', - 'start_time': '2018-01-01 00:00:00', - 'end_time': '2018-01-01 00:15:00', - 'int': 1, - 'float': 1.0, - 'numpy_int': np.uint8(1), - 'numpy_float': np.float32(1), - 'numpy_bool': 'true', - 'numpy_void': '[]', - 'numpy_bytes': 'test', - 'numpy_string': 'test', - 'list': [1, 2, np.float64(3)], - 'nested_list': '["1", ["2", [3]]]', - 'bool': 'true', - 'array': np.array([1, 2, 3], dtype='uint8'), - 'array_bool': ['true', 'false', 'true'], - 'array_2d': '[[1, 2], [3, 4]]', - 'array_3d': '[[[1, 2], [3, 4]], [[1, 2], [3, 4]]]', - 'dict': '{"a": 1, "b": 2}', - 'nested_dict': '{"l1": {"l2": {"l3": [1, 2, 3]}}}', - 'raw_metadata': '{"recarray": [[0, 0], [0, 0], [0, 0]], ' - '"flag": "true", "dict": {"a": 1, "b": [1, 2, 3]}}'} - encoded_flat = {'name': 'IR_108', - 'start_time': '2018-01-01 00:00:00', - 'end_time': '2018-01-01 00:15:00', - 'int': 1, - 'float': 1.0, - 'numpy_int': np.uint8(1), - 'numpy_float': np.float32(1), - 'numpy_bool': 'true', - 'numpy_void': '[]', - 'numpy_bytes': 'test', - 'numpy_string': 'test', - 'list': [1, 2, np.float64(3)], - 'nested_list': '["1", ["2", [3]]]', - 'bool': 'true', - 'array': np.array([1, 2, 3], dtype='uint8'), - 'array_bool': ['true', 'false', 'true'], - 'array_2d': '[[1, 2], [3, 4]]', - 'array_3d': '[[[1, 2], [3, 4]], [[1, 2], [3, 4]]]', - 'dict_a': 1, - 'dict_b': 2, - 'nested_dict_l1_l2_l3': np.array([1, 2, 3], dtype='uint8'), - 'raw_metadata_recarray': '[[0, 0], [0, 0], [0, 0]]', - 'raw_metadata_flag': 'true', - 'raw_metadata_dict_a': 1, - 'raw_metadata_dict_b': np.array([1, 2, 3], dtype='uint8')} - return attrs, encoded, encoded_flat - def test__encode_attrs_nc(self): """Test attributes encoding.""" from satpy.cf.attrs import _encode_attrs_nc + from satpy.tests.cf_tests._test_data import get_test_attrs from satpy.tests.utils import assert_dict_array_equality - attrs, expected, _ = self.get_test_attrs() + attrs, expected, _ = get_test_attrs() # Test encoding encoded = _encode_attrs_nc(attrs) diff --git a/satpy/tests/cf_tests/test_coords.py b/satpy/tests/cf_tests/test_coords.py index 1361c0d5a4..0d0b5436cd 100644 --- a/satpy/tests/cf_tests/test_coords.py +++ b/satpy/tests/cf_tests/test_coords.py @@ -19,7 +19,9 @@ import logging import numpy as np +import pytest import xarray as xr +from pyresample import AreaDefinition # NOTE: # The following fixtures are not defined in this file, but are used and injected by Pytest: @@ -49,10 +51,106 @@ def test_add_time_bounds_dimension(self): assert "bounds" in ds["time"].attrs assert "standard_name" in ds["time"].attrs + # set_cf_time_info + class TestCFcoords: """Test cases for CF spatial dimension and coordinates.""" + def test_check_unique_projection_coords(self): + """Test that the x and y coordinates are unique.""" + from satpy.cf.coords import check_unique_projection_coords + + dummy = [[1, 2], [3, 4]] + datas = {'a': xr.DataArray(data=dummy, dims=('y', 'x'), coords={'y': [1, 2], 'x': [3, 4]}), + 'b': xr.DataArray(data=dummy, dims=('y', 'x'), coords={'y': [1, 2], 'x': [3, 4]}), + 'n': xr.DataArray(data=dummy, dims=('v', 'w'), coords={'v': [1, 2], 'w': [3, 4]})} + check_unique_projection_coords(datas) + + datas['c'] = xr.DataArray(data=dummy, dims=('y', 'x'), coords={'y': [1, 3], 'x': [3, 4]}) + with pytest.raises(ValueError): + check_unique_projection_coords(datas) + + def test_add_coordinates_attrs_coords(self): + """Check that coordinates link has been established correctly.""" + from satpy.cf.coords import add_coordinates_attrs_coords + + data = [[1, 2], [3, 4]] + lon = np.zeros((2, 2)) + lon2 = np.zeros((1, 2, 2)) + lat = np.ones((2, 2)) + datasets = { + 'var1': xr.DataArray(data=data, dims=('y', 'x'), attrs={'coordinates': 'lon lat'}), + 'var2': xr.DataArray(data=data, dims=('y', 'x')), + 'var3': xr.DataArray(data=data, dims=('y', 'x'), attrs={'coordinates': 'lon2 lat'}), + 'var4': xr.DataArray(data=data, dims=('y', 'x'), attrs={'coordinates': 'not_exist lon lat'}), + 'lon': xr.DataArray(data=lon, dims=('y', 'x')), + 'lon2': xr.DataArray(data=lon2, dims=('time', 'y', 'x')), + 'lat': xr.DataArray(data=lat, dims=('y', 'x')) + } + + datasets = add_coordinates_attrs_coords(datasets) + + # Check that link has been established correctly and 'coordinate' atrribute has been dropped + assert 'lon' in datasets['var1'].coords + assert 'lat' in datasets['var1'].coords + np.testing.assert_array_equal(datasets['var1']['lon'].data, lon) + np.testing.assert_array_equal(datasets['var1']['lat'].data, lat) + assert 'coordinates' not in datasets['var1'].attrs + + # There should be no link if there was no 'coordinate' attribute + assert 'lon' not in datasets['var2'].coords + assert 'lat' not in datasets['var2'].coords + + # The non-existent dimension or coordinate should be dropped + assert 'time' not in datasets['var3'].coords + assert 'not_exist' not in datasets['var4'].coords + + def test_ensure_unique_nondimensional_coords(self): + """Test that created coordinate variables are unique.""" + from satpy.cf.coords import ensure_unique_nondimensional_coords + + data = [[1, 2], [3, 4]] + y = [1, 2] + x = [1, 2] + time1 = [1, 2] + time2 = [3, 4] + datasets = {'var1': xr.DataArray(data=data, + dims=('y', 'x'), + coords={'y': y, 'x': x, 'acq_time': ('y', time1)}), + 'var2': xr.DataArray(data=data, + dims=('y', 'x'), + coords={'y': y, 'x': x, 'acq_time': ('y', time2)})} + + # Test that dataset names are prepended to alternative coordinates + res = ensure_unique_nondimensional_coords(datasets) + np.testing.assert_array_equal(res['var1']['var1_acq_time'], time1) + np.testing.assert_array_equal(res['var2']['var2_acq_time'], time2) + assert 'acq_time' not in res['var1'].coords + assert 'acq_time' not in res['var2'].coords + + # Make sure nothing else is modified + np.testing.assert_array_equal(res['var1']['x'], x) + np.testing.assert_array_equal(res['var1']['y'], y) + np.testing.assert_array_equal(res['var2']['x'], x) + np.testing.assert_array_equal(res['var2']['y'], y) + + # Coords not unique -> Dataset names must be prepended, even if pretty=True + with pytest.warns(UserWarning, match='Cannot pretty-format "acq_time"'): + res = ensure_unique_nondimensional_coords(datasets, pretty=True) + np.testing.assert_array_equal(res['var1']['var1_acq_time'], time1) + np.testing.assert_array_equal(res['var2']['var2_acq_time'], time2) + assert 'acq_time' not in res['var1'].coords + assert 'acq_time' not in res['var2'].coords + + # Coords unique and pretty=True -> Don't modify coordinate names + datasets['var2']['acq_time'] = ('y', time1) + res = ensure_unique_nondimensional_coords(datasets, pretty=True) + np.testing.assert_array_equal(res['var1']['acq_time'], time1) + np.testing.assert_array_equal(res['var2']['acq_time'], time1) + assert 'var1_acq_time' not in res['var1'].coords + assert 'var2_acq_time' not in res['var2'].coords + def test_is_projected(self, caplog): """Tests for private _is_projected function.""" from satpy.cf.coords import _is_projected @@ -79,5 +177,56 @@ def test_is_projected(self, caplog): assert _is_projected(da) assert "Failed to tell if data are projected." in caplog.text + @pytest.fixture + def datasets(self): + """Create test dataset.""" + data = [[75, 2], [3, 4]] + y = [1, 2] + x = [1, 2] + geos = AreaDefinition( + area_id='geos', + description='geos', + proj_id='geos', + projection={'proj': 'geos', 'h': 35785831., 'a': 6378169., 'b': 6356583.8}, + width=2, height=2, + area_extent=[-1, -1, 1, 1]) + datasets = { + 'var1': xr.DataArray(data=data, + dims=('y', 'x'), + coords={'y': y, 'x': x}), + 'var2': xr.DataArray(data=data, + dims=('y', 'x'), + coords={'y': y, 'x': x}), + 'lat': xr.DataArray(data=data, + dims=('y', 'x'), + coords={'y': y, 'x': x}), + 'lon': xr.DataArray(data=data, + dims=('y', 'x'), + coords={'y': y, 'x': x})} + datasets['lat'].attrs['standard_name'] = 'latitude' + datasets['var1'].attrs['standard_name'] = 'dummy' + datasets['var2'].attrs['standard_name'] = 'dummy' + datasets['var2'].attrs['area'] = geos + datasets['var1'].attrs['area'] = geos + datasets['lat'].attrs['name'] = 'lat' + datasets['var1'].attrs['name'] = 'var1' + datasets['var2'].attrs['name'] = 'var2' + datasets['lon'].attrs['name'] = 'lon' + return datasets + + def test__is_lon_or_lat_dataarray(self, datasets): + """Test the _is_lon_or_lat_dataarray function.""" + from satpy.cf.coords import _is_lon_or_lat_dataarray + + assert _is_lon_or_lat_dataarray(datasets['lat']) + assert not _is_lon_or_lat_dataarray(datasets['var1']) + + def test_has_projection_coords(self, datasets): + """Test the has_projection_coords function.""" + from satpy.cf.coords import has_projection_coords + + assert has_projection_coords(datasets) + datasets['lat'].attrs['standard_name'] = 'dummy' + assert not has_projection_coords(datasets) + # add_xy_coords_attrs - # process_time_coord diff --git a/satpy/tests/cf_tests/test_dataaarray.py b/satpy/tests/cf_tests/test_dataaarray.py index 0fd3a9d41c..90fbae4558 100644 --- a/satpy/tests/cf_tests/test_dataaarray.py +++ b/satpy/tests/cf_tests/test_dataaarray.py @@ -16,10 +16,6 @@ # You should have received a copy of the GNU General Public License along with # satpy. If not, see . """Tests CF-compliant DataArray creation.""" - -import datetime -from collections import OrderedDict - import numpy as np import xarray as xr @@ -71,96 +67,14 @@ def test_make_cf_dataarray_lonlat(): class TestCfDataArray: """Test creation of CF DataArray.""" - def get_test_attrs(self): - """Create some dataset attributes for testing purpose. - - Returns: - Attributes, encoded attributes, encoded and flattened attributes - - """ - # TODO: also used by cf/test_attrs.py - attrs = {'name': 'IR_108', - 'start_time': datetime.datetime(2018, 1, 1, 0), - 'end_time': datetime.datetime(2018, 1, 1, 0, 15), - 'int': 1, - 'float': 1.0, - 'none': None, # should be dropped - 'numpy_int': np.uint8(1), - 'numpy_float': np.float32(1), - 'numpy_bool': True, - 'numpy_void': np.void(0), - 'numpy_bytes': np.bytes_('test'), - 'numpy_string': np.str_('test'), - 'list': [1, 2, np.float64(3)], - 'nested_list': ["1", ["2", [3]]], - 'bool': True, - 'array': np.array([1, 2, 3], dtype='uint8'), - 'array_bool': np.array([True, False, True]), - 'array_2d': np.array([[1, 2], [3, 4]]), - 'array_3d': np.array([[[1, 2], [3, 4]], [[1, 2], [3, 4]]]), - 'dict': {'a': 1, 'b': 2}, - 'nested_dict': {'l1': {'l2': {'l3': np.array([1, 2, 3], dtype='uint8')}}}, - 'raw_metadata': OrderedDict([ - ('recarray', np.zeros(3, dtype=[('x', 'i4'), ('y', 'u1')])), - ('flag', np.bool_(True)), - ('dict', OrderedDict([('a', 1), ('b', np.array([1, 2, 3], dtype='uint8'))])) - ])} - encoded = {'name': 'IR_108', - 'start_time': '2018-01-01 00:00:00', - 'end_time': '2018-01-01 00:15:00', - 'int': 1, - 'float': 1.0, - 'numpy_int': np.uint8(1), - 'numpy_float': np.float32(1), - 'numpy_bool': 'true', - 'numpy_void': '[]', - 'numpy_bytes': 'test', - 'numpy_string': 'test', - 'list': [1, 2, np.float64(3)], - 'nested_list': '["1", ["2", [3]]]', - 'bool': 'true', - 'array': np.array([1, 2, 3], dtype='uint8'), - 'array_bool': ['true', 'false', 'true'], - 'array_2d': '[[1, 2], [3, 4]]', - 'array_3d': '[[[1, 2], [3, 4]], [[1, 2], [3, 4]]]', - 'dict': '{"a": 1, "b": 2}', - 'nested_dict': '{"l1": {"l2": {"l3": [1, 2, 3]}}}', - 'raw_metadata': '{"recarray": [[0, 0], [0, 0], [0, 0]], ' - '"flag": "true", "dict": {"a": 1, "b": [1, 2, 3]}}'} - encoded_flat = {'name': 'IR_108', - 'start_time': '2018-01-01 00:00:00', - 'end_time': '2018-01-01 00:15:00', - 'int': 1, - 'float': 1.0, - 'numpy_int': np.uint8(1), - 'numpy_float': np.float32(1), - 'numpy_bool': 'true', - 'numpy_void': '[]', - 'numpy_bytes': 'test', - 'numpy_string': 'test', - 'list': [1, 2, np.float64(3)], - 'nested_list': '["1", ["2", [3]]]', - 'bool': 'true', - 'array': np.array([1, 2, 3], dtype='uint8'), - 'array_bool': ['true', 'false', 'true'], - 'array_2d': '[[1, 2], [3, 4]]', - 'array_3d': '[[[1, 2], [3, 4]], [[1, 2], [3, 4]]]', - 'dict_a': 1, - 'dict_b': 2, - 'nested_dict_l1_l2_l3': np.array([1, 2, 3], dtype='uint8'), - 'raw_metadata_recarray': '[[0, 0], [0, 0], [0, 0]]', - 'raw_metadata_flag': 'true', - 'raw_metadata_dict_a': 1, - 'raw_metadata_dict_b': np.array([1, 2, 3], dtype='uint8')} - return attrs, encoded, encoded_flat - def test_make_cf_dataarray(self): """Test the conversion of a DataArray to a CF-compatible DataArray.""" from satpy.cf.dataarray import make_cf_dataarray + from satpy.tests.cf_tests._test_data import get_test_attrs from satpy.tests.utils import assert_dict_array_equality # Create set of test attributes - attrs, attrs_expected, attrs_expected_flat = self.get_test_attrs() + attrs, attrs_expected, attrs_expected_flat = get_test_attrs() attrs['area'] = 'some_area' attrs['prerequisites'] = [make_dsq(name='hej')] attrs['_satpy_id_name'] = 'myname' From 57afb82959ac68129e89ba4179880fecc9f1fa59 Mon Sep 17 00:00:00 2001 From: ghiggi Date: Tue, 10 Oct 2023 20:45:32 +0200 Subject: [PATCH 18/37] Fix cf_writer module path --- satpy/tests/writer_tests/test_cf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/satpy/tests/writer_tests/test_cf.py b/satpy/tests/writer_tests/test_cf.py index 31e59a2fb7..f50a8e99d3 100644 --- a/satpy/tests/writer_tests/test_cf.py +++ b/satpy/tests/writer_tests/test_cf.py @@ -403,11 +403,11 @@ def test_load_module_with_old_pyproj(self): old_version = sys.modules['pyproj'].__version__ sys.modules['pyproj'].__version__ = "1.9.6" try: - importlib.reload(sys.modules['satpy.cf_writer']) + importlib.reload(sys.modules['satpy.writers.cf_writer']) finally: # Tear down sys.modules['pyproj'].__version__ = old_version - importlib.reload(sys.modules['satpy.cf_writer']) + importlib.reload(sys.modules['satpy.writers.cf_writer']) def test_global_attr_default_history_and_Conventions(self): """Test saving global attributes history and Conventions.""" From 9ef2af9cbc3642c0dc90a55e8e4e097d475997f8 Mon Sep 17 00:00:00 2001 From: ghiggi Date: Tue, 14 Nov 2023 11:54:48 +0100 Subject: [PATCH 19/37] Avoid modification to CHANGELOG --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 12c8e50194..799ae0a867 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1143,7 +1143,7 @@ In this release 6 issues were closed. * [PR 1606](https://github.com/pytroll/satpy/pull/1606) - Make FSFile hashable again ([1605](https://github.com/pytroll/satpy/issues/1605), [1604](https://github.com/pytroll/satpy/issues/1604)) * [PR 1603](https://github.com/pytroll/satpy/pull/1603) - Update slstr_l2.yaml * [PR 1600](https://github.com/pytroll/satpy/pull/1600) - When setting `upper_right_corner` make sure that all dataset coordinates are flipped -* [PR 1588](https://github.com/pytroll/satpy/pull/1588) - Bugfix of add_coordinates_attrs_coords ([1493](https://github.com/pytroll/satpy/issues/1493)) +* [PR 1588](https://github.com/pytroll/satpy/pull/1588) - Bugfix of link_coords ([1493](https://github.com/pytroll/satpy/issues/1493)) #### Features added From e96f0de1f935865146a9967e35db65c8f0063b2f Mon Sep 17 00:00:00 2001 From: ghiggi Date: Tue, 14 Nov 2023 12:02:42 +0100 Subject: [PATCH 20/37] Rename _encode* functions --- satpy/cf/attrs.py | 30 +++++++++++++++--------------- satpy/tests/cf_tests/test_attrs.py | 6 +++--- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/satpy/cf/attrs.py b/satpy/cf/attrs.py index d4a41f2bfc..f4d6ece0d0 100644 --- a/satpy/cf/attrs.py +++ b/satpy/cf/attrs.py @@ -65,7 +65,7 @@ def _encode(self, obj): return str(obj) -def __encode_nc(obj): +def _encode_object(obj): """Try to encode `obj` as a netCDF/Zarr compatible datatype which most closely resembles the object's nature. Raises: @@ -90,23 +90,15 @@ def __encode_nc(obj): raise ValueError('Unable to encode') -def _encode_nc(obj): - """Encode the given object as a netcdf compatible datatype.""" - try: - return obj.to_cf() - except AttributeError: - return _encode_python_objects(obj) - - def _encode_python_objects(obj): """Try to find the datatype which most closely resembles the object's nature. If on failure, encode as a string. Plain lists are encoded recursively. """ if isinstance(obj, (list, tuple)) and all([not isinstance(item, (list, tuple)) for item in obj]): - return [_encode_nc(item) for item in obj] + return [_encode_to_cf(item) for item in obj] try: - dump = __encode_nc(obj) + dump = _encode_object(obj) except ValueError: try: # Decode byte-strings @@ -117,7 +109,15 @@ def _encode_python_objects(obj): return dump -def _encode_attrs_nc(attrs): +def _encode_to_cf(obj): + """Encode the given object as a netcdf compatible datatype.""" + try: + return obj.to_cf() + except AttributeError: + return _encode_python_objects(obj) + + +def _encode_nc_attrs(attrs): """Encode dataset attributes in a netcdf compatible datatype. Args: @@ -130,7 +130,7 @@ def _encode_attrs_nc(attrs): encoded_attrs = [] for key, val in sorted(attrs.items()): if val is not None: - encoded_attrs.append((key, _encode_nc(val))) + encoded_attrs.append((key, _encode_to_cf(val))) return OrderedDict(encoded_attrs) @@ -193,7 +193,7 @@ def preprocess_datarray_attrs(dataarray, flatten_attrs, exclude_attrs): if flatten_attrs: dataarray.attrs = flatten_dict(dataarray.attrs) - dataarray.attrs = _encode_attrs_nc(dataarray.attrs) + dataarray.attrs = _encode_nc_attrs(dataarray.attrs) return dataarray @@ -215,7 +215,7 @@ def preprocess_header_attrs(header_attrs, flatten_attrs=False): if header_attrs is not None: if flatten_attrs: header_attrs = flatten_dict(header_attrs) - header_attrs = _encode_attrs_nc(header_attrs) # OrderedDict + header_attrs = _encode_nc_attrs(header_attrs) # OrderedDict else: header_attrs = {} header_attrs = _add_history(header_attrs) diff --git a/satpy/tests/cf_tests/test_attrs.py b/satpy/tests/cf_tests/test_attrs.py index 787d1dc82d..9306ae9749 100644 --- a/satpy/tests/cf_tests/test_attrs.py +++ b/satpy/tests/cf_tests/test_attrs.py @@ -22,16 +22,16 @@ class TestCFAttributeEncoding: """Test case for CF attribute encodings.""" - def test__encode_attrs_nc(self): + def test__encode_nc_attrs(self): """Test attributes encoding.""" - from satpy.cf.attrs import _encode_attrs_nc + from satpy.cf.attrs import _encode_nc_attrs from satpy.tests.cf_tests._test_data import get_test_attrs from satpy.tests.utils import assert_dict_array_equality attrs, expected, _ = get_test_attrs() # Test encoding - encoded = _encode_attrs_nc(attrs) + encoded = _encode_nc_attrs(attrs) assert_dict_array_equality(expected, encoded) # Test decoding of json-encoded attributes From 48df162107648870be89f0af9ddbd4fa6d1d9aba Mon Sep 17 00:00:00 2001 From: ghiggi Date: Tue, 14 Nov 2023 12:04:24 +0100 Subject: [PATCH 21/37] Update xarray version --- satpy/tests/writer_tests/test_cf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/satpy/tests/writer_tests/test_cf.py b/satpy/tests/writer_tests/test_cf.py index f50a8e99d3..fc1fa67343 100644 --- a/satpy/tests/writer_tests/test_cf.py +++ b/satpy/tests/writer_tests/test_cf.py @@ -570,5 +570,5 @@ def _should_use_compression_keyword(): versions = _get_backend_versions() return ( versions["libnetcdf"] >= Version("4.9.0") and - versions["xarray"] >= Version("2023.10") + versions["xarray"] >= Version("2023.11") ) From 14b1f066416190b464b630750d12519f00ef5fe4 Mon Sep 17 00:00:00 2001 From: ghiggi Date: Tue, 14 Nov 2023 13:36:54 +0100 Subject: [PATCH 22/37] Set default epoch argument to None --- satpy/_scene_converters.py | 4 --- satpy/cf/coords.py | 2 +- satpy/cf/dataarray.py | 6 ++++- satpy/cf/datasets.py | 33 ++++++++++++++----------- satpy/scene.py | 2 +- satpy/tests/cf_tests/test_coords.py | 2 -- satpy/tests/cf_tests/test_dataaarray.py | 2 -- satpy/tests/test_writers.py | 3 +-- satpy/writers/cf_writer.py | 20 ++++++++------- 9 files changed, 37 insertions(+), 37 deletions(-) diff --git a/satpy/_scene_converters.py b/satpy/_scene_converters.py index 54ccf85ac9..a890963a06 100644 --- a/satpy/_scene_converters.py +++ b/satpy/_scene_converters.py @@ -90,12 +90,8 @@ def to_xarray(scn, A CF-compliant xr.Dataset """ - from satpy.cf import EPOCH from satpy.cf.datasets import collect_cf_datasets - if epoch is None: - epoch = EPOCH - # Get list of DataArrays if datasets is None: datasets = list(scn.keys()) # list all loaded DataIDs diff --git a/satpy/cf/coords.py b/satpy/cf/coords.py index f486502a44..6e7a0892e9 100644 --- a/satpy/cf/coords.py +++ b/satpy/cf/coords.py @@ -188,7 +188,7 @@ def check_unique_projection_coords(dict_dataarrays): token_x = tokenize(dataarray["x"].data) unique_x.add(token_x) if len(unique_x) > 1 or len(unique_y) > 1: - raise ValueError("Datasets to be saved in one file (or one group) must have identical projection coordinates. " + raise ValueError("Datasets to be saved in one file (or one group) must have identical projection coordinates." "Please group them by area or save them in separate files.") diff --git a/satpy/cf/dataarray.py b/satpy/cf/dataarray.py index 5a7779f4c1..a8ac78d9a3 100644 --- a/satpy/cf/dataarray.py +++ b/satpy/cf/dataarray.py @@ -57,7 +57,7 @@ def _preprocess_dataarray_name(dataarray, numeric_name_prefix, include_orig_name def make_cf_dataarray(dataarray, - epoch=EPOCH, + epoch=None, flatten_attrs=False, exclude_attrs=None, include_orig_name=True, @@ -70,6 +70,7 @@ def make_cf_dataarray(dataarray, The data array to be made CF-compliant. epoch : str, optional Reference time for encoding of time coordinates. + If None, the default reference time is retrieved using `from satpy.cf import EPOCH` flatten_attrs : bool, optional If True, flatten dict-type attributes. The default is False. @@ -89,6 +90,9 @@ def make_cf_dataarray(dataarray, CF-compliant xr.DataArray. """ + if epoch is None: + epoch = EPOCH + dataarray = _preprocess_dataarray_name(dataarray=dataarray, numeric_name_prefix=numeric_name_prefix, include_orig_name=include_orig_name) diff --git a/satpy/cf/datasets.py b/satpy/cf/datasets.py index e801be2fdf..d85a943fe7 100644 --- a/satpy/cf/datasets.py +++ b/satpy/cf/datasets.py @@ -61,13 +61,14 @@ def _get_groups(groups, list_datarrays): def _collect_cf_dataset(list_dataarrays, - epoch=EPOCH, - flatten_attrs=False, - exclude_attrs=None, - include_lonlats=True, - pretty=False, - include_orig_name=True, - numeric_name_prefix="CHANNEL_"): + epoch, + flatten_attrs, + exclude_attrs, + include_lonlats, + pretty, + include_orig_name, + numeric_name_prefix, + ): """Process a list of xr.DataArray and return a dictionary with CF-compliant xr.Dataset. Parameters @@ -77,19 +78,18 @@ def _collect_cf_dataset(list_dataarrays, epoch : str Reference time for encoding the time coordinates (if available). Example format: "seconds since 1970-01-01 00:00:00". - If None, the default reference time is retrieved using `from satpy.cf import EPOCH` - flatten_attrs : bool, optional + flatten_attrs : bool If True, flatten dict-type attributes. - exclude_attrs : list, optional + exclude_attrs : list List of xr.DataArray attribute names to be excluded. - include_lonlats : bool, optional + include_lonlats : bool If True, it includes 'latitude' and 'longitude' coordinates also for satpy scene defined on an AreaDefinition. If the 'area' attribute is a SwathDefinition, it always include latitude and longitude coordinates. - pretty : bool, optional + pretty : bool Don't modify coordinate names, if possible. Makes the file prettier, but possibly less consistent. - include_orig_name : bool, optional + include_orig_name : bool Include the original dataset name as a variable attribute in the xr.Dataset. - numeric_name_prefix : str, optional + numeric_name_prefix : str Prefix to add the each variable with name starting with a digit. Use '' or None to leave this out. @@ -180,7 +180,7 @@ def collect_cf_datasets(list_dataarrays, flatten_attrs=False, pretty=True, include_lonlats=True, - epoch=EPOCH, + epoch=None, include_orig_name=True, numeric_name_prefix="CHANNEL_", groups=None): @@ -231,6 +231,9 @@ def collect_cf_datasets(list_dataarrays, from satpy.cf.attrs import preprocess_header_attrs from satpy.cf.coords import add_time_bounds_dimension + if epoch is None: + epoch = EPOCH + if not list_dataarrays: raise RuntimeError("None of the requested datasets have been " "generated or could not be loaded. Requested " diff --git a/satpy/scene.py b/satpy/scene.py index 587721a7be..bb8cf0ffab 100644 --- a/satpy/scene.py +++ b/satpy/scene.py @@ -1128,7 +1128,7 @@ def to_xarray(self, epoch (str): Reference time for encoding the time coordinates (if available). Example format: "seconds since 1970-01-01 00:00:00". - If None, the default reference time is retrieved using "from satpy.cf import EPOCH" + If None, the default reference time is defined using "from satpy.cf import EPOCH" flatten_attrs (bool): If True, flatten dict-type attributes. exclude_attrs (list): diff --git a/satpy/tests/cf_tests/test_coords.py b/satpy/tests/cf_tests/test_coords.py index 2462f59181..68ba319741 100644 --- a/satpy/tests/cf_tests/test_coords.py +++ b/satpy/tests/cf_tests/test_coords.py @@ -228,5 +228,3 @@ def test_has_projection_coords(self, datasets): assert has_projection_coords(datasets) datasets["lat"].attrs["standard_name"] = "dummy" assert not has_projection_coords(datasets) - - # add_xy_coords_attrs diff --git a/satpy/tests/cf_tests/test_dataaarray.py b/satpy/tests/cf_tests/test_dataaarray.py index 099013bbbc..d0154cd84f 100644 --- a/satpy/tests/cf_tests/test_dataaarray.py +++ b/satpy/tests/cf_tests/test_dataaarray.py @@ -114,5 +114,3 @@ def test_make_cf_dataarray_one_dimensional_array(self): arr = xr.DataArray(np.array([1, 2, 3, 4]), attrs={}, dims=("y",), coords={"y": [0, 1, 2, 3], "acq_time": ("y", [0, 1, 2, 3])}) _ = make_cf_dataarray(arr) - - # _handle_dataarray_name diff --git a/satpy/tests/test_writers.py b/satpy/tests/test_writers.py index c2d049dae1..c11066d3f6 100644 --- a/satpy/tests/test_writers.py +++ b/satpy/tests/test_writers.py @@ -874,9 +874,8 @@ def test_group_results_by_output_file(tmp_path): """ from pyresample import create_area_def + from satpy.tests.utils import make_fake_scene from satpy.writers import group_results_by_output_file - - from .utils import make_fake_scene x = 10 fake_area = create_area_def("sargasso", 4326, resolution=1, width=x, height=x, center=(0, 0)) fake_scene = make_fake_scene( diff --git a/satpy/writers/cf_writer.py b/satpy/writers/cf_writer.py index de3b445025..7076cc841d 100644 --- a/satpy/writers/cf_writer.py +++ b/satpy/writers/cf_writer.py @@ -162,7 +162,7 @@ import xarray as xr from packaging.version import Version -from satpy.cf import EPOCH +from satpy.cf import EPOCH # noqa: F401 (for backward compatibility) from satpy.writers import Writer logger = logging.getLogger(__name__) @@ -232,7 +232,7 @@ def save_dataset(self, dataset, filename=None, fill_value=None, **kwargs): """Save the *dataset* to a given *filename*.""" return self.save_datasets([dataset], filename, **kwargs) - def save_datasets(self, datasets, filename=None, groups=None, header_attrs=None, engine=None, epoch=EPOCH, + def save_datasets(self, datasets, filename=None, groups=None, header_attrs=None, engine=None, epoch=None, flatten_attrs=False, exclude_attrs=None, include_lonlats=True, pretty=False, include_orig_name=True, numeric_name_prefix="CHANNEL_", **to_netcdf_kwargs): """Save the given datasets in one netCDF file. @@ -256,6 +256,7 @@ def save_datasets(self, datasets, filename=None, groups=None, header_attrs=None, preference for 'netcdf4'. epoch (str): Reference time for encoding of time coordinates. + If None, the default reference time is defined using `from satpy.cf import EPOCH` flatten_attrs (bool): If True, flatten dict-type attributes. exclude_attrs (list): @@ -326,23 +327,24 @@ def save_datasets(self, datasets, filename=None, groups=None, header_attrs=None, return written @staticmethod - def da2cf(dataarray, epoch=EPOCH, flatten_attrs=False, exclude_attrs=None, + def da2cf(dataarray, epoch=None, flatten_attrs=False, exclude_attrs=None, include_orig_name=True, numeric_name_prefix="CHANNEL_"): """Convert the dataarray to something cf-compatible. Args: dataarray (xr.DataArray): - The data array to be converted + The data array to be converted. epoch (str): - Reference time for encoding of time coordinates + Reference time for encoding of time coordinates. + If None, the default reference time is defined using `from satpy.cf import EPOCH` flatten_attrs (bool): - If True, flatten dict-type attributes + If True, flatten dict-type attributes. exclude_attrs (list): - List of dataset attributes to be excluded + List of dataset attributes to be excluded. include_orig_name (bool): - Include the original dataset name in the netcdf variable attributes + Include the original dataset name in the netcdf variable attributes. numeric_name_prefix (str): - Prepend dataset name with this if starting with a digit + Prepend dataset name with this if starting with a digit. """ from satpy.cf.dataarray import make_cf_dataarray warnings.warn("CFWriter.da2cf is deprecated." From ec5f8fc4cafb1a89a39a42a7311077c896ef59a1 Mon Sep 17 00:00:00 2001 From: ghiggi Date: Tue, 14 Nov 2023 13:41:05 +0100 Subject: [PATCH 23/37] Reduce future risk of circular imports --- satpy/_scene_converters.py | 2 +- satpy/cf/dataarray.py | 5 +++-- satpy/cf/datasets.py | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/satpy/_scene_converters.py b/satpy/_scene_converters.py index a890963a06..c400a159f1 100644 --- a/satpy/_scene_converters.py +++ b/satpy/_scene_converters.py @@ -66,7 +66,7 @@ def to_xarray(scn, epoch (str): Reference time for encoding the time coordinates (if available). Example format: "seconds since 1970-01-01 00:00:00". - If None, the default reference time is retrieved using "from satpy.cf import EPOCH" + If None, the default reference time is defined using "from satpy.cf import EPOCH" flatten_attrs (bool): If True, flatten dict-type attributes. exclude_attrs (list): diff --git a/satpy/cf/dataarray.py b/satpy/cf/dataarray.py index a8ac78d9a3..39b5d82dc9 100644 --- a/satpy/cf/dataarray.py +++ b/satpy/cf/dataarray.py @@ -19,7 +19,6 @@ import logging import warnings -from satpy.cf import EPOCH from satpy.cf.attrs import preprocess_datarray_attrs from satpy.cf.coords import add_xy_coords_attrs, set_cf_time_info @@ -70,7 +69,7 @@ def make_cf_dataarray(dataarray, The data array to be made CF-compliant. epoch : str, optional Reference time for encoding of time coordinates. - If None, the default reference time is retrieved using `from satpy.cf import EPOCH` + If None, the default reference time is defined using `from satpy.cf import EPOCH` flatten_attrs : bool, optional If True, flatten dict-type attributes. The default is False. @@ -90,6 +89,8 @@ def make_cf_dataarray(dataarray, CF-compliant xr.DataArray. """ + from satpy.cf import EPOCH + if epoch is None: epoch = EPOCH diff --git a/satpy/cf/datasets.py b/satpy/cf/datasets.py index d85a943fe7..43b85003de 100644 --- a/satpy/cf/datasets.py +++ b/satpy/cf/datasets.py @@ -22,7 +22,6 @@ import xarray as xr -from satpy.cf import EPOCH from satpy.writers.cf_writer import CF_DTYPES, CF_VERSION logger = logging.getLogger(__name__) @@ -228,6 +227,7 @@ def collect_cf_datasets(list_dataarrays, header_attrs : dict Global attributes to be attached to the xr.Dataset / netCDF4. """ + from satpy.cf import EPOCH from satpy.cf.attrs import preprocess_header_attrs from satpy.cf.coords import add_time_bounds_dimension From 9a8810f4303630c561addd28eab90dfb1ed93d32 Mon Sep 17 00:00:00 2001 From: ghiggi Date: Tue, 14 Nov 2023 14:04:55 +0100 Subject: [PATCH 24/37] Fix tests --- satpy/cf/coords.py | 5 +++++ satpy/cf/dataarray.py | 5 ----- satpy/cf/datasets.py | 26 ++++++++++++++------------ satpy/tests/cf_tests/test_datasets.py | 4 +++- 4 files changed, 22 insertions(+), 18 deletions(-) diff --git a/satpy/cf/coords.py b/satpy/cf/coords.py index 6e7a0892e9..05e8a792fd 100644 --- a/satpy/cf/coords.py +++ b/satpy/cf/coords.py @@ -100,6 +100,11 @@ def set_cf_time_info(dataarray, epoch): - the time coordinate has size 1 """ + from satpy.cf import EPOCH + + if epoch is None: + epoch = EPOCH + dataarray["time"].encoding["units"] = epoch dataarray["time"].attrs["standard_name"] = "time" dataarray["time"].attrs.pop("bounds", None) diff --git a/satpy/cf/dataarray.py b/satpy/cf/dataarray.py index 39b5d82dc9..078c53c462 100644 --- a/satpy/cf/dataarray.py +++ b/satpy/cf/dataarray.py @@ -89,11 +89,6 @@ def make_cf_dataarray(dataarray, CF-compliant xr.DataArray. """ - from satpy.cf import EPOCH - - if epoch is None: - epoch = EPOCH - dataarray = _preprocess_dataarray_name(dataarray=dataarray, numeric_name_prefix=numeric_name_prefix, include_orig_name=include_orig_name) diff --git a/satpy/cf/datasets.py b/satpy/cf/datasets.py index 43b85003de..3cb72af442 100644 --- a/satpy/cf/datasets.py +++ b/satpy/cf/datasets.py @@ -60,14 +60,13 @@ def _get_groups(groups, list_datarrays): def _collect_cf_dataset(list_dataarrays, - epoch, - flatten_attrs, - exclude_attrs, - include_lonlats, - pretty, - include_orig_name, - numeric_name_prefix, - ): + epoch=None, + flatten_attrs=False, + exclude_attrs=None, + include_lonlats=True, + pretty=False, + include_orig_name=True, + numeric_name_prefix="CHANNEL_"): """Process a list of xr.DataArray and return a dictionary with CF-compliant xr.Dataset. Parameters @@ -77,17 +76,24 @@ def _collect_cf_dataset(list_dataarrays, epoch : str Reference time for encoding the time coordinates (if available). Example format: "seconds since 1970-01-01 00:00:00". + If None, the default reference time is defined using `from satpy.cf import EPOCH` + flatten_attrs : bool, optional flatten_attrs : bool If True, flatten dict-type attributes. + exclude_attrs : list, optional exclude_attrs : list List of xr.DataArray attribute names to be excluded. + include_lonlats : bool, optional include_lonlats : bool If True, it includes 'latitude' and 'longitude' coordinates also for satpy scene defined on an AreaDefinition. If the 'area' attribute is a SwathDefinition, it always include latitude and longitude coordinates. + pretty : bool, optional pretty : bool Don't modify coordinate names, if possible. Makes the file prettier, but possibly less consistent. + include_orig_name : bool, optional include_orig_name : bool Include the original dataset name as a variable attribute in the xr.Dataset. + numeric_name_prefix : str, optional numeric_name_prefix : str Prefix to add the each variable with name starting with a digit. Use '' or None to leave this out. @@ -227,13 +233,9 @@ def collect_cf_datasets(list_dataarrays, header_attrs : dict Global attributes to be attached to the xr.Dataset / netCDF4. """ - from satpy.cf import EPOCH from satpy.cf.attrs import preprocess_header_attrs from satpy.cf.coords import add_time_bounds_dimension - if epoch is None: - epoch = EPOCH - if not list_dataarrays: raise RuntimeError("None of the requested datasets have been " "generated or could not be loaded. Requested " diff --git a/satpy/tests/cf_tests/test_datasets.py b/satpy/tests/cf_tests/test_datasets.py index b234a7c945..e943026509 100644 --- a/satpy/tests/cf_tests/test_datasets.py +++ b/satpy/tests/cf_tests/test_datasets.py @@ -52,7 +52,9 @@ def test_collect_cf_dataset(self): attrs={"name": "var2", "long_name": "variable 2"})] # Collect datasets - ds = _collect_cf_dataset(list_dataarrays, include_lonlats=True) + ds = _collect_cf_dataset(list_dataarrays, + include_lonlats=True, + ) # Test results assert len(ds.keys()) == 3 From 09325c2fe97a1e966e281c638c32f47fec34b811 Mon Sep 17 00:00:00 2001 From: ghiggi Date: Thu, 16 Nov 2023 12:25:55 +0100 Subject: [PATCH 25/37] Fix docstrings --- satpy/_scene_converters.py | 58 ++++++++----------- satpy/cf/__init__.py | 2 - satpy/cf/area.py | 2 - satpy/cf/attrs.py | 2 - satpy/cf/coords.py | 2 - satpy/cf/dataarray.py | 38 ++++-------- satpy/cf/datasets.py | 115 ++++++++++++++----------------------- satpy/cf/encoding.py | 2 - satpy/writers/cf_writer.py | 51 +++++++--------- 9 files changed, 98 insertions(+), 174 deletions(-) diff --git a/satpy/_scene_converters.py b/satpy/_scene_converters.py index c400a159f1..fbc0a7a627 100644 --- a/satpy/_scene_converters.py +++ b/satpy/_scene_converters.py @@ -52,42 +52,32 @@ def to_xarray(scn, If Scene DataArrays are on different areas, currently it fails, although in future we might return a DataTree object, grouped by area. - Parameters - ---------- - scn: satpy.Scene - Satpy Scene. - datasets (iterable): - List of Satpy Scene datasets to include in the output xr.Dataset. - Elements can be string name, a wavelength as a number, a DataID, - or DataQuery object. - If None (the default), it include all loaded Scene datasets. - header_attrs: - Global attributes of the output xr.Dataset. - epoch (str): - Reference time for encoding the time coordinates (if available). - Example format: "seconds since 1970-01-01 00:00:00". - If None, the default reference time is defined using "from satpy.cf import EPOCH" - flatten_attrs (bool): - If True, flatten dict-type attributes. - exclude_attrs (list): - List of xr.DataArray attribute names to be excluded. - include_lonlats (bool): - If True, it includes 'latitude' and 'longitude' coordinates. - If the 'area' attribute is a SwathDefinition, it always includes - latitude and longitude coordinates. - pretty (bool): - Don't modify coordinate names, if possible. Makes the file prettier, - but possibly less consistent. - include_orig_name (bool). - Include the original dataset name as a variable attribute in the xr.Dataset. - numeric_name_prefix (str): - Prefix to add the each variable with name starting with a digit. - Use '' or None to leave this out. + Args: + scn (satpy.Scene): Satpy Scene. + datasets (iterable, optional): List of Satpy Scene datasets to include in + the output xr.Dataset. Elements can be string name, a wavelength as a + number, a DataID, or DataQuery object. If None (the default), it + includes all loaded Scene datasets. + header_attrs: Global attributes of the output xr.Dataset. + epoch (str, optional): Reference time for encoding the time coordinates + (if available). Format example: "seconds since 1970-01-01 00:00:00". + If None, the default reference time is retrieved using + "from satpy.cf_writer import EPOCH". + flatten_attrs (bool, optional): If True, flatten dict-type attributes. + exclude_attrs (list, optional): List of xr.DataArray attribute names to + be excluded. + include_lonlats (bool, optional): If True, includes 'latitude' and + 'longitude' coordinates. If the 'area' attribute is a SwathDefinition, + it always includes latitude and longitude coordinates. + pretty (bool, optional): Don't modify coordinate names, if possible. Makes + the file prettier, but possibly less consistent. + include_orig_name (bool, optional): Include the original dataset name as a + variable attribute in the xr.Dataset. + numeric_name_prefix (str, optional): Prefix to add to each variable with + name starting with a digit. Use '' or None to leave this out. Returns: - ------- - ds, xr.Dataset - A CF-compliant xr.Dataset + xr.Dataset: A CF-compliant xr.Dataset """ from satpy.cf.datasets import collect_cf_datasets diff --git a/satpy/cf/__init__.py b/satpy/cf/__init__.py index c48acebcf9..63fac5261c 100644 --- a/satpy/cf/__init__.py +++ b/satpy/cf/__init__.py @@ -1,5 +1,3 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- """Code for generation of CF-compliant datasets.""" EPOCH = u"seconds since 1970-01-01 00:00:00" diff --git a/satpy/cf/area.py b/satpy/cf/area.py index 5ce9f1e0c3..041338efd8 100644 --- a/satpy/cf/area.py +++ b/satpy/cf/area.py @@ -1,5 +1,3 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- # Copyright (c) 2017-2023 Satpy developers # # This file is part of satpy. diff --git a/satpy/cf/attrs.py b/satpy/cf/attrs.py index af1977b1f4..2cf9ffa528 100644 --- a/satpy/cf/attrs.py +++ b/satpy/cf/attrs.py @@ -1,5 +1,3 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- # Copyright (c) 2017-2023 Satpy developers # # This file is part of satpy. diff --git a/satpy/cf/coords.py b/satpy/cf/coords.py index 05e8a792fd..af11a62e43 100644 --- a/satpy/cf/coords.py +++ b/satpy/cf/coords.py @@ -1,5 +1,3 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- """Set CF-compliant spatial and temporal coordinates.""" import logging diff --git a/satpy/cf/dataarray.py b/satpy/cf/dataarray.py index 078c53c462..3c97a70336 100644 --- a/satpy/cf/dataarray.py +++ b/satpy/cf/dataarray.py @@ -1,5 +1,3 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- # Copyright (c) 2017-2023 Satpy developers # # This file is part of satpy. @@ -63,31 +61,19 @@ def make_cf_dataarray(dataarray, numeric_name_prefix="CHANNEL_"): """Make the xr.DataArray CF-compliant. - Parameters - ---------- - dataarray : xr.DataArray - The data array to be made CF-compliant. - epoch : str, optional - Reference time for encoding of time coordinates. - If None, the default reference time is defined using `from satpy.cf import EPOCH` - flatten_attrs : bool, optional - If True, flatten dict-type attributes. - The default is False. - exclude_attrs : list, optional - List of dataset attributes to be excluded. - The default is None. - include_orig_name : bool, optional - Include the original dataset name in the netcdf variable attributes. - The default is True. - numeric_name_prefix : TYPE, optional - Prepend dataset name with this if starting with a digit. - The default is ``"CHANNEL_"``. - - Returns - ------- - new_data : xr.DataArray - CF-compliant xr.DataArray. + Args: + dataarray (xr.DataArray): The data array to be made CF-compliant. + epoch (str, optional): Reference time for encoding of time coordinates. + If None, the default reference time is defined using `from satpy.cf import EPOCH`. + flatten_attrs (bool, optional): If True, flatten dict-type attributes. Defaults to False. + exclude_attrs (list, optional): List of dataset attributes to be excluded. Defaults to None. + include_orig_name (bool, optional): Include the original dataset name in the netcdf variable attributes. + Defaults to True. + numeric_name_prefix (str, optional): Prepend dataset name with this if starting with a digit. + Defaults to "CHANNEL_". + Returns: + xr.DataArray: A CF-compliant xr.DataArray. """ dataarray = _preprocess_dataarray_name(dataarray=dataarray, numeric_name_prefix=numeric_name_prefix, diff --git a/satpy/cf/datasets.py b/satpy/cf/datasets.py index 3cb72af442..70ac3fb014 100644 --- a/satpy/cf/datasets.py +++ b/satpy/cf/datasets.py @@ -1,5 +1,3 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- # Copyright (c) 2017-2023 Satpy developers # # This file is part of satpy. @@ -69,39 +67,24 @@ def _collect_cf_dataset(list_dataarrays, numeric_name_prefix="CHANNEL_"): """Process a list of xr.DataArray and return a dictionary with CF-compliant xr.Dataset. - Parameters - ---------- - list_dataarrays : list - List of DataArrays to make CF compliant and merge into a xr.Dataset. - epoch : str - Reference time for encoding the time coordinates (if available). - Example format: "seconds since 1970-01-01 00:00:00". - If None, the default reference time is defined using `from satpy.cf import EPOCH` - flatten_attrs : bool, optional - flatten_attrs : bool - If True, flatten dict-type attributes. - exclude_attrs : list, optional - exclude_attrs : list - List of xr.DataArray attribute names to be excluded. - include_lonlats : bool, optional - include_lonlats : bool - If True, it includes 'latitude' and 'longitude' coordinates also for satpy scene defined on an AreaDefinition. - If the 'area' attribute is a SwathDefinition, it always include latitude and longitude coordinates. - pretty : bool, optional - pretty : bool - Don't modify coordinate names, if possible. Makes the file prettier, but possibly less consistent. - include_orig_name : bool, optional - include_orig_name : bool - Include the original dataset name as a variable attribute in the xr.Dataset. - numeric_name_prefix : str, optional - numeric_name_prefix : str - Prefix to add the each variable with name starting with a digit. - Use '' or None to leave this out. - - Returns - ------- - ds : xr.Dataset - A partially CF-compliant xr.Dataset + Args: + list_dataarrays (list): List of DataArrays to make CF compliant and merge into an xr.Dataset. + epoch (str, optional): Reference time for encoding the time coordinates. + Example format: "seconds since 1970-01-01 00:00:00". + If None, the default reference time is defined using `from satpy.cf import EPOCH`. + flatten_attrs (bool, optional): If True, flatten dict-type attributes. + exclude_attrs (list, optional): List of xr.DataArray attribute names to be excluded. + include_lonlats (bool, optional): If True, includes 'latitude' and 'longitude' coordinates also for a + satpy.Scene defined on an AreaDefinition. + If the 'area' attribute is a SwathDefinition, it always includes latitude and longitude coordinates. + pretty (bool, optional): Don't modify coordinate names, if possible. + Makes the file prettier, but possibly less consistent. + include_orig_name (bool, optional): Include the original dataset name as a variable attribute in the xr.Dataset. + numeric_name_prefix (str, optional): Prefix to add to each variable with a name starting with a digit. + Use '' or None to leave this out. + + Returns: + xr.Dataset: A partially CF-compliant xr.Dataset. """ from satpy.cf.area import area2cf from satpy.cf.coords import ( @@ -194,44 +177,30 @@ def collect_cf_datasets(list_dataarrays, If the xr.DataArrays does not share the same dimensions, it creates a collection of xr.Datasets sharing the same dimensions. - Parameters - ---------- - list_dataarrays (list): - List of DataArrays to make CF compliant and merge into groups of xr.Datasets. - header_attrs: (dict): - Global attributes of the output xr.Dataset. - epoch (str): - Reference time for encoding the time coordinates (if available). - Example format: "seconds since 1970-01-01 00:00:00". - If None, the default reference time is retrieved using `from satpy.cf import EPOCH` - flatten_attrs (bool): - If True, flatten dict-type attributes. - exclude_attrs (list): - List of xr.DataArray attribute names to be excluded. - include_lonlats (bool): - If True, it includes 'latitude' and 'longitude' coordinates also for satpy scene defined on an AreaDefinition. - If the 'area' attribute is a SwathDefinition, it always include latitude and longitude coordinates. - pretty (bool): - Don't modify coordinate names, if possible. Makes the file prettier, but possibly less consistent. - include_orig_name (bool). - Include the original dataset name as a variable attribute in the xr.Dataset. - numeric_name_prefix (str): - Prefix to add the each variable with name starting with a digit. - Use '' or None to leave this out. - groups (dict): - Group datasets according to the given assignment: - - `{'': ['dataset_name1', 'dataset_name2', ...]}` - - It is used to create grouped netCDFs using the CF_Writer. - If None (the default), no groups will be created. - - Returns - ------- - grouped_datasets : dict - A dictionary of CF-compliant xr.Dataset: {group_name: xr.Dataset} - header_attrs : dict - Global attributes to be attached to the xr.Dataset / netCDF4. + Args: + list_dataarrays (list): List of DataArrays to make CF compliant and merge into groups of xr.Datasets. + header_attrs (dict): Global attributes of the output xr.Dataset. + epoch (str, optional): Reference time for encoding the time coordinates. + Example format: "seconds since 1970-01-01 00:00:00". + If None, the default reference time is retrieved using `from satpy.cf import EPOCH`. + flatten_attrs (bool, optional): If True, flatten dict-type attributes. + exclude_attrs (list, optional): List of xr.DataArray attribute names to be excluded. + include_lonlats (bool, optional): If True, includes 'latitude' and 'longitude' coordinates also + for a satpy.Scene defined on an AreaDefinition. + If the 'area' attribute is a SwathDefinition, it always includes latitude and longitude coordinates. + pretty (bool, optional): Don't modify coordinate names, if possible. + Makes the file prettier, but possibly less consistent. + include_orig_name (bool, optional): Include the original dataset name as a variable attribute in the xr.Dataset. + numeric_name_prefix (str, optional): Prefix to add to each variable with a name starting with a digit. + Use '' or None to leave this out. + groups (dict, optional): Group datasets according to the given assignment: + `{'': ['dataset_name1', 'dataset_name2', ...]}`. + Used to create grouped netCDFs using the CF_Writer. If None, no groups will be created. + + Returns: + tuple: A tuple containing: + - grouped_datasets (dict): A dictionary of CF-compliant xr.Dataset: {group_name: xr.Dataset}. + - header_attrs (dict): Global attributes to be attached to the xr.Dataset / netCDF4. """ from satpy.cf.attrs import preprocess_header_attrs from satpy.cf.coords import add_time_bounds_dimension diff --git a/satpy/cf/encoding.py b/satpy/cf/encoding.py index 3cdf1fdf1d..5c77b6d69f 100644 --- a/satpy/cf/encoding.py +++ b/satpy/cf/encoding.py @@ -1,5 +1,3 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- # Copyright (c) 2017-2023 Satpy developers # # This file is part of satpy. diff --git a/satpy/writers/cf_writer.py b/satpy/writers/cf_writer.py index 7076cc841d..1204754bd0 100644 --- a/satpy/writers/cf_writer.py +++ b/satpy/writers/cf_writer.py @@ -1,5 +1,3 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- # Copyright (c) 2017-2019 Satpy developers # # This file is part of satpy. @@ -240,36 +238,27 @@ def save_datasets(self, datasets, filename=None, groups=None, header_attrs=None, Note that all datasets (if grouping: in one group) must have the same projection coordinates. Args: - datasets (list): - List of xr.DataArray to be saved. - filename (str): - Output file - groups (dict): - Group datasets according to the given assignment: `{'group_name': ['dataset1', 'dataset2', ...]}`. - Group name `None` corresponds to the root of the file, i.e. no group will be created. + datasets (list): List of xr.DataArray to be saved. + filename (str): Output file. + groups (dict): Group datasets according to the given assignment: + `{'group_name': ['dataset1', 'dataset2', ...]}`. + The group name `None` corresponds to the root of the file, i.e., no group will be created. Warning: The results will not be fully CF compliant! - header_attrs: - Global attributes to be included. - engine (str): - Module to be used for writing netCDF files. Follows xarray's - :meth:`~xarray.Dataset.to_netcdf` engine choices with a - preference for 'netcdf4'. - epoch (str): - Reference time for encoding of time coordinates. - If None, the default reference time is defined using `from satpy.cf import EPOCH` - flatten_attrs (bool): - If True, flatten dict-type attributes. - exclude_attrs (list): - List of dataset attributes to be excluded. - include_lonlats (bool): - Always include latitude and longitude coordinates, even for datasets with area definition. - pretty (bool): - Don't modify coordinate names, if possible. Makes the file prettier, but possibly less consistent. - include_orig_name (bool). - Include the original dataset name as a variable attribute in the final netCDF. - numeric_name_prefix (str): - Prefix to add the each variable with name starting with a digit. Use '' or None to leave this out. - + header_attrs: Global attributes to be included. + engine (str, optional): Module to be used for writing netCDF files. Follows xarray's + :meth:`~xarray.Dataset.to_netcdf` engine choices with a preference for 'netcdf4'. + epoch (str, optional): Reference time for encoding of time coordinates. + If None, the default reference time is defined using `from satpy.cf import EPOCH`. + flatten_attrs (bool, optional): If True, flatten dict-type attributes. + exclude_attrs (list, optional): List of dataset attributes to be excluded. + include_lonlats (bool, optional): Always include latitude and longitude coordinates, + even for datasets with area definition. + pretty (bool, optional): Don't modify coordinate names, if possible. + Makes the file prettier, but possibly less consistent. + include_orig_name (bool, optional): Include the original dataset name as a variable + attribute in the final netCDF. + numeric_name_prefix (str, optional): Prefix to add to each variable with a name starting with a digit. + Use '' or None to leave this out. """ from satpy.cf.datasets import collect_cf_datasets from satpy.cf.encoding import update_encoding From c42d1edbde9b48667ef2d3be1092ed0da329073b Mon Sep 17 00:00:00 2001 From: ghiggi Date: Thu, 16 Nov 2023 12:30:43 +0100 Subject: [PATCH 26/37] Move EPOCH to satpy.cf.coords --- satpy/cf/__init__.py | 2 -- satpy/cf/coords.py | 5 +++-- satpy/cf/dataarray.py | 2 +- satpy/cf/datasets.py | 4 ++-- satpy/scene.py | 2 +- satpy/writers/cf_writer.py | 6 +++--- 6 files changed, 10 insertions(+), 11 deletions(-) diff --git a/satpy/cf/__init__.py b/satpy/cf/__init__.py index 63fac5261c..f8f662a93b 100644 --- a/satpy/cf/__init__.py +++ b/satpy/cf/__init__.py @@ -1,3 +1 @@ """Code for generation of CF-compliant datasets.""" - -EPOCH = u"seconds since 1970-01-01 00:00:00" diff --git a/satpy/cf/coords.py b/satpy/cf/coords.py index af11a62e43..80ce22de39 100644 --- a/satpy/cf/coords.py +++ b/satpy/cf/coords.py @@ -13,6 +13,9 @@ logger = logging.getLogger(__name__) +EPOCH = u"seconds since 1970-01-01 00:00:00" + + def add_xy_coords_attrs(dataarray): """Add relevant attributes to x, y coordinates.""" # If there are no coords, return dataarray @@ -98,8 +101,6 @@ def set_cf_time_info(dataarray, epoch): - the time coordinate has size 1 """ - from satpy.cf import EPOCH - if epoch is None: epoch = EPOCH diff --git a/satpy/cf/dataarray.py b/satpy/cf/dataarray.py index 3c97a70336..5df68da887 100644 --- a/satpy/cf/dataarray.py +++ b/satpy/cf/dataarray.py @@ -64,7 +64,7 @@ def make_cf_dataarray(dataarray, Args: dataarray (xr.DataArray): The data array to be made CF-compliant. epoch (str, optional): Reference time for encoding of time coordinates. - If None, the default reference time is defined using `from satpy.cf import EPOCH`. + If None, the default reference time is defined using `from satpy.cf.coords import EPOCH`. flatten_attrs (bool, optional): If True, flatten dict-type attributes. Defaults to False. exclude_attrs (list, optional): List of dataset attributes to be excluded. Defaults to None. include_orig_name (bool, optional): Include the original dataset name in the netcdf variable attributes. diff --git a/satpy/cf/datasets.py b/satpy/cf/datasets.py index 70ac3fb014..c6ea6fd351 100644 --- a/satpy/cf/datasets.py +++ b/satpy/cf/datasets.py @@ -71,7 +71,7 @@ def _collect_cf_dataset(list_dataarrays, list_dataarrays (list): List of DataArrays to make CF compliant and merge into an xr.Dataset. epoch (str, optional): Reference time for encoding the time coordinates. Example format: "seconds since 1970-01-01 00:00:00". - If None, the default reference time is defined using `from satpy.cf import EPOCH`. + If None, the default reference time is defined using `from satpy.cf.coords import EPOCH`. flatten_attrs (bool, optional): If True, flatten dict-type attributes. exclude_attrs (list, optional): List of xr.DataArray attribute names to be excluded. include_lonlats (bool, optional): If True, includes 'latitude' and 'longitude' coordinates also for a @@ -182,7 +182,7 @@ def collect_cf_datasets(list_dataarrays, header_attrs (dict): Global attributes of the output xr.Dataset. epoch (str, optional): Reference time for encoding the time coordinates. Example format: "seconds since 1970-01-01 00:00:00". - If None, the default reference time is retrieved using `from satpy.cf import EPOCH`. + If None, the default reference time is retrieved using `from satpy.cf.coords import EPOCH`. flatten_attrs (bool, optional): If True, flatten dict-type attributes. exclude_attrs (list, optional): List of xr.DataArray attribute names to be excluded. include_lonlats (bool, optional): If True, includes 'latitude' and 'longitude' coordinates also diff --git a/satpy/scene.py b/satpy/scene.py index bb8cf0ffab..4722a47533 100644 --- a/satpy/scene.py +++ b/satpy/scene.py @@ -1128,7 +1128,7 @@ def to_xarray(self, epoch (str): Reference time for encoding the time coordinates (if available). Example format: "seconds since 1970-01-01 00:00:00". - If None, the default reference time is defined using "from satpy.cf import EPOCH" + If None, the default reference time is defined using "from satpy.cf.coords import EPOCH" flatten_attrs (bool): If True, flatten dict-type attributes. exclude_attrs (list): diff --git a/satpy/writers/cf_writer.py b/satpy/writers/cf_writer.py index 1204754bd0..b64a288213 100644 --- a/satpy/writers/cf_writer.py +++ b/satpy/writers/cf_writer.py @@ -160,7 +160,7 @@ import xarray as xr from packaging.version import Version -from satpy.cf import EPOCH # noqa: F401 (for backward compatibility) +from satpy.cf.coords import EPOCH # noqa: F401 (for backward compatibility) from satpy.writers import Writer logger = logging.getLogger(__name__) @@ -248,7 +248,7 @@ def save_datasets(self, datasets, filename=None, groups=None, header_attrs=None, engine (str, optional): Module to be used for writing netCDF files. Follows xarray's :meth:`~xarray.Dataset.to_netcdf` engine choices with a preference for 'netcdf4'. epoch (str, optional): Reference time for encoding of time coordinates. - If None, the default reference time is defined using `from satpy.cf import EPOCH`. + If None, the default reference time is defined using `from satpy.cf.coords import EPOCH`. flatten_attrs (bool, optional): If True, flatten dict-type attributes. exclude_attrs (list, optional): List of dataset attributes to be excluded. include_lonlats (bool, optional): Always include latitude and longitude coordinates, @@ -325,7 +325,7 @@ def da2cf(dataarray, epoch=None, flatten_attrs=False, exclude_attrs=None, The data array to be converted. epoch (str): Reference time for encoding of time coordinates. - If None, the default reference time is defined using `from satpy.cf import EPOCH` + If None, the default reference time is defined using `from satpy.cf.coords import EPOCH` flatten_attrs (bool): If True, flatten dict-type attributes. exclude_attrs (list): From ba09f1859368a4faec05e4079301026fd83c9e9c Mon Sep 17 00:00:00 2001 From: ghiggi Date: Thu, 16 Nov 2023 18:32:32 +0100 Subject: [PATCH 27/37] Simplify functions for CodeScene happiness --- satpy/cf/area.py | 6 ++- satpy/cf/attrs.py | 45 ++++++++++------ satpy/cf/coords.py | 122 ++++++++++++++++++++++++++++-------------- satpy/cf/dataarray.py | 5 +- satpy/cf/datasets.py | 19 ++++--- 5 files changed, 127 insertions(+), 70 deletions(-) diff --git a/satpy/cf/area.py b/satpy/cf/area.py index 041338efd8..93c8b28eed 100644 --- a/satpy/cf/area.py +++ b/satpy/cf/area.py @@ -67,9 +67,11 @@ def _add_grid_mapping(dataarray): def area2cf(dataarray, include_lonlats=False, got_lonlats=False): """Convert an area to at CF grid mapping or lon and lats.""" res = [] - if not got_lonlats and (isinstance(dataarray.attrs["area"], SwathDefinition) or include_lonlats): + include_lonlats = include_lonlats or isinstance(dataarray.attrs["area"], SwathDefinition) + is_area_def = isinstance(dataarray.attrs["area"], AreaDefinition) + if not got_lonlats and include_lonlats: dataarray = _add_lonlat_coords(dataarray) - if isinstance(dataarray.attrs["area"], AreaDefinition): + if is_area_def: dataarray, gmapping = _add_grid_mapping(dataarray) res.append(gmapping) res.append(dataarray) diff --git a/satpy/cf/attrs.py b/satpy/cf/attrs.py index 2cf9ffa528..eb562c1c93 100644 --- a/satpy/cf/attrs.py +++ b/satpy/cf/attrs.py @@ -59,35 +59,50 @@ def _encode(self, obj): return tuple(obj) elif isinstance(obj, np.ndarray): return obj.tolist() - return str(obj) +def _encode_numpy_array(obj): + """Encode numpy array as a netCDF4 serializable datatype.""" + from satpy.writers.cf_writer import NC4_DTYPES + + # Only plain 1-d arrays are supported. Skip record arrays and multi-dimensional arrays. + is_plain_1d = not obj.dtype.fields and len(obj.shape) <= 1 + if is_plain_1d: + if obj.dtype in NC4_DTYPES: + return obj + elif obj.dtype == np.bool_: + # Boolean arrays are not supported, convert to array of strings. + return [s.lower() for s in obj.astype(str)] + return obj.tolist() + else: + raise ValueError("Only a 1D numpy array can be encoded as netCDF attribute.") + + def _encode_object(obj): """Try to encode `obj` as a netCDF/Zarr compatible datatype which most closely resembles the object's nature. Raises: ValueError if no such datatype could be found """ - from satpy.writers.cf_writer import NC4_DTYPES - if isinstance(obj, int) and not isinstance(obj, (bool, np.bool_)): return obj elif isinstance(obj, (float, str, np.integer, np.floating)): return obj elif isinstance(obj, np.ndarray): - # Only plain 1-d arrays are supported. Skip record arrays and multi-dimensional arrays. - is_plain_1d = not obj.dtype.fields and len(obj.shape) <= 1 - if is_plain_1d: - if obj.dtype in NC4_DTYPES: - return obj - elif obj.dtype == np.bool_: - # Boolean arrays are not supported, convert to array of strings. - return [s.lower() for s in obj.astype(str)] - return obj.tolist() + return _encode_numpy_array(obj) raise ValueError("Unable to encode") +def _try_decode_object(obj): + """Try to decode byte string""" + try: + decoded = obj.decode() + except AttributeError: + decoded = obj + return decoded + + def _encode_python_objects(obj): """Try to find the datatype which most closely resembles the object's nature. @@ -98,11 +113,7 @@ def _encode_python_objects(obj): try: dump = _encode_object(obj) except ValueError: - try: - # Decode byte-strings - decoded = obj.decode() - except AttributeError: - decoded = obj + decoded = _try_decode_object(obj) dump = json.dumps(decoded, cls=AttributeEncoder).strip('"') return dump diff --git a/satpy/cf/coords.py b/satpy/cf/coords.py index 80ce22de39..ba1d195663 100644 --- a/satpy/cf/coords.py +++ b/satpy/cf/coords.py @@ -46,12 +46,26 @@ def _is_projected(dataarray): return True +def _is_area(dataarray): + if isinstance(dataarray.attrs["area"], AreaDefinition): + return True + else: + return False + + +def _is_swath(dataarray): + if isinstance(dataarray.attrs["area"], SwathDefinition): + return True + else: + return False + + def _try_to_get_crs(dataarray): """Try to get a CRS from attributes.""" if "area" in dataarray.attrs: - if isinstance(dataarray.attrs["area"], AreaDefinition): + if _is_area(dataarray): return dataarray.attrs["area"].crs - if not isinstance(dataarray.attrs["area"], SwathDefinition): + if not _is_swath(dataarray): logger.warning( f"Could not tell CRS from area of type {type(dataarray.attrs['area']).__name__:s}. " "Assuming projected CRS.") @@ -116,9 +130,7 @@ def set_cf_time_info(dataarray, epoch): def _is_lon_or_lat_dataarray(dataarray): """Check if the DataArray represents the latitude or longitude coordinate.""" - if "standard_name" in dataarray.attrs and dataarray.attrs["standard_name"] in ["longitude", "latitude"]: - return True - return False + return dataarray.attrs.get("standard_name", "") in ("longitude", "latitude") def has_projection_coords(dict_datarrays): @@ -129,6 +141,35 @@ def has_projection_coords(dict_datarrays): return False +def _get_is_nondimensional_coords_dict(dict_dataarrays): + tokens = defaultdict(set) + for dataarray in dict_dataarrays.values(): + for coord_name in dataarray.coords: + if not _is_lon_or_lat_dataarray(dataarray[coord_name]) and coord_name not in dataarray.dims: + tokens[coord_name].add(tokenize(dataarray[coord_name].data)) + coords_unique = dict([(coord_name, len(tokens) == 1) for coord_name, tokens in tokens.items()]) + return coords_unique + + +def _warn_if_pretty_but_not_unique(pretty, coord_name): + """Warn if coordinates cannot be pretty-formatted due to non-uniqueness.""" + if pretty: + warnings.warn( + f'Cannot pretty-format "{coord_name}" coordinates because they are ' + 'not identical among the given datasets', + stacklevel=2 + ) + + +def _rename_coords(dict_dataarrays, coord_name): + """Rename coordinates in the datasets.""" + for name, dataarray in dict_dataarrays.items(): + if coord_name in dataarray.coords: + rename = {coord_name: f"{name}_{coord_name}"} + dict_dataarrays[name] = dataarray.rename(rename) + return dict_dataarrays + + def ensure_unique_nondimensional_coords(dict_dataarrays, pretty=False): """Make non-dimensional coordinates unique among all datasets. @@ -155,28 +196,14 @@ def ensure_unique_nondimensional_coords(dict_dataarrays, pretty=False): """ # Determine which non-dimensional coordinates are unique # - coords_unique has structure: {coord_name: True/False} - tokens = defaultdict(set) - for dataarray in dict_dataarrays.values(): - for coord_name in dataarray.coords: - if not _is_lon_or_lat_dataarray(dataarray[coord_name]) and coord_name not in dataarray.dims: - tokens[coord_name].add(tokenize(dataarray[coord_name].data)) - coords_unique = dict([(coord_name, len(tokens) == 1) for coord_name, tokens in tokens.items()]) + is_coords_unique_dict = _get_is_nondimensional_coords_dict(dict_dataarrays) # Prepend dataset name, if not unique or no pretty-format desired new_dict_dataarrays = dict_dataarrays.copy() - for coord_name, unique in coords_unique.items(): + for coord_name, unique in is_coords_unique_dict.items(): if not pretty or not unique: - if pretty: - warnings.warn( - 'Cannot pretty-format "{}" coordinates because they are ' - 'not identical among the given datasets'.format(coord_name), - stacklevel=2 - ) - for name, dataarray in dict_dataarrays.items(): - if coord_name in dataarray.coords: - rename = {coord_name: "{}_{}".format(name, coord_name)} - new_dict_dataarrays[name] = new_dict_dataarrays[name].rename(rename) - + _warn_if_pretty_but_not_unique(pretty, coord_name) + new_dict_dataarrays = _rename_coords(new_dict_dataarrays, coord_name) return new_dict_dataarrays @@ -196,6 +223,7 @@ def check_unique_projection_coords(dict_dataarrays): "Please group them by area or save them in separate files.") + def add_coordinates_attrs_coords(dict_dataarrays): """Add to DataArrays the coordinates specified in the 'coordinates' attribute. @@ -208,23 +236,39 @@ def add_coordinates_attrs_coords(dict_dataarrays): In the final call to `xr.Dataset.to_netcdf()` all coordinate relations will be resolved and the `coordinates` attributes be set automatically. """ - for da_name, dataarray in dict_dataarrays.items(): - declared_coordinates = _get_coordinates_list(dataarray) - for coord in declared_coordinates: - if coord not in dataarray.coords: - try: - dimensions_not_in_data = list(set(dict_dataarrays[coord].dims) - set(dataarray.dims)) - dataarray[coord] = dict_dataarrays[coord].squeeze(dimensions_not_in_data, drop=True) - except KeyError: - warnings.warn( - 'Coordinate "{}" referenced by dataarray {} does not ' - 'exist, dropping reference.'.format(coord, da_name), - stacklevel=2 - ) - continue - + for dataarray_name in dict_dataarrays.keys(): + dict_dataarrays = _add_declared_coordinates(dict_dataarrays, + dataarray_name=dataarray_name) # Drop 'coordinates' attribute in any case to avoid conflicts in xr.Dataset.to_netcdf() - dataarray.attrs.pop("coordinates", None) + dict_dataarrays[dataarray_name].attrs.pop("coordinates", None) + return dict_dataarrays + + +def _add_declared_coordinates(dict_dataarrays, dataarray_name): + """Add declared coordinates to the dataarray if they exist.""" + dataarray = dict_dataarrays[dataarray_name] + declared_coordinates = _get_coordinates_list(dataarray) + for coord in declared_coordinates: + if coord not in dataarray.coords: + dict_dataarrays = _try_add_coordinate(dict_dataarrays, + dataarray_name=dataarray_name, + coord=coord) + return dict_dataarrays + + +def _try_add_coordinate(dict_dataarrays, dataarray_name, coord): + """Try to add a coordinate to the dataarray, warn if not possible.""" + try: + dataarray_dims = set(dict_dataarrays[dataarray_name].dims) + coordinate_dims = set(dict_dataarrays[coord].dims) + dimensions_to_squeeze = list(coordinate_dims - dataarray_dims) + dict_dataarrays[dataarray_name][coord] = dict_dataarrays[coord].squeeze(dimensions_to_squeeze, drop=True) + except KeyError: + warnings.warn( + f'Coordinate "{coord}" referenced by dataarray {dataarray_name} does not ' + 'exist, dropping reference.', + stacklevel=2 + ) return dict_dataarrays diff --git a/satpy/cf/dataarray.py b/satpy/cf/dataarray.py index 5df68da887..dc2ae7d6c1 100644 --- a/satpy/cf/dataarray.py +++ b/satpy/cf/dataarray.py @@ -41,15 +41,16 @@ def _handle_dataarray_name(original_name, numeric_name_prefix): def _preprocess_dataarray_name(dataarray, numeric_name_prefix, include_orig_name): """Change the DataArray name by prepending numeric_name_prefix if the name is a digit.""" original_name = None + named_has_changed = False dataarray = dataarray.copy() if "name" in dataarray.attrs: original_name = dataarray.attrs.pop("name") original_name, new_name = _handle_dataarray_name(original_name, numeric_name_prefix) dataarray = dataarray.rename(new_name) + named_has_changed = original_name != new_name - if include_orig_name and numeric_name_prefix and original_name and original_name != new_name: + if named_has_changed and include_orig_name: dataarray.attrs["original_name"] = original_name - return dataarray diff --git a/satpy/cf/datasets.py b/satpy/cf/datasets.py index c6ea6fd351..cab71de58c 100644 --- a/satpy/cf/datasets.py +++ b/satpy/cf/datasets.py @@ -16,7 +16,6 @@ """Utility to generate a CF-compliant Datasets.""" import logging import warnings -from collections import defaultdict import xarray as xr @@ -39,6 +38,11 @@ def _get_extra_ds(dataarray, keys=None): return dict_datarrays +def _get_group_dataarrays(group_members, list_dataarrays): + """Yield DataArrays that are part of a specific group.""" + return [da for da in list_dataarrays if da.attrs["name"] in group_members] + + def _get_groups(groups, list_datarrays): """Return a dictionary with the list of xr.DataArray associated to each group. @@ -46,15 +50,10 @@ def _get_groups(groups, list_datarrays): Else, collect the DataArrays associated to each group. """ if groups is None: - grouped_dataarrays = {None: list_datarrays} - else: - grouped_dataarrays = defaultdict(list) - for datarray in list_datarrays: - for group_name, group_members in groups.items(): - if datarray.attrs["name"] in group_members: - grouped_dataarrays[group_name].append(datarray) - break - return grouped_dataarrays + return {None: list_datarrays} + + return {group_name: _get_group_dataarrays(group_members, list_datarrays) + for group_name, group_members in groups.items()} def _collect_cf_dataset(list_dataarrays, From 83e815d3b67404fa0b6f9cec458f56f63b430f12 Mon Sep 17 00:00:00 2001 From: David Hoese Date: Thu, 16 Nov 2023 12:09:38 -0600 Subject: [PATCH 28/37] Cleanup CF attrs functions --- satpy/cf/attrs.py | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/satpy/cf/attrs.py b/satpy/cf/attrs.py index eb562c1c93..987aeec6cb 100644 --- a/satpy/cf/attrs.py +++ b/satpy/cf/attrs.py @@ -68,15 +68,14 @@ def _encode_numpy_array(obj): # Only plain 1-d arrays are supported. Skip record arrays and multi-dimensional arrays. is_plain_1d = not obj.dtype.fields and len(obj.shape) <= 1 - if is_plain_1d: - if obj.dtype in NC4_DTYPES: - return obj - elif obj.dtype == np.bool_: - # Boolean arrays are not supported, convert to array of strings. - return [s.lower() for s in obj.astype(str)] - return obj.tolist() - else: + if not is_plain_1d: raise ValueError("Only a 1D numpy array can be encoded as netCDF attribute.") + if obj.dtype in NC4_DTYPES: + return obj + if obj.dtype == np.bool_: + # Boolean arrays are not supported, convert to array of strings. + return [s.lower() for s in obj.astype(str)] + return obj.tolist() def _encode_object(obj): @@ -85,9 +84,9 @@ def _encode_object(obj): Raises: ValueError if no such datatype could be found """ - if isinstance(obj, int) and not isinstance(obj, (bool, np.bool_)): - return obj - elif isinstance(obj, (float, str, np.integer, np.floating)): + is_nonbool_int = isinstance(obj, int) and not isinstance(obj, (bool, np.bool_)) + is_encode_type = isinstance(obj, (float, str, np.integer, np.floating)) + if is_nonbool_int or is_encode_type: return obj elif isinstance(obj, np.ndarray): return _encode_numpy_array(obj) @@ -194,10 +193,10 @@ def preprocess_datarray_attrs(dataarray, flatten_attrs, exclude_attrs): dataarray = _drop_exclude_attrs(dataarray, exclude_attrs) dataarray = _format_prerequisites_attrs(dataarray) dataarray = _remove_none_attrs(dataarray) - _ = dataarray.attrs.pop("area", None) + dataarray.attrs.pop("area", None) if "long_name" not in dataarray.attrs and "standard_name" not in dataarray.attrs: - dataarray.attrs["long_name"] = dataarray.name + dataarray.attrs["long_name"] = dataarray.attrs["name"] if flatten_attrs: dataarray.attrs = flatten_dict(dataarray.attrs) From cadcfef1129c2a74d5c4e47cb6f4deadb4b5b358 Mon Sep 17 00:00:00 2001 From: David Hoese Date: Thu, 16 Nov 2023 12:36:54 -0600 Subject: [PATCH 29/37] Remove commented out tests --- satpy/tests/test_writers.py | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/satpy/tests/test_writers.py b/satpy/tests/test_writers.py index c11066d3f6..c40b51fa01 100644 --- a/satpy/tests/test_writers.py +++ b/satpy/tests/test_writers.py @@ -594,26 +594,6 @@ def test_geotiff(self): compute_writer_results([res]) assert os.path.isfile(fname) -# FIXME: This reader needs more information than exist at the moment -# def test_mitiff(self): -# """Test writing to mitiff file""" -# fname = os.path.join(self.base_dir, 'mitiff.tif') -# res = self.scn.save_datasets(filename=fname, -# datasets=['test'], -# writer='mitiff') -# compute_writer_results([res]) -# self.assertTrue(os.path.isfile(fname)) - -# FIXME: This reader needs more information than exist at the moment -# def test_cf(self): -# """Test writing to NetCDF4 file""" -# fname = os.path.join(self.base_dir, 'cf.nc') -# res = self.scn.save_datasets(filename=fname, -# datasets=['test'], -# writer='cf') -# compute_writer_results([res]) -# self.assertTrue(os.path.isfile(fname)) - def test_multiple_geotiff(self): """Test writing to mitiff file.""" from satpy.writers import compute_writer_results From ceabff9532d2d7aaac0803fd00e2ab3251278a96 Mon Sep 17 00:00:00 2001 From: David Hoese Date: Thu, 16 Nov 2023 15:29:03 -0600 Subject: [PATCH 30/37] Refactor attribute handling --- satpy/cf/attrs.py | 97 ++++++++++++++++++++++++----------------------- 1 file changed, 49 insertions(+), 48 deletions(-) diff --git a/satpy/cf/attrs.py b/satpy/cf/attrs.py index 987aeec6cb..3b355748b8 100644 --- a/satpy/cf/attrs.py +++ b/satpy/cf/attrs.py @@ -14,12 +14,15 @@ # You should have received a copy of the GNU General Public License along with # satpy. If not, see . """CF processing of attributes.""" +from __future__ import annotations + import datetime import json import logging from collections import OrderedDict import numpy as np +import xarray as xr from satpy.writers.utils import flatten_dict @@ -142,68 +145,66 @@ def _encode_nc_attrs(attrs): return OrderedDict(encoded_attrs) -def _add_ancillary_variables_attrs(dataarray): - """Replace ancillary_variables DataArray with a list of their name.""" - list_ancillary_variable_names = [da_ancillary.attrs["name"] - for da_ancillary in dataarray.attrs.get("ancillary_variables", [])] - if list_ancillary_variable_names: - dataarray.attrs["ancillary_variables"] = " ".join(list_ancillary_variable_names) - else: - dataarray.attrs.pop("ancillary_variables", None) - return dataarray +def preprocess_datarray_attrs( + dataarray: xr.DataArray, + flatten_attrs: bool, + exclude_attrs: list[str] | None +) -> xr.DataArray: + """Preprocess DataArray attributes to be written into CF-compliant netCDF/Zarr.""" + _drop_attrs(dataarray, exclude_attrs) + _add_ancillary_variables_attrs(dataarray) + _format_prerequisites_attrs(dataarray) + if "long_name" not in dataarray.attrs and "standard_name" not in dataarray.attrs: + dataarray.attrs["long_name"] = dataarray.name -def _drop_exclude_attrs(dataarray, exclude_attrs): - """Remove user-specified list of attributes.""" - if exclude_attrs is None: - exclude_attrs = [] - for key in exclude_attrs: - dataarray.attrs.pop(key, None) - return dataarray + if flatten_attrs: + dataarray.attrs = flatten_dict(dataarray.attrs) + dataarray.attrs = _encode_nc_attrs(dataarray.attrs) -def _remove_satpy_attrs(new_data): - """Remove _satpy attribute.""" - satpy_attrs = [key for key in new_data.attrs if key.startswith("_satpy")] - for satpy_attr in satpy_attrs: - new_data.attrs.pop(satpy_attr) - new_data.attrs.pop("_last_resampler", None) - return new_data + return dataarray -def _format_prerequisites_attrs(dataarray): - """Reformat prerequisites attribute value to string.""" - if "prerequisites" in dataarray.attrs: - dataarray.attrs["prerequisites"] = [np.bytes_(str(prereq)) for prereq in dataarray.attrs["prerequisites"]] - return dataarray +def _drop_attrs( + dataarray: xr.DataArray, + user_excluded_attrs: list[str] | None +) -> None: + """Remove undesirable attributes.""" + attrs_to_drop = ( + (user_excluded_attrs or []) + + _get_satpy_attrs(dataarray) + + _get_none_attrs(dataarray) + + ["area"] + ) + for key in attrs_to_drop: + dataarray.attrs.pop(key, None) -def _remove_none_attrs(dataarray): - """Remove attribute keys with None value.""" - for key, val in dataarray.attrs.copy().items(): - if val is None: - dataarray.attrs.pop(key) - return dataarray +def _get_satpy_attrs(new_data): + """Remove _satpy attribute.""" + return [key for key in new_data.attrs if key.startswith("_satpy")] + ["_last_resampler"] -def preprocess_datarray_attrs(dataarray, flatten_attrs, exclude_attrs): - """Preprocess DataArray attributes to be written into CF-compliant netCDF/Zarr.""" - dataarray = _remove_satpy_attrs(dataarray) - dataarray = _add_ancillary_variables_attrs(dataarray) - dataarray = _drop_exclude_attrs(dataarray, exclude_attrs) - dataarray = _format_prerequisites_attrs(dataarray) - dataarray = _remove_none_attrs(dataarray) - dataarray.attrs.pop("area", None) +def _get_none_attrs(dataarray): + """Remove attribute keys with None value.""" + return [attr_name for attr_name, attr_val in dataarray.attrs.items() if attr_val is None] - if "long_name" not in dataarray.attrs and "standard_name" not in dataarray.attrs: - dataarray.attrs["long_name"] = dataarray.attrs["name"] - if flatten_attrs: - dataarray.attrs = flatten_dict(dataarray.attrs) +def _add_ancillary_variables_attrs(dataarray: xr.DataArray) -> None: + """Replace ancillary_variables DataArray with a list of their name.""" + list_ancillary_variable_names = [da_ancillary.attrs["name"] + for da_ancillary in dataarray.attrs.get("ancillary_variables", [])] + if list_ancillary_variable_names: + dataarray.attrs["ancillary_variables"] = " ".join(list_ancillary_variable_names) + else: + dataarray.attrs.pop("ancillary_variables", None) - dataarray.attrs = _encode_nc_attrs(dataarray.attrs) - return dataarray +def _format_prerequisites_attrs(dataarray: xr.DataArray) -> None: + """Reformat prerequisites attribute value to string.""" + if "prerequisites" in dataarray.attrs: + dataarray.attrs["prerequisites"] = [np.bytes_(str(prereq)) for prereq in dataarray.attrs["prerequisites"]] def _add_history(attrs): From bf681f2f88204aab02334dd9c12ddfcb6a2f9fc1 Mon Sep 17 00:00:00 2001 From: David Hoese Date: Thu, 16 Nov 2023 15:54:07 -0600 Subject: [PATCH 31/37] Rename dataarray to data_arr and add type annotations --- satpy/cf/area.py | 46 ++--- satpy/cf/attrs.py | 54 +++--- satpy/cf/coords.py | 207 +++++++++++------------ satpy/cf/{dataarray.py => data_array.py} | 32 ++-- satpy/cf/datasets.py | 14 +- satpy/tests/cf_tests/test_dataaarray.py | 24 +-- satpy/writers/cf_writer.py | 16 +- 7 files changed, 196 insertions(+), 197 deletions(-) rename satpy/cf/{dataarray.py => data_array.py} (74%) diff --git a/satpy/cf/area.py b/satpy/cf/area.py index 93c8b28eed..88a12a3c52 100644 --- a/satpy/cf/area.py +++ b/satpy/cf/area.py @@ -23,24 +23,24 @@ logger = logging.getLogger(__name__) -def _add_lonlat_coords(dataarray): +def _add_lonlat_coords(data_arr: xr.DataArray) -> xr.DataArray: """Add 'longitude' and 'latitude' coordinates to DataArray.""" - dataarray = dataarray.copy() - area = dataarray.attrs["area"] - ignore_dims = {dim: 0 for dim in dataarray.dims if dim not in ["x", "y"]} - chunks = getattr(dataarray.isel(**ignore_dims), "chunks", None) + data_arr = data_arr.copy() + area = data_arr.attrs["area"] + ignore_dims = {dim: 0 for dim in data_arr.dims if dim not in ["x", "y"]} + chunks = getattr(data_arr.isel(**ignore_dims), "chunks", None) lons, lats = area.get_lonlats(chunks=chunks) - dataarray["longitude"] = xr.DataArray(lons, dims=["y", "x"], - attrs={"name": "longitude", + data_arr["longitude"] = xr.DataArray(lons, dims=["y", "x"], + attrs={"name": "longitude", "standard_name": "longitude", "units": "degrees_east"}, - name="longitude") - dataarray["latitude"] = xr.DataArray(lats, dims=["y", "x"], - attrs={"name": "latitude", + name="longitude") + data_arr["latitude"] = xr.DataArray(lats, dims=["y", "x"], + attrs={"name": "latitude", "standard_name": "latitude", "units": "degrees_north"}, - name="latitude") - return dataarray + name="latitude") + return data_arr def _create_grid_mapping(area): @@ -55,24 +55,24 @@ def _create_grid_mapping(area): return area.area_id, grid_mapping -def _add_grid_mapping(dataarray): +def _add_grid_mapping(data_arr: xr.DataArray) -> tuple[xr.DataArray, xr.DataArray]: """Convert an area to at CF grid mapping.""" - dataarray = dataarray.copy() - area = dataarray.attrs["area"] + data_arr = data_arr.copy() + area = data_arr.attrs["area"] gmapping_var_name, attrs = _create_grid_mapping(area) - dataarray.attrs["grid_mapping"] = gmapping_var_name - return dataarray, xr.DataArray(0, attrs=attrs, name=gmapping_var_name) + data_arr.attrs["grid_mapping"] = gmapping_var_name + return data_arr, xr.DataArray(0, attrs=attrs, name=gmapping_var_name) -def area2cf(dataarray, include_lonlats=False, got_lonlats=False): +def area2cf(data_arr: xr.DataArray, include_lonlats: bool = False, got_lonlats: bool = False) -> list[xr.DataArray]: """Convert an area to at CF grid mapping or lon and lats.""" res = [] - include_lonlats = include_lonlats or isinstance(dataarray.attrs["area"], SwathDefinition) - is_area_def = isinstance(dataarray.attrs["area"], AreaDefinition) + include_lonlats = include_lonlats or isinstance(data_arr.attrs["area"], SwathDefinition) + is_area_def = isinstance(data_arr.attrs["area"], AreaDefinition) if not got_lonlats and include_lonlats: - dataarray = _add_lonlat_coords(dataarray) + data_arr = _add_lonlat_coords(data_arr) if is_area_def: - dataarray, gmapping = _add_grid_mapping(dataarray) + data_arr, gmapping = _add_grid_mapping(data_arr) res.append(gmapping) - res.append(dataarray) + res.append(data_arr) return res diff --git a/satpy/cf/attrs.py b/satpy/cf/attrs.py index 3b355748b8..f9d49416c8 100644 --- a/satpy/cf/attrs.py +++ b/satpy/cf/attrs.py @@ -145,66 +145,66 @@ def _encode_nc_attrs(attrs): return OrderedDict(encoded_attrs) -def preprocess_datarray_attrs( - dataarray: xr.DataArray, +def preprocess_attrs( + data_arr: xr.DataArray, flatten_attrs: bool, exclude_attrs: list[str] | None ) -> xr.DataArray: """Preprocess DataArray attributes to be written into CF-compliant netCDF/Zarr.""" - _drop_attrs(dataarray, exclude_attrs) - _add_ancillary_variables_attrs(dataarray) - _format_prerequisites_attrs(dataarray) + _drop_attrs(data_arr, exclude_attrs) + _add_ancillary_variables_attrs(data_arr) + _format_prerequisites_attrs(data_arr) - if "long_name" not in dataarray.attrs and "standard_name" not in dataarray.attrs: - dataarray.attrs["long_name"] = dataarray.name + if "long_name" not in data_arr.attrs and "standard_name" not in data_arr.attrs: + data_arr.attrs["long_name"] = data_arr.name if flatten_attrs: - dataarray.attrs = flatten_dict(dataarray.attrs) + data_arr.attrs = flatten_dict(data_arr.attrs) - dataarray.attrs = _encode_nc_attrs(dataarray.attrs) + data_arr.attrs = _encode_nc_attrs(data_arr.attrs) - return dataarray + return data_arr def _drop_attrs( - dataarray: xr.DataArray, + data_arr: xr.DataArray, user_excluded_attrs: list[str] | None ) -> None: """Remove undesirable attributes.""" attrs_to_drop = ( - (user_excluded_attrs or []) + - _get_satpy_attrs(dataarray) + - _get_none_attrs(dataarray) + - ["area"] + (user_excluded_attrs or []) + + _get_satpy_attrs(data_arr) + + _get_none_attrs(data_arr) + + ["area"] ) for key in attrs_to_drop: - dataarray.attrs.pop(key, None) + data_arr.attrs.pop(key, None) -def _get_satpy_attrs(new_data): +def _get_satpy_attrs(data_arr: xr.DataArray) -> list[str]: """Remove _satpy attribute.""" - return [key for key in new_data.attrs if key.startswith("_satpy")] + ["_last_resampler"] + return [key for key in data_arr.attrs if key.startswith("_satpy")] + ["_last_resampler"] -def _get_none_attrs(dataarray): +def _get_none_attrs(data_arr: xr.DataArray) -> list[str]: """Remove attribute keys with None value.""" - return [attr_name for attr_name, attr_val in dataarray.attrs.items() if attr_val is None] + return [attr_name for attr_name, attr_val in data_arr.attrs.items() if attr_val is None] -def _add_ancillary_variables_attrs(dataarray: xr.DataArray) -> None: +def _add_ancillary_variables_attrs(data_arr: xr.DataArray) -> None: """Replace ancillary_variables DataArray with a list of their name.""" list_ancillary_variable_names = [da_ancillary.attrs["name"] - for da_ancillary in dataarray.attrs.get("ancillary_variables", [])] + for da_ancillary in data_arr.attrs.get("ancillary_variables", [])] if list_ancillary_variable_names: - dataarray.attrs["ancillary_variables"] = " ".join(list_ancillary_variable_names) + data_arr.attrs["ancillary_variables"] = " ".join(list_ancillary_variable_names) else: - dataarray.attrs.pop("ancillary_variables", None) + data_arr.attrs.pop("ancillary_variables", None) -def _format_prerequisites_attrs(dataarray: xr.DataArray) -> None: +def _format_prerequisites_attrs(data_arr: xr.DataArray) -> None: """Reformat prerequisites attribute value to string.""" - if "prerequisites" in dataarray.attrs: - dataarray.attrs["prerequisites"] = [np.bytes_(str(prereq)) for prereq in dataarray.attrs["prerequisites"]] + if "prerequisites" in data_arr.attrs: + data_arr.attrs["prerequisites"] = [np.bytes_(str(prereq)) for prereq in data_arr.attrs["prerequisites"]] def _add_history(attrs): diff --git a/satpy/cf/coords.py b/satpy/cf/coords.py index ba1d195663..48a0748509 100644 --- a/satpy/cf/coords.py +++ b/satpy/cf/coords.py @@ -1,4 +1,5 @@ """Set CF-compliant spatial and temporal coordinates.""" +from __future__ import annotations import logging import warnings @@ -8,6 +9,7 @@ import numpy as np import xarray as xr from dask.base import tokenize +from pyproj import CRS from pyresample.geometry import AreaDefinition, SwathDefinition logger = logging.getLogger(__name__) @@ -16,27 +18,27 @@ EPOCH = u"seconds since 1970-01-01 00:00:00" -def add_xy_coords_attrs(dataarray): +def add_xy_coords_attrs(data_arr: xr.DataArray) -> xr.DataArray: """Add relevant attributes to x, y coordinates.""" # If there are no coords, return dataarray - if not dataarray.coords.keys() & {"x", "y", "crs"}: - return dataarray + if not data_arr.coords.keys() & {"x", "y", "crs"}: + return data_arr # If projected area - if _is_projected(dataarray): - dataarray = _add_xy_projected_coords_attrs(dataarray) + if _is_projected(data_arr): + data_arr = _add_xy_projected_coords_attrs(data_arr) else: - dataarray = _add_xy_geographic_coords_attrs(dataarray) - if "crs" in dataarray.coords: - dataarray = dataarray.drop_vars("crs") - return dataarray + data_arr = _add_xy_geographic_coords_attrs(data_arr) + if "crs" in data_arr.coords: + data_arr = data_arr.drop_vars("crs") + return data_arr -def _is_projected(dataarray): +def _is_projected(data_arr: xr.DataArray) -> bool: """Guess whether data are projected or not.""" - crs = _try_to_get_crs(dataarray) + crs = _try_to_get_crs(data_arr) if crs: return crs.is_projected - units = _try_get_units_from_coords(dataarray) + units = _try_get_units_from_coords(data_arr) if units: if units.endswith("m"): return True @@ -46,65 +48,60 @@ def _is_projected(dataarray): return True -def _is_area(dataarray): - if isinstance(dataarray.attrs["area"], AreaDefinition): - return True - else: - return False +def _is_area(data_arr: xr.DataArray) -> bool: + return isinstance(data_arr.attrs["area"], AreaDefinition) -def _is_swath(dataarray): - if isinstance(dataarray.attrs["area"], SwathDefinition): - return True - else: - return False +def _is_swath(data_arr: xr.DataArray) -> bool: + return isinstance(data_arr.attrs["area"], SwathDefinition) -def _try_to_get_crs(dataarray): +def _try_to_get_crs(data_arr: xr.DataArray) -> CRS: """Try to get a CRS from attributes.""" - if "area" in dataarray.attrs: - if _is_area(dataarray): - return dataarray.attrs["area"].crs - if not _is_swath(dataarray): + if "area" in data_arr.attrs: + if _is_area(data_arr): + return data_arr.attrs["area"].crs + if not _is_swath(data_arr): logger.warning( - f"Could not tell CRS from area of type {type(dataarray.attrs['area']).__name__:s}. " + f"Could not tell CRS from area of type {type(data_arr.attrs['area']).__name__:s}. " "Assuming projected CRS.") - if "crs" in dataarray.coords: - return dataarray.coords["crs"].item() + if "crs" in data_arr.coords: + return data_arr.coords["crs"].item() -def _try_get_units_from_coords(dataarray): +def _try_get_units_from_coords(data_arr: xr.DataArray) -> str | None: """Try to retrieve coordinate x/y units.""" for c in ["x", "y"]: with suppress(KeyError): # If the data has only 1 dimension, it has only one of x or y coords - if "units" in dataarray.coords[c].attrs: - return dataarray.coords[c].attrs["units"] + if "units" in data_arr.coords[c].attrs: + return data_arr.coords[c].attrs["units"] + return None -def _add_xy_projected_coords_attrs(dataarray, x="x", y="y"): +def _add_xy_projected_coords_attrs(data_arr: xr.DataArray, x: str = "x", y: str = "y") -> xr.DataArray: """Add relevant attributes to x, y coordinates of a projected CRS.""" - if x in dataarray.coords: - dataarray[x].attrs["standard_name"] = "projection_x_coordinate" - dataarray[x].attrs["units"] = "m" - if y in dataarray.coords: - dataarray[y].attrs["standard_name"] = "projection_y_coordinate" - dataarray[y].attrs["units"] = "m" - return dataarray + if x in data_arr.coords: + data_arr[x].attrs["standard_name"] = "projection_x_coordinate" + data_arr[x].attrs["units"] = "m" + if y in data_arr.coords: + data_arr[y].attrs["standard_name"] = "projection_y_coordinate" + data_arr[y].attrs["units"] = "m" + return data_arr -def _add_xy_geographic_coords_attrs(dataarray, x="x", y="y"): +def _add_xy_geographic_coords_attrs(data_arr: xr.DataArray, x: str = "x", y: str = "y") -> xr.DataArray: """Add relevant attributes to x, y coordinates of a geographic CRS.""" - if x in dataarray.coords: - dataarray[x].attrs["standard_name"] = "longitude" - dataarray[x].attrs["units"] = "degrees_east" - if y in dataarray.coords: - dataarray[y].attrs["standard_name"] = "latitude" - dataarray[y].attrs["units"] = "degrees_north" - return dataarray + if x in data_arr.coords: + data_arr[x].attrs["standard_name"] = "longitude" + data_arr[x].attrs["units"] = "degrees_east" + if y in data_arr.coords: + data_arr[y].attrs["standard_name"] = "latitude" + data_arr[y].attrs["units"] = "degrees_north" + return data_arr -def set_cf_time_info(dataarray, epoch): +def set_cf_time_info(data_arr: xr.DataArray, epoch: str | None) -> xr.DataArray: """Set CF time attributes and encoding. It expand the DataArray with a time dimension if does not yet exists. @@ -118,37 +115,33 @@ def set_cf_time_info(dataarray, epoch): if epoch is None: epoch = EPOCH - dataarray["time"].encoding["units"] = epoch - dataarray["time"].attrs["standard_name"] = "time" - dataarray["time"].attrs.pop("bounds", None) + data_arr["time"].encoding["units"] = epoch + data_arr["time"].attrs["standard_name"] = "time" + data_arr["time"].attrs.pop("bounds", None) - if "time" not in dataarray.dims and dataarray["time"].size not in dataarray.shape: - dataarray = dataarray.expand_dims("time") + if "time" not in data_arr.dims and data_arr["time"].size not in data_arr.shape: + data_arr = data_arr.expand_dims("time") - return dataarray + return data_arr -def _is_lon_or_lat_dataarray(dataarray): - """Check if the DataArray represents the latitude or longitude coordinate.""" - return dataarray.attrs.get("standard_name", "") in ("longitude", "latitude") +def has_projection_coords(data_arrays: dict[str, xr.DataArray]) -> bool: + """Check if DataArray collection has a "longitude" or "latitude" DataArray.""" + return any(_is_lon_or_lat_dataarray(data_arr) for data_arr in data_arrays.values()) -def has_projection_coords(dict_datarrays): - """Check if DataArray collection has a "longitude" or "latitude" DataArray.""" - for dataarray in dict_datarrays.values(): - if _is_lon_or_lat_dataarray(dataarray): - return True - return False +def _is_lon_or_lat_dataarray(data_arr: xr.DataArray) -> bool: + """Check if the DataArray represents the latitude or longitude coordinate.""" + return data_arr.attrs.get("standard_name", "") in ("longitude", "latitude") -def _get_is_nondimensional_coords_dict(dict_dataarrays): +def _get_is_nondimensional_coords_dict(data_arrays: dict[str, xr.DataArray]) -> dict[str, bool]: tokens = defaultdict(set) - for dataarray in dict_dataarrays.values(): - for coord_name in dataarray.coords: - if not _is_lon_or_lat_dataarray(dataarray[coord_name]) and coord_name not in dataarray.dims: - tokens[coord_name].add(tokenize(dataarray[coord_name].data)) - coords_unique = dict([(coord_name, len(tokens) == 1) for coord_name, tokens in tokens.items()]) - return coords_unique + for data_arr in data_arrays.values(): + for coord_name in data_arr.coords: + if not _is_lon_or_lat_dataarray(data_arr[coord_name]) and coord_name not in data_arr.dims: + tokens[coord_name].add(tokenize(data_arr[coord_name].data)) + return dict([(coord_name, len(tokens) == 1) for coord_name, tokens in tokens.items()]) def _warn_if_pretty_but_not_unique(pretty, coord_name): @@ -161,16 +154,19 @@ def _warn_if_pretty_but_not_unique(pretty, coord_name): ) -def _rename_coords(dict_dataarrays, coord_name): +def _rename_coords(data_arrays: dict[str, xr.DataArray], coord_name: str) -> dict[str, xr.DataArray]: """Rename coordinates in the datasets.""" - for name, dataarray in dict_dataarrays.items(): + for name, dataarray in data_arrays.items(): if coord_name in dataarray.coords: rename = {coord_name: f"{name}_{coord_name}"} - dict_dataarrays[name] = dataarray.rename(rename) - return dict_dataarrays + data_arrays[name] = dataarray.rename(rename) + return data_arrays -def ensure_unique_nondimensional_coords(dict_dataarrays, pretty=False): +def ensure_unique_nondimensional_coords( + data_arrays: dict[str, xr.DataArray], + pretty: bool = False +) -> dict[str, xr.DataArray]: """Make non-dimensional coordinates unique among all datasets. Non-dimensional coordinates, such as scanline timestamps, @@ -185,9 +181,9 @@ def ensure_unique_nondimensional_coords(dict_dataarrays, pretty=False): this is not applied to latitude and longitude. Args: - datas (dict): + datas: Dictionary of (dataset name, dataset) - pretty (bool): + pretty: Don't modify coordinate names, if possible. Makes the file prettier, but possibly less consistent. Returns: @@ -196,10 +192,10 @@ def ensure_unique_nondimensional_coords(dict_dataarrays, pretty=False): """ # Determine which non-dimensional coordinates are unique # - coords_unique has structure: {coord_name: True/False} - is_coords_unique_dict = _get_is_nondimensional_coords_dict(dict_dataarrays) + is_coords_unique_dict = _get_is_nondimensional_coords_dict(data_arrays) # Prepend dataset name, if not unique or no pretty-format desired - new_dict_dataarrays = dict_dataarrays.copy() + new_dict_dataarrays = data_arrays.copy() for coord_name, unique in is_coords_unique_dict.items(): if not pretty or not unique: _warn_if_pretty_but_not_unique(pretty, coord_name) @@ -207,11 +203,11 @@ def ensure_unique_nondimensional_coords(dict_dataarrays, pretty=False): return new_dict_dataarrays -def check_unique_projection_coords(dict_dataarrays): +def check_unique_projection_coords(data_arrays: dict[str, xr.DataArray]) -> None: """Check that all datasets share the same projection coordinates x/y.""" unique_x = set() unique_y = set() - for dataarray in dict_dataarrays.values(): + for dataarray in data_arrays.values(): if "y" in dataarray.dims: token_y = tokenize(dataarray["y"].data) unique_y.add(token_y) @@ -223,8 +219,7 @@ def check_unique_projection_coords(dict_dataarrays): "Please group them by area or save them in separate files.") - -def add_coordinates_attrs_coords(dict_dataarrays): +def add_coordinates_attrs_coords(data_arrays: dict[str, xr.DataArray]) -> dict[str, xr.DataArray]: """Add to DataArrays the coordinates specified in the 'coordinates' attribute. It deal with the 'coordinates' attributes indicating lat/lon coords @@ -236,51 +231,55 @@ def add_coordinates_attrs_coords(dict_dataarrays): In the final call to `xr.Dataset.to_netcdf()` all coordinate relations will be resolved and the `coordinates` attributes be set automatically. """ - for dataarray_name in dict_dataarrays.keys(): - dict_dataarrays = _add_declared_coordinates(dict_dataarrays, - dataarray_name=dataarray_name) + for dataarray_name in data_arrays.keys(): + data_arrays = _add_declared_coordinates(data_arrays, + dataarray_name=dataarray_name) # Drop 'coordinates' attribute in any case to avoid conflicts in xr.Dataset.to_netcdf() - dict_dataarrays[dataarray_name].attrs.pop("coordinates", None) - return dict_dataarrays + data_arrays[dataarray_name].attrs.pop("coordinates", None) + return data_arrays -def _add_declared_coordinates(dict_dataarrays, dataarray_name): +def _add_declared_coordinates(data_arrays: dict[str, xr.DataArray], dataarray_name: str) -> dict[str, xr.DataArray]: """Add declared coordinates to the dataarray if they exist.""" - dataarray = dict_dataarrays[dataarray_name] + dataarray = data_arrays[dataarray_name] declared_coordinates = _get_coordinates_list(dataarray) for coord in declared_coordinates: if coord not in dataarray.coords: - dict_dataarrays = _try_add_coordinate(dict_dataarrays, - dataarray_name=dataarray_name, - coord=coord) - return dict_dataarrays + data_arrays = _try_add_coordinate(data_arrays, + dataarray_name=dataarray_name, + coord=coord) + return data_arrays -def _try_add_coordinate(dict_dataarrays, dataarray_name, coord): +def _try_add_coordinate( + data_arrays: dict[str, xr.DataArray], + dataarray_name: str, + coord: str +) -> dict[str, xr.DataArray]: """Try to add a coordinate to the dataarray, warn if not possible.""" try: - dataarray_dims = set(dict_dataarrays[dataarray_name].dims) - coordinate_dims = set(dict_dataarrays[coord].dims) + dataarray_dims = set(data_arrays[dataarray_name].dims) + coordinate_dims = set(data_arrays[coord].dims) dimensions_to_squeeze = list(coordinate_dims - dataarray_dims) - dict_dataarrays[dataarray_name][coord] = dict_dataarrays[coord].squeeze(dimensions_to_squeeze, drop=True) + data_arrays[dataarray_name][coord] = data_arrays[coord].squeeze(dimensions_to_squeeze, drop=True) except KeyError: warnings.warn( f'Coordinate "{coord}" referenced by dataarray {dataarray_name} does not ' 'exist, dropping reference.', stacklevel=2 ) - return dict_dataarrays + return data_arrays -def _get_coordinates_list(dataarray): +def _get_coordinates_list(data_arr: xr.DataArray) -> list[str]: """Return a list with the coordinates names specified in the 'coordinates' attribute.""" - declared_coordinates = dataarray.attrs.get("coordinates", []) + declared_coordinates = data_arr.attrs.get("coordinates", []) if isinstance(declared_coordinates, str): declared_coordinates = declared_coordinates.split(" ") return declared_coordinates -def add_time_bounds_dimension(ds, time="time"): +def add_time_bounds_dimension(ds: xr.Dataset, time: str = "time") -> xr.Dataset: """Add time bound dimension to xr.Dataset.""" start_times = [] end_times = [] diff --git a/satpy/cf/dataarray.py b/satpy/cf/data_array.py similarity index 74% rename from satpy/cf/dataarray.py rename to satpy/cf/data_array.py index dc2ae7d6c1..ef86953f84 100644 --- a/satpy/cf/dataarray.py +++ b/satpy/cf/data_array.py @@ -17,13 +17,13 @@ import logging import warnings -from satpy.cf.attrs import preprocess_datarray_attrs +from satpy.cf.attrs import preprocess_attrs from satpy.cf.coords import add_xy_coords_attrs, set_cf_time_info logger = logging.getLogger(__name__) -def _handle_dataarray_name(original_name, numeric_name_prefix): +def _handle_data_array_name(original_name, numeric_name_prefix): if original_name[0].isdigit(): if numeric_name_prefix: new_name = numeric_name_prefix + original_name @@ -38,14 +38,14 @@ def _handle_dataarray_name(original_name, numeric_name_prefix): return original_name, new_name -def _preprocess_dataarray_name(dataarray, numeric_name_prefix, include_orig_name): +def _preprocess_data_array_name(dataarray, numeric_name_prefix, include_orig_name): """Change the DataArray name by prepending numeric_name_prefix if the name is a digit.""" original_name = None named_has_changed = False dataarray = dataarray.copy() if "name" in dataarray.attrs: original_name = dataarray.attrs.pop("name") - original_name, new_name = _handle_dataarray_name(original_name, numeric_name_prefix) + original_name, new_name = _handle_data_array_name(original_name, numeric_name_prefix) dataarray = dataarray.rename(new_name) named_has_changed = original_name != new_name @@ -54,12 +54,12 @@ def _preprocess_dataarray_name(dataarray, numeric_name_prefix, include_orig_name return dataarray -def make_cf_dataarray(dataarray, - epoch=None, - flatten_attrs=False, - exclude_attrs=None, - include_orig_name=True, - numeric_name_prefix="CHANNEL_"): +def make_cf_data_array(dataarray, + epoch=None, + flatten_attrs=False, + exclude_attrs=None, + include_orig_name=True, + numeric_name_prefix="CHANNEL_"): """Make the xr.DataArray CF-compliant. Args: @@ -76,12 +76,12 @@ def make_cf_dataarray(dataarray, Returns: xr.DataArray: A CF-compliant xr.DataArray. """ - dataarray = _preprocess_dataarray_name(dataarray=dataarray, - numeric_name_prefix=numeric_name_prefix, - include_orig_name=include_orig_name) - dataarray = preprocess_datarray_attrs(dataarray=dataarray, - flatten_attrs=flatten_attrs, - exclude_attrs=exclude_attrs) + dataarray = _preprocess_data_array_name(dataarray=dataarray, + numeric_name_prefix=numeric_name_prefix, + include_orig_name=include_orig_name) + dataarray = preprocess_attrs(data_arr=dataarray, + flatten_attrs=flatten_attrs, + exclude_attrs=exclude_attrs) dataarray = add_xy_coords_attrs(dataarray) if "time" in dataarray.coords: dataarray = set_cf_time_info(dataarray, epoch=epoch) diff --git a/satpy/cf/datasets.py b/satpy/cf/datasets.py index cab71de58c..2c5080ee42 100644 --- a/satpy/cf/datasets.py +++ b/satpy/cf/datasets.py @@ -92,7 +92,7 @@ def _collect_cf_dataset(list_dataarrays, ensure_unique_nondimensional_coords, has_projection_coords, ) - from satpy.cf.dataarray import make_cf_dataarray + from satpy.cf.data_array import make_cf_data_array # Create dictionary of input datarrays # --> Since keys=None, it doesn't never retrieve ancillary variables !!! @@ -134,12 +134,12 @@ def _collect_cf_dataset(list_dataarrays, # --> NOTE: If the input list_dataarrays have different pyresample areas with the same name # area information can be lost here !!! for new_dataarray in list_new_dataarrays: - new_dataarray = make_cf_dataarray(new_dataarray, - epoch=epoch, - flatten_attrs=flatten_attrs, - exclude_attrs=exclude_attrs, - include_orig_name=include_orig_name, - numeric_name_prefix=numeric_name_prefix) + new_dataarray = make_cf_data_array(new_dataarray, + epoch=epoch, + flatten_attrs=flatten_attrs, + exclude_attrs=exclude_attrs, + include_orig_name=include_orig_name, + numeric_name_prefix=numeric_name_prefix) dict_cf_dataarrays[new_dataarray.name] = new_dataarray # Check all DataArrays have same projection coordinates diff --git a/satpy/tests/cf_tests/test_dataaarray.py b/satpy/tests/cf_tests/test_dataaarray.py index d0154cd84f..50e5b54424 100644 --- a/satpy/tests/cf_tests/test_dataaarray.py +++ b/satpy/tests/cf_tests/test_dataaarray.py @@ -25,23 +25,23 @@ def test_preprocess_dataarray_name(): """Test saving an array to netcdf/cf where dataset name starting with a digit with prefix include orig name.""" from satpy import Scene - from satpy.cf.dataarray import _preprocess_dataarray_name + from satpy.cf.data_array import _preprocess_data_array_name scn = Scene() scn["1"] = xr.DataArray([1, 2, 3]) dataarray = scn["1"] # If numeric_name_prefix is a string, test add the original_name attributes - out_da = _preprocess_dataarray_name(dataarray, numeric_name_prefix="TEST", include_orig_name=True) + out_da = _preprocess_data_array_name(dataarray, numeric_name_prefix="TEST", include_orig_name=True) assert out_da.attrs["original_name"] == "1" # If numeric_name_prefix is empty string, False or None, test do not add original_name attributes - out_da = _preprocess_dataarray_name(dataarray, numeric_name_prefix="", include_orig_name=True) + out_da = _preprocess_data_array_name(dataarray, numeric_name_prefix="", include_orig_name=True) assert "original_name" not in out_da.attrs - out_da = _preprocess_dataarray_name(dataarray, numeric_name_prefix=False, include_orig_name=True) + out_da = _preprocess_data_array_name(dataarray, numeric_name_prefix=False, include_orig_name=True) assert "original_name" not in out_da.attrs - out_da = _preprocess_dataarray_name(dataarray, numeric_name_prefix=None, include_orig_name=True) + out_da = _preprocess_data_array_name(dataarray, numeric_name_prefix=None, include_orig_name=True) assert "original_name" not in out_da.attrs @@ -49,7 +49,7 @@ def test_make_cf_dataarray_lonlat(): """Test correct CF encoding for area with lon/lat units.""" from pyresample import create_area_def - from satpy.cf.dataarray import make_cf_dataarray + from satpy.cf.data_array import make_cf_data_array from satpy.resample import add_crs_xy_coords area = create_area_def("mavas", 4326, shape=(5, 5), @@ -59,7 +59,7 @@ def test_make_cf_dataarray_lonlat(): dims=("y", "x"), attrs={"area": area}) da = add_crs_xy_coords(da, area) - new_da = make_cf_dataarray(da) + new_da = make_cf_data_array(da) assert new_da["x"].attrs["units"] == "degrees_east" assert new_da["y"].attrs["units"] == "degrees_north" @@ -69,7 +69,7 @@ class TestCfDataArray: def test_make_cf_dataarray(self): """Test the conversion of a DataArray to a CF-compatible DataArray.""" - from satpy.cf.dataarray import make_cf_dataarray + from satpy.cf.data_array import make_cf_data_array from satpy.tests.cf_tests._test_data import get_test_attrs from satpy.tests.utils import assert_dict_array_equality @@ -94,7 +94,7 @@ def test_make_cf_dataarray(self): coords={"y": [0, 1], "x": [1, 2], "acq_time": ("y", [3, 4])}) # Test conversion to something cf-compliant - res = make_cf_dataarray(arr) + res = make_cf_data_array(arr) np.testing.assert_array_equal(res["x"], arr["x"]) np.testing.assert_array_equal(res["y"], arr["y"]) np.testing.assert_array_equal(res["acq_time"], arr["acq_time"]) @@ -103,14 +103,14 @@ def test_make_cf_dataarray(self): assert_dict_array_equality(res.attrs, attrs_expected) # Test attribute kwargs - res_flat = make_cf_dataarray(arr, flatten_attrs=True, exclude_attrs=["int"]) + res_flat = make_cf_data_array(arr, flatten_attrs=True, exclude_attrs=["int"]) attrs_expected_flat.pop("int") assert_dict_array_equality(res_flat.attrs, attrs_expected_flat) def test_make_cf_dataarray_one_dimensional_array(self): """Test the conversion of an 1d DataArray to a CF-compatible DataArray.""" - from satpy.cf.dataarray import make_cf_dataarray + from satpy.cf.data_array import make_cf_data_array arr = xr.DataArray(np.array([1, 2, 3, 4]), attrs={}, dims=("y",), coords={"y": [0, 1, 2, 3], "acq_time": ("y", [0, 1, 2, 3])}) - _ = make_cf_dataarray(arr) + _ = make_cf_data_array(arr) diff --git a/satpy/writers/cf_writer.py b/satpy/writers/cf_writer.py index b64a288213..4f67215bd1 100644 --- a/satpy/writers/cf_writer.py +++ b/satpy/writers/cf_writer.py @@ -335,16 +335,16 @@ def da2cf(dataarray, epoch=None, flatten_attrs=False, exclude_attrs=None, numeric_name_prefix (str): Prepend dataset name with this if starting with a digit. """ - from satpy.cf.dataarray import make_cf_dataarray + from satpy.cf.data_array import make_cf_data_array warnings.warn("CFWriter.da2cf is deprecated." - "Use satpy.cf.dataarray.make_cf_dataarray instead.", + "Use satpy.cf.dataarray.make_cf_data_array instead.", DeprecationWarning, stacklevel=3) - return make_cf_dataarray(dataarray=dataarray, - epoch=epoch, - flatten_attrs=flatten_attrs, - exclude_attrs=exclude_attrs, - include_orig_name=include_orig_name, - numeric_name_prefix=numeric_name_prefix) + return make_cf_data_array(dataarray=dataarray, + epoch=epoch, + flatten_attrs=flatten_attrs, + exclude_attrs=exclude_attrs, + include_orig_name=include_orig_name, + numeric_name_prefix=numeric_name_prefix) @staticmethod def update_encoding(dataset, to_netcdf_kwargs): From 055cbef95d2d13b3957206f94d3c29b775ed4e8d Mon Sep 17 00:00:00 2001 From: David Hoese Date: Thu, 16 Nov 2023 21:22:03 -0600 Subject: [PATCH 32/37] Reduce code complexity --- satpy/tests/cf_tests/test_area.py | 42 +++++++++++-------------------- satpy/tests/utils.py | 24 ++++++++++++------ 2 files changed, 30 insertions(+), 36 deletions(-) diff --git a/satpy/tests/cf_tests/test_area.py b/satpy/tests/cf_tests/test_area.py index cf8548d568..a00df3925e 100644 --- a/satpy/tests/cf_tests/test_area.py +++ b/satpy/tests/cf_tests/test_area.py @@ -18,6 +18,7 @@ """Tests for the CF Area.""" import dask.array as da import numpy as np +import pytest import xarray as xr from pyresample import AreaDefinition, SwathDefinition @@ -71,7 +72,7 @@ def test_area2cf(self): assert "latitude" in res[0].coords assert "grid_mapping" not in res[0].attrs - def test__add_grid_mapping(self): + def test_add_grid_mapping(self): """Test the conversion from pyresample area object to CF grid mapping.""" from satpy.cf.area import _add_grid_mapping @@ -255,7 +256,8 @@ def _gm_matches(gmapping, expected): assert new_ds.attrs["grid_mapping"] == "geos" _gm_matches(grid_mapping, geos_expected) - def test__add_lonlat_coords(self): + @pytest.mark.parametrize("dims", [("y", "x"), ("bands", "y", "x")]) + def test_add_lonlat_coords(self, dims): """Test the conversion from areas to lon/lat.""" from satpy.cf.area import _add_lonlat_coords @@ -268,35 +270,19 @@ def test__add_lonlat_coords(self): [-5570248.686685662, -5567248.28340708, 5567248.28340708, 5570248.686685662] ) lons_ref, lats_ref = area.get_lonlats() - dataarray = xr.DataArray(data=[[1, 2], [3, 4]], dims=("y", "x"), attrs={"area": area}) + if len(dims) == 2: + data_arr = xr.DataArray(data=[[1, 2], [3, 4]], dims=dims, attrs={"area": area}) + else: + data_arr = xr.DataArray( + data=da.from_array(np.arange(3 * 10 * 10).reshape(3, 10, 10), chunks=(1, 5, 5)), + dims=("bands", "y", "x"), + attrs={"area": area}, + ) - res = _add_lonlat_coords(dataarray) + res = _add_lonlat_coords(data_arr) # original should be unmodified - assert "longitude" not in dataarray.coords - assert set(res.coords) == {"longitude", "latitude"} - lat = res["latitude"] - lon = res["longitude"] - np.testing.assert_array_equal(lat.data, lats_ref) - np.testing.assert_array_equal(lon.data, lons_ref) - assert {"name": "latitude", "standard_name": "latitude", "units": "degrees_north"}.items() <= lat.attrs.items() - assert {"name": "longitude", "standard_name": "longitude", "units": "degrees_east"}.items() <= lon.attrs.items() - - area = AreaDefinition( - "seviri", - "Native SEVIRI grid", - "geos", - "+a=6378169.0 +h=35785831.0 +b=6356583.8 +lon_0=0 +proj=geos", - 10, 10, - [-5570248.686685662, -5567248.28340708, 5567248.28340708, 5570248.686685662] - ) - lons_ref, lats_ref = area.get_lonlats() - dataarray = xr.DataArray(data=da.from_array(np.arange(3 * 10 * 10).reshape(3, 10, 10), chunks=(1, 5, 5)), - dims=("bands", "y", "x"), attrs={"area": area}) - res = _add_lonlat_coords(dataarray) - - # original should be unmodified - assert "longitude" not in dataarray.coords + assert "longitude" not in data_arr.coords assert set(res.coords) == {"longitude", "latitude"} lat = res["latitude"] lon = res["longitude"] diff --git a/satpy/tests/utils.py b/satpy/tests/utils.py index 1fb736d427..a6ebf8753e 100644 --- a/satpy/tests/utils.py +++ b/satpy/tests/utils.py @@ -18,6 +18,7 @@ from contextlib import contextmanager from datetime import datetime +from typing import Any from unittest import mock import dask.array as da @@ -414,14 +415,21 @@ def assert_dict_array_equality(d1, d2): assert set(d1.keys()) == set(d2.keys()) for key, val1 in d1.items(): val2 = d2[key] - if isinstance(val1, np.ndarray): - np.testing.assert_array_equal(val1, val2) - assert val1.dtype == val2.dtype - else: - assert val1 == val2 - if isinstance(val1, (np.floating, np.integer, np.bool_)): - assert isinstance(val2, np.generic) - assert val1.dtype == val2.dtype + compare_func = _compare_numpy_array if isinstance(val1, np.ndarray) else _compare_nonarray + compare_func(val1, val2) + + +def _compare_numpy_array(val1: np.ndarray, val2: np.ndarray) -> None: + np.testing.assert_array_equal(val1, val2) + assert val1.dtype == val2.dtype + + +def _compare_nonarray(val1: Any, val2: Any) -> None: + assert val1 == val2 + if isinstance(val1, (np.floating, np.integer, np.bool_)): + assert isinstance(val2, np.generic) + assert val1.dtype == val2.dtype + def xfail_skyfield_unstable_numpy2(): """Determine if skyfield-based tests should be xfail in the unstable numpy 2.x environment.""" From 2a65eea467d6bb0526fce172a121aa4f95c146cb Mon Sep 17 00:00:00 2001 From: David Hoese Date: Thu, 16 Nov 2023 21:48:34 -0600 Subject: [PATCH 33/37] Refactor CF area tests --- satpy/tests/cf_tests/test_area.py | 70 +++++++++++++++---------------- 1 file changed, 34 insertions(+), 36 deletions(-) diff --git a/satpy/tests/cf_tests/test_area.py b/satpy/tests/cf_tests/test_area.py index a00df3925e..0539ebeb86 100644 --- a/satpy/tests/cf_tests/test_area.py +++ b/satpy/tests/cf_tests/test_area.py @@ -22,18 +22,25 @@ import xarray as xr from pyresample import AreaDefinition, SwathDefinition +from satpy.cf.area import area2cf + + +@pytest.fixture() +def input_data_arr() -> xr.DataArray: + return xr.DataArray( + data=[[1, 2], [3, 4]], + dims=("y", "x"), + coords={"y": [1, 2], "x": [3, 4]}, + attrs={"name": "var1"}, + ) + class TestCFArea: """Test case for CF Area.""" - def test_area2cf(self): + @pytest.mark.parametrize("include_lonlats", [False, True]) + def test_area2cf_geos_area_nolonlats(self, input_data_arr, include_lonlats): """Test the conversion of an area to CF standards.""" - from satpy.cf.area import area2cf - - ds_base = xr.DataArray(data=[[1, 2], [3, 4]], dims=("y", "x"), coords={"y": [1, 2], "x": [3, 4]}, - attrs={"name": "var1"}) - - # a) Area Definition and strict=False geos = AreaDefinition( area_id="geos", description="geos", @@ -41,32 +48,21 @@ def test_area2cf(self): projection={"proj": "geos", "h": 35785831., "a": 6378169., "b": 6356583.8}, width=2, height=2, area_extent=[-1, -1, 1, 1]) - ds = ds_base.copy(deep=True) - ds.attrs["area"] = geos + input_data_arr.attrs["area"] = geos - res = area2cf(ds, include_lonlats=False) + res = area2cf(input_data_arr, include_lonlats=include_lonlats) assert len(res) == 2 assert res[0].size == 1 # grid mapping variable assert res[0].name == res[1].attrs["grid_mapping"] + if include_lonlats: + assert "longitude" in res[1].coords + assert "latitude" in res[1].coords - # b) Area Definition and include_lonlats=False - ds = ds_base.copy(deep=True) - ds.attrs["area"] = geos - res = area2cf(ds, include_lonlats=True) - # same as above - assert len(res) == 2 - assert res[0].size == 1 # grid mapping variable - assert res[0].name == res[1].attrs["grid_mapping"] - # but now also have the lon/lats - assert "longitude" in res[1].coords - assert "latitude" in res[1].coords - - # c) Swath Definition + def test_area2cf_swath(self, input_data_arr): swath = SwathDefinition(lons=[[1, 1], [2, 2]], lats=[[1, 2], [1, 2]]) - ds = ds_base.copy(deep=True) - ds.attrs["area"] = swath + input_data_arr.attrs["area"] = swath - res = area2cf(ds, include_lonlats=False) + res = area2cf(input_data_arr, include_lonlats=False) assert len(res) == 1 assert "longitude" in res[0].coords assert "latitude" in res[0].coords @@ -76,15 +72,6 @@ def test_add_grid_mapping(self): """Test the conversion from pyresample area object to CF grid mapping.""" from satpy.cf.area import _add_grid_mapping - def _gm_matches(gmapping, expected): - """Assert that all keys in ``expected`` match the values in ``gmapping``.""" - for attr_key, attr_val in expected.attrs.items(): - test_val = gmapping.attrs[attr_key] - if attr_val is None or isinstance(attr_val, str): - assert test_val == attr_val - else: - np.testing.assert_almost_equal(test_val, attr_val, decimal=3) - ds_base = xr.DataArray(data=[[1, 2], [3, 4]], dims=("y", "x"), coords={"y": [1, 2], "x": [3, 4]}, attrs={"name": "var1"}) @@ -261,12 +248,13 @@ def test_add_lonlat_coords(self, dims): """Test the conversion from areas to lon/lat.""" from satpy.cf.area import _add_lonlat_coords + width, height = (2, 2) if len(dims) == 2 else (10, 10) area = AreaDefinition( "seviri", "Native SEVIRI grid", "geos", "+a=6378169.0 +h=35785831.0 +b=6356583.8 +lon_0=0 +proj=geos", - 2, 2, + width, height, [-5570248.686685662, -5567248.28340708, 5567248.28340708, 5570248.686685662] ) lons_ref, lats_ref = area.get_lonlats() @@ -290,3 +278,13 @@ def test_add_lonlat_coords(self, dims): np.testing.assert_array_equal(lon.data, lons_ref) assert {"name": "latitude", "standard_name": "latitude", "units": "degrees_north"}.items() <= lat.attrs.items() assert {"name": "longitude", "standard_name": "longitude", "units": "degrees_east"}.items() <= lon.attrs.items() + + +def _gm_matches(gmapping, expected): + """Assert that all keys in ``expected`` match the values in ``gmapping``.""" + for attr_key, attr_val in expected.attrs.items(): + test_val = gmapping.attrs[attr_key] + if attr_val is None or isinstance(attr_val, str): + assert test_val == attr_val + else: + np.testing.assert_almost_equal(test_val, attr_val, decimal=3) From 63e8407f6e23c68a0a5afdfbf9fe3e3e2f17edc6 Mon Sep 17 00:00:00 2001 From: David Hoese Date: Thu, 16 Nov 2023 21:59:59 -0600 Subject: [PATCH 34/37] Refactor CF area tests a little more --- satpy/tests/cf_tests/test_area.py | 94 ++++++++++++++----------------- 1 file changed, 41 insertions(+), 53 deletions(-) diff --git a/satpy/tests/cf_tests/test_area.py b/satpy/tests/cf_tests/test_area.py index 0539ebeb86..31b51b6cd9 100644 --- a/satpy/tests/cf_tests/test_area.py +++ b/satpy/tests/cf_tests/test_area.py @@ -22,7 +22,7 @@ import xarray as xr from pyresample import AreaDefinition, SwathDefinition -from satpy.cf.area import area2cf +from satpy.cf.area import _add_grid_mapping, area2cf @pytest.fixture() @@ -68,14 +68,12 @@ def test_area2cf_swath(self, input_data_arr): assert "latitude" in res[0].coords assert "grid_mapping" not in res[0].attrs - def test_add_grid_mapping(self): - """Test the conversion from pyresample area object to CF grid mapping.""" - from satpy.cf.area import _add_grid_mapping + def test_add_grid_mapping_cf_repr(self, input_data_arr): + """Test the conversion from pyresample area object to CF grid mapping. - ds_base = xr.DataArray(data=[[1, 2], [3, 4]], dims=("y", "x"), coords={"y": [1, 2], "x": [3, 4]}, - attrs={"name": "var1"}) + Projection has a corresponding CF representation (e.g. geos). - # a) Projection has a corresponding CF representation (e.g. geos) + """ a = 6378169. b = 6356583.8 h = 35785831. @@ -97,9 +95,8 @@ def test_add_grid_mapping(self): # 'sweep_angle_axis': None, }) - ds = ds_base.copy() - ds.attrs["area"] = geos - new_ds, grid_mapping = _add_grid_mapping(ds) + input_data_arr.attrs["area"] = geos + new_ds, grid_mapping = _add_grid_mapping(input_data_arr) if "sweep_angle_axis" in grid_mapping.attrs: # older versions of pyproj might not include this assert grid_mapping.attrs["sweep_angle_axis"] == "y" @@ -107,9 +104,14 @@ def test_add_grid_mapping(self): assert new_ds.attrs["grid_mapping"] == "geos" _gm_matches(grid_mapping, geos_expected) # should not have been modified - assert "grid_mapping" not in ds.attrs + assert "grid_mapping" not in input_data_arr.attrs + + def test_add_grid_mapping_no_cf_repr(self, input_data_arr): + """Test the conversion from pyresample area object to CF grid mapping. + + Projection does not have a corresponding CF representation (e.g. COSMO). - # b) Projection does not have a corresponding CF representation (COSMO) + """ cosmo7 = AreaDefinition( area_id="cosmo7", description="cosmo7", @@ -119,11 +121,9 @@ def test_add_grid_mapping(self): width=597, height=510, area_extent=[-1812933, -1003565, 814056, 1243448] ) + input_data_arr.attrs["area"] = cosmo7 - ds = ds_base.copy() - ds.attrs["area"] = cosmo7 - - new_ds, grid_mapping = _add_grid_mapping(ds) + new_ds, grid_mapping = _add_grid_mapping(input_data_arr) assert "crs_wkt" in grid_mapping.attrs wkt = grid_mapping.attrs["crs_wkt"] assert 'ELLIPSOID["WGS 84"' in wkt @@ -133,7 +133,12 @@ def test_add_grid_mapping(self): assert 'PARAMETER["o_lon_p",-5.465' in wkt assert new_ds.attrs["grid_mapping"] == "cosmo7" - # c) Projection Transverse Mercator + def test_add_grid_mapping_transverse_mercator(self, input_data_arr): + """Test the conversion from pyresample area object to CF grid mapping. + + Projection is transverse mercator. + + """ lat_0 = 36.5 lon_0 = 15.0 @@ -154,13 +159,17 @@ def test_add_grid_mapping(self): "false_northing": 0., }) - ds = ds_base.copy() - ds.attrs["area"] = tmerc - new_ds, grid_mapping = _add_grid_mapping(ds) + input_data_arr.attrs["area"] = tmerc + new_ds, grid_mapping = _add_grid_mapping(input_data_arr) assert new_ds.attrs["grid_mapping"] == "tmerc" _gm_matches(grid_mapping, tmerc_expected) - # d) Projection that has a representation but no explicit a/b + def test_add_grid_mapping_cf_repr_no_ab(self, input_data_arr): + """Test the conversion from pyresample area object to CF grid mapping. + + Projection has a corresponding CF representation but no explicit a/b. + + """ h = 35785831. geos = AreaDefinition( area_id="geos", @@ -175,19 +184,24 @@ def test_add_grid_mapping(self): "latitude_of_projection_origin": 0, "longitude_of_projection_origin": 0, "grid_mapping_name": "geostationary", + "reference_ellipsoid_name": "WGS 84", # 'semi_major_axis': 6378137.0, # 'semi_minor_axis': 6356752.314, # 'sweep_angle_axis': None, }) - ds = ds_base.copy() - ds.attrs["area"] = geos - new_ds, grid_mapping = _add_grid_mapping(ds) + input_data_arr.attrs["area"] = geos + new_ds, grid_mapping = _add_grid_mapping(input_data_arr) assert new_ds.attrs["grid_mapping"] == "geos" _gm_matches(grid_mapping, geos_expected) - # e) oblique Mercator + def test_add_grid_mapping_oblique_mercator(self, input_data_arr): + """Test the conversion from pyresample area object to CF grid mapping. + + Projection is oblique mercator. + + """ area = AreaDefinition( area_id="omerc_otf", description="On-the-fly omerc area", @@ -211,38 +225,12 @@ def test_add_grid_mapping(self): "reference_ellipsoid_name": "WGS 84"} omerc_expected = xr.DataArray(data=0, attrs=omerc_dict) - ds = ds_base.copy() - ds.attrs["area"] = area - new_ds, grid_mapping = _add_grid_mapping(ds) + input_data_arr.attrs["area"] = area + new_ds, grid_mapping = _add_grid_mapping(input_data_arr) assert new_ds.attrs["grid_mapping"] == "omerc_otf" _gm_matches(grid_mapping, omerc_expected) - # f) Projection that has a representation but no explicit a/b - h = 35785831. - geos = AreaDefinition( - area_id="geos", - description="geos", - proj_id="geos", - projection={"proj": "geos", "h": h, "datum": "WGS84", "ellps": "GRS80", - "lat_0": 0, "lon_0": 0}, - width=2, height=2, - area_extent=[-1, -1, 1, 1]) - geos_expected = xr.DataArray(data=0, - attrs={"perspective_point_height": h, - "latitude_of_projection_origin": 0, - "longitude_of_projection_origin": 0, - "grid_mapping_name": "geostationary", - "reference_ellipsoid_name": "WGS 84", - }) - - ds = ds_base.copy() - ds.attrs["area"] = geos - new_ds, grid_mapping = _add_grid_mapping(ds) - - assert new_ds.attrs["grid_mapping"] == "geos" - _gm_matches(grid_mapping, geos_expected) - @pytest.mark.parametrize("dims", [("y", "x"), ("bands", "y", "x")]) def test_add_lonlat_coords(self, dims): """Test the conversion from areas to lon/lat.""" From 504220210195f671bf7a5e91d1ecb6ea2bbd6c0a Mon Sep 17 00:00:00 2001 From: David Hoese Date: Fri, 17 Nov 2023 14:17:04 -0600 Subject: [PATCH 35/37] Fix sphinx docstring error in make_cf_data_array --- satpy/cf/data_array.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/satpy/cf/data_array.py b/satpy/cf/data_array.py index ef86953f84..e0b26867c7 100644 --- a/satpy/cf/data_array.py +++ b/satpy/cf/data_array.py @@ -71,7 +71,7 @@ def make_cf_data_array(dataarray, include_orig_name (bool, optional): Include the original dataset name in the netcdf variable attributes. Defaults to True. numeric_name_prefix (str, optional): Prepend dataset name with this if starting with a digit. - Defaults to "CHANNEL_". + Defaults to ``"CHANNEL_"``. Returns: xr.DataArray: A CF-compliant xr.DataArray. From cc366c0d11799374f46301b2c70c1560483dbd95 Mon Sep 17 00:00:00 2001 From: David Hoese Date: Fri, 17 Nov 2023 14:17:26 -0600 Subject: [PATCH 36/37] Add py.typed file so users get type information in their IDE --- MANIFEST.in | 1 + satpy/py.typed | 0 2 files changed, 1 insertion(+) create mode 100644 satpy/py.typed diff --git a/MANIFEST.in b/MANIFEST.in index 3a7cdb0b43..05c921b367 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -13,4 +13,5 @@ include satpy/version.py include pyproject.toml include setup.py include setup.cfg +include satpy/py.typed global-exclude *.py[cod] diff --git a/satpy/py.typed b/satpy/py.typed new file mode 100644 index 0000000000..e69de29bb2 From b4e8fa572d35b55f84151c61a73d071391f697fb Mon Sep 17 00:00:00 2001 From: Martin Raspaud Date: Tue, 28 Nov 2023 15:54:31 +0100 Subject: [PATCH 37/37] Fix style --- satpy/cf/attrs.py | 2 +- satpy/cf/coords.py | 2 +- satpy/modifiers/spectral.py | 1 - satpy/tests/cf_tests/test_area.py | 2 ++ 4 files changed, 4 insertions(+), 3 deletions(-) diff --git a/satpy/cf/attrs.py b/satpy/cf/attrs.py index f9d49416c8..cdec8500d4 100644 --- a/satpy/cf/attrs.py +++ b/satpy/cf/attrs.py @@ -97,7 +97,7 @@ def _encode_object(obj): def _try_decode_object(obj): - """Try to decode byte string""" + """Try to decode byte string.""" try: decoded = obj.decode() except AttributeError: diff --git a/satpy/cf/coords.py b/satpy/cf/coords.py index 48a0748509..9220632fcb 100644 --- a/satpy/cf/coords.py +++ b/satpy/cf/coords.py @@ -181,7 +181,7 @@ def ensure_unique_nondimensional_coords( this is not applied to latitude and longitude. Args: - datas: + data_arrays: Dictionary of (dataset name, dataset) pretty: Don't modify coordinate names, if possible. Makes the file prettier, but possibly less consistent. diff --git a/satpy/modifiers/spectral.py b/satpy/modifiers/spectral.py index e37f6d3c9f..18d1df2379 100644 --- a/satpy/modifiers/spectral.py +++ b/satpy/modifiers/spectral.py @@ -164,7 +164,6 @@ def __call__(self, projectables, optional_datasets=None, **info): def _get_emissivity_as_dataarray(self, nir, da_tb11, da_tb13_4, da_sun_zenith): """Get the emissivity as a dataarray.""" - logger.info("Getting emissive part of %s", nir.attrs["name"]) emissivity = self._get_emissivity_as_dask(nir.data, da_tb11, da_tb13_4, da_sun_zenith, nir.attrs) diff --git a/satpy/tests/cf_tests/test_area.py b/satpy/tests/cf_tests/test_area.py index 31b51b6cd9..ee24d0e10d 100644 --- a/satpy/tests/cf_tests/test_area.py +++ b/satpy/tests/cf_tests/test_area.py @@ -27,6 +27,7 @@ @pytest.fixture() def input_data_arr() -> xr.DataArray: + """Create a data array.""" return xr.DataArray( data=[[1, 2], [3, 4]], dims=("y", "x"), @@ -59,6 +60,7 @@ def test_area2cf_geos_area_nolonlats(self, input_data_arr, include_lonlats): assert "latitude" in res[1].coords def test_area2cf_swath(self, input_data_arr): + """Test area2cf for swath definitions.""" swath = SwathDefinition(lons=[[1, 1], [2, 2]], lats=[[1, 2], [1, 2]]) input_data_arr.attrs["area"] = swath