Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor CFWriter utility into CF directory #2524

Merged
merged 42 commits into from
Nov 28, 2023
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
5ac92a7
Refactor area-related functions
ghiggi Jun 27, 2023
4b19566
Refactor attrs-related functions
ghiggi Jun 27, 2023
6c27d86
Fix datetime import
ghiggi Jun 27, 2023
59f73ea
Replace deprecated CFWriter.da2cf with make_cf_dataarray
ghiggi Jun 27, 2023
396700f
Refactor Dataset encodings
ghiggi Jun 27, 2023
f664c60
Refactor CF-compliant DataArray creation
ghiggi Jun 27, 2023
3953319
Refactor CF-compliant Datasets creation
ghiggi Jun 27, 2023
064558d
Fix changed imports
ghiggi Jun 27, 2023
5762950
Fix all writers tests
ghiggi Jun 27, 2023
dabaa44
Add logging on top-file
ghiggi Jun 27, 2023
ea12d9b
Update satpy/tests/writer_tests/cf_tests/test_dataaarray.py
ghiggi Jun 28, 2023
109a80e
Merge branch 'main' into feature-cf-dataset
ghiggi Oct 10, 2023
eb0d6e7
Merge branch 'feature-cf-dataset' of github.com:ghiggi/satpy into fea…
ghiggi Oct 10, 2023
fe43fc8
Dummy changes
ghiggi Oct 10, 2023
c37fcb7
Set private functions
ghiggi Oct 10, 2023
22a8d09
Reorg files
ghiggi Oct 10, 2023
e3df20e
Unest cf directories
ghiggi Oct 10, 2023
bf33719
Fix imports error
ghiggi Oct 10, 2023
0dae774
Rename functions and refactor
ghiggi Oct 10, 2023
57afb82
Fix cf_writer module path
ghiggi Oct 10, 2023
1575849
Solve conflicts and merge main
ghiggi Oct 11, 2023
9ef2af9
Avoid modification to CHANGELOG
ghiggi Nov 14, 2023
e96f0de
Rename _encode* functions
ghiggi Nov 14, 2023
48df162
Update xarray version
ghiggi Nov 14, 2023
9372e32
Merge main
ghiggi Nov 14, 2023
14b1f06
Set default epoch argument to None
ghiggi Nov 14, 2023
ec5f8fc
Reduce future risk of circular imports
ghiggi Nov 14, 2023
9a8810f
Fix tests
ghiggi Nov 14, 2023
09325c2
Fix docstrings
ghiggi Nov 16, 2023
c42d1ed
Move EPOCH to satpy.cf.coords
ghiggi Nov 16, 2023
ba09f18
Simplify functions for CodeScene happiness
ghiggi Nov 16, 2023
83e815d
Cleanup CF attrs functions
djhoese Nov 16, 2023
cadcfef
Remove commented out tests
djhoese Nov 16, 2023
ceabff9
Refactor attribute handling
djhoese Nov 16, 2023
bf681f2
Rename dataarray to data_arr and add type annotations
djhoese Nov 16, 2023
055cbef
Reduce code complexity
djhoese Nov 17, 2023
2a65eea
Refactor CF area tests
djhoese Nov 17, 2023
63e8407
Refactor CF area tests a little more
djhoese Nov 17, 2023
5042202
Fix sphinx docstring error in make_cf_data_array
djhoese Nov 17, 2023
cc366c0
Add py.typed file so users get type information in their IDE
djhoese Nov 17, 2023
44c3c29
Merge branch 'main' into feature-cf-dataset
mraspaud Nov 28, 2023
b4e8fa5
Fix style
mraspaud Nov 28, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion satpy/_scene_converters.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,8 @@ def to_xarray(scn,
A CF-compliant xr.Dataset

"""
from satpy.writers.cf_writer import EPOCH, collect_cf_datasets
from satpy.writers.cf.datasets import collect_cf_datasets
from satpy.writers.cf.time import EPOCH

if epoch is None:
epoch = EPOCH
Expand Down
18 changes: 18 additions & 0 deletions satpy/tests/writer_tests/cf_tests/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (c) 2017-2023 Satpy developers
#
# This file is part of satpy.
#
# satpy is free software: you can redistribute it and/or modify it under the
# terms of the GNU General Public License as published by the Free Software
# Foundation, either version 3 of the License, or (at your option) any later
# version.
#
# satpy is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE. See the GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along with
# satpy. If not, see <http://www.gnu.org/licenses/>.
"""The CF dataset tests package."""
485 changes: 485 additions & 0 deletions satpy/tests/writer_tests/cf_tests/test_area.py

Large diffs are not rendered by default.

144 changes: 144 additions & 0 deletions satpy/tests/writer_tests/cf_tests/test_attrs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (c) 2017-2023 Satpy developers
#
# This file is part of satpy.
#
# satpy is free software: you can redistribute it and/or modify it under the
# terms of the GNU General Public License as published by the Free Software
# Foundation, either version 3 of the License, or (at your option) any later
# version.
#
# satpy is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE. See the GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along with
# satpy. If not, see <http://www.gnu.org/licenses/>.
"""Tests for CF-compatible attributes encoding."""
import datetime
import json
from collections import OrderedDict

import numpy as np


class TestCFAttributeEncoding:
"""Test case for CF attribute encodings."""

def get_test_attrs(self):
"""Create some dataset attributes for testing purpose.

Returns:
Attributes, encoded attributes, encoded and flattened attributes

"""
# TODO: this is also used by test_da2cf
attrs = {'name': 'IR_108',
ghiggi marked this conversation as resolved.
Show resolved Hide resolved
'start_time': datetime.datetime(2018, 1, 1, 0),
'end_time': datetime.datetime(2018, 1, 1, 0, 15),
'int': 1,
'float': 1.0,
'none': None, # should be dropped
'numpy_int': np.uint8(1),
'numpy_float': np.float32(1),
'numpy_bool': True,
'numpy_void': np.void(0),
'numpy_bytes': np.bytes_('test'),
'numpy_string': np.string_('test'),
'list': [1, 2, np.float64(3)],
'nested_list': ["1", ["2", [3]]],
'bool': True,
'array': np.array([1, 2, 3], dtype='uint8'),
'array_bool': np.array([True, False, True]),
'array_2d': np.array([[1, 2], [3, 4]]),
'array_3d': np.array([[[1, 2], [3, 4]], [[1, 2], [3, 4]]]),
'dict': {'a': 1, 'b': 2},
'nested_dict': {'l1': {'l2': {'l3': np.array([1, 2, 3], dtype='uint8')}}},
'raw_metadata': OrderedDict([
('recarray', np.zeros(3, dtype=[('x', 'i4'), ('y', 'u1')])),
('flag', np.bool_(True)),
('dict', OrderedDict([('a', 1), ('b', np.array([1, 2, 3], dtype='uint8'))]))
])}
encoded = {'name': 'IR_108',
'start_time': '2018-01-01 00:00:00',
'end_time': '2018-01-01 00:15:00',
'int': 1,
'float': 1.0,
'numpy_int': np.uint8(1),
'numpy_float': np.float32(1),
'numpy_bool': 'true',
'numpy_void': '[]',
'numpy_bytes': 'test',
'numpy_string': 'test',
'list': [1, 2, np.float64(3)],
'nested_list': '["1", ["2", [3]]]',
'bool': 'true',
'array': np.array([1, 2, 3], dtype='uint8'),
'array_bool': ['true', 'false', 'true'],
'array_2d': '[[1, 2], [3, 4]]',
'array_3d': '[[[1, 2], [3, 4]], [[1, 2], [3, 4]]]',
'dict': '{"a": 1, "b": 2}',
'nested_dict': '{"l1": {"l2": {"l3": [1, 2, 3]}}}',
'raw_metadata': '{"recarray": [[0, 0], [0, 0], [0, 0]], '
'"flag": "true", "dict": {"a": 1, "b": [1, 2, 3]}}'}
encoded_flat = {'name': 'IR_108',
'start_time': '2018-01-01 00:00:00',
'end_time': '2018-01-01 00:15:00',
'int': 1,
'float': 1.0,
'numpy_int': np.uint8(1),
'numpy_float': np.float32(1),
'numpy_bool': 'true',
'numpy_void': '[]',
'numpy_bytes': 'test',
'numpy_string': 'test',
'list': [1, 2, np.float64(3)],
'nested_list': '["1", ["2", [3]]]',
'bool': 'true',
'array': np.array([1, 2, 3], dtype='uint8'),
'array_bool': ['true', 'false', 'true'],
'array_2d': '[[1, 2], [3, 4]]',
'array_3d': '[[[1, 2], [3, 4]], [[1, 2], [3, 4]]]',
'dict_a': 1,
'dict_b': 2,
'nested_dict_l1_l2_l3': np.array([1, 2, 3], dtype='uint8'),
'raw_metadata_recarray': '[[0, 0], [0, 0], [0, 0]]',
'raw_metadata_flag': 'true',
'raw_metadata_dict_a': 1,
'raw_metadata_dict_b': np.array([1, 2, 3], dtype='uint8')}
return attrs, encoded, encoded_flat

def assertDictWithArraysEqual(self, d1, d2):
ghiggi marked this conversation as resolved.
Show resolved Hide resolved
"""Check that dicts containing arrays are equal."""
# TODO: this is also used by test_da2cf
assert set(d1.keys()) == set(d2.keys())
for key, val1 in d1.items():
val2 = d2[key]
if isinstance(val1, np.ndarray):
np.testing.assert_array_equal(val1, val2)
assert val1.dtype == val2.dtype
else:
assert val1 == val2
if isinstance(val1, (np.floating, np.integer, np.bool_)):
assert isinstance(val2, np.generic)
assert val1.dtype == val2.dtype

def test_encode_attrs_nc(self):
"""Test attributes encoding."""
from satpy.writers.cf.attrs import encode_attrs_nc

attrs, expected, _ = self.get_test_attrs()

# Test encoding
encoded = encode_attrs_nc(attrs)
self.assertDictWithArraysEqual(expected, encoded)

# Test decoding of json-encoded attributes
raw_md_roundtrip = {'recarray': [[0, 0], [0, 0], [0, 0]],
'flag': 'true',
'dict': {'a': 1, 'b': [1, 2, 3]}}
assert json.loads(encoded['raw_metadata']) == raw_md_roundtrip
assert json.loads(encoded['array_3d']) == [[[1, 2], [3, 4]], [[1, 2], [3, 4]]]
assert json.loads(encoded['nested_dict']) == {"l1": {"l2": {"l3": [1, 2, 3]}}}
assert json.loads(encoded['nested_list']) == ["1", ["2", [3]]]
216 changes: 216 additions & 0 deletions satpy/tests/writer_tests/cf_tests/test_dataaarray.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (c) 2017-2023 Satpy developers
#
# This file is part of satpy.
#
# satpy is free software: you can redistribute it and/or modify it under the
# terms of the GNU General Public License as published by the Free Software
# Foundation, either version 3 of the License, or (at your option) any later
# version.
#
# satpy is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE. See the GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along with
# satpy. If not, see <http://www.gnu.org/licenses/>.
"""Tests CF-compliant DataArray creation."""

import datetime
from collections import OrderedDict

import numpy as np
import xarray as xr

from satpy.tests.utils import make_dsq


def test_preprocess_dataarray_name():
"""Test saving an array to netcdf/cf where dataset name starting with a digit with prefix include orig name."""
from satpy import Scene
from satpy.writers.cf.dataarray import _preprocess_dataarray_name

scn = Scene()
scn['1'] = xr.DataArray([1, 2, 3])
dataarray = scn['1']
# If numeric_name_prefix is a string, test add the original_name attributes
out_da = _preprocess_dataarray_name(dataarray, numeric_name_prefix="TEST", include_orig_name=True)
assert out_da.attrs['original_name'] == '1'

# If numeric_name_prefix is empty string, False or None, test do not add original_name attributes
out_da = _preprocess_dataarray_name(dataarray, numeric_name_prefix="", include_orig_name=True)
assert "original_name" not in out_da.attrs

out_da = _preprocess_dataarray_name(dataarray, numeric_name_prefix=False, include_orig_name=True)
assert "original_name" not in out_da.attrs

out_da = _preprocess_dataarray_name(dataarray, numeric_name_prefix=None, include_orig_name=True)
assert "original_name" not in out_da.attrs


def test_make_cf_dataarray_lonlat():
"""Test correct CF encoding for area with lon/lat units."""
from pyresample import create_area_def

from satpy.resample import add_crs_xy_coords
from satpy.writers.cf.dataarray import make_cf_dataarray

area = create_area_def("mavas", 4326, shape=(5, 5),
center=(0, 0), resolution=(1, 1))
da = xr.DataArray(
np.arange(25).reshape(5, 5),
dims=("y", "x"),
attrs={"area": area})
da = add_crs_xy_coords(da, area)
new_da = make_cf_dataarray(da)
assert new_da["x"].attrs["units"] == "degrees_east"
assert new_da["y"].attrs["units"] == "degrees_north"


class TestCFWriter:
ghiggi marked this conversation as resolved.
Show resolved Hide resolved
"""Test creation of CF DataArray."""

def get_test_attrs(self):
ghiggi marked this conversation as resolved.
Show resolved Hide resolved
"""Create some dataset attributes for testing purpose.

Returns:
Attributes, encoded attributes, encoded and flattened attributes

"""
# TODO: also used by cf/test_attrs.py
attrs = {'name': 'IR_108',
'start_time': datetime.datetime(2018, 1, 1, 0),
'end_time': datetime.datetime(2018, 1, 1, 0, 15),
'int': 1,
'float': 1.0,
'none': None, # should be dropped
'numpy_int': np.uint8(1),
'numpy_float': np.float32(1),
'numpy_bool': True,
'numpy_void': np.void(0),
'numpy_bytes': np.bytes_('test'),
'numpy_string': np.string_('test'),
'list': [1, 2, np.float64(3)],
'nested_list': ["1", ["2", [3]]],
'bool': True,
'array': np.array([1, 2, 3], dtype='uint8'),
'array_bool': np.array([True, False, True]),
'array_2d': np.array([[1, 2], [3, 4]]),
'array_3d': np.array([[[1, 2], [3, 4]], [[1, 2], [3, 4]]]),
'dict': {'a': 1, 'b': 2},
'nested_dict': {'l1': {'l2': {'l3': np.array([1, 2, 3], dtype='uint8')}}},
'raw_metadata': OrderedDict([
('recarray', np.zeros(3, dtype=[('x', 'i4'), ('y', 'u1')])),
('flag', np.bool_(True)),
('dict', OrderedDict([('a', 1), ('b', np.array([1, 2, 3], dtype='uint8'))]))
])}
encoded = {'name': 'IR_108',
'start_time': '2018-01-01 00:00:00',
'end_time': '2018-01-01 00:15:00',
'int': 1,
'float': 1.0,
'numpy_int': np.uint8(1),
'numpy_float': np.float32(1),
'numpy_bool': 'true',
'numpy_void': '[]',
'numpy_bytes': 'test',
'numpy_string': 'test',
'list': [1, 2, np.float64(3)],
'nested_list': '["1", ["2", [3]]]',
'bool': 'true',
'array': np.array([1, 2, 3], dtype='uint8'),
'array_bool': ['true', 'false', 'true'],
'array_2d': '[[1, 2], [3, 4]]',
'array_3d': '[[[1, 2], [3, 4]], [[1, 2], [3, 4]]]',
'dict': '{"a": 1, "b": 2}',
'nested_dict': '{"l1": {"l2": {"l3": [1, 2, 3]}}}',
'raw_metadata': '{"recarray": [[0, 0], [0, 0], [0, 0]], '
'"flag": "true", "dict": {"a": 1, "b": [1, 2, 3]}}'}
encoded_flat = {'name': 'IR_108',
'start_time': '2018-01-01 00:00:00',
'end_time': '2018-01-01 00:15:00',
'int': 1,
'float': 1.0,
'numpy_int': np.uint8(1),
'numpy_float': np.float32(1),
'numpy_bool': 'true',
'numpy_void': '[]',
'numpy_bytes': 'test',
'numpy_string': 'test',
'list': [1, 2, np.float64(3)],
'nested_list': '["1", ["2", [3]]]',
'bool': 'true',
'array': np.array([1, 2, 3], dtype='uint8'),
'array_bool': ['true', 'false', 'true'],
'array_2d': '[[1, 2], [3, 4]]',
'array_3d': '[[[1, 2], [3, 4]], [[1, 2], [3, 4]]]',
'dict_a': 1,
'dict_b': 2,
'nested_dict_l1_l2_l3': np.array([1, 2, 3], dtype='uint8'),
'raw_metadata_recarray': '[[0, 0], [0, 0], [0, 0]]',
'raw_metadata_flag': 'true',
'raw_metadata_dict_a': 1,
'raw_metadata_dict_b': np.array([1, 2, 3], dtype='uint8')}
return attrs, encoded, encoded_flat

def assertDictWithArraysEqual(self, d1, d2):
"""Check that dicts containing arrays are equal."""
# TODO: also used by cf/test_attrs.py
assert set(d1.keys()) == set(d2.keys())
for key, val1 in d1.items():
val2 = d2[key]
if isinstance(val1, np.ndarray):
np.testing.assert_array_equal(val1, val2)
assert val1.dtype == val2.dtype
else:
assert val1 == val2
if isinstance(val1, (np.floating, np.integer, np.bool_)):
assert isinstance(val2, np.generic)
assert val1.dtype == val2.dtype

def test_make_cf_dataarray(self):
"""Test the conversion of a DataArray to a CF-compatible DataArray."""
from satpy.writers.cf.dataarray import make_cf_dataarray

# Create set of test attributes
attrs, attrs_expected, attrs_expected_flat = self.get_test_attrs()
attrs['area'] = 'some_area'
attrs['prerequisites'] = [make_dsq(name='hej')]
attrs['_satpy_id_name'] = 'myname'

# Adjust expected attributes
expected_prereq = ("DataQuery(name='hej')")
update = {'prerequisites': [expected_prereq], 'long_name': attrs['name']}

attrs_expected.update(update)
attrs_expected_flat.update(update)

attrs_expected.pop('name')
attrs_expected_flat.pop('name')

# Create test data array
arr = xr.DataArray(np.array([[1, 2], [3, 4]]), attrs=attrs, dims=('y', 'x'),
coords={'y': [0, 1], 'x': [1, 2], 'acq_time': ('y', [3, 4])})

# Test conversion to something cf-compliant
res = make_cf_dataarray(arr)
np.testing.assert_array_equal(res['x'], arr['x'])
np.testing.assert_array_equal(res['y'], arr['y'])
np.testing.assert_array_equal(res['acq_time'], arr['acq_time'])
assert res['x'].attrs == {'units': 'm', 'standard_name': 'projection_x_coordinate'}
assert res['y'].attrs == {'units': 'm', 'standard_name': 'projection_y_coordinate'}
self.assertDictWithArraysEqual(res.attrs, attrs_expected)

# Test attribute kwargs
res_flat = make_cf_dataarray(arr, flatten_attrs=True, exclude_attrs=['int'])
attrs_expected_flat.pop('int')
self.assertDictWithArraysEqual(res_flat.attrs, attrs_expected_flat)

def test_make_cf_dataarray_one_dimensional_array(self):
"""Test the conversion of an 1d DataArray to a CF-compatible DataArray."""
from satpy.writers.cf.dataarray import make_cf_dataarray

arr = xr.DataArray(np.array([1, 2, 3, 4]), attrs={}, dims=('y',),
coords={'y': [0, 1, 2, 3], 'acq_time': ('y', [0, 1, 2, 3])})
_ = make_cf_dataarray(arr)
Loading