pytroll · mraspaud · Feb 15, 2024 · Feb 1, 2024 · Feb 1, 2024 · Feb 1, 2024
@@ -1,4 +1,4 @@
 # Copyright (c) 2015-2023 Satpy developers
 #
 # This file is part of satpy.
 #
@@ -1665,13 +1665,6 @@
         img.attrs["mode"] = "".join(img.bands.data)
         img.attrs.pop("modifiers", None)
         img.attrs.pop("calibration", None)
-        # Add start time if not present in the filename
-        if "start_time" not in img.attrs or not img.attrs["start_time"]:
-            import datetime as dt
-            img.attrs["start_time"] = dt.datetime.utcnow()
-        if "end_time" not in img.attrs or not img.attrs["end_time"]:
-            import datetime as dt
-            img.attrs["end_time"] = dt.datetime.utcnow()
 
         return img
 

@@ -27,20 +27,25 @@
 from satpy.writers.utils import flatten_dict
 
 
-def combine_metadata(*metadata_objects, average_times=True):
+def combine_metadata(*metadata_objects):
     """Combine the metadata of two or more Datasets.
 
     If the values corresponding to any keys are not equal or do not
     exist in all provided dictionaries then they are not included in
-    the returned dictionary.  By default any keys with the word 'time'
-    in them and consisting of datetime objects will be averaged. This
-    is to handle cases where data were observed at almost the same time
-    but not exactly.  In the interest of time, lazy arrays are compared by
-    object identity rather than by their contents.
+    the returned dictionary.
+
+    All values of the keys containing the substring 'start_time' will be set
+    to the earliest value and similarly for 'end_time' to latest time.  All
+    other keys containing the word 'time' are averaged.  Before these adjustments,
+    `None` values resulting from data that don't have times associated to them
+    are removed. These rules are applied also to values in the 'time_parameters'
+    dictionary.
+
+    In the interest of processing time, lazy arrays are compared by object
+    identity rather than by their contents.
 
     Args:
         *metadata_objects: MetadataObject or dict objects to combine
-        average_times (bool): Average any keys with 'time' in the name
 
     Returns:
         dict: the combined metadata
@@ -53,7 +58,7 @@
 
     shared_keys = _shared_keys(info_dicts)
 
-    return _combine_shared_info(shared_keys, info_dicts, average_times)
+    return _combine_shared_info(shared_keys, info_dicts)
 
 
 def _get_valid_dicts(metadata_objects):
@@ -75,17 +80,52 @@
     return reduce(set.intersection, key_sets)
 
 
-def _combine_shared_info(shared_keys, info_dicts, average_times):
+def _combine_shared_info(shared_keys, info_dicts):
     shared_info = {}
     for key in shared_keys:
         values = [info[key] for info in info_dicts]
-        if "time" in key and isinstance(values[0], datetime) and average_times:
-            shared_info[key] = average_datetimes(values)
-        elif _are_values_combinable(values):
-            shared_info[key] = values[0]
+        _combine_values(key, values, shared_info)
     return shared_info
 
 
+def _combine_values(key, values, shared_info):
+    if "time" in key:
+        times = _combine_times(key, values)
+        if times is None:
+            return
+        shared_info[key] = times
+    elif _are_values_combinable(values):
+        shared_info[key] = values[0]
+
+
+def _combine_times(key, values):
+    if key == "time_parameters":
+        return _combine_time_parameters(values)
+    filtered_values = _filter_time_values(values)
+    if not filtered_values:
+        return None
+    if "end_time" in key:
+        return max(filtered_values)
+    elif "start_time" in key:
+        return min(filtered_values)
+    return average_datetimes(filtered_values)
+
+
+def _combine_time_parameters(values):
+    # Assume the first item has all the keys
+    keys = values[0].keys()
+    res = {}
+    for key in keys:
+        sub_values = [itm[key] for itm in values]
+        res[key] = _combine_times(key, sub_values)
+    return res
+
+
+def _filter_time_values(values):
+     """Remove values that are not datetime objects."""
+     return [v for v in values if isinstance(v, datetime)]
+
+
 def average_datetimes(datetime_list):
     """Average a series of datetime objects.
 

@@ -1,6 +1,5 @@
 from __future__ import annotations
 
-from datetime import datetime
 from typing import Callable, Iterable, Mapping, Optional, Sequence
 
 import pandas as pd
@@ -13,7 +12,6 @@
 def stack(
         data_arrays: Sequence[xr.DataArray],
         weights: Optional[Sequence[xr.DataArray]] = None,
-        combine_times: bool = True,
         blend_type: str = "select_with_weights"
 ) -> xr.DataArray:
     """Combine a series of datasets in different ways.
@@ -39,19 +37,18 @@
 
     """
     if weights:
-        return _stack_with_weights(data_arrays, weights, combine_times, blend_type)
-    return _stack_no_weights(data_arrays, combine_times)
+        return _stack_with_weights(data_arrays, weights, blend_type)
+    return _stack_no_weights(data_arrays)
 
 
 def _stack_with_weights(
         datasets: Sequence[xr.DataArray],
         weights: Sequence[xr.DataArray],
-        combine_times: bool,
         blend_type: str
 ) -> xr.DataArray:
     blend_func = _get_weighted_blending_func(blend_type)
     filled_weights = list(_fill_weights_for_invalid_dataset_pixels(datasets, weights))
-    return blend_func(datasets, filled_weights, combine_times)
+    return blend_func(datasets, filled_weights)
 
 
 def _get_weighted_blending_func(blend_type: str) -> Callable:
@@ -84,10 +81,9 @@
 def _stack_blend_by_weights(
         datasets: Sequence[xr.DataArray],
         weights: Sequence[xr.DataArray],
-        combine_times: bool
 ) -> xr.DataArray:
     """Stack datasets blending overlap using weights."""
-    attrs = _combine_stacked_attrs([data_arr.attrs for data_arr in datasets], combine_times)
+    attrs = _combine_stacked_attrs([data_arr.attrs for data_arr in datasets])
 
     overlays = []
     for weight, overlay in zip(weights, datasets):
@@ -109,14 +105,13 @@
 def _stack_select_by_weights(
         datasets: Sequence[xr.DataArray],
         weights: Sequence[xr.DataArray],
-        combine_times: bool
 ) -> xr.DataArray:
     """Stack datasets selecting pixels using weights."""
     indices = da.argmax(da.dstack(weights), axis=-1)
     if "bands" in datasets[0].dims:
         indices = [indices] * datasets[0].sizes["bands"]
 
-    attrs = _combine_stacked_attrs([data_arr.attrs for data_arr in datasets], combine_times)
+    attrs = _combine_stacked_attrs([data_arr.attrs for data_arr in datasets])
     dims = datasets[0].dims
     coords = datasets[0].coords
     selected_array = xr.DataArray(da.choose(indices, datasets), dims=dims, coords=coords, attrs=attrs)
@@ -125,7 +120,6 @@
 
 def _stack_no_weights(
         datasets: Sequence[xr.DataArray],
-        combine_times: bool
 ) -> xr.DataArray:
     base = datasets[0].copy()
     collected_attrs = [base.attrs]
@@ -136,32 +130,13 @@
         except KeyError:
             base = base.where(data_arr.isnull(), data_arr)
 
-    attrs = _combine_stacked_attrs(collected_attrs, combine_times)
+    attrs = _combine_stacked_attrs(collected_attrs)
     base.attrs = attrs
     return base
 
 
-def _combine_stacked_attrs(collected_attrs: Sequence[Mapping], combine_times: bool) -> dict:
-    attrs = combine_metadata(*collected_attrs)
-    if combine_times and ("start_time" in attrs or "end_time" in attrs):
-        new_start, new_end = _get_combined_start_end_times(collected_attrs)
-        if new_start:
-            attrs["start_time"] = new_start
-        if new_end:
-            attrs["end_time"] = new_end
-    return attrs
-
-
-def _get_combined_start_end_times(metadata_objects: Iterable[Mapping]) -> tuple[datetime | None, datetime | None]:
-    """Get the start and end times attributes valid for the entire dataset series."""
-    start_time = None
-    end_time = None
-    for md_obj in metadata_objects:
-        if "start_time" in md_obj and (start_time is None or md_obj["start_time"] < start_time):
-            start_time = md_obj["start_time"]
-        if "end_time" in md_obj and (end_time is None or md_obj["end_time"] > end_time):
-            end_time = md_obj["end_time"]
-    return start_time, end_time
+def _combine_stacked_attrs(collected_attrs: Sequence[Mapping]) -> dict:
+    return combine_metadata(*collected_attrs)
 
 
 def timeseries(datasets):

@@ -245,42 +245,37 @@
             ("select_with_weights", _get_expected_stack_select),
             ("blend_with_weights", _get_expected_stack_blend),
         ])
-    @pytest.mark.parametrize("combine_times", [False, True])
     def test_blend_two_scenes_using_stack_weighted(self, multi_scene_and_weights, groups,
                                                    scene1_with_weights, scene2_with_weights,
-                                                   combine_times, blend_func, exp_result_func):
+                                                   blend_func, exp_result_func):
         """Test stacking two scenes using weights.
 
         Here we test that the start and end times can be combined so that they
        describe the start and times of the entire data series. We also test
        the various types of weighted stacking functions (ex. select, blend).

        """
        from functools import partial

        multi_scene, weights = multi_scene_and_weights
        scene1, weights1 = scene1_with_weights
        scene2, weights2 = scene2_with_weights

        simple_groups = {DataQuery(name="CloudType"): groups[DataQuery(name="CloudType")]}
         multi_scene.group(simple_groups)
 
         weights = [weights[0][0], weights[1][0]]
-        stack_func = partial(stack, weights=weights, blend_type=blend_func, combine_times=combine_times)
+        stack_func = partial(stack, weights=weights, blend_type=blend_func)
         weighted_blend = multi_scene.blend(blend_function=stack_func)
 
         expected = exp_result_func(scene1, scene2)
        result = weighted_blend["CloudType"].compute()
        # result has NaNs and xarray's xr.testing.assert_equal doesn't support NaN comparison
         np.testing.assert_allclose(result.data, expected.data)
 
         _check_stacked_metadata(result, "CloudType")
-        if combine_times:
-            assert result.attrs["start_time"] == datetime(2023, 1, 16, 11, 9, 17)
-            assert result.attrs["end_time"] == datetime(2023, 1, 16, 11, 28, 1, 900000)
-        else:
-            assert result.attrs["start_time"] == datetime(2023, 1, 16, 11, 11, 7, 250000)
-            assert result.attrs["end_time"] == datetime(2023, 1, 16, 11, 20, 11, 950000)
+        assert result.attrs["start_time"] == datetime(2023, 1, 16, 11, 9, 17)
+        assert result.attrs["end_time"] == datetime(2023, 1, 16, 11, 28, 1, 900000)
 
     @pytest.fixture()
     def datasets_and_weights(self):
@@ -329,7 +324,7 @@
         input_data["weights"][1][line, :] = 2
         input_data["weights"][2][:, column] = 2
 
-        stack_with_weights = partial(stack, weights=input_data["weights"], combine_times=False)
+        stack_with_weights = partial(stack, weights=input_data["weights"])
         blend_result = stack_with_weights(input_data["datasets"][0:3])
 
         ds1 = input_data["datasets"][0]

@@ -1,4 +1,4 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 # Copyright (c) 2018-2020 Satpy developers
 #
@@ -1420,8 +1420,6 @@
                                       filenames=["/foo.tif"])
         register.assert_not_called()
         retrieve.assert_not_called()
-        assert "start_time" in res.attrs
-        assert "end_time" in res.attrs
         assert res.attrs["sensor"] is None
         assert "modifiers" not in res.attrs
         assert "calibration" not in res.attrs
@@ -1434,8 +1432,6 @@
         res = comp()
         Scene.assert_called_once_with(reader="generic_image",
                                       filenames=["data_dir/foo.tif"])
-        assert "start_time" in res.attrs
-        assert "end_time" in res.attrs
         assert res.attrs["sensor"] is None
         assert "modifiers" not in res.attrs
         assert "calibration" not in res.attrs

@@ -1,4 +1,4 @@
 # Copyright (c) 2015-2023 Satpy developers
 #
 # This file is part of satpy.
 #
@@ -101,13 +101,42 @@
 
     def setUp(self):
         """Set up the test case."""
-        self.datetime_dts = (
+        # The times need to be in ascending order (oldest first)
+        self.start_time_dts = (
             {"start_time": datetime(2018, 2, 1, 11, 58, 0)},
             {"start_time": datetime(2018, 2, 1, 11, 59, 0)},
             {"start_time": datetime(2018, 2, 1, 12, 0, 0)},
             {"start_time": datetime(2018, 2, 1, 12, 1, 0)},
             {"start_time": datetime(2018, 2, 1, 12, 2, 0)},
         )
+        self.end_time_dts = (
+            {"end_time": datetime(2018, 2, 1, 11, 58, 0)},
+            {"end_time": datetime(2018, 2, 1, 11, 59, 0)},
+            {"end_time": datetime(2018, 2, 1, 12, 0, 0)},
+            {"end_time": datetime(2018, 2, 1, 12, 1, 0)},
+            {"end_time": datetime(2018, 2, 1, 12, 2, 0)},
+        )
+        self.other_time_dts = (
+            {"other_time": datetime(2018, 2, 1, 11, 58, 0)},
+            {"other_time": datetime(2018, 2, 1, 11, 59, 0)},
+            {"other_time": datetime(2018, 2, 1, 12, 0, 0)},
+            {"other_time": datetime(2018, 2, 1, 12, 1, 0)},
+            {"other_time": datetime(2018, 2, 1, 12, 2, 0)},
+        )
+        self.start_time_dts_with_none = (
+            {"start_time": None},
+            {"start_time": datetime(2018, 2, 1, 11, 59, 0)},
+            {"start_time": datetime(2018, 2, 1, 12, 0, 0)},
+            {"start_time": datetime(2018, 2, 1, 12, 1, 0)},
+            {"start_time": datetime(2018, 2, 1, 12, 2, 0)},
+        )
+        self.end_time_dts_with_none = (
+            {"end_time": datetime(2018, 2, 1, 11, 58, 0)},
+            {"end_time": datetime(2018, 2, 1, 11, 59, 0)},
+            {"end_time": datetime(2018, 2, 1, 12, 0, 0)},
+            {"end_time": datetime(2018, 2, 1, 12, 1, 0)},
+            {"end_time": None},
+        )
 
     def test_average_datetimes(self):
         """Test the average_datetimes helper function."""
@@ -122,18 +151,35 @@
         ret = average_datetimes(dts)
         assert dts[2] == ret
 
-    def test_combine_times_with_averaging(self):
-        """Test the combine_metadata with times with averaging."""
+    def test_combine_start_times(self):
+        """Test the combine_metadata with start times."""
+        from satpy.dataset.metadata import combine_metadata
+        ret = combine_metadata(*self.start_time_dts)
+        assert ret["start_time"] == self.start_time_dts[0]["start_time"]
+
+    def test_combine_end_times(self):
+        """Test the combine_metadata with end times."""
+        from satpy.dataset.metadata import combine_metadata
+        ret = combine_metadata(*self.end_time_dts)
+        assert ret["end_time"] == self.end_time_dts[-1]["end_time"]
+
+    def test_combine_start_times_with_none(self):
+        """Test the combine_metadata with start times when there's a None included."""
+        from satpy.dataset.metadata import combine_metadata
+        ret = combine_metadata(*self.start_time_dts_with_none)
+        assert ret["start_time"] == self.start_time_dts_with_none[1]["start_time"]
+
+    def test_combine_end_times_with_none(self):
+        """Test the combine_metadata with end times when there's a None included."""
         from satpy.dataset.metadata import combine_metadata
-        ret = combine_metadata(*self.datetime_dts)
-        assert self.datetime_dts[2]["start_time"] == ret["start_time"]
+        ret = combine_metadata(*self.end_time_dts_with_none)
+        assert ret["end_time"] == self.end_time_dts_with_none[-2]["end_time"]
 
-    def test_combine_times_without_averaging(self):
-        """Test the combine_metadata with times without averaging."""
+    def test_combine_other_times(self):
+        """Test the combine_metadata with other time values than start or end times."""
         from satpy.dataset.metadata import combine_metadata
-        ret = combine_metadata(*self.datetime_dts, average_times=False)
-        # times are not equal so don't include it in the final result
-        assert "start_time" not in ret
+        ret = combine_metadata(*self.other_time_dts)
+        assert ret["other_time"] == self.other_time_dts[2]["other_time"]
 
     def test_combine_arrays(self):
         """Test the combine_metadata with arrays."""