diff --git a/c3s_eqc_data_checker/check.py b/c3s_eqc_data_checker/check.py index 2f378e1..326f752 100644 --- a/c3s_eqc_data_checker/check.py +++ b/c3s_eqc_data_checker/check.py @@ -296,23 +296,26 @@ def check_temporal_resolution( self, min: datetime.date | datetime.datetime | str | None, max: datetime.date | datetime.datetime | str | None, - resolution: str | None, + frequency: str | None, name: str | None, ) -> dict[str, Any]: # noqa: D205, D400 """ [temporal_resolution] # Check temporal resolution. # + # See pandas frequency aliases: + # https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#timeseries-offset-aliases + # # Arguments: # * min: first time (optional) # * max: last time (optional) - # * resolution: time resolution (optional) + # * frequency: time frequency (optional). # * name: name of time dimension (optional, default: "time") # # Example: min = 1900-01-01 max = 1900-01-02 - resolution = "1 days" + frequency = "1D" name = "time" """ if name is None: @@ -334,12 +337,12 @@ def check_temporal_resolution( if (actual_max := time.max()) != pd.to_datetime(max): errors["max"] = str(actual_max.values) - if resolution is not None: - res_td = pd.to_timedelta(resolution) - if time.size <= 1 and res_td != pd.to_timedelta(0): - errors["resolution"] = "0" - elif ((actual_res := time.diff(name)) != res_td).any(): - errors["resolution"] = {str(value) for value in actual_res.values} + if frequency is not None: + expected_time = pd.date_range( + time.min().values, time.max().values, freq=frequency + ) + if time.size != expected_time.size or not (time == expected_time).all(): + errors["frequency"] = {str(value) for value in time.diff(name).values} return errors diff --git a/tests/test_10_netcdf.py b/tests/test_10_netcdf.py index 8dc4d90..9fe9396 100644 --- a/tests/test_10_netcdf.py +++ b/tests/test_10_netcdf.py @@ -73,19 +73,21 @@ def test_cf_compliance(tmp_path: pathlib.Path) -> None: def test_temporal_resolution(tmp_path: pathlib.Path) -> None: - for date in pd.date_range("1900-01-01", "1900-01-02", freq="1D"): + for date in pd.date_range("1900-01-01", "1900-02-01", freq="1MS"): da = xr.DataArray(date, name="time") da.to_netcdf(tmp_path / f"{date}.nc") checker = Checker(str(tmp_path / "*.nc"), files_format="NETCDF") - actual = checker.check_temporal_resolution("1900-01-01", "1900-01-02", "1D", "time") + actual = checker.check_temporal_resolution( + "1900-01-01", "1900-02-01", "1MS", "time" + ) assert actual == {} - actual = checker.check_temporal_resolution("2000-01-01", "2000-01-02", "2D", "time") + actual = checker.check_temporal_resolution("2000-01-01", "2000-02-01", "1D", "time") expected = { "min": "1900-01-01T00:00:00.000000000", - "max": "1900-01-02T00:00:00.000000000", - "resolution": {"86400000000000 nanoseconds"}, + "max": "1900-02-01T00:00:00.000000000", + "frequency": {"2678400000000000 nanoseconds"}, } assert actual == expected diff --git a/tests/test_20_grib.py b/tests/test_20_grib.py index 927a504..45b12ed 100644 --- a/tests/test_20_grib.py +++ b/tests/test_20_grib.py @@ -71,7 +71,7 @@ def test_cf_compliance(tmp_path: pathlib.Path, grib_path: pathlib.Path) -> None: def test_temporal_resolution(grib_path: pathlib.Path) -> None: checker = Checker(str(grib_path / "GRIB2.tmpl"), files_format="GRIB") actual = checker.check_temporal_resolution( - "2007-03-23T12", "2007-03-23T12", "0", None + "2007-03-23T12", "2007-03-23T12", None, None ) assert actual == {} @@ -81,7 +81,6 @@ def test_temporal_resolution(grib_path: pathlib.Path) -> None: expected = { "max": "2007-03-23T12:00:00.000000000", "min": "2007-03-23T12:00:00.000000000", - "resolution": "0", } assert actual == expected