pydata · dcherian · Feb 10, 2021 · Jan 4, 2021 · Jan 4, 2021 · Jan 4, 2021
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -52,6 +52,13 @@ Deprecations
 
 New Features
 ~~~~~~~~~~~~
+- Xarray now leverages updates as of cftime version 1.4.1, which enable exact I/O
+  rountripping of ``cftime.datetime`` objects (:pull:`4758`).
+  By `Spencer Clark <https://github.com/spencerkclark>`_.
+- :py:meth:`~xarray.cftime_range` and :py:meth:`DataArray.resample` now support
+  millisecond (``"L"`` or ``"ms"``) and microsecond (``"U"`` or ``"us"``) frequencies
+  for ``cftime.datetime`` coordinates (:issue:`4097`, :pull:`4758`).
+  By `Spencer Clark <https://github.com/spencerkclark>`_.
 - Significantly higher ``unstack`` performance on numpy-backed arrays which
   contain missing values; 8x faster in our benchmark, and 2x faster than pandas.
   (:pull:`4746`);

diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py
@@ -576,6 +576,26 @@ def __apply__(self, other):
         return other + self.as_timedelta()
 
 
+class Millisecond(BaseCFTimeOffset):
+    _freq = "L"
+
+    def as_timedelta(self):
+        return timedelta(milliseconds=self.n)
+
+    def __apply__(self, other):
+        return other + self.as_timedelta()
+
+
+class Microsecond(BaseCFTimeOffset):
+    _freq = "U"
+
+    def as_timedelta(self):
+        return timedelta(microseconds=self.n)
+
+    def __apply__(self, other):
+        return other + self.as_timedelta()
+
+
 _FREQUENCIES = {
     "A": YearEnd,
     "AS": YearBegin,
@@ -590,6 +610,10 @@ def __apply__(self, other):
     "T": Minute,
     "min": Minute,
     "S": Second,
+    "L": Millisecond,
+    "ms": Millisecond,
+    "U": Microsecond,
+    "us": Microsecond,
     "AS-JAN": partial(YearBegin, month=1),
     "AS-FEB": partial(YearBegin, month=2),
     "AS-MAR": partial(YearBegin, month=3),
@@ -824,7 +848,7 @@ def cftime_range(
       `ISO-8601 format <https://en.wikipedia.org/wiki/ISO_8601>`_.
     - It supports many, but not all, frequencies supported by
       ``pandas.date_range``.  For example it does not currently support any of
-      the business-related, semi-monthly, or sub-second frequencies.
+      the business-related or semi-monthly frequencies.
     - Compound sub-monthly frequencies are not supported, e.g. '1H1min', as
       these can easily be written in terms of the finest common resolution,
       e.g. '61min'.
@@ -855,6 +879,10 @@ def cftime_range(
     +--------+--------------------------+
     | S      | Second frequency         |
     +--------+--------------------------+
+    | L, ms  | Millisecond frequency    |
+    +--------+--------------------------+
+    | U, us  | Microsecond frequency    |
+    +--------+--------------------------+
 
     Any multiples of the following anchored offsets are also supported.
 

diff --git a/xarray/coding/times.py b/xarray/coding/times.py
@@ -1,6 +1,6 @@
 import re
 import warnings
-from datetime import datetime
+from datetime import datetime, timedelta
 from distutils.version import LooseVersion
 from functools import partial
 
@@ -35,6 +35,26 @@
     "D": int(1e9) * 60 * 60 * 24,
 }
 
+_US_PER_TIME_DELTA = {
+    "microseconds": 1,
+    "milliseconds": 1_000,
+    "seconds": 1_000_000,
+    "minutes": 60 * 1_000_000,
+    "hours": 60 * 60 * 1_000_000,
+    "days": 24 * 60 * 60 * 1_000_000,
+}
+
+_NETCDF_TIME_UNITS_CFTIME = [
+    "days",
+    "hours",
+    "minutes",
+    "seconds",
+    "milliseconds",
+    "microseconds",
+]
+
+_NETCDF_TIME_UNITS_NUMPY = _NETCDF_TIME_UNITS_CFTIME + ["nanoseconds"]
+
 TIME_UNITS = frozenset(
     [
         "days",
@@ -225,9 +245,7 @@ def decode_cf_datetime(num_dates, units, calendar=None, use_cftime=None):
                 if calendar in _STANDARD_CALENDARS:
                     dates = cftime_to_nptime(dates)
     elif use_cftime:
-        dates = _decode_datetime_with_cftime(
-            flat_num_dates.astype(float), units, calendar
-        )
+        dates = _decode_datetime_with_cftime(flat_num_dates, units, calendar)
     else:
         dates = _decode_datetime_with_pandas(flat_num_dates, units, calendar)
 
@@ -262,25 +280,33 @@ def decode_cf_timedelta(num_timedeltas, units):
     return result.reshape(num_timedeltas.shape)
 
 
+def _unit_timedelta_cftime(units):
+    return timedelta(microseconds=_US_PER_TIME_DELTA[units])
+
+
+def _unit_timedelta_numpy(units):
+    numpy_units = _netcdf_to_numpy_timeunit(units)
+    return np.timedelta64(_NS_PER_TIME_DELTA[numpy_units], "ns")
+
+
 def _infer_time_units_from_diff(unique_timedeltas):
-    # Note that the modulus operator was only implemented for np.timedelta64
-    # arrays as of NumPy version 1.16.0.  Once our minimum version of NumPy
-    # supported is greater than or equal to this we will no longer need to cast
-    # unique_timedeltas to a TimedeltaIndex.  In the meantime, however, the
-    # modulus operator works for TimedeltaIndex objects.
-    unique_deltas_as_index = pd.TimedeltaIndex(unique_timedeltas)
-    for time_unit in [
-        "days",
-        "hours",
-        "minutes",
-        "seconds",
-        "milliseconds",
-        "microseconds",
-        "nanoseconds",
-    ]:
-        delta_ns = _NS_PER_TIME_DELTA[_netcdf_to_numpy_timeunit(time_unit)]
-        unit_delta = np.timedelta64(delta_ns, "ns")
-        if np.all(unique_deltas_as_index % unit_delta == np.timedelta64(0, "ns")):
+    if unique_timedeltas.dtype == np.dtype("O"):
+        time_units = _NETCDF_TIME_UNITS_CFTIME
+        unit_timedelta = _unit_timedelta_cftime
+        zero_timedelta = timedelta(microseconds=0)
+        timedeltas = unique_timedeltas
+    else:
+        time_units = _NETCDF_TIME_UNITS_NUMPY
+        unit_timedelta = _unit_timedelta_numpy
+        zero_timedelta = np.timedelta64(0, "ns")
+        # Note that the modulus operator was only implemented for np.timedelta64
+        # arrays as of NumPy version 1.16.0.  Once our minimum version of NumPy
+        # supported is greater than or equal to this we will no longer need to cast
+        # unique_timedeltas to a TimedeltaIndex.  In the meantime, however, the
+        # modulus operator works for TimedeltaIndex objects.
+        timedeltas = pd.TimedeltaIndex(unique_timedeltas)
+    for time_unit in time_units:
+        if np.all(timedeltas % unit_timedelta(time_unit) == zero_timedelta):
             return time_unit
     return "seconds"
 
@@ -309,10 +335,6 @@ def infer_datetime_units(dates):
         reference_date = dates[0] if len(dates) > 0 else "1970-01-01"
         reference_date = format_cftime_datetime(reference_date)
     unique_timedeltas = np.unique(np.diff(dates))
-    if unique_timedeltas.dtype == np.dtype("O"):
-        # Convert to np.timedelta64 objects using pandas to work around a
-        # NumPy casting bug: https://github.com/numpy/numpy/issues/11096
-        unique_timedeltas = to_timedelta_unboxed(unique_timedeltas)
     units = _infer_time_units_from_diff(unique_timedeltas)
     return f"{units} since {reference_date}"
 

diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py
@@ -68,6 +68,7 @@ def LooseVersion(vstring):
 has_pseudonetcdf, requires_pseudonetcdf = _importorskip("PseudoNetCDF")
 has_cftime, requires_cftime = _importorskip("cftime")
 has_cftime_1_1_0, requires_cftime_1_1_0 = _importorskip("cftime", minversion="1.1.0.0")
+has_cftime_1_4_1, requires_cftime_1_4_1 = _importorskip("cftime", minversion="1.4.1")
 has_dask, requires_dask = _importorskip("dask")
 has_bottleneck, requires_bottleneck = _importorskip("bottleneck")
 has_nc_time_axis, requires_nc_time_axis = _importorskip("nc_time_axis")

diff --git a/xarray/tests/test_cftime_offsets.py b/xarray/tests/test_cftime_offsets.py
@@ -10,6 +10,8 @@
     BaseCFTimeOffset,
     Day,
     Hour,
+    Microsecond,
+    Millisecond,
     Minute,
     MonthBegin,
     MonthEnd,
@@ -181,6 +183,14 @@ def test_to_offset_offset_input(offset):
         ("2min", Minute(n=2)),
         ("S", Second()),
         ("2S", Second(n=2)),
+        ("L", Millisecond(n=1)),
+        ("2L", Millisecond(n=2)),
+        ("ms", Millisecond(n=1)),
+        ("2ms", Millisecond(n=2)),
+        ("U", Microsecond(n=1)),
+        ("2U", Microsecond(n=2)),
+        ("us", Microsecond(n=1)),
+        ("2us", Microsecond(n=2)),
     ],
     ids=_id_func,
 )
@@ -299,6 +309,8 @@ def test_to_cftime_datetime_error_type_error():
     Hour(),
     Minute(),
     Second(),
+    Millisecond(),
+    Microsecond(),
 ]
 _EQ_TESTS_B = [
     BaseCFTimeOffset(n=2),
@@ -316,6 +328,8 @@ def test_to_cftime_datetime_error_type_error():
     Hour(n=2),
     Minute(n=2),
     Second(n=2),
+    Millisecond(n=2),
+    Microsecond(n=2),
 ]
 
 
@@ -340,6 +354,8 @@ def test_neq(a, b):
     Hour(n=2),
     Minute(n=2),
     Second(n=2),
+    Millisecond(n=2),
+    Microsecond(n=2),
 ]
 
 
@@ -360,6 +376,8 @@ def test_eq(a, b):
     (Hour(), Hour(n=3)),
     (Minute(), Minute(n=3)),
     (Second(), Second(n=3)),
+    (Millisecond(), Millisecond(n=3)),
+    (Microsecond(), Microsecond(n=3)),
 ]
 
 
@@ -387,6 +405,8 @@ def test_rmul(offset, expected):
         (Hour(), Hour(n=-1)),
         (Minute(), Minute(n=-1)),
         (Second(), Second(n=-1)),
+        (Millisecond(), Millisecond(n=-1)),
+        (Microsecond(), Microsecond(n=-1)),
     ],
     ids=_id_func,
 )
@@ -399,6 +419,8 @@ def test_neg(offset, expected):
     (Hour(n=2), (1, 1, 1, 2)),
     (Minute(n=2), (1, 1, 1, 0, 2)),
     (Second(n=2), (1, 1, 1, 0, 0, 2)),
+    (Millisecond(n=2), (1, 1, 1, 0, 0, 0, 2000)),
+    (Microsecond(n=2), (1, 1, 1, 0, 0, 0, 2)),
 ]
 
 
@@ -427,6 +449,8 @@ def test_radd_sub_monthly(offset, expected_date_args, calendar):
         (Hour(n=2), (1, 1, 2, 22)),
         (Minute(n=2), (1, 1, 2, 23, 58)),
         (Second(n=2), (1, 1, 2, 23, 59, 58)),
+        (Millisecond(n=2), (1, 1, 2, 23, 59, 59, 998000)),
+        (Microsecond(n=2), (1, 1, 2, 23, 59, 59, 999998)),
     ],
     ids=_id_func,
 )
@@ -802,6 +826,8 @@ def test_add_quarter_end_onOffset(
         ((1, 1, 1), Hour(), True),
         ((1, 1, 1), Minute(), True),
         ((1, 1, 1), Second(), True),
+        ((1, 1, 1), Millisecond(), True),
+        ((1, 1, 1), Microsecond(), True),
     ],
     ids=_id_func,
 )
@@ -865,6 +891,8 @@ def test_onOffset_month_or_quarter_or_year_end(
         (Hour(), (1, 3, 2, 1, 1), (1, 3, 2, 1, 1)),
         (Minute(), (1, 3, 2, 1, 1, 1), (1, 3, 2, 1, 1, 1)),
         (Second(), (1, 3, 2, 1, 1, 1, 1), (1, 3, 2, 1, 1, 1, 1)),
+        (Millisecond(), (1, 3, 2, 1, 1, 1, 1000), (1, 3, 2, 1, 1, 1, 1000)),
+        (Microsecond(), (1, 3, 2, 1, 1, 1, 1), (1, 3, 2, 1, 1, 1, 1)),
     ],
     ids=_id_func,
 )
@@ -914,6 +942,8 @@ def test_rollforward(calendar, offset, initial_date_args, partial_expected_date_
         (Hour(), (1, 3, 2, 1, 1), (1, 3, 2, 1, 1)),
         (Minute(), (1, 3, 2, 1, 1, 1), (1, 3, 2, 1, 1, 1)),
         (Second(), (1, 3, 2, 1, 1, 1, 1), (1, 3, 2, 1, 1, 1, 1)),
+        (Millisecond(), (1, 3, 2, 1, 1, 1, 1000), (1, 3, 2, 1, 1, 1, 1000)),
+        (Microsecond(), (1, 3, 2, 1, 1, 1, 1), (1, 3, 2, 1, 1, 1, 1)),
     ],
     ids=_id_func,
 )

diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py
@@ -1,12 +1,21 @@
 import warnings
+from datetime import timedelta
 from itertools import product
 
 import numpy as np
 import pandas as pd
 import pytest
 from pandas.errors import OutOfBoundsDatetime
 
-from xarray import DataArray, Dataset, Variable, coding, conventions, decode_cf
+from xarray import (
+    DataArray,
+    Dataset,
+    Variable,
+    cftime_range,
+    coding,
+    conventions,
+    decode_cf,
+)
 from xarray.coding.times import (
     _encode_datetime_with_cftime,
     cftime_to_nptime,
@@ -19,7 +28,14 @@
 from xarray.core.common import contains_cftime_datetimes
 from xarray.testing import assert_equal
 
-from . import arm_xfail, assert_array_equal, has_cftime, requires_cftime, requires_dask
+from . import (
+    arm_xfail,
+    assert_array_equal,
+    has_cftime,
+    has_cftime_1_4_1,
+    requires_cftime,
+    requires_dask,
+)
 
 _NON_STANDARD_CALENDARS_SET = {
     "noleap",
@@ -973,8 +989,13 @@ def test_decode_ambiguous_time_warns(calendar):
 
 @pytest.mark.parametrize("encoding_units", FREQUENCIES_TO_ENCODING_UNITS.values())
 @pytest.mark.parametrize("freq", FREQUENCIES_TO_ENCODING_UNITS.keys())
-def test_encode_cf_datetime_defaults_to_correct_dtype(encoding_units, freq):
-    times = pd.date_range("2000", periods=3, freq=freq)
+@pytest.mark.parametrize("date_range", [pd.date_range, cftime_range])
+def test_encode_cf_datetime_defaults_to_correct_dtype(encoding_units, freq, date_range):
+    if not has_cftime and date_range == cftime_range:
+        pytest.skip("Test requires cftime.")
+    if (freq == "N" or encoding_units == "nanoseconds") and date_range == cftime_range:
+        pytest.skip("Nanosecond frequency is not valid for cftime dates.")
+    times = date_range("2000", periods=3, freq=freq)
     units = f"{encoding_units} since 2000-01-01"
     encoded, _, _ = coding.times.encode_cf_datetime(times, units)
 
@@ -987,7 +1008,7 @@ def test_encode_cf_datetime_defaults_to_correct_dtype(encoding_units, freq):
 
 
 @pytest.mark.parametrize("freq", FREQUENCIES_TO_ENCODING_UNITS.keys())
-def test_encode_decode_roundtrip(freq):
+def test_encode_decode_roundtrip_datetime64(freq):
     # See GH 4045. Prior to GH 4684 this test would fail for frequencies of
     # "S", "L", "U", and "N".
     initial_time = pd.date_range("1678-01-01", periods=1)
@@ -998,6 +1019,20 @@ def test_encode_decode_roundtrip(freq):
     assert_equal(variable, decoded)
 
 
+@pytest.mark.parametrize("freq", ["U", "L", "S", "T", "H", "D"])
+def test_encode_decode_roundtrip_cftime(freq):
+    if not has_cftime_1_4_1:
+        pytest.skip("Exact roundtripping requires at least cftime 1.4.1.")
+    initial_time = cftime_range("0001", periods=1)
+    times = initial_time.append(
+        cftime_range("0001", periods=2, freq=freq) + timedelta(days=291000 * 365)
+    )
+    variable = Variable(["time"], times)
+    encoded = conventions.encode_cf_variable(variable)
+    decoded = conventions.decode_cf_variable("time", encoded, use_cftime=True)
+    assert_equal(variable, decoded)
+
+
 @requires_cftime
 def test__encode_datetime_with_cftime():
     # See GH 4870. cftime versions > 1.4.0 required us to adapt the