diff --git a/cdflib/epochs.py b/cdflib/epochs.py index f6a560e..0c54af0 100644 --- a/cdflib/epochs.py +++ b/cdflib/epochs.py @@ -154,6 +154,7 @@ def breakdown(epochs: epoch_types) -> np.ndarray: @staticmethod def _compose_date( + nat_positions: npt.NDArray, years: npt.NDArray, months: npt.NDArray, days: npt.NDArray, @@ -174,18 +175,29 @@ def _compose_date( vals = (v for v in (years, months, days, hours, minutes, seconds, milliseconds, microseconds, nanoseconds) if v is not None) arrays: List[npt.NDArray[np.datetime64]] = [np.array(v, dtype=t) for t, v in zip(types, vals)] - return np.array(sum(arrays)) + total_datetime = np.array(sum(arrays)) + total_datetime = np.where(nat_positions, np.datetime64("NaT"), total_datetime) + return total_datetime @classmethod def to_datetime(cls, cdf_time: epoch_types) -> npt.NDArray[np.datetime64]: """ - Converts CDF epoch argument to numpy.datetime64. This method - converts a scalar, or array-like. Precision is only kept to the - nearest nanosecond. + Converts CDF epoch argument to numpy.datetime64. + + Parameters: + cdf_time: NumPy scalar/arrays to convert. np.int64 will be converted to cdf_tt2000, np.complex128 will be converted to cdf_epoch16, and floats will be converted to cdf_epoch. + + Notes: + Because of datetime64 limitations, CDF_EPOCH16 precision is only kept to the nearest nanosecond. + """ times = cls.breakdown(cdf_time) times = np.atleast_2d(times) - return cls._compose_date(*times.T[:9]).astype("datetime64[ns]") + + fillval_locations = np.all((times[:, 0:7] == [9999, 12, 31, 23, 59, 59, 999]), axis=1) + padval_locations = np.all((times[:, 0:7] == [0, 1, 1, 0, 0, 0, 0]), axis=1) + nan_locations = np.logical_or(fillval_locations, padval_locations) + return cls._compose_date(nan_locations, *times.T[:9]).astype("datetime64[ns]") @staticmethod def unixtime(cdf_time: npt.ArrayLike) -> Union[float, npt.NDArray]: @@ -455,18 +467,26 @@ def breakdown_tt2000(tt2000: cdf_tt2000_type) -> np.ndarray: """ Breaks down the epoch(s) into UTC components. - For CDF_EPOCH: - they are 7 date/time components: year, month, day, - hour, minute, second, and millisecond - For CDF_EPOCH16: - they are 10 date/time components: year, month, day, - hour, minute, second, and millisecond, microsecond, - nanosecond, and picosecond. - For TT2000: - they are 9 date/time components: year, month, day, - hour, minute, second, millisecond, microsecond, - nanosecond. + Calculate date and time from cdf_time_tt2000 integers + + Parameters + ---------- + epochs : array-like + Single, list, tuple, or np.array of tt2000 values + + Returns + ------- + components : ndarray + List or array of date and time values. The last axis contains + (in order): year, month, day, hour, minute, second, millisecond, + microsecond, and nanosecond + + Notes + ----- + If a bad epoch is supplied, a fill date of 9999-12-31 23:59:59 and 999 ms, 999 us, and + 999 ns is returned. """ + new_tt2000 = np.atleast_1d(tt2000).astype(np.longlong) count = len(new_tt2000) toutcs = np.zeros((9, count), dtype=int) @@ -576,7 +596,14 @@ def breakdown_tt2000(tt2000: cdf_tt2000_type) -> np.ndarray: toutcs[7, :] = ma1 toutcs[8, :] = na1 - return np.squeeze(toutcs.T) + # Check standard fill and pad values + cdf_epoch_time_tt2000 = np.atleast_2d(toutcs.T) + fillval_locations = np.all(cdf_epoch_time_tt2000 == [1707, 9, 22, 12, 12, 10, 961, 224, 192], axis=1) + cdf_epoch_time_tt2000[fillval_locations] = [9999, 12, 31, 23, 59, 59, 999, 999, 999] + padval_locations = np.all(cdf_epoch_time_tt2000 == [1707, 9, 22, 12, 12, 10, 961, 224, 193], axis=1) + cdf_epoch_time_tt2000[padval_locations] = [0, 1, 1, 0, 0, 0, 0, 0, 0] + + return np.squeeze(cdf_epoch_time_tt2000) @staticmethod def compute_tt2000(datetimes: npt.ArrayLike) -> Union[int, npt.NDArray[np.int64]]: @@ -1187,7 +1214,10 @@ def breakdown_epoch16(epochs: cdf_epoch16_type) -> npt.NDArray: components = np.full(shape=cshape, fill_value=[9999, 12, 31, 23, 59, 59, 999, 999, 999, 999]) for i, epoch16 in enumerate(new_epochs): # Ignore fill values - if (epoch16.real != -1.0e31) or (epoch16.imag != -1.0e31): + if (epoch16.real != -1.0e31) or (epoch16.imag != -1.0e31) or np.isnan(epoch16): + if (epoch16.imag == -1.0e30) or (epoch16.imag == -1.0e30): + components[i] = [0, 1, 1, 0, 0, 0, 0, 0, 0, 0] + continue esec = -epoch16.real if epoch16.real < 0.0 else epoch16.real efra = -epoch16.imag if epoch16.imag < 0.0 else epoch16.imag @@ -1518,8 +1548,8 @@ def breakdown_epoch(epochs: cdf_epoch_type) -> np.ndarray: cshape.append(7) components = np.full(shape=cshape, fill_value=[9999, 12, 31, 23, 59, 59, 999]) for i, epoch in enumerate(new_epochs): - # Ignore fill values - if epoch != -1.0e31: + # Ignore fill values and NaNs + if (epoch != -1.0e31) and not np.isnan(epoch): esec = -epoch / 1000.0 if epoch < 0.0 else epoch / 1000.0 date_time = CDFepoch._calc_from_julian(esec, 0.0) @@ -1530,6 +1560,8 @@ def breakdown_epoch(epochs: cdf_epoch_type) -> np.ndarray: components = date_time[..., :7] else: components[i] = date_time[..., :7] + elif epoch == 0: + components[i] = [0, 1, 1, 0, 0, 0, 0] return np.squeeze(components) diff --git a/cdflib/xarray/cdf_to_xarray.py b/cdflib/xarray/cdf_to_xarray.py index 5b77a18..5c8d84a 100644 --- a/cdflib/xarray/cdf_to_xarray.py +++ b/cdflib/xarray/cdf_to_xarray.py @@ -303,10 +303,16 @@ def _convert_fillvals_to_nan(var_data: npt.NDArray, var_atts: Dict[str, Any], va ): if new_data.size > 1: if new_data[new_data == var_atts["FILLVAL"]].size != 0: - new_data[new_data == var_atts["FILLVAL"]] = np.nan + if new_data.dtype.type == np.datetime64: + new_data[new_data == var_atts["FILLVAL"]] = np.datetime64("nat") + else: + new_data[new_data == var_atts["FILLVAL"]] = np.nan else: if new_data == var_atts["FILLVAL"]: - new_data = np.array(np.nan) + if new_data.dtype.type == np.datetime64: + new_data[new_data == var_atts["FILLVAL"]] = np.array(np.datetime64("nat")) + else: + new_data[new_data == var_atts["FILLVAL"]] = np.array(np.nan) return new_data diff --git a/cdflib/xarray/xarray_to_cdf.py b/cdflib/xarray/xarray_to_cdf.py index 166b835..5175591 100644 --- a/cdflib/xarray/xarray_to_cdf.py +++ b/cdflib/xarray/xarray_to_cdf.py @@ -60,14 +60,14 @@ 11: np.uint8(255), 12: np.uint16(65535), 14: np.uint32(4294967295), - 21: np.float32(-1e30), - 22: np.float64(-1e30), - 31: np.float64(-1e30), - 32: np.complex128(complex(-1e30, -1e30)), + 21: np.float32(-1e31), + 22: np.float64(-1e31), + 31: np.float64(-1e31), + 32: np.complex128(complex(-1e31, -1e31)), 33: np.datetime64(-9223372036854775808, "ns"), 41: np.int8(-128), - 44: np.float32(-1e30), - 45: np.float64(-1e30), + 44: np.float32(-1e31), + 45: np.float64(-1e31), 51: np.str_(" "), 52: np.str_(" "), } @@ -126,7 +126,7 @@ def _is_istp_epoch_variable(var_name: str) -> Union[re.Match, None]: return epoch_regex_1.match(var_name.lower()) or epoch_regex_2.match(var_name.lower()) -def _dtype_to_cdf_type(var: xr.Dataset, terminate_on_warning: bool = False) -> Tuple[int, int]: +def _dtype_to_cdf_type(var: xr.DataArray, terminate_on_warning: bool = False) -> Tuple[int, int]: # Determines which CDF types to cast the xarray.Dataset to # Set some defaults @@ -140,7 +140,7 @@ def _dtype_to_cdf_type(var: xr.Dataset, terminate_on_warning: bool = False) -> T return STRINGS_TO_DATATYPES[cdf_data_type], element_size # Everything named "epoch" should be cast to a CDF_TIME_TT2000 - if _is_istp_epoch_variable(var.name.lower()): + if _is_istp_epoch_variable(var.name.lower()): # type: ignore return STRINGS_TO_DATATYPES["CDF_TIME_TT2000"], element_size numpy_data_type = var.dtype @@ -191,14 +191,52 @@ def _dtype_to_cdf_type(var: xr.Dataset, terminate_on_warning: bool = False) -> T return STRINGS_TO_DATATYPES[cdf_data_type], element_size -def _dtype_to_fillval(var: xr.Dataset, terminate_on_warning: bool = False) -> Union[np.number, np.str_, np.datetime64, object]: +def _dtype_to_fillval(var: xr.DataArray, terminate_on_warning: bool = False) -> Union[np.number, np.str_, np.datetime64]: datatype, _ = _dtype_to_cdf_type(var, terminate_on_warning=terminate_on_warning) if datatype in DATATYPE_FILLVALS: - return DATATYPE_FILLVALS[datatype] + return DATATYPE_FILLVALS[datatype] # type: ignore else: return np.str_(" ") +def _convert_nans_to_fillval(var_data: xr.Dataset, terminate_on_warning: bool = False) -> xr.Dataset: + new_data = var_data + + for var_name in new_data.data_vars: + data_array = new_data[var_name] + fill_value = _dtype_to_fillval(data_array) + if fill_value.dtype.type != np.datetime64: + try: + new_data[var_name] = new_data[var_name].fillna(fill_value) + except: + pass + var_att_dict = {} + for att in data_array.attrs: + try: + var_att_dict[att] = np.nan_to_num(data_array.attrs[att], -1e31) # type: ignore + except: + var_att_dict[att] = data_array.attrs[att] + new_data[var_name].attrs = var_att_dict + + for var_name in new_data.coords: + data_array = new_data[var_name] + fill_value = _dtype_to_fillval(data_array) + if fill_value.dtype.type != np.datetime64: + try: + new_data[var_name] = new_data[var_name].fillna(fill_value) + except: + pass + var_att_dict = {} + for att in data_array.attrs: + try: + var_att_dict[att] = np.nan_to_num(data_array.attrs[att], -1e31) # type: ignore + except: + var_att_dict[att] = data_array.attrs[att] + new_data[var_name].attrs = var_att_dict + + return new_data + + def _verify_depend_dimensions( dataset: xr.Dataset, dimension_number: int, @@ -684,13 +722,16 @@ def _unixtime_to_cdf_time(unixtime_data: xr.DataArray, cdf_epoch: bool = False, def _datetime_to_cdf_time( - datetime_array: xr.DataArray, cdf_epoch: bool = False, cdf_epoch16: bool = False, attribute_name: str = "" + datetime_array: xr.DataArray, + cdf_epoch: bool = False, + cdf_epoch16: bool = False, + attribute_name: str = "", ) -> object: if attribute_name: datetime_data = datetime_array.attrs[attribute_name] else: datetime_data = datetime_array.data - datetime64_data = np.array(datetime_data, dtype="datetime64[ns]") + datetime64_data = np.atleast_1d(np.array(datetime_data, dtype="datetime64[ns]")) cdf_epoch = False cdf_epoch16 = False if "CDF_DATA_TYPE" in datetime_array.attrs: @@ -704,29 +745,52 @@ def _datetime_to_cdf_time( else: dtype_for_time_data = np.int64 # type: ignore - cdf_time_data = np.array([], dtype=dtype_for_time_data) - for dd in datetime64_data: - year = dd.astype("datetime64[Y]").astype(int) + 1970 - month = dd.astype("datetime64[M]").astype(int) % 12 + 1 + years = datetime64_data.astype("datetime64[Y]").astype("int64") + 1970 + months = datetime64_data.astype("datetime64[M]").astype("int64") % 12 + 1 + days = np.zeros(len(datetime64_data), dtype=np.int64) + i = 0 + for i in range(len(datetime64_data)): + day = ((datetime64_data[i] - np.datetime64(f"{years[i]}-{months[i]:02d}", "M")) / 86400000000000).astype("int64") + 1 + days[i] = day.item() + i += 1 + hours = datetime64_data.astype("datetime64[h]").astype("int64") % 24 + minutes = datetime64_data.astype("datetime64[m]").astype("int64") % 60 + seconds = datetime64_data.astype("datetime64[s]").astype("int64") % 60 + milliseconds = datetime64_data.astype("datetime64[ms]").astype("int64") % 1000 + microseconds = datetime64_data.astype("datetime64[us]").astype("int64") % 1000 + nanoseconds = datetime64_data.astype("datetime64[ns]").astype("int64") % 1000 + picoseconds = 0 + + cdf_time_data = np.zeros(len(datetime64_data), dtype=dtype_for_time_data) + for i in range(len(datetime64_data)): dd_to_convert = [ - dd.astype("datetime64[Y]").astype("int64") + 1970, - dd.astype("datetime64[M]").astype("int64") % 12 + 1, - ((dd - np.datetime64(f"{year}-{month:02d}", "M")) / 86400000000000).astype("int64") + 1, - dd.astype("datetime64[h]").astype("int64") % 24, - dd.astype("datetime64[m]").astype("int64") % 60, - dd.astype("datetime64[s]").astype("int64") % 60, - dd.astype("datetime64[ms]").astype("int64") % 1000, - dd.astype("datetime64[us]").astype("int64") % 1000, - dd.astype("datetime64[ns]").astype("int64") % 1000, - 0, + years[i], + months[i], + days[i], + hours[i], + minutes[i], + seconds[i], + milliseconds[i], + microseconds[i], + nanoseconds[i], + picoseconds, ] - if cdf_epoch16: - converted_data = cdfepoch.compute(dd_to_convert) - elif cdf_epoch: - converted_data = cdfepoch.compute(dd_to_convert[0:7]) + if np.isnat(datetime64_data[i]): + if cdf_epoch16: + cdf_time_data[i] == np.complex128(complex(-1e30, -1e30)) + elif cdf_epoch: + cdf_time_data[i] == np.float64(-1e30) + else: + cdf_time_data[i] == np.int64(-9223372036854775808) else: - converted_data = cdfepoch.compute(dd_to_convert[0:9]) - cdf_time_data = np.append(cdf_time_data, converted_data) + if cdf_epoch16: + converted_data = cdfepoch.compute(dd_to_convert) + elif cdf_epoch: + converted_data = cdfepoch.compute(dd_to_convert[0:7]) + else: + converted_data = cdfepoch.compute(dd_to_convert[0:9]) + cdf_time_data[i] = converted_data + return cdf_time_data @@ -739,6 +803,7 @@ def xarray_to_cdf( auto_fix_depends: bool = True, record_dimensions: List[str] = ["record0"], compression: int = 0, + nan_to_fillval: bool = True, from_unixtime: bool = False, from_datetime: bool = False, unixtime_to_cdftt2000: bool = False, @@ -757,6 +822,7 @@ def xarray_to_cdf( auto_fix_depends (bool, optional): Whether or not to automatically add dependencies record_dimensions (list of str, optional): If the code cannot determine which dimensions should be made into CDF records, you may provide a list of them here compression (int, optional): The level of compression to gzip the data in the variables. Default is no compression, standard is 6. + nan_to_fillval (bool, optional): Convert all np.nan and np.datetime64('NaT') to the standard CDF FILLVALs. from_datetime (bool, optional, deprecated): Same as the datetime_to_cdftt2000 option from_unixtime (bool, optional, deprecated): Same as the unixtime_to_cdftt2000 option datetime_to_cdftt2000 (bool, optional, deprecated): Whether or not to convert variables named "epoch" or "epoch_X" to CDF_TT2000 from datetime objects @@ -922,6 +988,9 @@ def xarray_to_cdf( # Make a deep copy of the data before continuing dataset = xarray_dataset.copy() + if nan_to_fillval: + _convert_nans_to_fillval(dataset) + if istp: # This creates a list of suspected or confirmed label variables _label_checker(dataset, terminate_on_warning)