From 2dd01c25e9f01c03979c61e71d3c5cd9f0bd4c96 Mon Sep 17 00:00:00 2001 From: Henry Solberg Date: Thu, 21 Mar 2024 13:48:16 -0700 Subject: [PATCH] feat: support Series.dt.floor (#493) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes # 🦕 --- bigframes/core/compile/scalar_op_compiler.py | 20 +++++++++++++ bigframes/operations/__init__.py | 9 ++++++ bigframes/operations/datetimes.py | 3 ++ .../system/small/operations/test_datetimes.py | 21 +++++++++++++ .../pandas/core/arrays/datetimelike.py | 30 +++++++++++++++++++ 5 files changed, 83 insertions(+) diff --git a/bigframes/core/compile/scalar_op_compiler.py b/bigframes/core/compile/scalar_op_compiler.py index a52264be17..4d43545efe 100644 --- a/bigframes/core/compile/scalar_op_compiler.py +++ b/bigframes/core/compile/scalar_op_compiler.py @@ -622,6 +622,26 @@ def strftime_op_impl(x: ibis_types.Value, op: ops.StrftimeOp): ) +@scalar_op_compiler.register_unary_op(ops.FloorDtOp, pass_op=True) +def floor_dt_op_impl(x: ibis_types.Value, op: ops.FloorDtOp): + supported_freqs = ["Y", "Q", "M", "W", "D", "h", "min", "s", "ms", "us", "ns"] + pandas_to_ibis_freqs = {"min": "m"} + if op.freq not in supported_freqs: + raise NotImplementedError( + f"Unsupported freq paramater: {op.freq}" + + " Supported freq parameters are: " + + ",".join(supported_freqs) + ) + if op.freq in pandas_to_ibis_freqs: + ibis_freq = pandas_to_ibis_freqs[op.freq] + else: + ibis_freq = op.freq + result_type = x.type() + result = typing.cast(ibis_types.TimestampValue, x) + result = result.truncate(ibis_freq) + return result.cast(result_type) + + @scalar_op_compiler.register_unary_op(ops.time_op) def time_op_impl(x: ibis_types.Value): return typing.cast(ibis_types.TimestampValue, x).time() diff --git a/bigframes/operations/__init__.py b/bigframes/operations/__init__.py index dbea6145e0..f2bcbd894a 100644 --- a/bigframes/operations/__init__.py +++ b/bigframes/operations/__init__.py @@ -441,6 +441,15 @@ def output_type(self, *input_types): return dtypes.STRING_DTYPE +@dataclasses.dataclass(frozen=True) +class FloorDtOp(UnaryOp): + name: typing.ClassVar[str] = "floor_dt" + freq: str + + def output_type(self, *input_types): + return input_types[0] + + # Binary Ops fillna_op = create_binary_op(name="fillna") cliplower_op = create_binary_op(name="clip_lower") diff --git a/bigframes/operations/datetimes.py b/bigframes/operations/datetimes.py index 1b4a2fe0e6..7d25ac3622 100644 --- a/bigframes/operations/datetimes.py +++ b/bigframes/operations/datetimes.py @@ -97,3 +97,6 @@ def strftime(self, date_format: str) -> series.Series: def normalize(self) -> series.Series: return self._apply_unary_op(ops.normalize_op) + + def floor(self, freq: str) -> series.Series: + return self._apply_unary_op(ops.FloorDtOp(freq=freq)) diff --git a/tests/system/small/operations/test_datetimes.py b/tests/system/small/operations/test_datetimes.py index d5100e7dc2..b952289a72 100644 --- a/tests/system/small/operations/test_datetimes.py +++ b/tests/system/small/operations/test_datetimes.py @@ -282,3 +282,24 @@ def test_dt_normalize(scalars_dfs, col_name): pd_result.astype(scalars_df[col_name].dtype), # normalize preserves type bf_result, ) + + +@pytest.mark.parametrize( + ("col_name", "freq"), + [ + ("timestamp_col", "D"), + ("timestamp_col", "min"), + ("datetime_col", "s"), + ("datetime_col", "us"), + ], +) +@skip_legacy_pandas +def test_dt_floor(scalars_dfs, col_name, freq): + scalars_df, scalars_pandas_df = scalars_dfs + bf_result = scalars_df[col_name].dt.floor(freq).to_pandas() + pd_result = scalars_pandas_df[col_name].dt.floor(freq) + + assert_series_equal( + pd_result.astype(scalars_df[col_name].dtype), # floor preserves type + bf_result, + ) diff --git a/third_party/bigframes_vendored/pandas/core/arrays/datetimelike.py b/third_party/bigframes_vendored/pandas/core/arrays/datetimelike.py index 60ac19b818..f706ae2560 100644 --- a/third_party/bigframes_vendored/pandas/core/arrays/datetimelike.py +++ b/third_party/bigframes_vendored/pandas/core/arrays/datetimelike.py @@ -67,3 +67,33 @@ def normalize(self): bigframes.series.Series of the same dtype as the data. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + + def floor(self, freq: str): + """ + Perform floor operation on the data to the specified freq. + + Supported freq arguments are: 'Y' (year), 'Q' (quarter), 'M' + (month), 'W' (week), 'D' (day), 'h' (hour), 'min' (minute), 's' + (second), 'ms' (microsecond), 'us' (nanosecond), 'ns' (nanosecond) + + Behavior around clock changes (i.e. daylight savings) is determined + by the SQL engine, so "ambiguous" and "nonexistent" parameters are not + supported. Y, Q, M, and W freqs are not supported by pandas as of + version 2.2, but have been added here due to backend support. + + **Examples:** + + >>> import pandas as pd + >>> import bigframes.pandas as bpd + >>> rng = pd.date_range('1/1/2018 11:59:00', periods=3, freq='min') + >>> bpd.Series(rng).dt.floor("h") + 0 2018-01-01 11:00:00 + 1 2018-01-01 12:00:00 + 2 2018-01-01 12:00:00 + dtype: timestamp[us][pyarrow] + + Args: + freq (str): + Frequency string (e.g. "D", "min", "s"). + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)