From 8f6e955fc946db97c95ea012659432355b0cd12c Mon Sep 17 00:00:00 2001 From: Lily Zhang <32233490+junyazhang@users.noreply.github.com> Date: Mon, 18 Mar 2024 12:56:50 -0700 Subject: [PATCH] feat: support Series.dt.strftime (#453) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: support Series.dt.strftime * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * address comments * fix imports * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot --- bigframes/core/compile/scalar_op_compiler.py | 9 ++++ bigframes/operations/__init__.py | 9 ++++ bigframes/operations/datetimes.py | 8 +++- .../system/small/operations/test_datetimes.py | 47 +++++++++++++++++++ .../pandas/core/arrays/datetimelike.py | 38 +++++++++++++++ 5 files changed, 110 insertions(+), 1 deletion(-) create mode 100644 third_party/bigframes_vendored/pandas/core/arrays/datetimelike.py diff --git a/bigframes/core/compile/scalar_op_compiler.py b/bigframes/core/compile/scalar_op_compiler.py index 67761c0330..c95d1ca45e 100644 --- a/bigframes/core/compile/scalar_op_compiler.py +++ b/bigframes/core/compile/scalar_op_compiler.py @@ -613,6 +613,15 @@ def second_op_impl(x: ibis_types.Value): return typing.cast(ibis_types.TimestampValue, x).second().cast(ibis_dtypes.int64) +@scalar_op_compiler.register_unary_op(ops.StrftimeOp, pass_op=True) +def strftime_op_impl(x: ibis_types.Value, op: ops.StrftimeOp): + return ( + typing.cast(ibis_types.TimestampValue, x) + .strftime(op.date_format) + .cast(ibis_dtypes.str) + ) + + @scalar_op_compiler.register_unary_op(ops.time_op) def time_op_impl(x: ibis_types.Value): return typing.cast(ibis_types.TimestampValue, x).time() diff --git a/bigframes/operations/__init__.py b/bigframes/operations/__init__.py index 37188e490e..c358d46ee1 100644 --- a/bigframes/operations/__init__.py +++ b/bigframes/operations/__init__.py @@ -415,6 +415,15 @@ def output_type(self, *input_types): return input_types[0] +@dataclasses.dataclass(frozen=True) +class StrftimeOp(UnaryOp): + name: typing.ClassVar[str] = "strftime" + date_format: str + + def output_type(self, *input_types): + return dtypes.STRING_DTYPE + + # Binary Ops fillna_op = create_binary_op(name="fillna") cliplower_op = create_binary_op(name="clip_lower") diff --git a/bigframes/operations/datetimes.py b/bigframes/operations/datetimes.py index 66ec347add..eb91bc0b20 100644 --- a/bigframes/operations/datetimes.py +++ b/bigframes/operations/datetimes.py @@ -17,6 +17,7 @@ import datetime as dt from typing import Optional +import bigframes_vendored.pandas.core.arrays.datetimelike as vendored_pandas_datetimelike import bigframes_vendored.pandas.core.indexes.accessor as vendordt from bigframes.core import log_adapter @@ -27,7 +28,9 @@ @log_adapter.class_logger class DatetimeMethods( - bigframes.operations.base.SeriesMethods, vendordt.DatetimeProperties + bigframes.operations.base.SeriesMethods, + vendordt.DatetimeProperties, + vendored_pandas_datetimelike.DatelikeOps, ): __doc__ = vendordt.DatetimeProperties.__doc__ @@ -88,3 +91,6 @@ def tz(self) -> Optional[dt.timezone]: def unit(self) -> str: # Assumption: pyarrow dtype return self._dtype.pyarrow_dtype.unit + + def strftime(self, date_format: str) -> series.Series: + return self._apply_unary_op(ops.StrftimeOp(date_format=date_format)) diff --git a/tests/system/small/operations/test_datetimes.py b/tests/system/small/operations/test_datetimes.py index 3882491ecb..854672585d 100644 --- a/tests/system/small/operations/test_datetimes.py +++ b/tests/system/small/operations/test_datetimes.py @@ -219,3 +219,50 @@ def test_dt_unit(scalars_dfs, col_name): pd_result = scalars_pandas_df[col_name].dt.unit assert bf_result == pd_result + + +@pytest.mark.parametrize( + ("column", "date_format"), + [ + ("timestamp_col", "%B %d, %Y, %r"), + ("timestamp_col", "%m-%d-%Y %H:%M"), + ("datetime_col", "%m-%d-%Y %H:%M"), + ("datetime_col", "%H:%M"), + ], +) +@skip_legacy_pandas +def test_dt_strftime(scalars_df_index, scalars_pandas_df_index, column, date_format): + bf_result = scalars_df_index[column].dt.strftime(date_format).to_pandas() + pd_result = scalars_pandas_df_index[column].dt.strftime(date_format) + pd.testing.assert_series_equal(bf_result, pd_result, check_dtype=False) + assert bf_result.dtype == "string[pyarrow]" + + +def test_dt_strftime_date(): + bf_series = bigframes.series.Series( + ["2014-08-15", "2215-08-15", "2016-02-29"] + ).astype("date32[day][pyarrow]") + + expected_result = pd.Series(["08/15/2014", "08/15/2215", "02/29/2016"]) + bf_result = bf_series.dt.strftime("%m/%d/%Y").to_pandas() + + pd.testing.assert_series_equal( + bf_result, expected_result, check_index_type=False, check_dtype=False + ) + assert bf_result.dtype == "string[pyarrow]" + + +def test_dt_strftime_time(): + bf_series = bigframes.series.Series( + [143542314, 345234512341, 75543252344, 626546437654754, 8543523452345234] + ).astype("time64[us][pyarrow]") + + expected_result = pd.Series( + ["00:02:23", "23:53:54", "20:59:03", "16:40:37", "08:57:32"] + ) + bf_result = bf_series.dt.strftime("%X").to_pandas() + + pd.testing.assert_series_equal( + bf_result, expected_result, check_index_type=False, check_dtype=False + ) + assert bf_result.dtype == "string[pyarrow]" diff --git a/third_party/bigframes_vendored/pandas/core/arrays/datetimelike.py b/third_party/bigframes_vendored/pandas/core/arrays/datetimelike.py new file mode 100644 index 0000000000..4f7e33909e --- /dev/null +++ b/third_party/bigframes_vendored/pandas/core/arrays/datetimelike.py @@ -0,0 +1,38 @@ +# Contains code from https://github.com/pandas-dev/pandas/blob/main/pandas/core/arrays/datetimelike.py + +from bigframes import constants + + +class DatelikeOps: + def strftime(self, date_format: str): + """ + Convert to string Series using specified date_format. + + Return a Series of formatted strings specified by date_format. Details + of the string format can be found in `BigQuery format elements doc + <%(https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements)s>`__. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> s = bpd.to_datetime( + ... ['2014-08-15 08:15:12', '2012-02-29 08:15:12+06:00', '2015-08-15 08:15:12+05:00'], + ... utc=True + ... ).astype("timestamp[us, tz=UTC][pyarrow]") + + >>> s.dt.strftime("%B %d, %Y, %r") + 0 August 15, 2014, 08:15:12 AM + 1 February 29, 2012, 02:15:12 AM + 2 August 15, 2015, 03:15:12 AM + Name: 0, dtype: string + + Args: + date_format (str): + Date format string (e.g. "%Y-%m-%d"). + + Returns: + bigframes.series.Series of formatted strings. + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)