Skip to content

Commit

Permalink
feat: support Series.dt.strftime (#453)
Browse files Browse the repository at this point in the history
* feat: support Series.dt.strftime

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

* address comments

* fix imports

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

---------

Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
  • Loading branch information
junyazhang and gcf-owl-bot[bot] authored Mar 18, 2024
1 parent 1df0140 commit 8f6e955
Show file tree
Hide file tree
Showing 5 changed files with 110 additions and 1 deletion.
9 changes: 9 additions & 0 deletions bigframes/core/compile/scalar_op_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -613,6 +613,15 @@ def second_op_impl(x: ibis_types.Value):
return typing.cast(ibis_types.TimestampValue, x).second().cast(ibis_dtypes.int64)


@scalar_op_compiler.register_unary_op(ops.StrftimeOp, pass_op=True)
def strftime_op_impl(x: ibis_types.Value, op: ops.StrftimeOp):
return (
typing.cast(ibis_types.TimestampValue, x)
.strftime(op.date_format)
.cast(ibis_dtypes.str)
)


@scalar_op_compiler.register_unary_op(ops.time_op)
def time_op_impl(x: ibis_types.Value):
return typing.cast(ibis_types.TimestampValue, x).time()
Expand Down
9 changes: 9 additions & 0 deletions bigframes/operations/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,15 @@ def output_type(self, *input_types):
return input_types[0]


@dataclasses.dataclass(frozen=True)
class StrftimeOp(UnaryOp):
name: typing.ClassVar[str] = "strftime"
date_format: str

def output_type(self, *input_types):
return dtypes.STRING_DTYPE


# Binary Ops
fillna_op = create_binary_op(name="fillna")
cliplower_op = create_binary_op(name="clip_lower")
Expand Down
8 changes: 7 additions & 1 deletion bigframes/operations/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import datetime as dt
from typing import Optional

import bigframes_vendored.pandas.core.arrays.datetimelike as vendored_pandas_datetimelike
import bigframes_vendored.pandas.core.indexes.accessor as vendordt

from bigframes.core import log_adapter
Expand All @@ -27,7 +28,9 @@

@log_adapter.class_logger
class DatetimeMethods(
bigframes.operations.base.SeriesMethods, vendordt.DatetimeProperties
bigframes.operations.base.SeriesMethods,
vendordt.DatetimeProperties,
vendored_pandas_datetimelike.DatelikeOps,
):
__doc__ = vendordt.DatetimeProperties.__doc__

Expand Down Expand Up @@ -88,3 +91,6 @@ def tz(self) -> Optional[dt.timezone]:
def unit(self) -> str:
# Assumption: pyarrow dtype
return self._dtype.pyarrow_dtype.unit

def strftime(self, date_format: str) -> series.Series:
return self._apply_unary_op(ops.StrftimeOp(date_format=date_format))
47 changes: 47 additions & 0 deletions tests/system/small/operations/test_datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,3 +219,50 @@ def test_dt_unit(scalars_dfs, col_name):
pd_result = scalars_pandas_df[col_name].dt.unit

assert bf_result == pd_result


@pytest.mark.parametrize(
("column", "date_format"),
[
("timestamp_col", "%B %d, %Y, %r"),
("timestamp_col", "%m-%d-%Y %H:%M"),
("datetime_col", "%m-%d-%Y %H:%M"),
("datetime_col", "%H:%M"),
],
)
@skip_legacy_pandas
def test_dt_strftime(scalars_df_index, scalars_pandas_df_index, column, date_format):
bf_result = scalars_df_index[column].dt.strftime(date_format).to_pandas()
pd_result = scalars_pandas_df_index[column].dt.strftime(date_format)
pd.testing.assert_series_equal(bf_result, pd_result, check_dtype=False)
assert bf_result.dtype == "string[pyarrow]"


def test_dt_strftime_date():
bf_series = bigframes.series.Series(
["2014-08-15", "2215-08-15", "2016-02-29"]
).astype("date32[day][pyarrow]")

expected_result = pd.Series(["08/15/2014", "08/15/2215", "02/29/2016"])
bf_result = bf_series.dt.strftime("%m/%d/%Y").to_pandas()

pd.testing.assert_series_equal(
bf_result, expected_result, check_index_type=False, check_dtype=False
)
assert bf_result.dtype == "string[pyarrow]"


def test_dt_strftime_time():
bf_series = bigframes.series.Series(
[143542314, 345234512341, 75543252344, 626546437654754, 8543523452345234]
).astype("time64[us][pyarrow]")

expected_result = pd.Series(
["00:02:23", "23:53:54", "20:59:03", "16:40:37", "08:57:32"]
)
bf_result = bf_series.dt.strftime("%X").to_pandas()

pd.testing.assert_series_equal(
bf_result, expected_result, check_index_type=False, check_dtype=False
)
assert bf_result.dtype == "string[pyarrow]"
38 changes: 38 additions & 0 deletions third_party/bigframes_vendored/pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Contains code from https://github.com/pandas-dev/pandas/blob/main/pandas/core/arrays/datetimelike.py

from bigframes import constants


class DatelikeOps:
def strftime(self, date_format: str):
"""
Convert to string Series using specified date_format.
Return a Series of formatted strings specified by date_format. Details
of the string format can be found in `BigQuery format elements doc
<%(https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements)s>`__.
**Examples:**
>>> import bigframes.pandas as bpd
>>> bpd.options.display.progress_bar = None
>>> s = bpd.to_datetime(
... ['2014-08-15 08:15:12', '2012-02-29 08:15:12+06:00', '2015-08-15 08:15:12+05:00'],
... utc=True
... ).astype("timestamp[us, tz=UTC][pyarrow]")
>>> s.dt.strftime("%B %d, %Y, %r")
0 August 15, 2014, 08:15:12 AM
1 February 29, 2012, 02:15:12 AM
2 August 15, 2015, 03:15:12 AM
Name: 0, dtype: string
Args:
date_format (str):
Date format string (e.g. "%Y-%m-%d").
Returns:
bigframes.series.Series of formatted strings.
"""
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

0 comments on commit 8f6e955

Please sign in to comment.