Skip to content

Commit

Permalink
feat: support Series.dt.floor (#493)
Browse files Browse the repository at this point in the history
Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly:
- [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code!  That way we can discuss the change, evaluate designs, and agree on the general idea
- [ ] Ensure the tests and linter pass
- [ ] Code coverage does not decrease (if any source code was changed)
- [ ] Appropriate docs were updated (if necessary)

Fixes #<issue_number_goes_here> 🦕
  • Loading branch information
milkshakeiii authored Mar 21, 2024
1 parent 3e3329a commit 2dd01c2
Show file tree
Hide file tree
Showing 5 changed files with 83 additions and 0 deletions.
20 changes: 20 additions & 0 deletions bigframes/core/compile/scalar_op_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -622,6 +622,26 @@ def strftime_op_impl(x: ibis_types.Value, op: ops.StrftimeOp):
)


@scalar_op_compiler.register_unary_op(ops.FloorDtOp, pass_op=True)
def floor_dt_op_impl(x: ibis_types.Value, op: ops.FloorDtOp):
supported_freqs = ["Y", "Q", "M", "W", "D", "h", "min", "s", "ms", "us", "ns"]
pandas_to_ibis_freqs = {"min": "m"}
if op.freq not in supported_freqs:
raise NotImplementedError(
f"Unsupported freq paramater: {op.freq}"
+ " Supported freq parameters are: "
+ ",".join(supported_freqs)
)
if op.freq in pandas_to_ibis_freqs:
ibis_freq = pandas_to_ibis_freqs[op.freq]
else:
ibis_freq = op.freq
result_type = x.type()
result = typing.cast(ibis_types.TimestampValue, x)
result = result.truncate(ibis_freq)
return result.cast(result_type)


@scalar_op_compiler.register_unary_op(ops.time_op)
def time_op_impl(x: ibis_types.Value):
return typing.cast(ibis_types.TimestampValue, x).time()
Expand Down
9 changes: 9 additions & 0 deletions bigframes/operations/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -441,6 +441,15 @@ def output_type(self, *input_types):
return dtypes.STRING_DTYPE


@dataclasses.dataclass(frozen=True)
class FloorDtOp(UnaryOp):
name: typing.ClassVar[str] = "floor_dt"
freq: str

def output_type(self, *input_types):
return input_types[0]


# Binary Ops
fillna_op = create_binary_op(name="fillna")
cliplower_op = create_binary_op(name="clip_lower")
Expand Down
3 changes: 3 additions & 0 deletions bigframes/operations/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,3 +97,6 @@ def strftime(self, date_format: str) -> series.Series:

def normalize(self) -> series.Series:
return self._apply_unary_op(ops.normalize_op)

def floor(self, freq: str) -> series.Series:
return self._apply_unary_op(ops.FloorDtOp(freq=freq))
21 changes: 21 additions & 0 deletions tests/system/small/operations/test_datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,3 +282,24 @@ def test_dt_normalize(scalars_dfs, col_name):
pd_result.astype(scalars_df[col_name].dtype), # normalize preserves type
bf_result,
)


@pytest.mark.parametrize(
("col_name", "freq"),
[
("timestamp_col", "D"),
("timestamp_col", "min"),
("datetime_col", "s"),
("datetime_col", "us"),
],
)
@skip_legacy_pandas
def test_dt_floor(scalars_dfs, col_name, freq):
scalars_df, scalars_pandas_df = scalars_dfs
bf_result = scalars_df[col_name].dt.floor(freq).to_pandas()
pd_result = scalars_pandas_df[col_name].dt.floor(freq)

assert_series_equal(
pd_result.astype(scalars_df[col_name].dtype), # floor preserves type
bf_result,
)
30 changes: 30 additions & 0 deletions third_party/bigframes_vendored/pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,3 +67,33 @@ def normalize(self):
bigframes.series.Series of the same dtype as the data.
"""
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

def floor(self, freq: str):
"""
Perform floor operation on the data to the specified freq.
Supported freq arguments are: 'Y' (year), 'Q' (quarter), 'M'
(month), 'W' (week), 'D' (day), 'h' (hour), 'min' (minute), 's'
(second), 'ms' (microsecond), 'us' (nanosecond), 'ns' (nanosecond)
Behavior around clock changes (i.e. daylight savings) is determined
by the SQL engine, so "ambiguous" and "nonexistent" parameters are not
supported. Y, Q, M, and W freqs are not supported by pandas as of
version 2.2, but have been added here due to backend support.
**Examples:**
>>> import pandas as pd
>>> import bigframes.pandas as bpd
>>> rng = pd.date_range('1/1/2018 11:59:00', periods=3, freq='min')
>>> bpd.Series(rng).dt.floor("h")
0 2018-01-01 11:00:00
1 2018-01-01 12:00:00
2 2018-01-01 12:00:00
dtype: timestamp[us][pyarrow]
Args:
freq (str):
Frequency string (e.g. "D", "min", "s").
"""
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

0 comments on commit 2dd01c2

Please sign in to comment.