From e24e333c0dc129c012587bc096de5ee2445f8e6f Mon Sep 17 00:00:00 2001 From: Lily Zhang Date: Wed, 20 Mar 2024 23:27:33 +0000 Subject: [PATCH 1/3] fix: Properly support format param for numerical input. --- bigframes/core/compile/scalar_op_compiler.py | 17 +++++-- bigframes/core/tools/datetimes.py | 10 +++++ tests/system/small/test_pandas.py | 47 ++++++++++++++++++++ 3 files changed, 70 insertions(+), 4 deletions(-) diff --git a/bigframes/core/compile/scalar_op_compiler.py b/bigframes/core/compile/scalar_op_compiler.py index 2a93f08e90..a209df0183 100644 --- a/bigframes/core/compile/scalar_op_compiler.py +++ b/bigframes/core/compile/scalar_op_compiler.py @@ -42,6 +42,8 @@ # Datetime constants UNIT_TO_US_CONVERSION_FACTORS = { + "W": 7 * 24 * 60 * 60 * 1000 * 1000, + "d": 24 * 60 * 60 * 1000 * 1000, "D": 24 * 60 * 60 * 1000 * 1000, "h": 60 * 60 * 1000 * 1000, "m": 60 * 1000 * 1000, @@ -726,12 +728,19 @@ def to_datetime_op_impl(x: ibis_types.Value, op: ops.ToDatetimeOp): if x.type() == ibis_dtypes.str: x = x.to_timestamp(op.format) if op.format else timestamp(x) elif x.type() == ibis_dtypes.Timestamp(timezone="UTC"): + if op.format: + raise NotImplementedError( + f"Format parameter is not supported for Timestamp input types. {constants.FEEDBACK_LINK}" + ) return x elif x.type() != ibis_dtypes.timestamp: - # The default unit is set to "ns" (nanoseconds) for consistency - # with pandas, where "ns" is the default unit for datetime operations. - unit = op.unit or "ns" - x = numeric_to_datatime(x, unit) + if op.format: + x = x.cast(ibis_dtypes.str).to_timestamp(op.format) + else: + # The default unit is set to "ns" (nanoseconds) for consistency + # with pandas, where "ns" is the default unit for datetime operations. + unit = op.unit or "ns" + x = numeric_to_datatime(x, unit) return x.cast(ibis_dtypes.Timestamp(timezone="UTC" if op.utc else None)) diff --git a/bigframes/core/tools/datetimes.py b/bigframes/core/tools/datetimes.py index 4aaf320c7a..9881670fed 100644 --- a/bigframes/core/tools/datetimes.py +++ b/bigframes/core/tools/datetimes.py @@ -73,6 +73,16 @@ def to_datetime( f"String and Timestamp requires utc=True. {constants.FEEDBACK_LINK}" ) + if format and unit and arg.dtype in ("Int64", "Float64"): # type: ignore + raise ValueError( + f"cannot specify both format and unit" + ) + + if unit and arg.dtype not in ("Int64", "Float64"): # type: ignore + raise NotImplementedError( + f"Unit parameter is not supported for non-numerical input types. {constants.FEEDBACK_LINK}" + ) + return arg._apply_unary_op( # type: ignore ops.ToDatetimeOp( utc=utc, diff --git a/tests/system/small/test_pandas.py b/tests/system/small/test_pandas.py index ec61329aa5..32effe11eb 100644 --- a/tests/system/small/test_pandas.py +++ b/tests/system/small/test_pandas.py @@ -539,3 +539,50 @@ def test_to_datetime_series(scalars_dfs): pd.testing.assert_series_equal( bf_result, pd_result, check_index_type=False, check_names=False ) + +@pytest.mark.parametrize( + ("arg", "unit"), + [ + ([1, 2, 3], "W"), + ([1, 2, 3], "d"), + ([1, 2, 3], "D"), + ([1, 2, 3], "h"), + ([1, 2, 3], "m"), + ([20242330, 25244685, 34324234], "s"), + ([20242330000, 25244685000, 34324234000], "ms"), + ([20242330000000, 25244685000000, 34324234000000], "us"), + ([20242330000000000, 25244685000000000, 34324234000000000], "ns"), + ], +) +def test_to_datetime_unit_param(arg, unit): + bf_result = ( + bpd.to_datetime(arg, unit=unit) + .to_pandas() + .astype("datetime64[ns]") + ) + pd_result = pd.Series(pd.to_datetime(arg, unit=unit)).dt.floor("us") + pd.testing.assert_series_equal( + bf_result, pd_result, check_index_type=False, check_names=False + ) + +@pytest.mark.parametrize( + ("arg", "utc", "format"), + [ + ([20230110, 20230101, 20230101], False, "%Y%m%d"), + ([201301.01], False, "%Y%m.%d"), + (["2023-01-10", "2023-01-20", "2023-01-01"], True, "%Y-%m-%d"), + (['2014-08-15 07:19'], True, "%Y-%m-%d %H:%M"), + ], +) +def test_to_datetime_format_param(arg, utc, format): + bf_result = ( + bpd.to_datetime(arg, utc=utc, format=format) + .to_pandas() + .astype("datetime64[ns, UTC]" if utc else "datetime64[ns]") + ) + pd_result = pd.Series( + pd.to_datetime(arg, utc=utc, format=format) + ).dt.floor("us") + pd.testing.assert_series_equal( + bf_result, pd_result, check_index_type=False, check_names=False + ) From dabb3b7f6afc1b9676cb654fe502fc9878642c4f Mon Sep 17 00:00:00 2001 From: Owl Bot Date: Wed, 20 Mar 2024 23:35:25 +0000 Subject: [PATCH 2/3] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot=20po?= =?UTF-8?q?st-processor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --- bigframes/core/tools/datetimes.py | 4 +--- tests/system/small/test_pandas.py | 14 +++++--------- 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/bigframes/core/tools/datetimes.py b/bigframes/core/tools/datetimes.py index 9881670fed..8ae6be75e4 100644 --- a/bigframes/core/tools/datetimes.py +++ b/bigframes/core/tools/datetimes.py @@ -74,9 +74,7 @@ def to_datetime( ) if format and unit and arg.dtype in ("Int64", "Float64"): # type: ignore - raise ValueError( - f"cannot specify both format and unit" - ) + raise ValueError(f"cannot specify both format and unit") if unit and arg.dtype not in ("Int64", "Float64"): # type: ignore raise NotImplementedError( diff --git a/tests/system/small/test_pandas.py b/tests/system/small/test_pandas.py index 32effe11eb..a080a969c8 100644 --- a/tests/system/small/test_pandas.py +++ b/tests/system/small/test_pandas.py @@ -540,6 +540,7 @@ def test_to_datetime_series(scalars_dfs): bf_result, pd_result, check_index_type=False, check_names=False ) + @pytest.mark.parametrize( ("arg", "unit"), [ @@ -555,23 +556,20 @@ def test_to_datetime_series(scalars_dfs): ], ) def test_to_datetime_unit_param(arg, unit): - bf_result = ( - bpd.to_datetime(arg, unit=unit) - .to_pandas() - .astype("datetime64[ns]") - ) + bf_result = bpd.to_datetime(arg, unit=unit).to_pandas().astype("datetime64[ns]") pd_result = pd.Series(pd.to_datetime(arg, unit=unit)).dt.floor("us") pd.testing.assert_series_equal( bf_result, pd_result, check_index_type=False, check_names=False ) + @pytest.mark.parametrize( ("arg", "utc", "format"), [ ([20230110, 20230101, 20230101], False, "%Y%m%d"), ([201301.01], False, "%Y%m.%d"), (["2023-01-10", "2023-01-20", "2023-01-01"], True, "%Y-%m-%d"), - (['2014-08-15 07:19'], True, "%Y-%m-%d %H:%M"), + (["2014-08-15 07:19"], True, "%Y-%m-%d %H:%M"), ], ) def test_to_datetime_format_param(arg, utc, format): @@ -580,9 +578,7 @@ def test_to_datetime_format_param(arg, utc, format): .to_pandas() .astype("datetime64[ns, UTC]" if utc else "datetime64[ns]") ) - pd_result = pd.Series( - pd.to_datetime(arg, utc=utc, format=format) - ).dt.floor("us") + pd_result = pd.Series(pd.to_datetime(arg, utc=utc, format=format)).dt.floor("us") pd.testing.assert_series_equal( bf_result, pd_result, check_index_type=False, check_names=False ) From be809280135dbd220696b8260449a0164872b74f Mon Sep 17 00:00:00 2001 From: Lily Zhang Date: Wed, 20 Mar 2024 23:42:37 +0000 Subject: [PATCH 3/3] fix lint --- bigframes/core/tools/datetimes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigframes/core/tools/datetimes.py b/bigframes/core/tools/datetimes.py index 8ae6be75e4..96bf556101 100644 --- a/bigframes/core/tools/datetimes.py +++ b/bigframes/core/tools/datetimes.py @@ -74,7 +74,7 @@ def to_datetime( ) if format and unit and arg.dtype in ("Int64", "Float64"): # type: ignore - raise ValueError(f"cannot specify both format and unit") + raise ValueError("cannot specify both format and unit") if unit and arg.dtype not in ("Int64", "Float64"): # type: ignore raise NotImplementedError(