From 93b02cd8bc620823563f8214b43bc5f2f35c155b Mon Sep 17 00:00:00 2001 From: chelsea-lin <124939984+chelsea-lin@users.noreply.github.com> Date: Wed, 14 Feb 2024 09:45:39 -0800 Subject: [PATCH] fix: correct the numeric literal dtype (#365) --- bigframes/dtypes.py | 20 +++++++++++++++++++- tests/system/small/test_series.py | 10 ++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/bigframes/dtypes.py b/bigframes/dtypes.py index cb2210bec6..6e3bc25c47 100644 --- a/bigframes/dtypes.py +++ b/bigframes/dtypes.py @@ -378,11 +378,29 @@ def literal_to_ibis_scalar( scalar_expr = ibis.literal(literal, ibis_dtypes.float64) elif scalar_expr.type().is_integer(): scalar_expr = ibis.literal(literal, ibis_dtypes.int64) + elif scalar_expr.type().is_decimal(): + precision = scalar_expr.type().precision + scale = scalar_expr.type().scale + if (not precision and not scale) or ( + precision and scale and scale <= 9 and precision + (9 - scale) <= 38 + ): + scalar_expr = ibis.literal( + literal, ibis_dtypes.decimal(precision=38, scale=9) + ) + elif precision and scale and scale <= 38 and precision + (38 - scale) <= 76: + scalar_expr = ibis.literal( + literal, ibis_dtypes.decimal(precision=76, scale=38) + ) + else: + raise TypeError( + "BigQuery's decimal data type supports a maximum precision of 76 and a maximum scale of 38." + f"Current precision: {precision}. Current scale: {scale}" + ) # TODO(bmil): support other literals that can be coerced to compatible types if validate and (scalar_expr.type() not in BIGFRAMES_TO_IBIS.values()): raise ValueError( - f"Literal did not coerce to a supported data type: {literal}. {constants.FEEDBACK_LINK}" + f"Literal did not coerce to a supported data type: {scalar_expr.type()}. {constants.FEEDBACK_LINK}" ) return scalar_expr diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py index 42651ed96f..37b4f8c1de 100644 --- a/tests/system/small/test_series.py +++ b/tests/system/small/test_series.py @@ -1228,6 +1228,16 @@ def test_median(scalars_dfs): assert pd_min < bf_result < pd_max +def test_numeric_literal(scalars_dfs): + scalars_df, _ = scalars_dfs + col_name = "numeric_col" + assert scalars_df[col_name].dtype == pd.ArrowDtype(pa.decimal128(38, 9)) + bf_result = scalars_df[col_name] - scalars_df[col_name].median() + assert bf_result.size == scalars_df[col_name].size + # TODO(b/323387826): The precision increased by 1 unexpectedly. + # assert bf_result.dtype == pd.ArrowDtype(pa.decimal128(38, 9)) + + def test_repr(scalars_dfs): scalars_df, scalars_pandas_df = scalars_dfs if scalars_pandas_df.index.name != "rowindex":