fix: correct the numeric literal dtype (#365)

googleapis · Feb 14, 2024 · 93b02cd · 93b02cd
1 parent 747e5f6
commit 93b02cd
Show file tree

Hide file tree

Showing 2 changed files with 29 additions and 1 deletion.
diff --git a/bigframes/dtypes.py b/bigframes/dtypes.py
@@ -378,11 +378,29 @@ def literal_to_ibis_scalar(
         scalar_expr = ibis.literal(literal, ibis_dtypes.float64)
     elif scalar_expr.type().is_integer():
         scalar_expr = ibis.literal(literal, ibis_dtypes.int64)
+    elif scalar_expr.type().is_decimal():
+        precision = scalar_expr.type().precision
+        scale = scalar_expr.type().scale
+        if (not precision and not scale) or (
+            precision and scale and scale <= 9 and precision + (9 - scale) <= 38
+        ):
+            scalar_expr = ibis.literal(
+                literal, ibis_dtypes.decimal(precision=38, scale=9)
+            )
+        elif precision and scale and scale <= 38 and precision + (38 - scale) <= 76:
+            scalar_expr = ibis.literal(
+                literal, ibis_dtypes.decimal(precision=76, scale=38)
+            )
+        else:
+            raise TypeError(
+                "BigQuery's decimal data type supports a maximum precision of 76 and a maximum scale of 38."
+                f"Current precision: {precision}. Current scale: {scale}"
+            )
 
     # TODO(bmil): support other literals that can be coerced to compatible types
     if validate and (scalar_expr.type() not in BIGFRAMES_TO_IBIS.values()):
         raise ValueError(
-            f"Literal did not coerce to a supported data type: {literal}. {constants.FEEDBACK_LINK}"
+            f"Literal did not coerce to a supported data type: {scalar_expr.type()}. {constants.FEEDBACK_LINK}"
         )
 
     return scalar_expr

diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py
@@ -1228,6 +1228,16 @@ def test_median(scalars_dfs):
     assert pd_min < bf_result < pd_max
 
 
+def test_numeric_literal(scalars_dfs):
+    scalars_df, _ = scalars_dfs
+    col_name = "numeric_col"
+    assert scalars_df[col_name].dtype == pd.ArrowDtype(pa.decimal128(38, 9))
+    bf_result = scalars_df[col_name] - scalars_df[col_name].median()
+    assert bf_result.size == scalars_df[col_name].size
+    # TODO(b/323387826): The precision increased by 1 unexpectedly.
+    # assert bf_result.dtype == pd.ArrowDtype(pa.decimal128(38, 9))
+
+
 def test_repr(scalars_dfs):
     scalars_df, scalars_pandas_df = scalars_dfs
     if scalars_pandas_df.index.name != "rowindex":