From 3b7cce09ebe440dcb26c5bb34647c55a2ed2aed5 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 21 Aug 2024 15:57:41 -0700 Subject: [PATCH 1/3] Allow for binops between two differently sized DecimalDtypes --- python/cudf/cudf/core/column/decimal.py | 20 +++++++++++++++----- python/cudf/cudf/tests/test_decimal.py | 10 ++++++++++ 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/python/cudf/cudf/core/column/decimal.py b/python/cudf/cudf/core/column/decimal.py index 3b979ef2e97..5c52307bee6 100644 --- a/python/cudf/cudf/core/column/decimal.py +++ b/python/cudf/cudf/core/column/decimal.py @@ -4,7 +4,7 @@ import warnings from decimal import Decimal -from typing import TYPE_CHECKING, Sequence, cast +from typing import TYPE_CHECKING, Literal, Sequence, cast import cupy as cp import numpy as np @@ -135,9 +135,14 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str): # are computed outside of libcudf if op in {"__add__", "__sub__", "__mul__", "__div__"}: output_type = _get_decimal_type(lhs.dtype, rhs.dtype, op) - result = libcudf.binaryop.binaryop(lhs, rhs, op, output_type) - # TODO: Why is this necessary? Why isn't the result's - # precision already set correctly based on output_type? + result = libcudf.binaryop.binaryop( + lhs.astype(output_type), + rhs.astype(output_type), + op, + output_type, + ) + # libcudf doesn't support precision, so result.dtype doesn't + # maintain output_type.precision result.dtype.precision = output_type.precision elif op in { "__eq__", @@ -430,7 +435,11 @@ def _with_type_metadata( return self -def _get_decimal_type(lhs_dtype, rhs_dtype, op): +def _get_decimal_type( + lhs_dtype: DecimalDtype, + rhs_dtype: DecimalDtype, + op: Literal["__add__", "__sub__", "__mul__", "__div__"], +) -> DecimalDtype: """ Returns the resulting decimal type after calculating precision & scale when performing the binary operation @@ -441,6 +450,7 @@ def _get_decimal_type(lhs_dtype, rhs_dtype, op): # This should at some point be hooked up to libcudf's # binary_operation_fixed_point_scale + # Note: libcudf decimal types don't have a concept of precision p1, p2 = lhs_dtype.precision, rhs_dtype.precision s1, s2 = lhs_dtype.scale, rhs_dtype.scale diff --git a/python/cudf/cudf/tests/test_decimal.py b/python/cudf/cudf/tests/test_decimal.py index b63788d20b7..048b3a656e3 100644 --- a/python/cudf/cudf/tests/test_decimal.py +++ b/python/cudf/cudf/tests/test_decimal.py @@ -398,3 +398,13 @@ def test_decimal_overflow(): s = cudf.Series([1, 2], dtype=cudf.Decimal128Dtype(precision=38, scale=0)) result = s * Decimal("1.0") assert_eq(cudf.Decimal128Dtype(precision=38, scale=1), result.dtype) + + +def test_decimal_binop_upcast_operands(): + ser1 = cudf.Series([0.51, 1.51, 2.51]).astype(cudf.Decimal64Dtype(18, 2)) + ser2 = cudf.Series([0.90, 0.96, 0.99]).astype(cudf.Decimal128Dtype(19, 2)) + result = ser1 + ser2 + expected = cudf.Series([1.41, 2.47, 3.50]).astype( + cudf.Decimal128Dtype(20, 2) + ) + assert_eq(result, expected) From 5765b6a2850478535270c2049639eb67278b62aa Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 21 Aug 2024 16:10:22 -0700 Subject: [PATCH 2/3] Remove Literal for just str --- python/cudf/cudf/core/column/decimal.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/cudf/cudf/core/column/decimal.py b/python/cudf/cudf/core/column/decimal.py index 5c52307bee6..f4831eb7a4e 100644 --- a/python/cudf/cudf/core/column/decimal.py +++ b/python/cudf/cudf/core/column/decimal.py @@ -4,7 +4,7 @@ import warnings from decimal import Decimal -from typing import TYPE_CHECKING, Literal, Sequence, cast +from typing import TYPE_CHECKING, Sequence, cast import cupy as cp import numpy as np @@ -438,7 +438,7 @@ def _with_type_metadata( def _get_decimal_type( lhs_dtype: DecimalDtype, rhs_dtype: DecimalDtype, - op: Literal["__add__", "__sub__", "__mul__", "__div__"], + op: str, ) -> DecimalDtype: """ Returns the resulting decimal type after calculating From f74e76dad59a653e4eeeb30acb0c668c75b16166 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 22 Aug 2024 15:24:13 -0700 Subject: [PATCH 3/3] Just change the dtype class --- python/cudf/cudf/core/column/decimal.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/python/cudf/cudf/core/column/decimal.py b/python/cudf/cudf/core/column/decimal.py index f4831eb7a4e..8803ebd6791 100644 --- a/python/cudf/cudf/core/column/decimal.py +++ b/python/cudf/cudf/core/column/decimal.py @@ -135,12 +135,13 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str): # are computed outside of libcudf if op in {"__add__", "__sub__", "__mul__", "__div__"}: output_type = _get_decimal_type(lhs.dtype, rhs.dtype, op) - result = libcudf.binaryop.binaryop( - lhs.astype(output_type), - rhs.astype(output_type), - op, - output_type, + lhs = lhs.astype( + type(output_type)(lhs.dtype.precision, lhs.dtype.scale) ) + rhs = rhs.astype( + type(output_type)(rhs.dtype.precision, rhs.dtype.scale) + ) + result = libcudf.binaryop.binaryop(lhs, rhs, op, output_type) # libcudf doesn't support precision, so result.dtype doesn't # maintain output_type.precision result.dtype.precision = output_type.precision