From 5ee6bc994817beaad07cf8a21d553863daaa7bb7 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Wed, 24 Mar 2021 17:01:45 -0700 Subject: [PATCH 01/13] Initial --- python/cudf/cudf/core/column/decimal.py | 37 ++++--- python/cudf/cudf/tests/test_binops.py | 135 ++++++++++++++++++++++++ 2 files changed, 157 insertions(+), 15 deletions(-) diff --git a/python/cudf/cudf/core/column/decimal.py b/python/cudf/cudf/core/column/decimal.py index 7fbe602f07a..080e5dd75f2 100644 --- a/python/cudf/cudf/core/column/decimal.py +++ b/python/cudf/cudf/core/column/decimal.py @@ -1,23 +1,22 @@ # Copyright (c) 2021, NVIDIA CORPORATION. -import cudf +from typing import cast + import cupy as cp import numpy as np import pyarrow as pa from pandas.api.types import is_integer_dtype -from typing import cast +import cudf from cudf import _lib as libcudf -from cudf.core.buffer import Buffer -from cudf.core.column import ColumnBase -from cudf.core.dtypes import Decimal64Dtype -from cudf.utils.utils import pa_mask_buffer_to_mask - -from cudf._typing import Dtype from cudf._lib.strings.convert.convert_fixed_point import ( from_decimal as cpp_from_decimal, ) -from cudf.core.column import as_column +from cudf._typing import Dtype +from cudf.core.buffer import Buffer +from cudf.core.column import ColumnBase, as_column +from cudf.core.dtypes import Decimal64Dtype +from cudf.utils.utils import pa_mask_buffer_to_mask class DecimalColumn(ColumnBase): @@ -64,12 +63,20 @@ def to_arrow(self): def binary_operator(self, op, other, reflect=False): if reflect: self, other = other, self - scale = _binop_scale(self.dtype, other.dtype, op) - output_type = Decimal64Dtype( - scale=scale, precision=Decimal64Dtype.MAX_PRECISION - ) # precision will be ignored, libcudf has no notion of precision - result = libcudf.binaryop.binaryop(self, other, op, output_type) - result.dtype.precision = _binop_precision(self.dtype, other.dtype, op) + + # Binary Arithmatics between decimal columns. `Scale` and `precision` + # are computed outside of libcudf + if op in ("add", "sub", "mul"): + scale = _binop_scale(self.dtype, other.dtype, op) + output_type = Decimal64Dtype( + scale=scale, precision=Decimal64Dtype.MAX_PRECISION + ) # precision will be ignored, libcudf has no notion of precision + result = libcudf.binaryop.binaryop(self, other, op, output_type) + result.dtype.precision = _binop_precision( + self.dtype, other.dtype, op + ) + elif op in ("eq", "lt", "gt", "le", "ge"): + result = libcudf.binaryop.binaryop(self, other, op, bool) return result def as_decimal_column( diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py index 18f2d7e474b..e64f42f5c56 100644 --- a/python/cudf/cudf/tests/test_binops.py +++ b/python/cudf/cudf/tests/test_binops.py @@ -1753,6 +1753,141 @@ def test_binops_with_NA_consistent(dtype, op): ["10.0", None], cudf.Decimal64Dtype(scale=1, precision=8), ), + ( + operator.eq, + ["0.18", "0.42"], + cudf.Decimal64Dtype(scale=2, precision=3), + ["0.18", "0.21"], + cudf.Decimal64Dtype(scale=2, precision=3), + [True, False], + bool, + ), + ( + operator.eq, + ["0.18", "0.42"], + cudf.Decimal64Dtype(scale=2, precision=3), + ["0.1800", "0.2100"], + cudf.Decimal64Dtype(scale=4, precision=5), + [True, False], + bool, + ), + ( + operator.eq, + ["100", None], + cudf.Decimal64Dtype(scale=-2, precision=3), + ["100", "200"], + cudf.Decimal64Dtype(scale=-1, precision=4), + [True, None], + bool, + ), + ( + operator.lt, + ["0.18", "0.42", "1.00"], + cudf.Decimal64Dtype(scale=2, precision=3), + ["0.10", "0.87", "1.00"], + cudf.Decimal64Dtype(scale=2, precision=3), + [False, True, False], + bool, + ), + ( + operator.lt, + ["0.18", "0.42", "1.00"], + cudf.Decimal64Dtype(scale=2, precision=3), + ["0.1000", "0.8700", "1.0000"], + cudf.Decimal64Dtype(scale=4, precision=5), + [False, True, False], + bool, + ), + ( + operator.lt, + ["200", None, "100"], + cudf.Decimal64Dtype(scale=-2, precision=3), + ["100", "200", "100"], + cudf.Decimal64Dtype(scale=-1, precision=4), + [False, None, False], + bool, + ), + ( + operator.gt, + ["0.18", "0.42", "1.00"], + cudf.Decimal64Dtype(scale=2, precision=3), + ["0.10", "0.87", "1.00"], + cudf.Decimal64Dtype(scale=2, precision=3), + [True, False, False], + bool, + ), + ( + operator.gt, + ["0.18", "0.42", "1.00"], + cudf.Decimal64Dtype(scale=2, precision=3), + ["0.1000", "0.8700", "1.0000"], + cudf.Decimal64Dtype(scale=4, precision=5), + [True, False, False], + bool, + ), + ( + operator.gt, + ["300", None, "150"], + cudf.Decimal64Dtype(scale=-2, precision=3), + ["100", "200", "150"], + cudf.Decimal64Dtype(scale=-1, precision=4), + [True, None, False], + bool, + ), + ( + operator.le, + ["0.18", "0.42", "1.00"], + cudf.Decimal64Dtype(scale=2, precision=3), + ["0.10", "0.87", "1.00"], + cudf.Decimal64Dtype(scale=2, precision=3), + [False, True, True], + bool, + ), + ( + operator.le, + ["0.18", "0.42", "1.00"], + cudf.Decimal64Dtype(scale=2, precision=3), + ["0.1000", "0.8700", "1.0000"], + cudf.Decimal64Dtype(scale=4, precision=5), + [False, True, True], + bool, + ), + ( + operator.le, + ["300", None, "150"], + cudf.Decimal64Dtype(scale=-2, precision=3), + ["100", "200", "150"], + cudf.Decimal64Dtype(scale=-1, precision=4), + [False, None, True], + bool, + ), + ( + operator.ge, + ["0.18", "0.42", "1.00"], + cudf.Decimal64Dtype(scale=2, precision=3), + ["0.10", "0.87", "1.00"], + cudf.Decimal64Dtype(scale=2, precision=3), + [True, False, True], + bool, + ), + ( + operator.ge, + ["0.18", "0.42", "1.00"], + cudf.Decimal64Dtype(scale=2, precision=3), + ["0.1000", "0.8700", "1.0000"], + cudf.Decimal64Dtype(scale=4, precision=5), + [True, False, True], + bool, + ), + ( + operator.ge, + ["300", None, "150"], + cudf.Decimal64Dtype(scale=-2, precision=3), + ["100", "200", "150"], + cudf.Decimal64Dtype(scale=-1, precision=4), + [True, None, True], + bool, + ), ], ) def test_binops_decimal(args): From aa3d0ed130cf1c6aee61a211440c503232fa60ed Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Wed, 24 Mar 2021 18:40:20 -0700 Subject: [PATCH 02/13] Passing tests --- python/cudf/cudf/tests/test_binops.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py index e64f42f5c56..233e3673b98 100644 --- a/python/cudf/cudf/tests/test_binops.py +++ b/python/cudf/cudf/tests/test_binops.py @@ -1827,9 +1827,9 @@ def test_binops_with_NA_consistent(dtype, op): ), ( operator.gt, - ["300", None, "150"], + ["300", None, "100"], cudf.Decimal64Dtype(scale=-2, precision=3), - ["100", "200", "150"], + ["100", "200", "100"], cudf.Decimal64Dtype(scale=-1, precision=4), [True, None, False], bool, @@ -1854,9 +1854,9 @@ def test_binops_with_NA_consistent(dtype, op): ), ( operator.le, - ["300", None, "150"], + ["300", None, "100"], cudf.Decimal64Dtype(scale=-2, precision=3), - ["100", "200", "150"], + ["100", "200", "100"], cudf.Decimal64Dtype(scale=-1, precision=4), [False, None, True], bool, @@ -1881,9 +1881,9 @@ def test_binops_with_NA_consistent(dtype, op): ), ( operator.ge, - ["300", None, "150"], + ["300", None, "100"], cudf.Decimal64Dtype(scale=-2, precision=3), - ["100", "200", "150"], + ["100", "200", "100"], cudf.Decimal64Dtype(scale=-1, precision=4), [True, None, True], bool, @@ -1901,7 +1901,11 @@ def decimal_series(input, dtype): a = decimal_series(lhs, l_dtype) b = decimal_series(rhs, r_dtype) - expect = decimal_series(expect, expect_dtype) + expect = ( + decimal_series(expect, expect_dtype) + if isinstance(expect_dtype, cudf.Decimal64Dtype) + else cudf.Series(expect, dtype=expect_dtype) + ) got = op(a, b) assert expect.dtype == got.dtype From 7ee96b56d96c234b05b7886bdc56b7fc53c1a9f9 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Fri, 26 Mar 2021 13:04:49 -0700 Subject: [PATCH 03/13] Initial integer decimal mixed tested --- python/cudf/cudf/tests/test_binops.py | 170 ++++++++++++++++++++++++-- 1 file changed, 161 insertions(+), 9 deletions(-) diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py index 233e3673b98..84566a7bcbf 100644 --- a/python/cudf/cudf/tests/test_binops.py +++ b/python/cudf/cudf/tests/test_binops.py @@ -1614,6 +1614,11 @@ def test_binops_with_NA_consistent(dtype, op): elif dtype in DATETIME_TYPES & TIMEDELTA_TYPES: assert result._column.null_count == len(data) +def _decimal_series(input, dtype): + return cudf.Series( + [x if x is None else decimal.Decimal(x) for x in input], + dtype=dtype, + ) @pytest.mark.parametrize( "args", @@ -1893,16 +1898,10 @@ def test_binops_with_NA_consistent(dtype, op): def test_binops_decimal(args): op, lhs, l_dtype, rhs, r_dtype, expect, expect_dtype = args - def decimal_series(input, dtype): - return cudf.Series( - [x if x is None else decimal.Decimal(x) for x in input], - dtype=dtype, - ) - - a = decimal_series(lhs, l_dtype) - b = decimal_series(rhs, r_dtype) + a = _decimal_series(lhs, l_dtype) + b = _decimal_series(rhs, r_dtype) expect = ( - decimal_series(expect, expect_dtype) + _decimal_series(expect, expect_dtype) if isinstance(expect_dtype, cudf.Decimal64Dtype) else cudf.Series(expect, dtype=expect_dtype) ) @@ -1911,6 +1910,159 @@ def decimal_series(input, dtype): assert expect.dtype == got.dtype utils.assert_eq(expect, got) +@pytest.mark.parametrize( + "args", + [( + operator.eq, + ["100", "42", None], + cudf.Decimal64Dtype(scale=0, precision=5), + [100, 42, 12], + cudf.Series([True, False, None], dtype=bool), + cudf.Series([True, False, None], dtype=bool) + ), + ( + operator.eq, + ["100.000", "42.001", None], + cudf.Decimal64Dtype(scale=3, precision=6), + [100, 42, 12], + cudf.Series([True, False, None], dtype=bool), + cudf.Series([True, False, None], dtype=bool) + ), + ( + operator.eq, + ["100", "40", None], + cudf.Decimal64Dtype(scale=-1, precision=3), + [100, 42, 12], + cudf.Series([True, False, None], dtype=bool), + cudf.Series([True, False, None], dtype=bool) + ), + ( + operator.lt, + ["100", "40", "28", None], + cudf.Decimal64Dtype(scale=0, precision=3), + [100, 42, 24, 12], + cudf.Series([False, True, False, None], dtype=bool), + cudf.Series([False, False, True, None], dtype=bool) + ), + ( + operator.lt, + ["100.000", "42.002", "23.999", None], + cudf.Decimal64Dtype(scale=3, precision=6), + [100, 42, 24, 12], + cudf.Series([False, False, False, None], dtype=bool), + cudf.Series([False, False, False, None], dtype=bool) + ), + ( + operator.lt, + ["100", "40", "10", None], + cudf.Decimal64Dtype(scale=-1, precision=3), + [100, 42, 8, 12], + cudf.Series([False, True, False, None], dtype=bool), + cudf.Series([False, False, True, None], dtype=bool) + ), + ( + operator.gt, + ["100", "42", "20", None], + cudf.Decimal64Dtype(scale=0, precision=3), + [100, 40, 24, 12], + cudf.Series([False, True, False, None], dtype=bool), + cudf.Series([False, False, True, None], dtype=bool) + ), + ( + operator.gt, + ["100.000", "42.002", "23.999", None], + cudf.Decimal64Dtype(scale=3, precision=6), + [100, 42, 24, 12], + cudf.Series([False, True, False, None], dtype=bool), + cudf.Series([False, False, True, None], dtype=bool) + ), + ( + operator.gt, + ["100", "40", "10", None], + cudf.Decimal64Dtype(scale=-1, precision=3), + [100, 42, 8, 12], + cudf.Series([False, False, True, None], dtype=bool), + cudf.Series([False, True, False, None], dtype=bool) + ), + ( + operator.le, + ["100", "40", "28", None], + cudf.Decimal64Dtype(scale=0, precision=3), + [100, 42, 24, 12], + cudf.Series([True, True, False, None], dtype=bool), + cudf.Series([True, False, True, None], dtype=bool) + ), + ( + operator.le, + ["100.000", "42.002", "23.999", None], + cudf.Decimal64Dtype(scale=3, precision=6), + [100, 42, 24, 12], + cudf.Series([True, False, False, None], dtype=bool), + cudf.Series([True, False, False, None], dtype=bool) + ), + ( + operator.le, + ["100", "40", "10", None], + cudf.Decimal64Dtype(scale=-1, precision=3), + [100, 42, 8, 12], + cudf.Series([True, True, False, None], dtype=bool), + cudf.Series([True, False, True, None], dtype=bool) + ), + ( + operator.ge, + ["100", "42", "20", None], + cudf.Decimal64Dtype(scale=0, precision=3), + [100, 40, 24, 12], + cudf.Series([True, True, False, None], dtype=bool), + cudf.Series([True, False, True, None], dtype=bool) + ), + ( + operator.ge, + ["100.000", "42.002", "23.999", None], + cudf.Decimal64Dtype(scale=3, precision=6), + [100, 42, 24, 12], + cudf.Series([True, True, False, None], dtype=bool), + cudf.Series([True, False, True, None], dtype=bool) + ), + ( + operator.ge, + ["100", "40", "10", None], + cudf.Decimal64Dtype(scale=-1, precision=3), + [100, 42, 8, 12], + cudf.Series([True, False, True, None], dtype=bool), + cudf.Series([True, True, False, None], dtype=bool) + ), + ] +) +@pytest.mark.parametrize( + "integer_dtype", INTEGER_TYPES +) +@pytest.mark.parametrize( + "reflected", [True, False] +) +def test_binops_decimal_comp_mixed_integer(args, integer_dtype, reflected): + """ + Tested compare operations: + eq, lt, gt, le, ge + Each operation has 3 decimal data setups, with scale from {==0, >0, <0}, + decimal precisions are sufficient to contain the digits. + For each decimal data setup, there is at least one row that lead to one + of the following compare results: {True, False, None}. + """ + if not reflected: + op, ldata, ldtype, rdata, expected, _ = args + else: + op, ldata, ldtype, rdata, _, expected = args + + lhs = _decimal_series(ldata, ldtype) + rhs = cudf.Series(rdata, dtype=integer_dtype) + + if reflected: + rhs, lhs = lhs, rhs + + actual = op(lhs, rhs) + + utils.assert_eq(expected, actual) @pytest.mark.parametrize("fn", ["eq", "ne", "lt", "gt", "le", "ge"]) def test_equality_ops_index_mismatch(fn): From 25e563a7ac76d6c2b30157c2192df3d074caa51d Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Fri, 26 Mar 2021 20:44:38 -0700 Subject: [PATCH 04/13] Passing some test without reflect --- python/cudf/cudf/core/column/decimal.py | 27 +++ python/cudf/cudf/core/column/numerical.py | 5 - python/cudf/cudf/tests/test_binops.py | 264 +++++++++++----------- 3 files changed, 159 insertions(+), 137 deletions(-) diff --git a/python/cudf/cudf/core/column/decimal.py b/python/cudf/cudf/core/column/decimal.py index 080e5dd75f2..a32e4534460 100644 --- a/python/cudf/cudf/core/column/decimal.py +++ b/python/cudf/cudf/core/column/decimal.py @@ -15,6 +15,7 @@ from cudf._typing import Dtype from cudf.core.buffer import Buffer from cudf.core.column import ColumnBase, as_column +from cudf.core.column.numerical import NumericalColumn from cudf.core.dtypes import Decimal64Dtype from cudf.utils.utils import pa_mask_buffer_to_mask @@ -76,6 +77,20 @@ def binary_operator(self, op, other, reflect=False): self.dtype, other.dtype, op ) elif op in ("eq", "lt", "gt", "le", "ge"): + if not isinstance(other, (DecimalColumn, NumericalColumn)): + raise TypeError( + f"Operator {op} not supported between" + f"{str(type(self))} and {str(type(other))}" + ) + if isinstance(other, NumericalColumn) and not is_integer_dtype( + other.dtype + ): + raise TypeError( + f"Only decimal and integer column is supported for {op}." + ) + else: + dtype = _infer_dtype_from_integer_column(other) + other = other.as_decimal_column(dtype) result = libcudf.binaryop.binaryop(self, other, op, bool) return result @@ -137,3 +152,15 @@ def _binop_precision(l_dtype, r_dtype, op): return p1 + p2 + 1 else: raise NotImplementedError() + + +def _infer_dtype_from_integer_column(col: NumericalColumn) -> int: + """ + Introspect the integer column, compute the maximum number of digits + as precision and construct Decimal64Dtype with scale 0. + """ + minv, maxv = libcudf.reduce.minmax(col) + mindigt, maxdigt = len(str(minv._host_value)), len(str(maxv._host_value)) + mindigt = mindigt - 1 if mindigt < 0 else mindigt + maxdigt = maxdigt - 1 if maxdigt < 0 else maxdigt + return Decimal64Dtype(max(mindigt, maxdigt), 0) diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py index 7ad6eed65a8..9f55440cff4 100644 --- a/python/cudf/cudf/core/column/numerical.py +++ b/python/cudf/cudf/core/column/numerical.py @@ -208,11 +208,6 @@ def as_timedelta_column( def as_decimal_column( self, dtype: Dtype, **kwargs ) -> "cudf.core.column.DecimalColumn": - if is_integer_dtype(self.dtype): - raise NotImplementedError( - "Casting from integer types to decimal " - "types not currently supported" - ) result = libcudf.unary.cast(self, dtype) if isinstance(dtype, cudf.core.dtypes.Decimal64Dtype): result.dtype.precision = dtype.precision diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py index 84566a7bcbf..7a080bbaa49 100644 --- a/python/cudf/cudf/tests/test_binops.py +++ b/python/cudf/cudf/tests/test_binops.py @@ -1614,12 +1614,13 @@ def test_binops_with_NA_consistent(dtype, op): elif dtype in DATETIME_TYPES & TIMEDELTA_TYPES: assert result._column.null_count == len(data) + def _decimal_series(input, dtype): return cudf.Series( - [x if x is None else decimal.Decimal(x) for x in input], - dtype=dtype, + [x if x is None else decimal.Decimal(x) for x in input], dtype=dtype, ) + @pytest.mark.parametrize( "args", [ @@ -1910,136 +1911,134 @@ def test_binops_decimal(args): assert expect.dtype == got.dtype utils.assert_eq(expect, got) + @pytest.mark.parametrize( "args", - [( - operator.eq, - ["100", "42", None], - cudf.Decimal64Dtype(scale=0, precision=5), - [100, 42, 12], - cudf.Series([True, False, None], dtype=bool), - cudf.Series([True, False, None], dtype=bool) - ), - ( - operator.eq, - ["100.000", "42.001", None], - cudf.Decimal64Dtype(scale=3, precision=6), - [100, 42, 12], - cudf.Series([True, False, None], dtype=bool), - cudf.Series([True, False, None], dtype=bool) - ), - ( - operator.eq, - ["100", "40", None], - cudf.Decimal64Dtype(scale=-1, precision=3), - [100, 42, 12], - cudf.Series([True, False, None], dtype=bool), - cudf.Series([True, False, None], dtype=bool) - ), - ( - operator.lt, - ["100", "40", "28", None], - cudf.Decimal64Dtype(scale=0, precision=3), - [100, 42, 24, 12], - cudf.Series([False, True, False, None], dtype=bool), - cudf.Series([False, False, True, None], dtype=bool) - ), - ( - operator.lt, - ["100.000", "42.002", "23.999", None], - cudf.Decimal64Dtype(scale=3, precision=6), - [100, 42, 24, 12], - cudf.Series([False, False, False, None], dtype=bool), - cudf.Series([False, False, False, None], dtype=bool) - ), - ( - operator.lt, - ["100", "40", "10", None], - cudf.Decimal64Dtype(scale=-1, precision=3), - [100, 42, 8, 12], - cudf.Series([False, True, False, None], dtype=bool), - cudf.Series([False, False, True, None], dtype=bool) - ), - ( - operator.gt, - ["100", "42", "20", None], - cudf.Decimal64Dtype(scale=0, precision=3), - [100, 40, 24, 12], - cudf.Series([False, True, False, None], dtype=bool), - cudf.Series([False, False, True, None], dtype=bool) - ), - ( - operator.gt, - ["100.000", "42.002", "23.999", None], - cudf.Decimal64Dtype(scale=3, precision=6), - [100, 42, 24, 12], - cudf.Series([False, True, False, None], dtype=bool), - cudf.Series([False, False, True, None], dtype=bool) - ), - ( - operator.gt, - ["100", "40", "10", None], - cudf.Decimal64Dtype(scale=-1, precision=3), - [100, 42, 8, 12], - cudf.Series([False, False, True, None], dtype=bool), - cudf.Series([False, True, False, None], dtype=bool) - ), - ( - operator.le, - ["100", "40", "28", None], - cudf.Decimal64Dtype(scale=0, precision=3), - [100, 42, 24, 12], - cudf.Series([True, True, False, None], dtype=bool), - cudf.Series([True, False, True, None], dtype=bool) - ), - ( - operator.le, - ["100.000", "42.002", "23.999", None], - cudf.Decimal64Dtype(scale=3, precision=6), - [100, 42, 24, 12], - cudf.Series([True, False, False, None], dtype=bool), - cudf.Series([True, False, False, None], dtype=bool) - ), - ( - operator.le, - ["100", "40", "10", None], - cudf.Decimal64Dtype(scale=-1, precision=3), - [100, 42, 8, 12], - cudf.Series([True, True, False, None], dtype=bool), - cudf.Series([True, False, True, None], dtype=bool) - ), - ( - operator.ge, - ["100", "42", "20", None], - cudf.Decimal64Dtype(scale=0, precision=3), - [100, 40, 24, 12], - cudf.Series([True, True, False, None], dtype=bool), - cudf.Series([True, False, True, None], dtype=bool) - ), - ( - operator.ge, - ["100.000", "42.002", "23.999", None], - cudf.Decimal64Dtype(scale=3, precision=6), - [100, 42, 24, 12], - cudf.Series([True, True, False, None], dtype=bool), - cudf.Series([True, False, True, None], dtype=bool) - ), - ( - operator.ge, - ["100", "40", "10", None], - cudf.Decimal64Dtype(scale=-1, precision=3), - [100, 42, 8, 12], - cudf.Series([True, False, True, None], dtype=bool), - cudf.Series([True, True, False, None], dtype=bool) - ), - ] -) -@pytest.mark.parametrize( - "integer_dtype", INTEGER_TYPES -) -@pytest.mark.parametrize( - "reflected", [True, False] + [ + ( + operator.eq, + ["100", "41", None], + cudf.Decimal64Dtype(scale=0, precision=5), + [100, 42, 12], + cudf.Series([True, False, None], dtype=bool), + cudf.Series([True, False, None], dtype=bool), + ), + ( + operator.eq, + ["100.000", "42.001", None], + cudf.Decimal64Dtype(scale=3, precision=6), + [100, 42, 12], + cudf.Series([True, False, None], dtype=bool), + cudf.Series([True, False, None], dtype=bool), + ), + ( + operator.eq, + ["100", "40", None], + cudf.Decimal64Dtype(scale=-1, precision=3), + [100, 42, 12], + cudf.Series([True, False, None], dtype=bool), + cudf.Series([True, False, None], dtype=bool), + ), + ( + operator.lt, + ["100", "40", "28", None], + cudf.Decimal64Dtype(scale=0, precision=3), + [100, 42, 24, 12], + cudf.Series([False, True, False, None], dtype=bool), + cudf.Series([False, False, True, None], dtype=bool), + ), + ( + operator.lt, + ["100.000", "42.002", "23.999", None], + cudf.Decimal64Dtype(scale=3, precision=6), + [100, 42, 24, 12], + cudf.Series([False, False, False, None], dtype=bool), + cudf.Series([False, False, False, None], dtype=bool), + ), + ( + operator.lt, + ["100", "40", "10", None], + cudf.Decimal64Dtype(scale=-1, precision=3), + [100, 42, 8, 12], + cudf.Series([False, True, False, None], dtype=bool), + cudf.Series([False, False, True, None], dtype=bool), + ), + ( + operator.gt, + ["100", "42", "20", None], + cudf.Decimal64Dtype(scale=0, precision=3), + [100, 40, 24, 12], + cudf.Series([False, True, False, None], dtype=bool), + cudf.Series([False, False, True, None], dtype=bool), + ), + ( + operator.gt, + ["100.000", "42.002", "23.999", None], + cudf.Decimal64Dtype(scale=3, precision=6), + [100, 42, 24, 12], + cudf.Series([False, True, False, None], dtype=bool), + cudf.Series([False, False, True, None], dtype=bool), + ), + ( + operator.gt, + ["100", "40", "10", None], + cudf.Decimal64Dtype(scale=-1, precision=3), + [100, 42, 8, 12], + cudf.Series([False, False, True, None], dtype=bool), + cudf.Series([False, True, False, None], dtype=bool), + ), + ( + operator.le, + ["100", "40", "28", None], + cudf.Decimal64Dtype(scale=0, precision=3), + [100, 42, 24, 12], + cudf.Series([True, True, False, None], dtype=bool), + cudf.Series([True, False, True, None], dtype=bool), + ), + ( + operator.le, + ["100.000", "42.002", "23.999", None], + cudf.Decimal64Dtype(scale=3, precision=6), + [100, 42, 24, 12], + cudf.Series([True, False, False, None], dtype=bool), + cudf.Series([True, False, False, None], dtype=bool), + ), + ( + operator.le, + ["100", "40", "10", None], + cudf.Decimal64Dtype(scale=-1, precision=3), + [100, 42, 8, 12], + cudf.Series([True, True, False, None], dtype=bool), + cudf.Series([True, False, True, None], dtype=bool), + ), + ( + operator.ge, + ["100", "42", "20", None], + cudf.Decimal64Dtype(scale=0, precision=3), + [100, 40, 24, 12], + cudf.Series([True, True, False, None], dtype=bool), + cudf.Series([True, False, True, None], dtype=bool), + ), + ( + operator.ge, + ["100.000", "42.002", "23.999", None], + cudf.Decimal64Dtype(scale=3, precision=6), + [100, 42, 24, 12], + cudf.Series([True, True, False, None], dtype=bool), + cudf.Series([True, False, True, None], dtype=bool), + ), + ( + operator.ge, + ["100", "40", "10", None], + cudf.Decimal64Dtype(scale=-1, precision=3), + [100, 42, 8, 12], + cudf.Series([True, False, True, None], dtype=bool), + cudf.Series([True, True, False, None], dtype=bool), + ), + ], ) +@pytest.mark.parametrize("integer_dtype", INTEGER_TYPES) +@pytest.mark.parametrize("reflected", [False]) def test_binops_decimal_comp_mixed_integer(args, integer_dtype, reflected): """ Tested compare operations: @@ -2053,17 +2052,18 @@ def test_binops_decimal_comp_mixed_integer(args, integer_dtype, reflected): op, ldata, ldtype, rdata, expected, _ = args else: op, ldata, ldtype, rdata, _, expected = args - + lhs = _decimal_series(ldata, ldtype) rhs = cudf.Series(rdata, dtype=integer_dtype) if reflected: rhs, lhs = lhs, rhs - + actual = op(lhs, rhs) - + utils.assert_eq(expected, actual) + @pytest.mark.parametrize("fn", ["eq", "ne", "lt", "gt", "le", "ge"]) def test_equality_ops_index_mismatch(fn): a = cudf.Series( From fd93ef4de874092ef291b9376589015da9e28cd3 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Sun, 28 Mar 2021 01:29:37 -0700 Subject: [PATCH 05/13] Pass tests w/o reflect --- python/cudf/cudf/tests/test_binops.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py index 7a080bbaa49..7f4294cc46e 100644 --- a/python/cudf/cudf/tests/test_binops.py +++ b/python/cudf/cudf/tests/test_binops.py @@ -1952,8 +1952,8 @@ def test_binops_decimal(args): ["100.000", "42.002", "23.999", None], cudf.Decimal64Dtype(scale=3, precision=6), [100, 42, 24, 12], - cudf.Series([False, False, False, None], dtype=bool), - cudf.Series([False, False, False, None], dtype=bool), + cudf.Series([False, False, True, None], dtype=bool), + cudf.Series([False, True, False, None], dtype=bool), ), ( operator.lt, @@ -2000,7 +2000,7 @@ def test_binops_decimal(args): ["100.000", "42.002", "23.999", None], cudf.Decimal64Dtype(scale=3, precision=6), [100, 42, 24, 12], - cudf.Series([True, False, False, None], dtype=bool), + cudf.Series([True, False, True, None], dtype=bool), cudf.Series([True, False, False, None], dtype=bool), ), ( From 3a595df75194c27b652b652dd8c7185d3d81ec30 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Sun, 28 Mar 2021 03:31:34 -0700 Subject: [PATCH 06/13] Passing all tests for integer columns, removed prec infer --- python/cudf/cudf/core/column/decimal.py | 30 ++++++++--------------- python/cudf/cudf/core/column/numerical.py | 15 +++++++++++- python/cudf/cudf/tests/test_binops.py | 4 +-- 3 files changed, 26 insertions(+), 23 deletions(-) diff --git a/python/cudf/cudf/core/column/decimal.py b/python/cudf/cudf/core/column/decimal.py index a32e4534460..5c87451fc2d 100644 --- a/python/cudf/cudf/core/column/decimal.py +++ b/python/cudf/cudf/core/column/decimal.py @@ -15,7 +15,6 @@ from cudf._typing import Dtype from cudf.core.buffer import Buffer from cudf.core.column import ColumnBase, as_column -from cudf.core.column.numerical import NumericalColumn from cudf.core.dtypes import Decimal64Dtype from cudf.utils.utils import pa_mask_buffer_to_mask @@ -77,20 +76,23 @@ def binary_operator(self, op, other, reflect=False): self.dtype, other.dtype, op ) elif op in ("eq", "lt", "gt", "le", "ge"): - if not isinstance(other, (DecimalColumn, NumericalColumn)): + if not isinstance( + other, (DecimalColumn, cudf.core.column.NumericalColumn) + ): raise TypeError( f"Operator {op} not supported between" f"{str(type(self))} and {str(type(other))}" ) - if isinstance(other, NumericalColumn) and not is_integer_dtype( - other.dtype - ): + if isinstance( + other, cudf.core.column.NumericalColumn + ) and not is_integer_dtype(other.dtype): raise TypeError( f"Only decimal and integer column is supported for {op}." ) - else: - dtype = _infer_dtype_from_integer_column(other) - other = other.as_decimal_column(dtype) + if isinstance(other, cudf.core.column.NumericalColumn): + other = other.as_decimal_column( + Decimal64Dtype(Decimal64Dtype.MAX_PRECISION, 0) + ) result = libcudf.binaryop.binaryop(self, other, op, bool) return result @@ -152,15 +154,3 @@ def _binop_precision(l_dtype, r_dtype, op): return p1 + p2 + 1 else: raise NotImplementedError() - - -def _infer_dtype_from_integer_column(col: NumericalColumn) -> int: - """ - Introspect the integer column, compute the maximum number of digits - as precision and construct Decimal64Dtype with scale 0. - """ - minv, maxv = libcudf.reduce.minmax(col) - mindigt, maxdigt = len(str(minv._host_value)), len(str(maxv._host_value)) - mindigt = mindigt - 1 if mindigt < 0 else mindigt - maxdigt = maxdigt - 1 if maxdigt < 0 else maxdigt - return Decimal64Dtype(max(mindigt, maxdigt), 0) diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py index 9f55440cff4..169bfdd1c1d 100644 --- a/python/cudf/cudf/core/column/numerical.py +++ b/python/cudf/cudf/core/column/numerical.py @@ -22,6 +22,7 @@ column, string, ) +from cudf.core.dtypes import Decimal64Dtype from cudf.utils import cudautils, utils from cudf.utils.dtypes import ( min_column_type, @@ -103,11 +104,23 @@ def binary_operator( out_dtype = self.dtype else: if not ( - isinstance(rhs, (NumericalColumn, cudf.Scalar,),) + isinstance( + rhs, + ( + NumericalColumn, + cudf.Scalar, + cudf.core.column.DecimalColumn, + ), + ) or np.isscalar(rhs) ): msg = "{!r} operator not supported between {} and {}" raise TypeError(msg.format(binop, type(self), type(rhs))) + if isinstance(rhs, cudf.core.column.DecimalColumn): + lhs = self.as_decimal_column( + Decimal64Dtype(Decimal64Dtype.MAX_PRECISION, 0) + ) + return lhs.binary_operator(binop, rhs) out_dtype = np.result_type(self.dtype, rhs.dtype) if binop in ["mod", "floordiv"]: tmp = self if reflect else rhs diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py index 7f4294cc46e..900f68000d2 100644 --- a/python/cudf/cudf/tests/test_binops.py +++ b/python/cudf/cudf/tests/test_binops.py @@ -2001,7 +2001,7 @@ def test_binops_decimal(args): cudf.Decimal64Dtype(scale=3, precision=6), [100, 42, 24, 12], cudf.Series([True, False, True, None], dtype=bool), - cudf.Series([True, False, False, None], dtype=bool), + cudf.Series([True, True, False, None], dtype=bool), ), ( operator.le, @@ -2038,7 +2038,7 @@ def test_binops_decimal(args): ], ) @pytest.mark.parametrize("integer_dtype", INTEGER_TYPES) -@pytest.mark.parametrize("reflected", [False]) +@pytest.mark.parametrize("reflected", [True, False]) def test_binops_decimal_comp_mixed_integer(args, integer_dtype, reflected): """ Tested compare operations: From 1f8e24762a0aa2f8268c4b5ae135e32727b51fd3 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Sun, 28 Mar 2021 19:27:07 -0700 Subject: [PATCH 07/13] Use sorted INTEGER_TYPES for test setup --- python/cudf/cudf/tests/test_binops.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py index 423c14ef64d..9b8c7f9f8c8 100644 --- a/python/cudf/cudf/tests/test_binops.py +++ b/python/cudf/cudf/tests/test_binops.py @@ -2037,14 +2037,14 @@ def test_binops_decimal(args): ), ], ) -@pytest.mark.parametrize("integer_dtype", INTEGER_TYPES) +@pytest.mark.parametrize("integer_dtype", cudf.tests.utils.INTEGER_TYPES) @pytest.mark.parametrize("reflected", [True, False]) def test_binops_decimal_comp_mixed_integer(args, integer_dtype, reflected): """ Tested compare operations: eq, lt, gt, le, ge - Each operation has 3 decimal data setups, with scale from {==0, >0, <0}, - decimal precisions are sufficient to contain the digits. + Each operation has 3 decimal data setups, with scale from {==0, >0, <0}. + Decimal precisions are sufficient to hold the digits. For each decimal data setup, there is at least one row that lead to one of the following compare results: {True, False, None}. """ From f97d424da6901aba6158b04bd5ea63de93710480 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Mon, 29 Mar 2021 23:29:05 -0700 Subject: [PATCH 08/13] Updating api docs for decimal types --- docs/cudf/source/basics.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/cudf/source/basics.rst b/docs/cudf/source/basics.rst index e270708df90..15b4b43662b 100644 --- a/docs/cudf/source/basics.rst +++ b/docs/cudf/source/basics.rst @@ -34,6 +34,8 @@ The following table lists all of cudf types. For methods requiring dtype argumen +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+ | Boolean | | np.bool_ | ``'bool'`` | +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+ +| Decimal | Decimal64Dtype | (none) | (none) | ++------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+ **Note: All dtypes above are Nullable** From 2e32c2f6d112cc6d5f033bd7b2a5bbda1187fdf3 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Tue, 30 Mar 2021 17:44:00 -0700 Subject: [PATCH 09/13] Adding support for integer and decimal scalars --- python/cudf/cudf/core/column/decimal.py | 3 +- python/cudf/cudf/tests/test_binops.py | 110 ++++++++++++++++++++++++ 2 files changed, 112 insertions(+), 1 deletion(-) diff --git a/python/cudf/cudf/core/column/decimal.py b/python/cudf/cudf/core/column/decimal.py index 38d446978bc..a6869450770 100644 --- a/python/cudf/cudf/core/column/decimal.py +++ b/python/cudf/cudf/core/column/decimal.py @@ -79,7 +79,8 @@ def binary_operator(self, op, other, reflect=False): ) elif op in ("eq", "lt", "gt", "le", "ge"): if not isinstance( - other, (DecimalColumn, cudf.core.column.NumericalColumn) + other, + (DecimalColumn, cudf.core.column.NumericalColumn, cudf.Scalar), ): raise TypeError( f"Operator {op} not supported between" diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py index 41e70cfc13c..adbc76004a7 100644 --- a/python/cudf/cudf/tests/test_binops.py +++ b/python/cudf/cudf/tests/test_binops.py @@ -2251,6 +2251,116 @@ def decimal_series(input, dtype): utils.assert_eq(expect, got) +@pytest.mark.parametrize( + "args", + [ + ( + operator.eq, + ["100.00", "41", None], + cudf.Decimal64Dtype(scale=0, precision=5), + 100, + cudf.Series([True, False, None], dtype=bool), + cudf.Series([True, False, None], dtype=bool), + ), + ( + operator.eq, + ["100.123", "41", None], + cudf.Decimal64Dtype(scale=3, precision=6), + decimal.Decimal("100.123"), + cudf.Series([True, False, None], dtype=bool), + cudf.Series([True, False, None], dtype=bool), + ), + ( + operator.gt, + ["100.00", "41", "120.21", None], + cudf.Decimal64Dtype(scale=2, precision=5), + 100, + cudf.Series([False, False, True, None], dtype=bool), + cudf.Series([False, True, False, None], dtype=bool), + ), + ( + operator.gt, + ["100.123", "41", "120.21", None], + cudf.Decimal64Dtype(scale=3, precision=6), + decimal.Decimal("100.123"), + cudf.Series([False, False, True, None], dtype=bool), + cudf.Series([False, True, False, None], dtype=bool), + ), + ( + operator.ge, + ["100.00", "41", "120.21", None], + cudf.Decimal64Dtype(scale=2, precision=5), + 100, + cudf.Series([True, False, True, None], dtype=bool), + cudf.Series([True, True, False, None], dtype=bool), + ), + ( + operator.ge, + ["100.123", "41", "120.21", None], + cudf.Decimal64Dtype(scale=3, precision=6), + decimal.Decimal("100.123"), + cudf.Series([True, False, True, None], dtype=bool), + cudf.Series([True, True, False, None], dtype=bool), + ), + ( + operator.lt, + ["100.00", "41", "120.21", None], + cudf.Decimal64Dtype(scale=2, precision=5), + 100, + cudf.Series([False, True, False, None], dtype=bool), + cudf.Series([False, False, True, None], dtype=bool), + ), + ( + operator.lt, + ["100.123", "41", "120.21", None], + cudf.Decimal64Dtype(scale=3, precision=6), + decimal.Decimal("100.123"), + cudf.Series([False, True, False, None], dtype=bool), + cudf.Series([False, False, True, None], dtype=bool), + ), + ( + operator.le, + ["100.00", "41", "120.21", None], + cudf.Decimal64Dtype(scale=2, precision=5), + 100, + cudf.Series([True, True, False, None], dtype=bool), + cudf.Series([True, False, True, None], dtype=bool), + ), + ( + operator.le, + ["100.123", "41", "120.21", None], + cudf.Decimal64Dtype(scale=3, precision=6), + decimal.Decimal("100.123"), + cudf.Series([True, True, False, None], dtype=bool), + cudf.Series([True, False, True, None], dtype=bool), + ), + ], +) +@pytest.mark.parametrize("reflected", [True, False]) +def test_binops_decimal_scalar_compare(args, reflected): + """ + Tested compare operations: + eq, lt, gt, le, ge + Each operation has 2 data setups: integer and decimal.Decimal + For each data setup, there is at least one row that lead to one of the + following compare results: {True, False, None}. + """ + if not reflected: + op, ldata, ldtype, rdata, expected, _ = args + else: + op, ldata, ldtype, rdata, _, expected = args + + lhs = _decimal_series(ldata, ldtype) + rhs = rdata + + if reflected: + rhs, lhs = lhs, rhs + + actual = op(lhs, rhs) + + utils.assert_eq(expected, actual) + + @pytest.mark.parametrize( "dtype", [ From 77f4f474bbb4907965add927ae1e5906088c687f Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Wed, 31 Mar 2021 11:54:44 -0700 Subject: [PATCH 10/13] Comment for `decimals.pxd` decimal64 wrapper --- python/cudf/cudf/_lib/cpp/wrappers/decimals.pxd | 1 + 1 file changed, 1 insertion(+) diff --git a/python/cudf/cudf/_lib/cpp/wrappers/decimals.pxd b/python/cudf/cudf/_lib/cpp/wrappers/decimals.pxd index a73e6e0151d..9de23fb2595 100644 --- a/python/cudf/cudf/_lib/cpp/wrappers/decimals.pxd +++ b/python/cudf/cudf/_lib/cpp/wrappers/decimals.pxd @@ -2,6 +2,7 @@ from libc.stdint cimport int64_t, int32_t cdef extern from "cudf/fixed_point/fixed_point.hpp" namespace "numeric" nogil: + # cython type stub to help resolve to numeric::decimal64 ctypedef int64_t decimal64 cdef cppclass scale_type: From 5e9da57b6a9dd875e316bf494e44d3d0fa68035f Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Wed, 31 Mar 2021 11:55:54 -0700 Subject: [PATCH 11/13] Decimal column v. Scalar with Decimal64Dtype binop --- python/cudf/cudf/core/column/decimal.py | 2 + python/cudf/cudf/tests/test_binops.py | 97 ++++++++++++++++++++++++- 2 files changed, 98 insertions(+), 1 deletion(-) diff --git a/python/cudf/cudf/core/column/decimal.py b/python/cudf/cudf/core/column/decimal.py index a6869450770..a8907c4380f 100644 --- a/python/cudf/cudf/core/column/decimal.py +++ b/python/cudf/cudf/core/column/decimal.py @@ -102,6 +102,8 @@ def binary_operator(self, op, other, reflect=False): def normalize_binop_value(self, other): if is_scalar(other) and isinstance(other, (int, np.int, Decimal)): return cudf.Scalar(Decimal(other)) + elif isinstance(other, cudf.Scalar) and isinstance(other.dtype, cudf.Decimal64Dtype): + return other else: raise TypeError(f"cannot normalize {type(other)}") diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py index adbc76004a7..a1db396a5f4 100644 --- a/python/cudf/cudf/tests/test_binops.py +++ b/python/cudf/cudf/tests/test_binops.py @@ -2094,6 +2094,15 @@ def test_binops_decimal_comp_mixed_integer(args, integer_dtype, reflected): cudf.Decimal64Dtype(scale=1, precision=7), False, ), + ( + operator.add, + ["100", "200"], + cudf.Decimal64Dtype(scale=-2, precision=3), + cudf.Scalar(decimal.Decimal("1.5")), + ["101.5", "201.5"], + cudf.Decimal64Dtype(scale=1, precision=7), + False, + ), ( operator.add, ["100", "200"], @@ -2121,6 +2130,15 @@ def test_binops_decimal_comp_mixed_integer(args, integer_dtype, reflected): cudf.Decimal64Dtype(scale=1, precision=7), True, ), + ( + operator.add, + ["100", "200"], + cudf.Decimal64Dtype(scale=-2, precision=3), + cudf.Scalar(decimal.Decimal("1.5")), + ["101.5", "201.5"], + cudf.Decimal64Dtype(scale=1, precision=7), + True, + ), ( operator.mul, ["100", "200"], @@ -2148,6 +2166,15 @@ def test_binops_decimal_comp_mixed_integer(args, integer_dtype, reflected): cudf.Decimal64Dtype(scale=-1, precision=6), False, ), + ( + operator.mul, + ["100", "200"], + cudf.Decimal64Dtype(scale=-2, precision=3), + cudf.Scalar(decimal.Decimal("1.5")), + ["150", "300"], + cudf.Decimal64Dtype(scale=-1, precision=6), + False, + ), ( operator.mul, ["100", "200"], @@ -2175,6 +2202,15 @@ def test_binops_decimal_comp_mixed_integer(args, integer_dtype, reflected): cudf.Decimal64Dtype(scale=-1, precision=6), True, ), + ( + operator.mul, + ["100", "200"], + cudf.Decimal64Dtype(scale=-2, precision=3), + cudf.Scalar(decimal.Decimal("1.5")), + ["150", "300"], + cudf.Decimal64Dtype(scale=-1, precision=6), + True, + ), ( operator.sub, ["100", "200"], @@ -2202,6 +2238,15 @@ def test_binops_decimal_comp_mixed_integer(args, integer_dtype, reflected): cudf.Decimal64Dtype(scale=0, precision=6), False, ), + ( + operator.sub, + ["100", "200"], + cudf.Decimal64Dtype(scale=-2, precision=3), + cudf.Scalar(decimal.Decimal("2.5")), + ["97.5", "197.5"], + cudf.Decimal64Dtype(scale=1, precision=7), + False, + ), ( operator.sub, ["100", "200"], @@ -2229,6 +2274,15 @@ def test_binops_decimal_comp_mixed_integer(args, integer_dtype, reflected): cudf.Decimal64Dtype(scale=1, precision=7), True, ), + ( + operator.sub, + ["100", "200"], + cudf.Decimal64Dtype(scale=-2, precision=3), + cudf.Scalar(decimal.Decimal("2.5")), + ["-97.5", "-197.5"], + cudf.Decimal64Dtype(scale=1, precision=7), + True, + ), ], ) def test_binops_decimal_scalar(args): @@ -2270,6 +2324,14 @@ def decimal_series(input, dtype): cudf.Series([True, False, None], dtype=bool), cudf.Series([True, False, None], dtype=bool), ), + ( + operator.eq, + ["100.123", "41", None], + cudf.Decimal64Dtype(scale=3, precision=6), + cudf.Scalar(decimal.Decimal("100.123")), + cudf.Series([True, False, None], dtype=bool), + cudf.Series([True, False, None], dtype=bool), + ), ( operator.gt, ["100.00", "41", "120.21", None], @@ -2286,6 +2348,14 @@ def decimal_series(input, dtype): cudf.Series([False, False, True, None], dtype=bool), cudf.Series([False, True, False, None], dtype=bool), ), + ( + operator.gt, + ["100.123", "41", "120.21", None], + cudf.Decimal64Dtype(scale=3, precision=6), + cudf.Scalar(decimal.Decimal("100.123")), + cudf.Series([False, False, True, None], dtype=bool), + cudf.Series([False, True, False, None], dtype=bool), + ), ( operator.ge, ["100.00", "41", "120.21", None], @@ -2302,6 +2372,14 @@ def decimal_series(input, dtype): cudf.Series([True, False, True, None], dtype=bool), cudf.Series([True, True, False, None], dtype=bool), ), + ( + operator.ge, + ["100.123", "41", "120.21", None], + cudf.Decimal64Dtype(scale=3, precision=6), + cudf.Scalar(decimal.Decimal("100.123")), + cudf.Series([True, False, True, None], dtype=bool), + cudf.Series([True, True, False, None], dtype=bool), + ), ( operator.lt, ["100.00", "41", "120.21", None], @@ -2318,6 +2396,14 @@ def decimal_series(input, dtype): cudf.Series([False, True, False, None], dtype=bool), cudf.Series([False, False, True, None], dtype=bool), ), + ( + operator.lt, + ["100.123", "41", "120.21", None], + cudf.Decimal64Dtype(scale=3, precision=6), + cudf.Scalar(decimal.Decimal("100.123")), + cudf.Series([False, True, False, None], dtype=bool), + cudf.Series([False, False, True, None], dtype=bool), + ), ( operator.le, ["100.00", "41", "120.21", None], @@ -2334,6 +2420,14 @@ def decimal_series(input, dtype): cudf.Series([True, True, False, None], dtype=bool), cudf.Series([True, False, True, None], dtype=bool), ), + ( + operator.le, + ["100.123", "41", "120.21", None], + cudf.Decimal64Dtype(scale=3, precision=6), + cudf.Scalar(decimal.Decimal("100.123")), + cudf.Series([True, True, False, None], dtype=bool), + cudf.Series([True, False, True, None], dtype=bool), + ), ], ) @pytest.mark.parametrize("reflected", [True, False]) @@ -2341,7 +2435,8 @@ def test_binops_decimal_scalar_compare(args, reflected): """ Tested compare operations: eq, lt, gt, le, ge - Each operation has 2 data setups: integer and decimal.Decimal + Each operation has 3 data setups: pyints, Decimal, and + cudf.Scalar(dtype=cudf.Decimal64Dtype) For each data setup, there is at least one row that lead to one of the following compare results: {True, False, None}. """ From 978c9d3bed03c00d480b219e153779ec484fe075 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Wed, 31 Mar 2021 11:58:32 -0700 Subject: [PATCH 12/13] style --- python/cudf/cudf/core/column/decimal.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python/cudf/cudf/core/column/decimal.py b/python/cudf/cudf/core/column/decimal.py index a8907c4380f..e93c5824817 100644 --- a/python/cudf/cudf/core/column/decimal.py +++ b/python/cudf/cudf/core/column/decimal.py @@ -102,7 +102,9 @@ def binary_operator(self, op, other, reflect=False): def normalize_binop_value(self, other): if is_scalar(other) and isinstance(other, (int, np.int, Decimal)): return cudf.Scalar(Decimal(other)) - elif isinstance(other, cudf.Scalar) and isinstance(other.dtype, cudf.Decimal64Dtype): + elif isinstance(other, cudf.Scalar) and isinstance( + other.dtype, cudf.Decimal64Dtype + ): return other else: raise TypeError(f"cannot normalize {type(other)}") From e8bfde2c2ae6e88854c3b3bce473dcac4584feae Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Wed, 31 Mar 2021 15:34:30 -0700 Subject: [PATCH 13/13] refine docstring --- python/cudf/cudf/tests/test_binops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py index a1db396a5f4..ac80071c8e4 100644 --- a/python/cudf/cudf/tests/test_binops.py +++ b/python/cudf/cudf/tests/test_binops.py @@ -2436,7 +2436,7 @@ def test_binops_decimal_scalar_compare(args, reflected): Tested compare operations: eq, lt, gt, le, ge Each operation has 3 data setups: pyints, Decimal, and - cudf.Scalar(dtype=cudf.Decimal64Dtype) + decimal cudf.Scalar For each data setup, there is at least one row that lead to one of the following compare results: {True, False, None}. """