Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add decimal column comparison operations #7716

Merged
merged 17 commits into from
Apr 1, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/cudf/source/basics.rst
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ The following table lists all of cudf types. For methods requiring dtype argumen
+------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+
| Boolean | | np.bool_ | ``'bool'`` |
+------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+
| Decimal | Decimal64Dtype | (none) | (none) |
+------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+

**Note: All dtypes above are Nullable**

Expand Down
1 change: 1 addition & 0 deletions python/cudf/cudf/_lib/cpp/wrappers/decimals.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from libc.stdint cimport int64_t, int32_t

cdef extern from "cudf/fixed_point/fixed_point.hpp" namespace "numeric" nogil:
# cython type stub to help resolve to numeric::decimal64
ctypedef int64_t decimal64

cdef cppclass scale_type:
Expand Down
62 changes: 46 additions & 16 deletions python/cudf/cudf/core/column/decimal.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,24 @@
# Copyright (c) 2021, NVIDIA CORPORATION.

import cudf
from decimal import Decimal
from typing import cast

import cupy as cp
import numpy as np
import pyarrow as pa
from typing import cast
from pandas.api.types import is_integer_dtype

import cudf
from cudf import _lib as libcudf
from cudf.core.buffer import Buffer
from cudf.core.column import ColumnBase
from cudf.core.dtypes import Decimal64Dtype
from cudf.utils.utils import pa_mask_buffer_to_mask

from cudf._typing import Dtype
from cudf._lib.strings.convert.convert_fixed_point import (
from_decimal as cpp_from_decimal,
)
from cudf.core.column import as_column
from decimal import Decimal
from cudf._typing import Dtype
from cudf.core.buffer import Buffer
from cudf.core.column import ColumnBase, as_column
from cudf.core.dtypes import Decimal64Dtype
from cudf.utils.dtypes import is_scalar
from cudf.utils.utils import pa_mask_buffer_to_mask


class DecimalColumn(ColumnBase):
Expand Down Expand Up @@ -65,17 +65,47 @@ def to_arrow(self):
def binary_operator(self, op, other, reflect=False):
if reflect:
self, other = other, self
scale = _binop_scale(self.dtype, other.dtype, op)
output_type = Decimal64Dtype(
scale=scale, precision=Decimal64Dtype.MAX_PRECISION
) # precision will be ignored, libcudf has no notion of precision
result = libcudf.binaryop.binaryop(self, other, op, output_type)
result.dtype.precision = _binop_precision(self.dtype, other.dtype, op)

# Binary Arithmatics between decimal columns. `Scale` and `precision`
# are computed outside of libcudf
if op in ("add", "sub", "mul"):
scale = _binop_scale(self.dtype, other.dtype, op)
output_type = Decimal64Dtype(
scale=scale, precision=Decimal64Dtype.MAX_PRECISION
) # precision will be ignored, libcudf has no notion of precision
result = libcudf.binaryop.binaryop(self, other, op, output_type)
result.dtype.precision = _binop_precision(
self.dtype, other.dtype, op
)
elif op in ("eq", "lt", "gt", "le", "ge"):
if not isinstance(
other,
(DecimalColumn, cudf.core.column.NumericalColumn, cudf.Scalar),
):
raise TypeError(
f"Operator {op} not supported between"
f"{str(type(self))} and {str(type(other))}"
)
if isinstance(
other, cudf.core.column.NumericalColumn
) and not is_integer_dtype(other.dtype):
raise TypeError(
f"Only decimal and integer column is supported for {op}."
)
if isinstance(other, cudf.core.column.NumericalColumn):
other = other.as_decimal_column(
Decimal64Dtype(Decimal64Dtype.MAX_PRECISION, 0)
)
result = libcudf.binaryop.binaryop(self, other, op, bool)
return result

def normalize_binop_value(self, other):
if is_scalar(other) and isinstance(other, (int, np.int, Decimal)):
return cudf.Scalar(Decimal(other))
elif isinstance(other, cudf.Scalar) and isinstance(
other.dtype, cudf.Decimal64Dtype
):
return other
else:
raise TypeError(f"cannot normalize {type(other)}")

Expand Down
15 changes: 14 additions & 1 deletion python/cudf/cudf/core/column/numerical.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
column,
string,
)
from cudf.core.dtypes import Decimal64Dtype
from cudf.utils import cudautils, utils
from cudf.utils.dtypes import (
min_column_type,
Expand Down Expand Up @@ -103,11 +104,23 @@ def binary_operator(
out_dtype = self.dtype
else:
if not (
isinstance(rhs, (NumericalColumn, cudf.Scalar,),)
isinstance(
rhs,
(
NumericalColumn,
cudf.Scalar,
cudf.core.column.DecimalColumn,
),
)
or np.isscalar(rhs)
):
msg = "{!r} operator not supported between {} and {}"
raise TypeError(msg.format(binop, type(self), type(rhs)))
if isinstance(rhs, cudf.core.column.DecimalColumn):
lhs = self.as_decimal_column(
Decimal64Dtype(Decimal64Dtype.MAX_PRECISION, 0)
)
return lhs.binary_operator(binop, rhs)
out_dtype = np.result_type(self.dtype, rhs.dtype)
if binop in ["mod", "floordiv"]:
tmp = self if reflect else rhs
Expand Down
Loading