Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[REVIEW] Enable typecasting between decimal and int #7691

Merged
merged 3 commits into from
Mar 30, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 6 additions & 19 deletions python/cudf/cudf/_lib/unary.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ from cudf._lib.cpp.unary cimport (
unary_operator,
)

from cudf._lib.types cimport underlying_type_t_type_id
from cudf._lib.types cimport underlying_type_t_type_id, dtype_to_data_type

cimport cudf._lib.cpp.unary as libcudf_unary
cimport cudf._lib.cpp.types as libcudf_types
Expand Down Expand Up @@ -95,30 +95,17 @@ def is_valid(Column input):

def cast(Column input, object dtype=np.float64):
cdef column_view c_input = input.view()
cdef type_id tid
cdef data_type c_dtype

# TODO: Use dtype_to_data_type when it becomes available
# to simplify this conversion
if is_decimal_dtype(dtype):
tid = libcudf_types.type_id.DECIMAL64
c_dtype = data_type(tid, -dtype.scale)
else:
tid = (
<type_id> (
<underlying_type_t_type_id> (
np_to_cudf_types[np.dtype(dtype)]
)
)
)
c_dtype = data_type(tid)
cdef data_type c_dtype = dtype_to_data_type(dtype)

cdef unique_ptr[column] c_result

with nogil:
c_result = move(libcudf_unary.cast(c_input, c_dtype))

return Column.from_unique_ptr(move(c_result))
result = Column.from_unique_ptr(move(c_result))
if is_decimal_dtype(result.dtype):
result.dtype.precision = dtype.precision
return result


def is_nan(Column input):
Expand Down
11 changes: 1 addition & 10 deletions python/cudf/cudf/core/column/decimal.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import cupy as cp
import numpy as np
import pyarrow as pa
from pandas.api.types import is_integer_dtype
from typing import cast

from cudf import _lib as libcudf
Expand Down Expand Up @@ -80,19 +79,11 @@ def as_decimal_column(
) -> "cudf.core.column.DecimalColumn":
if dtype == self.dtype:
return self
result = libcudf.unary.cast(self, dtype)
if isinstance(dtype, cudf.core.dtypes.Decimal64Dtype):
result.dtype.precision = dtype.precision
return result
return libcudf.unary.cast(self, dtype)
Comment on lines -83 to +82
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just a quick side note that this is needed in #7716


def as_numerical_column(
self, dtype: Dtype
) -> "cudf.core.column.NumericalColumn":
if is_integer_dtype(dtype):
raise NotImplementedError(
"Casting from decimal types to integer "
"types not currently supported"
)
return libcudf.unary.cast(self, dtype)

def as_string_column(
Expand Down
10 changes: 1 addition & 9 deletions python/cudf/cudf/core/column/numerical.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,15 +208,7 @@ def as_timedelta_column(
def as_decimal_column(
self, dtype: Dtype, **kwargs
) -> "cudf.core.column.DecimalColumn":
if is_integer_dtype(self.dtype):
raise NotImplementedError(
"Casting from integer types to decimal "
"types not currently supported"
)
result = libcudf.unary.cast(self, dtype)
if isinstance(dtype, cudf.core.dtypes.Decimal64Dtype):
result.dtype.precision = dtype.precision
return result
return libcudf.unary.cast(self, dtype)

def as_numerical_column(self, dtype: Dtype) -> NumericalColumn:
dtype = np.dtype(dtype)
Expand Down
82 changes: 63 additions & 19 deletions python/cudf/cudf/tests/test_decimal.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@
from cudf.core.column import DecimalColumn, NumericalColumn

from cudf.tests.utils import (
NUMERIC_TYPES,
FLOAT_TYPES,
INTEGER_TYPES,
assert_eq,
)

Expand Down Expand Up @@ -75,18 +77,59 @@ def test_from_arrow_max_precision():
"to_dtype",
[Decimal64Dtype(7, 2), Decimal64Dtype(11, 4), Decimal64Dtype(18, 9)],
)
def test_typecast_to_decimal(data, from_dtype, to_dtype):
actual = data.astype(from_dtype)
expected = actual
def test_typecast_from_float_to_decimal(data, from_dtype, to_dtype):
got = data.astype(from_dtype)

actual = actual.astype(to_dtype)
pa_arr = expected.to_arrow().cast(
pa_arr = got.to_arrow().cast(
pa.decimal128(to_dtype.precision, to_dtype.scale)
)
expected = cudf.Series(DecimalColumn.from_arrow(pa_arr))

assert_eq(actual, expected)
assert_eq(actual.dtype, expected.dtype)
got = got.astype(to_dtype)

assert_eq(got, expected)
assert_eq(got.dtype, expected.dtype)


@pytest.mark.parametrize(
"data",
[
cudf.Series(
[
14.12302,
38.2,
np.nan,
0.0,
-8.302014,
np.nan,
94.31304,
np.nan,
-112.2314,
0.3333333,
np.nan,
]
),
],
)
@pytest.mark.parametrize("from_dtype", INTEGER_TYPES)
@pytest.mark.parametrize(
"to_dtype",
[Decimal64Dtype(9, 3), Decimal64Dtype(11, 4), Decimal64Dtype(18, 9)],
)
def test_typecast_from_int_to_decimal(data, from_dtype, to_dtype):
got = data.astype(from_dtype)

pa_arr = (
got.to_arrow()
.cast("float64")
.cast(pa.decimal128(to_dtype.precision, to_dtype.scale))
)
expected = cudf.Series(DecimalColumn.from_arrow(pa_arr))

got = got.astype(to_dtype)

assert_eq(got, expected)
assert_eq(got.dtype, expected.dtype)


@pytest.mark.parametrize(
Expand Down Expand Up @@ -117,17 +160,17 @@ def test_typecast_to_decimal(data, from_dtype, to_dtype):
[Decimal64Dtype(7, 2), Decimal64Dtype(18, 10), Decimal64Dtype(11, 4)],
)
def test_typecast_to_from_decimal(data, from_dtype, to_dtype):
actual = data.astype(from_dtype)
expected = actual
got = data.astype(from_dtype)

actual = actual.astype(to_dtype)
pa_arr = expected.to_arrow().cast(
pa_arr = got.to_arrow().cast(
pa.decimal128(to_dtype.precision, to_dtype.scale), safe=False
)
expected = cudf.Series(DecimalColumn.from_arrow(pa_arr))

assert_eq(actual, expected)
assert_eq(actual.dtype, expected.dtype)
got = got.astype(to_dtype)

assert_eq(got, expected)
assert_eq(got.dtype, expected.dtype)


@pytest.mark.parametrize(
Expand All @@ -151,14 +194,15 @@ def test_typecast_to_from_decimal(data, from_dtype, to_dtype):
)
@pytest.mark.parametrize(
"from_dtype",
[Decimal64Dtype(7, 2), Decimal64Dtype(11, 4), Decimal64Dtype(18, 10)],
[Decimal64Dtype(7, 2), Decimal64Dtype(11, 4), Decimal64Dtype(17, 10)],
)
@pytest.mark.parametrize("to_dtype", FLOAT_TYPES)
@pytest.mark.parametrize("to_dtype", NUMERIC_TYPES)
def test_typecast_from_decimal(data, from_dtype, to_dtype):
actual = data.astype(from_dtype)
pa_arr = actual.to_arrow().cast(to_dtype, safe=False)
got = data.astype(from_dtype)
pa_arr = got.to_arrow().cast(to_dtype, safe=False)

actual = actual.astype(to_dtype)
got = got.astype(to_dtype)
expected = cudf.Series(NumericalColumn.from_arrow(pa_arr))

assert_eq(actual, expected)
assert_eq(got, expected)
assert_eq(got.dtype, expected.dtype)
galipremsagar marked this conversation as resolved.
Show resolved Hide resolved