diff --git a/cpp/include/cudf/strings/char_types/char_types.hpp b/cpp/include/cudf/strings/char_types/char_types.hpp index 300722920f4..1f5b6241850 100644 --- a/cpp/include/cudf/strings/char_types/char_types.hpp +++ b/cpp/include/cudf/strings/char_types/char_types.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -146,82 +146,6 @@ std::unique_ptr filter_characters_of_type( string_character_types types_to_keep = string_character_types::ALL_TYPES, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); -/** - * @brief Returns a boolean column identifying strings in which all - * characters are valid for conversion to integers. - * - * The output row entry will be set to `true` if the corresponding string element - * has at least one character in [-+0-9]. - * - * @code{.pseudo} - * Example: - * s = ['123', '-456', '', 'A', '+7'] - * b = s.is_integer(s) - * b is [true, true, false, false, true] - * @endcode - * - * Any null row results in a null entry for that row in the output column. - * - * @param strings Strings instance for this operation. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New column of boolean results for each string. - */ -std::unique_ptr is_integer( - strings_column_view const& strings, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - -/** - * @brief Returns `true` if all strings contain - * characters that are valid for conversion to integers. - * - * This function will return `true` if all string elements - * has at least one character in [-+0-9]. - * - * Any null entry or empty string will cause this function to return `false`. - * - * @param strings Strings instance for this operation. - * @return true if all string are valid - */ -bool all_integer(strings_column_view const& strings); - -/** - * @brief Returns a boolean column identifying strings in which all - * characters are valid for conversion to floats. - * - * The output row entry will be set to `true` if the corresponding string element - * has at least one character in [-+0-9eE.]. - * - * @code{.pseudo} - * Example: - * s = ['123', '-456', '', 'A', '+7', '8.9' '3.7e+5'] - * b = s.is_float(s) - * b is [true, true, false, false, true, true, true] - * @endcode - * - * Any null row results in a null entry for that row in the output column. - * - * @param strings Strings instance for this operation. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New column of boolean results for each string. - */ -std::unique_ptr is_float( - strings_column_view const& strings, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - -/** - * @brief Returns `true` if all strings contain - * characters that are valid for conversion to floats. - * - * This function will return `true` if all string elements - * has at least one character in [-+0-9eE.]. - * - * Any null entry or empty string will cause this function to return `false`. - * - * @param strings Strings instance for this operation. - * @return true if all string are valid - */ -bool all_float(strings_column_view const& strings); - /** @} */ // end of doxygen group } // namespace strings } // namespace cudf diff --git a/cpp/include/cudf/strings/convert/convert_floats.hpp b/cpp/include/cudf/strings/convert/convert_floats.hpp index cb4746dbf40..d1e00b36f6f 100644 --- a/cpp/include/cudf/strings/convert/convert_floats.hpp +++ b/cpp/include/cudf/strings/convert/convert_floats.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -68,6 +68,30 @@ std::unique_ptr from_floats( column_view const& floats, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +/** + * @brief Returns a boolean column identifying strings in which all + * characters are valid for conversion to floats. + * + * The output row entry will be set to `true` if the corresponding string element + * has at least one character in [-+0-9eE.]. + * + * @code{.pseudo} + * Example: + * s = ['123', '-456', '', 'A', '+7', '8.9' '3.7e+5'] + * b = s.is_float(s) + * b is [true, true, false, false, true, true, true] + * @endcode + * + * Any null row results in a null entry for that row in the output column. + * + * @param strings Strings instance for this operation. + * @param mr Device memory resource used to allocate the returned column's device memory. + * @return New column of boolean results for each string. + */ +std::unique_ptr is_float( + strings_column_view const& strings, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + /** @} */ // end of doxygen group } // namespace strings } // namespace cudf diff --git a/cpp/include/cudf/strings/convert/convert_integers.hpp b/cpp/include/cudf/strings/convert/convert_integers.hpp index 8f42deb380d..1e2fa80b129 100644 --- a/cpp/include/cudf/strings/convert/convert_integers.hpp +++ b/cpp/include/cudf/strings/convert/convert_integers.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -73,6 +73,30 @@ std::unique_ptr from_integers( column_view const& integers, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +/** + * @brief Returns a boolean column identifying strings in which all + * characters are valid for conversion to integers. + * + * The output row entry will be set to `true` if the corresponding string element + * has at least one character in [-+0-9]. + * + * @code{.pseudo} + * Example: + * s = ['123', '-456', '', 'A', '+7'] + * b = s.is_integer(s) + * b is [true, true, false, false, true] + * @endcode + * + * Any null row results in a null entry for that row in the output column. + * + * @param strings Strings instance for this operation. + * @param mr Device memory resource used to allocate the returned column's device memory. + * @return New column of boolean results for each string. + */ +std::unique_ptr is_integer( + strings_column_view const& strings, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + /** * @brief Returns a new integer numeric column parsing hexadecimal values from the * provided strings column. diff --git a/cpp/src/strings/char_types/char_types.cu b/cpp/src/strings/char_types/char_types.cu index 10496b89328..0b384ad0631 100644 --- a/cpp/src/strings/char_types/char_types.cu +++ b/cpp/src/strings/char_types/char_types.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -186,91 +186,6 @@ std::unique_ptr filter_characters_of_type(strings_column_view const& str mr); } -std::unique_ptr is_integer( - strings_column_view const& strings, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) -{ - auto strings_column = column_device_view::create(strings.parent(), stream); - auto d_column = *strings_column; - // create output column - auto results = make_numeric_column(data_type{type_id::BOOL8}, - strings.size(), - cudf::detail::copy_bitmask(strings.parent(), stream, mr), - strings.null_count(), - stream, - mr); - auto d_results = results->mutable_view().data(); - thrust::transform(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(strings.size()), - d_results, - [d_column] __device__(size_type idx) { - if (d_column.is_null(idx)) return false; - return string::is_integer(d_column.element(idx)); - }); - results->set_null_count(strings.null_count()); - return results; -} - -bool all_integer(strings_column_view const& strings, rmm::cuda_stream_view stream) -{ - auto strings_column = column_device_view::create(strings.parent(), stream); - auto d_column = *strings_column; - auto transformer_itr = thrust::make_transform_iterator( - thrust::make_counting_iterator(0), [d_column] __device__(size_type idx) { - if (d_column.is_null(idx)) return false; - return string::is_integer(d_column.element(idx)); - }); - return thrust::all_of(rmm::exec_policy(stream), - transformer_itr, - transformer_itr + strings.size(), - thrust::identity()); -} - -std::unique_ptr is_float( - strings_column_view const& strings, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) -{ - auto strings_column = column_device_view::create(strings.parent(), stream); - auto d_column = *strings_column; - // create output column - auto results = make_numeric_column(data_type{type_id::BOOL8}, - strings.size(), - cudf::detail::copy_bitmask(strings.parent(), stream, mr), - strings.null_count(), - stream, - mr); - auto d_results = results->mutable_view().data(); - // check strings for valid float chars - thrust::transform(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(strings.size()), - d_results, - [d_column] __device__(size_type idx) { - if (d_column.is_null(idx)) return false; - return string::is_float(d_column.element(idx)); - }); - results->set_null_count(strings.null_count()); - return results; -} - -bool all_float(strings_column_view const& strings, rmm::cuda_stream_view stream) -{ - auto strings_column = column_device_view::create(strings.parent(), stream); - auto d_column = *strings_column; - auto transformer_itr = thrust::make_transform_iterator( - thrust::make_counting_iterator(0), [d_column] __device__(size_type idx) { - if (d_column.is_null(idx)) return false; - return string::is_float(d_column.element(idx)); - }); - return thrust::all_of(rmm::exec_policy(stream), - transformer_itr, - transformer_itr + strings.size(), - thrust::identity()); -} - } // namespace detail // external API @@ -295,31 +210,5 @@ std::unique_ptr filter_characters_of_type(strings_column_view const& str strings, types_to_remove, replacement, types_to_keep, rmm::cuda_stream_default, mr); } -std::unique_ptr is_integer(strings_column_view const& strings, - rmm::mr::device_memory_resource* mr) -{ - CUDF_FUNC_RANGE(); - return detail::is_integer(strings, rmm::cuda_stream_default, mr); -} - -std::unique_ptr is_float(strings_column_view const& strings, - rmm::mr::device_memory_resource* mr) -{ - CUDF_FUNC_RANGE(); - return detail::is_float(strings, rmm::cuda_stream_default, mr); -} - -bool all_integer(strings_column_view const& strings) -{ - CUDF_FUNC_RANGE(); - return detail::all_integer(strings, rmm::cuda_stream_default); -} - -bool all_float(strings_column_view const& strings) -{ - CUDF_FUNC_RANGE(); - return detail::all_float(strings, rmm::cuda_stream_default); -} - } // namespace strings } // namespace cudf diff --git a/cpp/src/strings/convert/convert_floats.cu b/cpp/src/strings/convert/convert_floats.cu index 2bf65976986..b6d99efd51f 100644 --- a/cpp/src/strings/convert/convert_floats.cu +++ b/cpp/src/strings/convert/convert_floats.cu @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -536,12 +537,50 @@ std::unique_ptr from_floats(column_view const& floats, } // namespace detail // external API - std::unique_ptr from_floats(column_view const& floats, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); return detail::from_floats(floats, rmm::cuda_stream_default, mr); } +namespace detail { +std::unique_ptr is_float( + strings_column_view const& strings, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +{ + auto strings_column = column_device_view::create(strings.parent(), stream); + auto d_column = *strings_column; + // create output column + auto results = make_numeric_column(data_type{type_id::BOOL8}, + strings.size(), + cudf::detail::copy_bitmask(strings.parent(), stream, mr), + strings.null_count(), + stream, + mr); + auto d_results = results->mutable_view().data(); + // check strings for valid float chars + thrust::transform(rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(strings.size()), + d_results, + [d_column] __device__(size_type idx) { + if (d_column.is_null(idx)) return false; + return string::is_float(d_column.element(idx)); + }); + results->set_null_count(strings.null_count()); + return results; +} + +} // namespace detail + +// external API +std::unique_ptr is_float(strings_column_view const& strings, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::is_float(strings, rmm::cuda_stream_default, mr); +} + } // namespace strings } // namespace cudf diff --git a/cpp/src/strings/convert/convert_integers.cu b/cpp/src/strings/convert/convert_integers.cu index 112550fc25b..5c5032b5c87 100644 --- a/cpp/src/strings/convert/convert_integers.cu +++ b/cpp/src/strings/convert/convert_integers.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -245,7 +246,6 @@ std::unique_ptr from_integers(column_view const& integers, } // namespace detail // external API - std::unique_ptr from_integers(column_view const& integers, rmm::mr::device_memory_resource* mr) { @@ -253,5 +253,42 @@ std::unique_ptr from_integers(column_view const& integers, return detail::from_integers(integers, rmm::cuda_stream_default, mr); } +namespace detail { +std::unique_ptr is_integer( + strings_column_view const& strings, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +{ + auto strings_column = column_device_view::create(strings.parent(), stream); + auto d_column = *strings_column; + // create output column + auto results = make_numeric_column(data_type{type_id::BOOL8}, + strings.size(), + cudf::detail::copy_bitmask(strings.parent(), stream, mr), + strings.null_count(), + stream, + mr); + auto d_results = results->mutable_view().data(); + thrust::transform(rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(strings.size()), + d_results, + [d_column] __device__(size_type idx) { + if (d_column.is_null(idx)) return false; + return string::is_integer(d_column.element(idx)); + }); + results->set_null_count(strings.null_count()); + return results; +} +} // namespace detail + +// external API +std::unique_ptr is_integer(strings_column_view const& strings, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::is_integer(strings, rmm::cuda_stream_default, mr); +} + } // namespace strings } // namespace cudf diff --git a/cpp/tests/strings/chars_types_tests.cpp b/cpp/tests/strings/chars_types_tests.cpp index 803a9b01b07..702329edaba 100644 --- a/cpp/tests/strings/chars_types_tests.cpp +++ b/cpp/tests/strings/chars_types_tests.cpp @@ -14,7 +14,6 @@ * limitations under the License. */ -#include #include #include #include @@ -228,54 +227,6 @@ TEST_F(StringsCharsTest, Numerics) CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); } -TEST_F(StringsCharsTest, Integers) -{ - cudf::test::strings_column_wrapper strings1( - {"+175", "-34", "9.8", "17+2", "+-14", "1234567890", "67de", "", "1e10", "-", "++", ""}); - auto results = cudf::strings::is_integer(cudf::strings_column_view(strings1)); - cudf::test::fixed_width_column_wrapper expected1({1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected1); - EXPECT_FALSE(cudf::strings::all_integer(cudf::strings_column_view(strings1))); - - cudf::test::strings_column_wrapper strings2( - {"0", "+0", "-0", "1234567890", "-27341132", "+012", "023", "-045"}); - results = cudf::strings::is_integer(cudf::strings_column_view(strings2)); - cudf::test::fixed_width_column_wrapper expected2({1, 1, 1, 1, 1, 1, 1, 1}); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected2); - EXPECT_TRUE(cudf::strings::all_integer(cudf::strings_column_view(strings2))); -} - -TEST_F(StringsCharsTest, Floats) -{ - cudf::test::strings_column_wrapper strings1({"+175", - "-9.8", - "7+2", - "+-4", - "6.7e17", - "-1.2e-5", - "e", - ".e", - "1.e+-2", - "00.00", - "1.0e+1.0", - "1.2.3", - "+", - "--", - ""}); - auto results = cudf::strings::is_float(cudf::strings_column_view(strings1)); - cudf::test::fixed_width_column_wrapper expected1( - {1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0}); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected1); - EXPECT_FALSE(cudf::strings::all_float(cudf::strings_column_view(strings1))); - - cudf::test::strings_column_wrapper strings2( - {"+175", "-34", "9.8", "1234567890", "6.7e17", "-917.2e5"}); - results = cudf::strings::is_float(cudf::strings_column_view(strings2)); - cudf::test::fixed_width_column_wrapper expected2({1, 1, 1, 1, 1, 1}); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected2); - EXPECT_TRUE(cudf::strings::all_float(cudf::strings_column_view(strings2))); -} - TEST_F(StringsCharsTest, EmptyStrings) { cudf::test::strings_column_wrapper strings({"", "", ""}); @@ -284,12 +235,6 @@ TEST_F(StringsCharsTest, EmptyStrings) auto results = cudf::strings::all_characters_of_type( strings_view, cudf::strings::string_character_types::ALPHANUM); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); - results = cudf::strings::is_integer(strings_view); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); - EXPECT_FALSE(cudf::strings::all_integer(strings_view)); - results = cudf::strings::is_float(strings_view); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); - EXPECT_FALSE(cudf::strings::all_float(strings_view)); } TEST_F(StringsCharsTest, FilterCharTypes) @@ -379,14 +324,6 @@ TEST_F(StringsCharsTest, EmptyStringsColumn) EXPECT_EQ(cudf::type_id::BOOL8, results->view().type().id()); EXPECT_EQ(0, results->view().size()); - results = cudf::strings::is_integer(strings_view); - EXPECT_EQ(cudf::type_id::BOOL8, results->view().type().id()); - EXPECT_EQ(0, results->view().size()); - - results = cudf::strings::is_float(strings_view); - EXPECT_EQ(cudf::type_id::BOOL8, results->view().type().id()); - EXPECT_EQ(0, results->view().size()); - results = cudf::strings::filter_characters_of_type( strings_view, cudf::strings::string_character_types::NUMERIC); EXPECT_EQ(cudf::type_id::STRING, results->view().type().id()); diff --git a/cpp/tests/strings/floats_tests.cpp b/cpp/tests/strings/floats_tests.cpp index b98416d9edd..f7151363d83 100644 --- a/cpp/tests/strings/floats_tests.cpp +++ b/cpp/tests/strings/floats_tests.cpp @@ -27,6 +27,41 @@ struct StringsConvertTest : public cudf::test::BaseFixture { }; +TEST_F(StringsConvertTest, IsFloat) +{ + cudf::test::strings_column_wrapper strings; + auto strings_view = cudf::strings_column_view(strings); + auto results = cudf::strings::is_float(strings_view); + EXPECT_EQ(cudf::type_id::BOOL8, results->view().type().id()); + EXPECT_EQ(0, results->view().size()); + + cudf::test::strings_column_wrapper strings1({"+175", + "-9.8", + "7+2", + "+-4", + "6.7e17", + "-1.2e-5", + "e", + ".e", + "1.e+-2", + "00.00", + "1.0e+1.0", + "1.2.3", + "+", + "--", + ""}); + results = cudf::strings::is_float(cudf::strings_column_view(strings1)); + cudf::test::fixed_width_column_wrapper expected1( + {1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected1); + + cudf::test::strings_column_wrapper strings2( + {"+175", "-34", "9.8", "1234567890", "6.7e17", "-917.2e5"}); + results = cudf::strings::is_float(cudf::strings_column_view(strings2)); + cudf::test::fixed_width_column_wrapper expected2({1, 1, 1, 1, 1, 1}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected2); +} + TEST_F(StringsConvertTest, ToFloats32) { std::vector h_strings{"1234", diff --git a/cpp/tests/strings/integers_tests.cu b/cpp/tests/strings/integers_tests.cu index 9e2b9809b26..d6bf03b3f76 100644 --- a/cpp/tests/strings/integers_tests.cu +++ b/cpp/tests/strings/integers_tests.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,6 +29,27 @@ struct StringsConvertTest : public cudf::test::BaseFixture { }; +TEST_F(StringsConvertTest, IsInteger) +{ + cudf::test::strings_column_wrapper strings; + auto strings_view = cudf::strings_column_view(strings); + auto results = cudf::strings::is_integer(strings_view); + EXPECT_EQ(cudf::type_id::BOOL8, results->view().type().id()); + EXPECT_EQ(0, results->view().size()); + + cudf::test::strings_column_wrapper strings1( + {"+175", "-34", "9.8", "17+2", "+-14", "1234567890", "67de", "", "1e10", "-", "++", ""}); + results = cudf::strings::is_integer(cudf::strings_column_view(strings1)); + cudf::test::fixed_width_column_wrapper expected1({1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected1); + + cudf::test::strings_column_wrapper strings2( + {"0", "+0", "-0", "1234567890", "-27341132", "+012", "023", "-045"}); + results = cudf::strings::is_integer(cudf::strings_column_view(strings2)); + cudf::test::fixed_width_column_wrapper expected2({1, 1, 1, 1, 1, 1, 1, 1}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected2); +} + TEST_F(StringsConvertTest, ToInteger) { std::vector h_strings{ diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp index e8474bda1be..1db45e3c4ba 100644 --- a/java/src/main/native/src/ColumnViewJni.cpp +++ b/java/src/main/native/src/ColumnViewJni.cpp @@ -37,7 +37,6 @@ #include #include #include -#include #include #include #include diff --git a/python/cudf/cudf/_lib/cpp/strings/char_types.pxd b/python/cudf/cudf/_lib/cpp/strings/char_types.pxd index ad675027c10..934269c6f25 100644 --- a/python/cudf/cudf/_lib/cpp/strings/char_types.pxd +++ b/python/cudf/cudf/_lib/cpp/strings/char_types.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2021, NVIDIA CORPORATION. from libcpp.memory cimport unique_ptr from cudf._lib.cpp.column.column_view cimport column_view @@ -33,11 +33,3 @@ cdef extern from "cudf/strings/char_types/char_types.hpp" \ string_character_types types_to_remove, string_scalar replacement, string_character_types types_to_keep) except + - - cdef unique_ptr[column] is_integer( - column_view source_strings - ) except + - - cdef unique_ptr[column] is_float( - column_view source_strings - ) except + diff --git a/python/cudf/cudf/_lib/cpp/strings/convert/convert_floats.pxd b/python/cudf/cudf/_lib/cpp/strings/convert/convert_floats.pxd index baee01b8f99..55a84b60efd 100644 --- a/python/cudf/cudf/_lib/cpp/strings/convert/convert_floats.pxd +++ b/python/cudf/cudf/_lib/cpp/strings/convert/convert_floats.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2021, NVIDIA CORPORATION. from cudf._lib.cpp.column.column cimport column from cudf._lib.cpp.column.column_view cimport column_view @@ -14,3 +14,7 @@ cdef extern from "cudf/strings/convert/convert_floats.hpp" namespace \ cdef unique_ptr[column] from_floats( column_view input_col) except + + + cdef unique_ptr[column] is_float( + column_view source_strings + ) except + diff --git a/python/cudf/cudf/_lib/cpp/strings/convert/convert_integers.pxd b/python/cudf/cudf/_lib/cpp/strings/convert/convert_integers.pxd index 92f99a2f5cb..6e45d4ba869 100644 --- a/python/cudf/cudf/_lib/cpp/strings/convert/convert_integers.pxd +++ b/python/cudf/cudf/_lib/cpp/strings/convert/convert_integers.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2021, NVIDIA CORPORATION. from cudf._lib.cpp.column.column cimport column from cudf._lib.cpp.column.column_view cimport column_view @@ -15,6 +15,10 @@ cdef extern from "cudf/strings/convert/convert_integers.hpp" namespace \ cdef unique_ptr[column] from_integers( column_view input_col) except + + cdef unique_ptr[column] is_integer( + column_view source_strings + ) except + + cdef unique_ptr[column] hex_to_integers( column_view input_col, data_type output_type) except + diff --git a/python/cudf/cudf/_lib/strings/char_types.pyx b/python/cudf/cudf/_lib/strings/char_types.pyx index 5d8d1522418..1890e98f956 100644 --- a/python/cudf/cudf/_lib/strings/char_types.pyx +++ b/python/cudf/cudf/_lib/strings/char_types.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2021, NVIDIA CORPORATION. from libcpp cimport bool from libcpp.memory cimport unique_ptr @@ -14,8 +14,6 @@ from cudf._lib.cpp.strings.char_types cimport ( all_characters_of_type as cpp_all_characters_of_type, filter_characters_of_type as cpp_filter_characters_of_type, string_character_types as string_character_types, - is_integer as cpp_is_integer, - is_float as cpp_is_float, ) @@ -191,35 +189,3 @@ def is_space(Column source_strings): )) return Column.from_unique_ptr(move(c_result)) - - -def is_integer(Column source_strings): - """ - Returns a Column of boolean values with True for `source_strings` - that have intergers. - """ - cdef unique_ptr[column] c_result - cdef column_view source_view = source_strings.view() - - with nogil: - c_result = move(cpp_is_integer( - source_view - )) - - return Column.from_unique_ptr(move(c_result)) - - -def is_float(Column source_strings): - """ - Returns a Column of boolean values with True for `source_strings` - that have floats. - """ - cdef unique_ptr[column] c_result - cdef column_view source_view = source_strings.view() - - with nogil: - c_result = move(cpp_is_float( - source_view - )) - - return Column.from_unique_ptr(move(c_result)) diff --git a/python/cudf/cudf/_lib/strings/convert/convert_floats.pyx b/python/cudf/cudf/_lib/strings/convert/convert_floats.pyx new file mode 100644 index 00000000000..195d9b71f6e --- /dev/null +++ b/python/cudf/cudf/_lib/strings/convert/convert_floats.pyx @@ -0,0 +1,29 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. + +from libcpp cimport bool +from libcpp.memory cimport unique_ptr +from libcpp.utility cimport move + +from cudf._lib.cpp.column.column_view cimport column_view +from cudf._lib.column cimport Column +from cudf._lib.cpp.column.column cimport column + +from cudf._lib.cpp.strings.convert.convert_floats cimport ( + is_float as cpp_is_float, +) + + +def is_float(Column source_strings): + """ + Returns a Column of boolean values with True for `source_strings` + that have floats. + """ + cdef unique_ptr[column] c_result + cdef column_view source_view = source_strings.view() + + with nogil: + c_result = move(cpp_is_float( + source_view + )) + + return Column.from_unique_ptr(move(c_result)) diff --git a/python/cudf/cudf/_lib/strings/convert/convert_integers.pyx b/python/cudf/cudf/_lib/strings/convert/convert_integers.pyx new file mode 100644 index 00000000000..d1bae1edd37 --- /dev/null +++ b/python/cudf/cudf/_lib/strings/convert/convert_integers.pyx @@ -0,0 +1,29 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. + +from libcpp cimport bool +from libcpp.memory cimport unique_ptr +from libcpp.utility cimport move + +from cudf._lib.cpp.column.column_view cimport column_view +from cudf._lib.column cimport Column +from cudf._lib.cpp.column.column cimport column + +from cudf._lib.cpp.strings.convert.convert_integers cimport ( + is_integer as cpp_is_integer, +) + + +def is_integer(Column source_strings): + """ + Returns a Column of boolean values with True for `source_strings` + that have intergers. + """ + cdef unique_ptr[column] c_result + cdef column_view source_view = source_strings.view() + + with nogil: + c_result = move(cpp_is_integer( + source_view + )) + + return Column.from_unique_ptr(move(c_result)) diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index 81abdd3f66a..3e6db01ea66 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -70,13 +70,15 @@ is_alpha as cpp_is_alpha, is_decimal as cpp_is_decimal, is_digit as cpp_is_digit, - is_float as cpp_is_float, - is_integer as cpp_is_integer, is_lower as cpp_is_lower, is_numeric as cpp_is_numeric, is_space as cpp_isspace, is_upper as cpp_is_upper, ) +from cudf._lib.strings.convert.convert_integers import ( + is_integer as cpp_is_integer, +) +from cudf._lib.strings.convert.convert_floats import is_float as cpp_is_float from cudf._lib.strings.combine import ( concatenate as cpp_concatenate, join as cpp_join, diff --git a/python/cudf/cudf/core/tools/datetimes.py b/python/cudf/cudf/core/tools/datetimes.py index 4e5e4ce1987..535e497e8dc 100644 --- a/python/cudf/cudf/core/tools/datetimes.py +++ b/python/cudf/cudf/core/tools/datetimes.py @@ -8,7 +8,9 @@ from pandas.core.tools.datetimes import _unit_map import cudf -from cudf._lib.strings.char_types import is_integer as cpp_is_integer +from cudf._lib.strings.convert.convert_integers import ( + is_integer as cpp_is_integer, +) from cudf.core import column from cudf.core.index import as_index from cudf.utils.dtypes import is_scalar