diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index eb4e8735638..1bd1bb04b57 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -351,7 +351,7 @@ ConfigureTest(DISPATCHER_TEST types/type_dispatcher_test.cu) ################################################################################################### # - strings test ---------------------------------------------------------------------------------- ConfigureTest(STRINGS_TEST - strings/array_tests.cu + strings/array_tests.cpp strings/attrs_tests.cpp strings/booleans_tests.cpp strings/case_tests.cpp @@ -371,7 +371,6 @@ ConfigureTest(STRINGS_TEST strings/find_multiple_tests.cpp strings/fixed_point_tests.cpp strings/floats_tests.cpp - strings/hash_string.cu strings/integers_tests.cpp strings/ipv4_tests.cpp strings/json_tests.cpp diff --git a/cpp/tests/strings/array_tests.cu b/cpp/tests/strings/array_tests.cpp similarity index 61% rename from cpp/tests/strings/array_tests.cu rename to cpp/tests/strings/array_tests.cpp index a4d8ecb2bec..8b61999f93e 100644 --- a/cpp/tests/strings/array_tests.cu +++ b/cpp/tests/strings/array_tests.cpp @@ -15,25 +15,17 @@ */ #include + #include #include #include -#include -#include #include -#include -#include #include #include #include -#include -#include #include #include -#include - -#include #include @@ -120,7 +112,7 @@ TEST_P(SliceParmsTest, SliceAllEmpty) CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); } -INSTANTIATE_TEST_CASE_P(SliceParms, +INSTANTIATE_TEST_CASE_P(StringsColumnTest, SliceParmsTest, testing::ValuesIn(std::array{5, 6, 7})); @@ -161,98 +153,62 @@ TEST_F(StringsColumnTest, GatherZeroSizeStringsColumn) cudf::test::expect_strings_empty(results.front()->view()); } -struct column_to_string_view_vector { - cudf::column_device_view const d_strings; - __device__ cudf::string_view operator()(cudf::size_type idx) const - { - cudf::string_view d_str{nullptr, 0}; - if (d_strings.is_valid(idx)) d_str = d_strings.element(idx); - return d_str; - } -}; - TEST_F(StringsColumnTest, GatherTooBig) { - cudf::test::strings_column_wrapper strings({"0123456789012345678901234567890123456789"}); + std::vector h_chars(3000000); + cudf::test::fixed_width_column_wrapper chars(h_chars.begin(), h_chars.end()); + cudf::test::fixed_width_column_wrapper offsets({0, 3000000}); + auto input = cudf::column_view( + cudf::data_type{cudf::type_id::STRING}, 1, nullptr, nullptr, 0, 0, {offsets, chars}); auto map = thrust::constant_iterator(0); - cudf::test::fixed_width_column_wrapper gather_map( - map, map + std::numeric_limits::max() / 20); - EXPECT_THROW(cudf::gather(cudf::table_view{{strings}}, gather_map), cudf::logic_error); + cudf::test::fixed_width_column_wrapper gather_map(map, map + 1000); + EXPECT_THROW(cudf::gather(cudf::table_view{{input}}, gather_map), cudf::logic_error); } TEST_F(StringsColumnTest, Scatter) { - std::vector h_strings1{"eee", "bb", nullptr, "", "aa", "bbb", "ééé"}; - cudf::test::strings_column_wrapper strings1( - h_strings1.begin(), - h_strings1.end(), - thrust::make_transform_iterator(h_strings1.begin(), [](auto str) { return str != nullptr; })); - auto target = cudf::strings_column_view(strings1); - std::vector h_strings2{"1", "22"}; - cudf::test::strings_column_wrapper strings2( - h_strings2.begin(), - h_strings2.end(), - thrust::make_transform_iterator(h_strings2.begin(), [](auto str) { return str != nullptr; })); - auto source = cudf::strings_column_view(strings2); - - std::vector h_scatter_map({4, 1}); - auto scatter_map = cudf::detail::make_device_uvector_sync(h_scatter_map); - - auto source_column = cudf::column_device_view::create(source.parent()); - auto begin = - cudf::detail::make_counting_transform_iterator(0, column_to_string_view_vector{*source_column}); + cudf::test::strings_column_wrapper target({"eee", "bb", "", "", "aa", "bbb", "ééé"}, + {1, 1, 0, 1, 1, 1, 1}); + cudf::test::strings_column_wrapper source({"1", "22"}); - auto results = - cudf::strings::detail::scatter(begin, begin + source.size(), scatter_map.begin(), target); + cudf::test::fixed_width_column_wrapper scatter_map({4, 1}); - std::vector h_expected{"eee", "22", nullptr, "", "1", "bbb", "ééé"}; - cudf::test::strings_column_wrapper expected( - h_expected.begin(), - h_expected.end(), - thrust::make_transform_iterator(h_expected.begin(), [](auto str) { return str != nullptr; })); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); + auto results = cudf::scatter(cudf::table_view({source}), scatter_map, cudf::table_view({target})); + + cudf::test::strings_column_wrapper expected({"eee", "22", "", "", "1", "bbb", "ééé"}, + {1, 1, 0, 1, 1, 1, 1}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view().column(0), expected); } TEST_F(StringsColumnTest, ScatterScalar) { - std::vector h_strings1{"eee", "bb", nullptr, "", "aa", "bbb", "ééé"}; - cudf::test::strings_column_wrapper strings1( - h_strings1.begin(), - h_strings1.end(), - thrust::make_transform_iterator(h_strings1.begin(), [](auto str) { return str != nullptr; })); - auto target = cudf::strings_column_view(strings1); + cudf::test::strings_column_wrapper target({"eee", "bb", "", "", "aa", "bbb", "ééé"}, + {1, 1, 0, 1, 1, 1, 1}); - std::vector h_scatter_map({0, 5}); - auto scatter_map = cudf::detail::make_device_uvector_sync(h_scatter_map); + cudf::test::fixed_width_column_wrapper scatter_map({0, 5}); cudf::string_scalar scalar("__"); - auto begin = thrust::make_constant_iterator(cudf::string_view(scalar.data(), scalar.size())); - - auto results = - cudf::strings::detail::scatter(begin, begin + scatter_map.size(), scatter_map.begin(), target); + auto source = std::vector>({scalar}); + auto results = cudf::scatter(source, scatter_map, cudf::table_view({target})); - std::vector h_expected{"__", "bb", nullptr, "", "aa", "__", "ééé"}; - cudf::test::strings_column_wrapper expected( - h_expected.begin(), - h_expected.end(), - thrust::make_transform_iterator(h_expected.begin(), [](auto str) { return str != nullptr; })); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); + cudf::test::strings_column_wrapper expected({"__", "bb", "", "", "aa", "__", "ééé"}, + {1, 1, 0, 1, 1, 1, 1}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view().column(0), expected); } TEST_F(StringsColumnTest, ScatterZeroSizeStringsColumn) { - cudf::column_view zero_size_strings_column( - cudf::data_type{cudf::type_id::STRING}, 0, nullptr, nullptr, 0); - auto source = cudf::strings_column_view(zero_size_strings_column); - cudf::column_view values(cudf::data_type{cudf::type_id::STRING}, 0, nullptr, nullptr, 0); - auto target = cudf::strings_column_view(values); + cudf::column_view source(cudf::data_type{cudf::type_id::STRING}, 0, nullptr, nullptr, 0); + cudf::column_view target(cudf::data_type{cudf::type_id::STRING}, 0, nullptr, nullptr, 0); + cudf::column_view scatter_map(cudf::data_type{cudf::type_id::INT8}, 0, nullptr, nullptr, 0); - rmm::device_uvector scatter_map(0, rmm::cuda_stream_default); - cudf::string_scalar scalar(""); - auto begin = thrust::make_constant_iterator(cudf::string_view(scalar.data(), scalar.size())); + auto results = cudf::scatter(cudf::table_view({source}), scatter_map, cudf::table_view({target})); + cudf::test::expect_strings_empty(results->view().column(0)); - auto results = cudf::strings::detail::scatter(begin, begin, scatter_map.begin(), target); - cudf::test::expect_strings_empty(results->view()); + cudf::string_scalar scalar(""); + auto scalar_source = std::vector>({scalar}); + results = cudf::scatter(scalar_source, scatter_map, cudf::table_view({target})); + cudf::test::expect_strings_empty(results->view().column(0)); } CUDF_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/strings/hash_string.cu b/cpp/tests/strings/hash_string.cu deleted file mode 100644 index b5298d39bda..00000000000 --- a/cpp/tests/strings/hash_string.cu +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "./utilities.h" -#include "rmm/exec_policy.hpp" - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include -#include - -struct StringsHashTest : public cudf::test::BaseFixture { -}; - -struct hash_string_fn { - cudf::column_device_view d_strings; - uint32_t __device__ operator()(uint32_t idx) - { - if (d_strings.is_null(idx)) return 0; - auto item = d_strings.element(idx); - return MurmurHash3_32{}(item); - } -}; - -TEST_F(StringsHashTest, HashTest) -{ - std::vector h_strings{"abcdefghijklmnopqrstuvwxyz", - "abcdefghijklmnopqrstuvwxyz", - "ABCDEFGHIJKLMNOPQRSTUVWXYZ", - "0123456789", - "4", - "", - nullptr, - "last one"}; - cudf::test::strings_column_wrapper strings( - h_strings.begin(), - h_strings.end(), - thrust::make_transform_iterator(h_strings.begin(), [](auto str) { return str != nullptr; })); - - auto strings_view = cudf::strings_column_view(strings); - auto strings_column = cudf::column_device_view::create(strings_view.parent()); - auto d_view = *strings_column; - - rmm::device_uvector d_values(strings_view.size(), rmm::cuda_stream_default); - thrust::transform(rmm::exec_policy(), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(strings_view.size()), - d_values.begin(), - hash_string_fn{d_view}); - - uint32_t h_expected[] = { - 2739798893, 2739798893, 3506676360, 1891213601, 3778137224, 0, 0, 1551088011}; - auto h_values = cudf::detail::make_host_vector_sync(d_values); - for (uint32_t idx = 0; idx < h_values.size(); ++idx) - EXPECT_EQ(h_values[idx], h_expected[idx]); -}