Skip to content

Commit

Permalink
Rename strings/array_tests.cu to strings/array_tests.cpp (#9480)
Browse files Browse the repository at this point in the history
This PR renames `cpp/tests/strings/array_tests.cu` to `.cpp` file and cleans up several test cases. The gtests that required a `.cu` extension called the `cudf::strings::detail::scatter` function directly. This was created when `cudf::scatter` had not yet supported strings columns. Changing this to just calling `cudf::scatter` allows removing device code dependencies.

Also, the `GatherTooBig` test case was taking over 16s to run. Too much time was spent building fake data to just verify an exception. This PR fixes the fake data (which is never actually read) to speed up it's creation.

Finally, this PR includes removing the `hash_string.cu` since it is redundant with the `hashing/hash_test.cpp` which already covers strings inputs. The `hash_string.cu` had been created before strings support was added to `cudf::hash`.

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Mark Harris (https://github.com/harrism)

URL: #9480
  • Loading branch information
davidwendt authored Oct 20, 2021
1 parent 919fedf commit 52b7a9e
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 160 deletions.
3 changes: 1 addition & 2 deletions cpp/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -351,7 +351,7 @@ ConfigureTest(DISPATCHER_TEST types/type_dispatcher_test.cu)
###################################################################################################
# - strings test ----------------------------------------------------------------------------------
ConfigureTest(STRINGS_TEST
strings/array_tests.cu
strings/array_tests.cpp
strings/attrs_tests.cpp
strings/booleans_tests.cpp
strings/case_tests.cpp
Expand All @@ -371,7 +371,6 @@ ConfigureTest(STRINGS_TEST
strings/find_multiple_tests.cpp
strings/fixed_point_tests.cpp
strings/floats_tests.cpp
strings/hash_string.cu
strings/integers_tests.cpp
strings/ipv4_tests.cpp
strings/json_tests.cpp
Expand Down
114 changes: 35 additions & 79 deletions cpp/tests/strings/array_tests.cu → cpp/tests/strings/array_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,25 +15,17 @@
*/

#include <tests/strings/utilities.h>

#include <cudf_test/base_fixture.hpp>
#include <cudf_test/column_utilities.hpp>
#include <cudf_test/column_wrapper.hpp>

#include <cudf/column/column.hpp>
#include <cudf/column/column_factories.hpp>
#include <cudf/copying.hpp>
#include <cudf/detail/iterator.cuh>
#include <cudf/detail/utilities/vector_factories.hpp>
#include <cudf/scalar/scalar.hpp>
#include <cudf/sorting.hpp>
#include <cudf/strings/detail/copying.hpp>
#include <cudf/strings/detail/scatter.cuh>
#include <cudf/strings/detail/utilities.hpp>
#include <cudf/strings/strings_column_view.hpp>
#include <cudf/table/table_view.hpp>
#include <cudf/utilities/error.hpp>

#include <rmm/cuda_stream_view.hpp>

#include <thrust/iterator/constant_iterator.h>

Expand Down Expand Up @@ -120,7 +112,7 @@ TEST_P(SliceParmsTest, SliceAllEmpty)
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
}

INSTANTIATE_TEST_CASE_P(SliceParms,
INSTANTIATE_TEST_CASE_P(StringsColumnTest,
SliceParmsTest,
testing::ValuesIn(std::array<cudf::size_type, 3>{5, 6, 7}));

Expand Down Expand Up @@ -161,98 +153,62 @@ TEST_F(StringsColumnTest, GatherZeroSizeStringsColumn)
cudf::test::expect_strings_empty(results.front()->view());
}

struct column_to_string_view_vector {
cudf::column_device_view const d_strings;
__device__ cudf::string_view operator()(cudf::size_type idx) const
{
cudf::string_view d_str{nullptr, 0};
if (d_strings.is_valid(idx)) d_str = d_strings.element<cudf::string_view>(idx);
return d_str;
}
};

TEST_F(StringsColumnTest, GatherTooBig)
{
cudf::test::strings_column_wrapper strings({"0123456789012345678901234567890123456789"});
std::vector<int8_t> h_chars(3000000);
cudf::test::fixed_width_column_wrapper<int8_t> chars(h_chars.begin(), h_chars.end());
cudf::test::fixed_width_column_wrapper<cudf::offset_type> offsets({0, 3000000});
auto input = cudf::column_view(
cudf::data_type{cudf::type_id::STRING}, 1, nullptr, nullptr, 0, 0, {offsets, chars});
auto map = thrust::constant_iterator<int8_t>(0);
cudf::test::fixed_width_column_wrapper<int8_t> gather_map(
map, map + std::numeric_limits<cudf::size_type>::max() / 20);
EXPECT_THROW(cudf::gather(cudf::table_view{{strings}}, gather_map), cudf::logic_error);
cudf::test::fixed_width_column_wrapper<int8_t> gather_map(map, map + 1000);
EXPECT_THROW(cudf::gather(cudf::table_view{{input}}, gather_map), cudf::logic_error);
}

TEST_F(StringsColumnTest, Scatter)
{
std::vector<const char*> h_strings1{"eee", "bb", nullptr, "", "aa", "bbb", "ééé"};
cudf::test::strings_column_wrapper strings1(
h_strings1.begin(),
h_strings1.end(),
thrust::make_transform_iterator(h_strings1.begin(), [](auto str) { return str != nullptr; }));
auto target = cudf::strings_column_view(strings1);
std::vector<const char*> h_strings2{"1", "22"};
cudf::test::strings_column_wrapper strings2(
h_strings2.begin(),
h_strings2.end(),
thrust::make_transform_iterator(h_strings2.begin(), [](auto str) { return str != nullptr; }));
auto source = cudf::strings_column_view(strings2);

std::vector<int32_t> h_scatter_map({4, 1});
auto scatter_map = cudf::detail::make_device_uvector_sync(h_scatter_map);

auto source_column = cudf::column_device_view::create(source.parent());
auto begin =
cudf::detail::make_counting_transform_iterator(0, column_to_string_view_vector{*source_column});
cudf::test::strings_column_wrapper target({"eee", "bb", "", "", "aa", "bbb", "ééé"},
{1, 1, 0, 1, 1, 1, 1});
cudf::test::strings_column_wrapper source({"1", "22"});

auto results =
cudf::strings::detail::scatter(begin, begin + source.size(), scatter_map.begin(), target);
cudf::test::fixed_width_column_wrapper<int32_t> scatter_map({4, 1});

std::vector<const char*> h_expected{"eee", "22", nullptr, "", "1", "bbb", "ééé"};
cudf::test::strings_column_wrapper expected(
h_expected.begin(),
h_expected.end(),
thrust::make_transform_iterator(h_expected.begin(), [](auto str) { return str != nullptr; }));
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
auto results = cudf::scatter(cudf::table_view({source}), scatter_map, cudf::table_view({target}));

cudf::test::strings_column_wrapper expected({"eee", "22", "", "", "1", "bbb", "ééé"},
{1, 1, 0, 1, 1, 1, 1});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view().column(0), expected);
}

TEST_F(StringsColumnTest, ScatterScalar)
{
std::vector<const char*> h_strings1{"eee", "bb", nullptr, "", "aa", "bbb", "ééé"};
cudf::test::strings_column_wrapper strings1(
h_strings1.begin(),
h_strings1.end(),
thrust::make_transform_iterator(h_strings1.begin(), [](auto str) { return str != nullptr; }));
auto target = cudf::strings_column_view(strings1);
cudf::test::strings_column_wrapper target({"eee", "bb", "", "", "aa", "bbb", "ééé"},
{1, 1, 0, 1, 1, 1, 1});

std::vector<int32_t> h_scatter_map({0, 5});
auto scatter_map = cudf::detail::make_device_uvector_sync(h_scatter_map);
cudf::test::fixed_width_column_wrapper<int32_t> scatter_map({0, 5});

cudf::string_scalar scalar("__");
auto begin = thrust::make_constant_iterator(cudf::string_view(scalar.data(), scalar.size()));

auto results =
cudf::strings::detail::scatter(begin, begin + scatter_map.size(), scatter_map.begin(), target);
auto source = std::vector<std::reference_wrapper<const cudf::scalar>>({scalar});
auto results = cudf::scatter(source, scatter_map, cudf::table_view({target}));

std::vector<const char*> h_expected{"__", "bb", nullptr, "", "aa", "__", "ééé"};
cudf::test::strings_column_wrapper expected(
h_expected.begin(),
h_expected.end(),
thrust::make_transform_iterator(h_expected.begin(), [](auto str) { return str != nullptr; }));
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
cudf::test::strings_column_wrapper expected({"__", "bb", "", "", "aa", "__", "ééé"},
{1, 1, 0, 1, 1, 1, 1});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view().column(0), expected);
}

TEST_F(StringsColumnTest, ScatterZeroSizeStringsColumn)
{
cudf::column_view zero_size_strings_column(
cudf::data_type{cudf::type_id::STRING}, 0, nullptr, nullptr, 0);
auto source = cudf::strings_column_view(zero_size_strings_column);
cudf::column_view values(cudf::data_type{cudf::type_id::STRING}, 0, nullptr, nullptr, 0);
auto target = cudf::strings_column_view(values);
cudf::column_view source(cudf::data_type{cudf::type_id::STRING}, 0, nullptr, nullptr, 0);
cudf::column_view target(cudf::data_type{cudf::type_id::STRING}, 0, nullptr, nullptr, 0);
cudf::column_view scatter_map(cudf::data_type{cudf::type_id::INT8}, 0, nullptr, nullptr, 0);

rmm::device_uvector<int32_t> scatter_map(0, rmm::cuda_stream_default);
cudf::string_scalar scalar("");
auto begin = thrust::make_constant_iterator(cudf::string_view(scalar.data(), scalar.size()));
auto results = cudf::scatter(cudf::table_view({source}), scatter_map, cudf::table_view({target}));
cudf::test::expect_strings_empty(results->view().column(0));

auto results = cudf::strings::detail::scatter(begin, begin, scatter_map.begin(), target);
cudf::test::expect_strings_empty(results->view());
cudf::string_scalar scalar("");
auto scalar_source = std::vector<std::reference_wrapper<const cudf::scalar>>({scalar});
results = cudf::scatter(scalar_source, scatter_map, cudf::table_view({target}));
cudf::test::expect_strings_empty(results->view().column(0));
}

CUDF_TEST_PROGRAM_MAIN()
79 changes: 0 additions & 79 deletions cpp/tests/strings/hash_string.cu

This file was deleted.

0 comments on commit 52b7a9e

Please sign in to comment.