diff --git a/cpp/include/cudf/lists/list_device_view.cuh b/cpp/include/cudf/lists/list_device_view.cuh index 1653a03ce37..07346e78261 100644 --- a/cpp/include/cudf/lists/list_device_view.cuh +++ b/cpp/include/cudf/lists/list_device_view.cuh @@ -129,6 +129,13 @@ class list_device_view { */ [[nodiscard]] __device__ inline size_type size() const { return _size; } + /** + * @brief Returns the row index of this list in the original lists column. + * + * @return The row index of this list + */ + [[nodiscard]] __device__ inline size_type row_index() const { return _row_index; } + /** * @brief Fetches the lists_column_device_view that contains this list. * diff --git a/cpp/src/lists/contains.cu b/cpp/src/lists/contains.cu index 16e155e9e6c..b4223a1c0c1 100644 --- a/cpp/src/lists/contains.cu +++ b/cpp/src/lists/contains.cu @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -30,7 +31,6 @@ #include #include -#include #include #include #include @@ -42,7 +42,6 @@ #include namespace cudf::lists { - namespace { /** @@ -54,13 +53,13 @@ auto constexpr __device__ NOT_FOUND_SENTINEL = size_type{-1}; /** * @brief A sentinel value used for marking that a given output row should be null. + * + * This value should be different from `NOT_FOUND_SENTINEL`. */ auto constexpr __device__ NULL_SENTINEL = std::numeric_limits::min(); /** - * @brief Indicate the current supported types in `cudf::lists::contains`. - * - * TODO: Add supported nested types. + * @brief Check if the given type is a supported non-nested type in `cudf::lists::contains`. */ template static auto constexpr is_supported_non_nested_type() @@ -69,13 +68,52 @@ static auto constexpr is_supported_non_nested_type() } /** - * @brief Functor to perform searching for index of a key element in a given list. + * @brief Check if the given type is supported in `cudf::lists::contains`. + */ +template +auto constexpr is_supported_type() +{ + return is_supported_non_nested_type() || cudf::is_nested(); +} + +/** + * @brief Return a pair of index iterators {begin, end} to loop through elements within a + * list. + * + * Depending on the value of `forward`, a pair of forward or reverse iterators will be + * returned, allowing to loop through elements in the list in first-to-last or last-to-first + * order. + * + * Note that the element indices always restart to `0` at the first position in each list. + * + * @tparam forward A boolean value indicating whether we want to iterate elements in the list + * by forward or reverse order. + * @param size The number of elements in the list. + * @return A pair of {begin, end} iterators to iterate through the range `[0, size)`. + */ +template +__device__ auto element_index_pair_iter(size_type const size) +{ + auto const begin = thrust::make_counting_iterator(0); + auto const end = thrust::make_counting_iterator(size); + + if constexpr (forward) { + return thrust::pair{begin, end}; + } else { + return thrust::pair{thrust::make_reverse_iterator(end), thrust::make_reverse_iterator(begin)}; + } +} + +/** + * @brief Functor to perform searching for index of a key element in a given list, specialized + * for non-nested types. */ -struct search_list_fn { +struct search_list_non_nested_types_fn { duplicate_find_option const find_option; template ())> - __device__ size_type operator()(list_device_view list, thrust::optional key_opt) const + __device__ size_type operator()(list_device_view const list, + thrust::optional const key_opt) const { // A null list or null key will result in a null output row. if (list.is_null() || !key_opt) { return NULL_SENTINEL; } @@ -86,7 +124,7 @@ struct search_list_fn { } template ())> - __device__ size_type operator()(list_device_view, thrust::optional) const + __device__ size_type operator()(list_device_view const, thrust::optional const) const { CUDF_UNREACHABLE("Unsupported type."); } @@ -98,55 +136,179 @@ struct search_list_fn { { auto const [begin, end] = element_index_pair_iter(list.size()); auto const found_iter = - thrust::find_if(thrust::seq, begin, end, [&] __device__(auto const idx) { + thrust::find_if(thrust::seq, begin, end, [=] __device__(auto const idx) { return !list.is_null(idx) && cudf::equality_compare(list.template element(idx), search_key); }); // If the key is found, return its found position in the list from `found_iter`. return found_iter == end ? NOT_FOUND_SENTINEL : *found_iter; } +}; + +/** + * @brief Functor to perform searching for index of a key element in a given list, specialized + * for nested types. + */ +template +struct search_list_nested_types_fn { + duplicate_find_option const find_option; + KeyValidityIter const key_validity_iter; + EqComparator const d_comp; + bool const search_key_is_scalar; + + search_list_nested_types_fn(duplicate_find_option const find_option, + KeyValidityIter const key_validity_iter, + EqComparator const& d_comp, + bool search_key_is_scalar) + : find_option(find_option), + key_validity_iter(key_validity_iter), + d_comp(d_comp), + search_key_is_scalar(search_key_is_scalar) + { + } + + __device__ size_type operator()(list_device_view const list) const + { + // A null list or null key will result in a null output row. + if (list.is_null() || !key_validity_iter[list.row_index()]) { return NULL_SENTINEL; } + + return find_option == duplicate_find_option::FIND_FIRST ? search_list(list) + : search_list(list); + } - /** - * @brief Return a pair of index iterators {begin, end} to loop through elements within a list. - * - * Depending on the value of `forward`, a pair of forward or reverse iterators will be - * returned, allowing to loop through elements in the list in first-to-last or last-to-first - * order. - * - * Note that the element indices always restart to `0` at the first position in each list. - * - * @tparam forward A boolean value indicating whether we want to iterate elements in the list by - * forward or reverse order. - * @param size The number of elements in the list. - * @return A pair of {begin, end} iterators to iterate through the range `[0, size)`. - */ + private: template - static __device__ auto element_index_pair_iter(size_type const size) + __device__ inline size_type search_list(list_device_view const list) const { - if constexpr (forward) { - return thrust::pair(thrust::make_counting_iterator(0), thrust::make_counting_iterator(size)); + using cudf::experimental::row::lhs_index_type; + using cudf::experimental::row::rhs_index_type; + + auto const [begin, end] = element_index_pair_iter(list.size()); + auto const found_iter = + thrust::find_if(thrust::seq, begin, end, [=] __device__(auto const idx) { + return !list.is_null(idx) && + d_comp(static_cast(list.element_offset(idx)), + static_cast(search_key_is_scalar ? 0 : list.row_index())); + }); + // If the key is found, return its found position in the list from `found_iter`. + return found_iter == end ? NOT_FOUND_SENTINEL : *found_iter; + } +}; + +/** + * @brief Function to search for key element(s) in the corresponding rows of a lists column, + * specialized for non-nested types. + */ +template +void index_of_non_nested_types(InputIterator input_it, + size_type num_rows, + OutputIterator output_it, + SearchKeyType const& search_keys, + bool search_keys_have_nulls, + duplicate_find_option find_option, + rmm::cuda_stream_view stream) +{ + auto const do_search = [=](auto const keys_iter) { + thrust::transform(rmm::exec_policy(stream), + input_it, + input_it + num_rows, + keys_iter, + output_it, + search_list_non_nested_types_fn{find_option}); + }; + + if constexpr (search_key_is_scalar) { + auto const keys_iter = cudf::detail::make_optional_iterator( + search_keys, nullate::DYNAMIC{search_keys_have_nulls}); + do_search(keys_iter); + } else { + auto const keys_cdv_ptr = column_device_view::create(search_keys, stream); + auto const keys_iter = cudf::detail::make_optional_iterator( + *keys_cdv_ptr, nullate::DYNAMIC{search_keys_have_nulls}); + do_search(keys_iter); + } +} + +/** + * @brief Function to search for index of key element(s) in the corresponding rows of a lists + * column, specialized for nested types. + */ +template +void index_of_nested_types(InputIterator input_it, + size_type num_rows, + OutputIterator output_it, + column_view const& child, + SearchKeyType const& search_keys, + duplicate_find_option find_option, + rmm::cuda_stream_view stream) +{ + // Create a `table_view` from the search key(s). + // If the input search key is a (nested type) scalar, a new column is materialized from that + // scalar before a `table_view` is generated from it. As such, the new created column will also be + // returned to keep the result `table_view` valid. + [[maybe_unused]] auto const [keys_tview, unused_column] = + [&]() -> std::pair> { + if constexpr (std::is_same_v) { + auto tmp_column = make_column_from_scalar(search_keys, 1, stream); + auto const keys_tview = tmp_column->view(); + return {table_view{{keys_tview}}, std::move(tmp_column)}; } else { - return thrust::pair(thrust::make_reverse_iterator(thrust::make_counting_iterator(size)), - thrust::make_reverse_iterator(thrust::make_counting_iterator(0))); + return {table_view{{search_keys}}, nullptr}; } + }(); + auto const child_tview = table_view{{child}}; + auto const has_nulls = has_nested_nulls(child_tview) || has_nested_nulls(keys_tview); + auto const comparator = + cudf::experimental::row::equality::two_table_comparator(child_tview, keys_tview, stream); + auto const d_comp = comparator.equal_to(nullate::DYNAMIC{has_nulls}); + + auto const do_search = [=](auto const key_validity_iter) { + thrust::transform( + rmm::exec_policy(stream), + input_it, + input_it + num_rows, + output_it, + search_list_nested_types_fn{find_option, key_validity_iter, d_comp, search_key_is_scalar}); + }; + + if constexpr (search_key_is_scalar) { + auto const key_validity_iter = cudf::detail::make_validity_iterator(search_keys); + do_search(key_validity_iter); + } else { + auto const keys_dv_ptr = column_device_view::create(search_keys, stream); + auto const key_validity_iter = cudf::detail::make_validity_iterator(*keys_dv_ptr); + do_search(key_validity_iter); } -}; +} /** - * @brief Dispatch functor to search for key element(s) in the corresponding rows of a lists column. + * @brief Dispatch functor to search for index of key element(s) in the corresponding rows of a + * lists column. */ struct dispatch_index_of { + // SFINAE with conditional return type because we need to support device lambda in this function. + // This is required due to a limitation of nvcc. template - std::enable_if_t(), std::unique_ptr> operator()( + std::enable_if_t(), std::unique_ptr> operator()( lists_column_view const& lists, SearchKeyType const& search_keys, duplicate_find_option find_option, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) const { - CUDF_EXPECTS(!cudf::is_nested(lists.child().type()), - "Nested types not supported in list search operations."); - CUDF_EXPECTS(lists.child().type() == search_keys.type(), + // Access the child column through `child()` method, not `get_sliced_child()`. + // This is because slicing offset has already been taken into account during row + // comparisons. + auto const child = lists.child(); + + CUDF_EXPECTS(child.type() == search_keys.type(), "Type/Scale of search key does not match list column element type."); CUDF_EXPECTS(search_keys.type().id() != type_id::EMPTY, "Type cannot be empty."); @@ -159,12 +321,14 @@ struct dispatch_index_of { } }(); + auto const num_rows = lists.size(); + if (search_key_is_scalar && search_keys_have_nulls) { // If the scalar key is invalid/null, the entire output column will be all nulls. return make_numeric_column(data_type{cudf::type_to_id()}, - lists.size(), - cudf::create_null_mask(lists.size(), mask_state::ALL_NULL, mr), - lists.size(), + num_rows, + cudf::create_null_mask(num_rows, mask_state::ALL_NULL, mr), + num_rows, stream, mr); } @@ -177,33 +341,21 @@ struct dispatch_index_of { }); auto out_positions = make_numeric_column( - data_type{type_to_id()}, lists.size(), cudf::mask_state::UNALLOCATED, stream, mr); - auto const out_begin = out_positions->mutable_view().template begin(); - - auto const do_search = [&](auto const keys_iter) { - thrust::transform(rmm::exec_policy(stream), - input_it, - input_it + lists.size(), - keys_iter, - out_begin, - search_list_fn{find_option}); - }; - - if constexpr (search_key_is_scalar) { - auto const keys_iter = cudf::detail::make_optional_iterator( - search_keys, nullate::DYNAMIC{search_keys_have_nulls}); - do_search(keys_iter); - } else { - auto const keys_cdv_ptr = column_device_view::create(search_keys, stream); - auto const keys_iter = cudf::detail::make_optional_iterator( - *keys_cdv_ptr, nullate::DYNAMIC{search_keys_have_nulls}); - do_search(keys_iter); + data_type{type_to_id()}, num_rows, cudf::mask_state::UNALLOCATED, stream, mr); + auto const output_it = out_positions->mutable_view().template begin(); + + if constexpr (not cudf::is_nested()) { + index_of_non_nested_types( + input_it, num_rows, output_it, search_keys, search_keys_have_nulls, find_option, stream); + } else { // list + struct + index_of_nested_types( + input_it, num_rows, output_it, child, search_keys, find_option, stream); } if (search_keys_have_nulls || lists.has_nulls()) { auto [null_mask, null_count] = cudf::detail::valid_if( - out_begin, - out_begin + lists.size(), + output_it, + output_it + num_rows, [] __device__(auto const idx) { return idx != NULL_SENTINEL; }, stream, mr); @@ -213,7 +365,7 @@ struct dispatch_index_of { } template - std::enable_if_t(), std::unique_ptr> operator()( + std::enable_if_t(), std::unique_ptr> operator()( lists_column_view const&, SearchKeyType const&, duplicate_find_option, @@ -226,7 +378,7 @@ struct dispatch_index_of { /** * @brief Converts key-positions vector (from `index_of()`) to a BOOL8 vector, indicating if - * the search key(s) were found. + * the search key(s) were found. */ std::unique_ptr to_contains(std::unique_ptr&& key_positions, rmm::cuda_stream_view stream, @@ -282,8 +434,12 @@ std::unique_ptr contains(lists_column_view const& lists, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - return to_contains( - index_of(lists, search_key, duplicate_find_option::FIND_FIRST, stream), stream, mr); + auto key_indices = index_of(lists, + search_key, + duplicate_find_option::FIND_FIRST, + stream, + rmm::mr::get_current_device_resource()); + return to_contains(std::move(key_indices), stream, mr); } std::unique_ptr contains(lists_column_view const& lists, @@ -294,8 +450,12 @@ std::unique_ptr contains(lists_column_view const& lists, CUDF_EXPECTS(search_keys.size() == lists.size(), "Number of search keys must match list column size."); - return to_contains( - index_of(lists, search_keys, duplicate_find_option::FIND_FIRST, stream), stream, mr); + auto key_indices = index_of(lists, + search_keys, + duplicate_find_option::FIND_FIRST, + stream, + rmm::mr::get_current_device_resource()); + return to_contains(std::move(key_indices), stream, mr); } std::unique_ptr contains_nulls(lists_column_view const& lists, @@ -305,7 +465,7 @@ std::unique_ptr contains_nulls(lists_column_view const& lists, auto const lists_cv = lists.parent(); auto output = make_numeric_column(data_type{type_to_id()}, lists.size(), - copy_bitmask(lists_cv), + copy_bitmask(lists_cv, stream, mr), lists_cv.null_count(), stream, mr); diff --git a/cpp/tests/lists/contains_tests.cpp b/cpp/tests/lists/contains_tests.cpp index 4cc0c4155b8..a93ef4f8b1d 100644 --- a/cpp/tests/lists/contains_tests.cpp +++ b/cpp/tests/lists/contains_tests.cpp @@ -17,10 +17,8 @@ #include #include -#include #include #include -#include #include #include @@ -31,24 +29,7 @@ namespace cudf { namespace test { -struct ContainsTest : public BaseFixture { -}; - -using ContainsTestTypes = Concat; - -template -struct TypedContainsTest : public ContainsTest { -}; - -TYPED_TEST_SUITE(TypedContainsTest, ContainsTestTypes); - namespace { - -auto constexpr x = int32_t{-1}; // Placeholder for nulls. -auto constexpr absent = size_type{-1}; // Index when key is not found in a list. -auto constexpr FIND_FIRST = lists::duplicate_find_option::FIND_FIRST; -auto constexpr FIND_LAST = lists::duplicate_find_option::FIND_LAST; - template (), void>* = nullptr> auto create_scalar_search_key(T const& value) { @@ -82,6 +63,12 @@ auto create_scalar_search_key(typename T::rep const& value) return search_key; } +template +auto make_struct_scalar(Args&&... args) +{ + return cudf::struct_scalar(std::vector{std::forward(args)...}); +} + template (), void>* = nullptr> auto create_null_search_key() { @@ -108,11 +95,30 @@ auto create_null_search_key() } // namespace +auto constexpr X = int32_t{0}; // Placeholder for nulls. +auto constexpr ABSENT = size_type{-1}; // Index when key is not found in a list. +auto constexpr FIND_FIRST = lists::duplicate_find_option::FIND_FIRST; +auto constexpr FIND_LAST = lists::duplicate_find_option::FIND_LAST; + +using bools_col = cudf::test::fixed_width_column_wrapper; +using indices_col = cudf::test::fixed_width_column_wrapper; +using structs_col = cudf::test::structs_column_wrapper; +using strings_col = cudf::test::strings_column_wrapper; + using iterators::all_nulls; using iterators::null_at; using iterators::nulls_at; -using bools = fixed_width_column_wrapper; -using indices = fixed_width_column_wrapper; + +using ContainsTestTypes = Concat; + +struct ContainsTest : public BaseFixture { +}; + +template +struct TypedContainsTest : public ContainsTest { +}; + +TYPED_TEST_SUITE(TypedContainsTest, ContainsTestTypes); TYPED_TEST(TypedContainsTest, ScalarKeyWithNoNulls) { @@ -134,25 +140,25 @@ TYPED_TEST(TypedContainsTest, ScalarKeyWithNoNulls) { // CONTAINS auto result = lists::contains(search_space, *search_key_one); - auto expected = bools{1, 0, 0, 1, 0, 0, 0, 0, 1, 0}; + auto expected = bools_col{1, 0, 0, 1, 0, 0, 0, 0, 1, 0}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); } { // CONTAINS NULLS auto result = lists::contains_nulls(search_space); - auto expected = bools{0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + auto expected = bools_col{0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); } { // FIND_FIRST auto result = lists::index_of(search_space, *search_key_one, FIND_FIRST); - auto expected = indices{1, absent, absent, 2, absent, absent, absent, absent, 0, absent}; + auto expected = indices_col{1, ABSENT, ABSENT, 2, ABSENT, ABSENT, ABSENT, ABSENT, 0, ABSENT}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); } { // FIND_LAST auto result = lists::index_of(search_space, *search_key_one, FIND_LAST); - auto expected = indices{3, absent, absent, 4, absent, absent, absent, absent, 2, absent}; + auto expected = indices_col{3, ABSENT, ABSENT, 4, ABSENT, ABSENT, ABSENT, ABSENT, 2, ABSENT}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); } } @@ -179,27 +185,27 @@ TYPED_TEST(TypedContainsTest, ScalarKeyWithNullLists) { // CONTAINS auto result = lists::contains(search_space, *search_key_one); - auto expected = bools{{1, 0, 0, x, 1, 0, 0, 0, 0, 1, x}, nulls_at({3, 10})}; + auto expected = bools_col{{1, 0, 0, X, 1, 0, 0, 0, 0, 1, X}, nulls_at({3, 10})}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); } { // CONTAINS NULLS auto result = lists::contains_nulls(search_space); - auto expected = bools{{0, 0, 0, x, 0, 0, 0, 0, 0, 0, x}, nulls_at({3, 10})}; + auto expected = bools_col{{0, 0, 0, X, 0, 0, 0, 0, 0, 0, X}, nulls_at({3, 10})}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); } { // FIND_FIRST - auto result = lists::index_of(search_space, *search_key_one, FIND_FIRST); - auto expected = - indices{{1, absent, absent, x, 2, absent, absent, absent, absent, 0, x}, nulls_at({3, 10})}; + auto result = lists::index_of(search_space, *search_key_one, FIND_FIRST); + auto expected = indices_col{{1, ABSENT, ABSENT, X, 2, ABSENT, ABSENT, ABSENT, ABSENT, 0, X}, + nulls_at({3, 10})}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); } { // FIND_LAST - auto result = lists::index_of(search_space, *search_key_one, FIND_LAST); - auto expected = - indices{{3, absent, absent, x, 4, absent, absent, absent, absent, 0, x}, nulls_at({3, 10})}; + auto result = lists::index_of(search_space, *search_key_one, FIND_LAST); + auto expected = indices_col{{3, ABSENT, ABSENT, X, 4, ABSENT, ABSENT, ABSENT, ABSENT, 0, X}, + nulls_at({3, 10})}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); } } @@ -230,25 +236,27 @@ TYPED_TEST(TypedContainsTest, SlicedLists) { // CONTAINS auto result = lists::contains(sliced_column_1, *search_key_one); - auto expected_result = bools{{0, 0, x, 1, 0, 0, 0}, null_at(2)}; + auto expected_result = bools_col{{0, 0, X, 1, 0, 0, 0}, null_at(2)}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, result->view()); } { // CONTAINS NULLS auto result = lists::contains_nulls(sliced_column_1); - auto expected_result = bools{{0, 0, x, 0, 0, 0, 0}, null_at(2)}; + auto expected_result = bools_col{{0, 0, X, 0, 0, 0, 0}, null_at(2)}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, result->view()); } { // FIND_FIRST - auto result = lists::index_of(sliced_column_1, *search_key_one, FIND_FIRST); - auto expected_result = indices{{absent, absent, 0, 2, absent, absent, absent}, null_at(2)}; + auto result = lists::index_of(sliced_column_1, *search_key_one, FIND_FIRST); + auto expected_result = + indices_col{{ABSENT, ABSENT, 0, 2, ABSENT, ABSENT, ABSENT}, null_at(2)}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, result->view()); } { // FIND_LAST - auto result = lists::index_of(sliced_column_1, *search_key_one, FIND_LAST); - auto expected_result = indices{{absent, absent, 0, 4, absent, absent, absent}, null_at(2)}; + auto result = lists::index_of(sliced_column_1, *search_key_one, FIND_LAST); + auto expected_result = + indices_col{{ABSENT, ABSENT, 0, 4, ABSENT, ABSENT, ABSENT}, null_at(2)}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, result->view()); } } @@ -260,25 +268,25 @@ TYPED_TEST(TypedContainsTest, SlicedLists) { // CONTAINS auto result = lists::contains(sliced_column_2, *search_key_one); - auto expected_result = bools{{x, 1, 0, 0, 0, 0, 1}, null_at(0)}; + auto expected_result = bools_col{{X, 1, 0, 0, 0, 0, 1}, null_at(0)}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, result->view()); } { // CONTAINS NULLS auto result = lists::contains_nulls(sliced_column_2); - auto expected_result = bools{{x, 0, 0, 0, 0, 0, 0}, null_at(0)}; + auto expected_result = bools_col{{X, 0, 0, 0, 0, 0, 0}, null_at(0)}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, result->view()); } { // FIND_FIRST auto result = lists::index_of(sliced_column_2, *search_key_one, FIND_FIRST); - auto expected_result = indices{{0, 2, absent, absent, absent, absent, 0}, null_at(0)}; + auto expected_result = indices_col{{0, 2, ABSENT, ABSENT, ABSENT, ABSENT, 0}, null_at(0)}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, result->view()); } { // FIND_LAST auto result = lists::index_of(sliced_column_2, *search_key_one, FIND_LAST); - auto expected_result = indices{{0, 4, absent, absent, absent, absent, 2}, null_at(0)}; + auto expected_result = indices_col{{0, 4, ABSENT, ABSENT, ABSENT, ABSENT, 2}, null_at(0)}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, result->view()); } } @@ -289,34 +297,34 @@ TYPED_TEST(TypedContainsTest, ScalarKeyNonNullListsWithNullValues) // Test List columns that have no NULL list rows, but NULL elements in some list rows. using T = TypeParam; - auto numerals = fixed_width_column_wrapper{{x, 1, 2, x, 4, 5, x, 7, 8, x, x, 1, 2, x, 1}, + auto numerals = fixed_width_column_wrapper{{X, 1, 2, X, 4, 5, X, 7, 8, X, X, 1, 2, X, 1}, nulls_at({0, 3, 6, 9, 10, 13})}; auto search_space = make_lists_column( - 8, indices{0, 1, 3, 7, 7, 7, 10, 11, 15}.release(), numerals.release(), 0, {}); + 8, indices_col{0, 1, 3, 7, 7, 7, 10, 11, 15}.release(), numerals.release(), 0, {}); // Search space: [ [x], [1,2], [x,4,5,x], [], [], [7,8,x], [x], [1,2,x,1] ] auto search_key_one = create_scalar_search_key(1); { // CONTAINS auto result = lists::contains(search_space->view(), *search_key_one); - auto expected = bools{0, 1, 0, 0, 0, 0, 0, 1}; + auto expected = bools_col{0, 1, 0, 0, 0, 0, 0, 1}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); } { // CONTAINS NULLS auto result = lists::contains_nulls(search_space->view()); - auto expected = bools{1, 0, 1, 0, 0, 1, 1, 1}; + auto expected = bools_col{1, 0, 1, 0, 0, 1, 1, 1}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); } { // FIND_FIRST auto result = lists::index_of(search_space->view(), *search_key_one, FIND_FIRST); - auto expected = indices{absent, 0, absent, absent, absent, absent, absent, 0}; + auto expected = indices_col{ABSENT, 0, ABSENT, ABSENT, ABSENT, ABSENT, ABSENT, 0}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); } { // FIND_LAST auto result = lists::index_of(search_space->view(), *search_key_one, FIND_LAST); - auto expected = indices{absent, 0, absent, absent, absent, absent, absent, 3}; + auto expected = indices_col{ABSENT, 0, ABSENT, ABSENT, ABSENT, ABSENT, ABSENT, 3}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); } } @@ -325,13 +333,13 @@ TYPED_TEST(TypedContainsTest, ScalarKeysWithNullsInLists) { using T = TypeParam; - auto numerals = fixed_width_column_wrapper{{x, 1, 2, x, 4, 5, x, 7, 8, x, x, 1, 2, x, 1}, + auto numerals = fixed_width_column_wrapper{{X, 1, 2, X, 4, 5, X, 7, 8, X, X, 1, 2, X, 1}, nulls_at({0, 3, 6, 9, 10, 13})}; auto input_null_mask_iter = null_at(4); auto search_space = make_lists_column( 8, - indices{0, 1, 3, 7, 7, 7, 10, 11, 15}.release(), + indices_col{0, 1, 3, 7, 7, 7, 10, 11, 15}.release(), numerals.release(), 1, cudf::test::detail::make_null_mask(input_null_mask_iter, input_null_mask_iter + 8)); @@ -341,25 +349,25 @@ TYPED_TEST(TypedContainsTest, ScalarKeysWithNullsInLists) { // CONTAINS. auto result = lists::contains(search_space->view(), *search_key_one); - auto expected = bools{{0, 1, 0, 0, x, 0, 0, 1}, null_at(4)}; + auto expected = bools_col{{0, 1, 0, 0, X, 0, 0, 1}, null_at(4)}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); } { // CONTAINS NULLS. auto result = lists::contains_nulls(search_space->view()); - auto expected = bools{{1, 0, 1, 0, x, 1, 1, 1}, null_at(4)}; + auto expected = bools_col{{1, 0, 1, 0, X, 1, 1, 1}, null_at(4)}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); } { // FIND_FIRST. auto result = lists::index_of(search_space->view(), *search_key_one, FIND_FIRST); - auto expected = indices{{absent, 0, absent, absent, x, absent, absent, 0}, null_at(4)}; + auto expected = indices_col{{ABSENT, 0, ABSENT, ABSENT, X, ABSENT, ABSENT, 0}, null_at(4)}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); } { // FIND_LAST. auto result = lists::index_of(search_space->view(), *search_key_one, FIND_LAST); - auto expected = indices{{absent, 0, absent, absent, x, absent, absent, 3}, null_at(4)}; + auto expected = indices_col{{ABSENT, 0, ABSENT, ABSENT, X, ABSENT, ABSENT, 3}, null_at(4)}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); } } @@ -368,7 +376,7 @@ TEST_F(ContainsTest, BoolScalarWithNullsInLists) { using T = bool; - auto numerals = fixed_width_column_wrapper{{x, 1, 1, x, 1, 1, x, 1, 1, x, x, 1, 1, x, 1}, + auto numerals = fixed_width_column_wrapper{{X, 1, 1, X, 1, 1, X, 1, 1, X, X, 1, 1, X, 1}, nulls_at({0, 3, 6, 9, 10, 13})}; auto input_null_mask_iter = null_at(4); auto search_space = make_lists_column( @@ -383,25 +391,25 @@ TEST_F(ContainsTest, BoolScalarWithNullsInLists) { // CONTAINS auto result = lists::contains(search_space->view(), *search_key_one); - auto expected = bools{{0, 1, 1, 0, x, 1, 0, 1}, null_at(4)}; + auto expected = bools_col{{0, 1, 1, 0, X, 1, 0, 1}, null_at(4)}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); } { // CONTAINS NULLS auto result = lists::contains_nulls(search_space->view()); - auto expected = bools{{1, 0, 1, 0, x, 1, 1, 1}, null_at(4)}; + auto expected = bools_col{{1, 0, 1, 0, X, 1, 1, 1}, null_at(4)}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); } { // FIND_FIRST. auto result = lists::index_of(search_space->view(), *search_key_one, FIND_FIRST); - auto expected = indices{{absent, 0, 1, absent, x, 0, absent, 0}, null_at(4)}; + auto expected = indices_col{{ABSENT, 0, 1, ABSENT, X, 0, ABSENT, 0}, null_at(4)}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); } { // FIND_LAST. auto result = lists::index_of(search_space->view(), *search_key_one, FIND_LAST); - auto expected = indices{{absent, 1, 2, absent, x, 1, absent, 3}, null_at(4)}; + auto expected = indices_col{{ABSENT, 1, 2, ABSENT, X, 1, ABSENT, 3}, null_at(4)}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); } } @@ -416,7 +424,7 @@ TEST_F(ContainsTest, StringScalarWithNullsInLists) auto input_null_mask_iter = null_at(4); auto search_space = make_lists_column( 8, - indices{0, 1, 3, 7, 7, 7, 10, 11, 15}.release(), + indices_col{0, 1, 3, 7, 7, 7, 10, 11, 15}.release(), strings.release(), 1, cudf::test::detail::make_null_mask(input_null_mask_iter, input_null_mask_iter + 8)); @@ -426,25 +434,25 @@ TEST_F(ContainsTest, StringScalarWithNullsInLists) { // CONTAINS auto result = lists::contains(search_space->view(), *search_key_one); - auto expected = bools{{0, 1, 0, 0, x, 0, 0, 1}, null_at(4)}; + auto expected = bools_col{{0, 1, 0, 0, X, 0, 0, 1}, null_at(4)}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); } { // CONTAINS NULLS auto result = lists::contains_nulls(search_space->view()); - auto expected = bools{{1, 0, 1, 0, x, 1, 1, 1}, null_at(4)}; + auto expected = bools_col{{1, 0, 1, 0, X, 1, 1, 1}, null_at(4)}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); } { // FIND_FIRST. auto result = lists::index_of(search_space->view(), *search_key_one, FIND_FIRST); - auto expected = indices{{absent, 0, absent, absent, x, absent, absent, 0}, null_at(4)}; + auto expected = indices_col{{ABSENT, 0, ABSENT, ABSENT, X, ABSENT, ABSENT, 0}, null_at(4)}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); } { // FIND_LAST. auto result = lists::index_of(search_space->view(), *search_key_one, FIND_LAST); - auto expected = indices{{absent, 0, absent, absent, x, absent, absent, 3}, null_at(4)}; + auto expected = indices_col{{ABSENT, 0, ABSENT, ABSENT, X, ABSENT, ABSENT, 3}, null_at(4)}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); } } @@ -470,19 +478,19 @@ TYPED_TEST(TypedContainsTest, ScalarNullSearchKey) { // CONTAINS auto result = lists::contains(search_space->view(), *search_key_null); - auto expected = bools{{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, all_nulls()}; + auto expected = bools_col{{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, all_nulls()}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); } { // FIND_FIRST auto result = lists::index_of(search_space->view(), *search_key_null, FIND_FIRST); - auto expected = indices{{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, all_nulls()}; + auto expected = indices_col{{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, all_nulls()}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); } { // FIND_LAST auto result = lists::index_of(search_space->view(), *search_key_null, FIND_LAST); - auto expected = indices{{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, all_nulls()}; + auto expected = indices_col{{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, all_nulls()}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); } } @@ -497,12 +505,9 @@ TEST_F(ContainsTest, ScalarTypeRelatedExceptions) {{1, 2, 3}, {4, 5, 6}}}.release(); auto skey = create_scalar_search_key(10); - CUDF_EXPECT_THROW_MESSAGE(lists::contains(list_of_lists->view(), *skey), - "Nested types not supported in list search operations."); - CUDF_EXPECT_THROW_MESSAGE(lists::index_of(list_of_lists->view(), *skey, FIND_FIRST), - "Nested types not supported in list search operations."); - CUDF_EXPECT_THROW_MESSAGE(lists::index_of(list_of_lists->view(), *skey, FIND_LAST), - "Nested types not supported in list search operations."); + EXPECT_THROW(lists::contains(list_of_lists->view(), *skey), cudf::logic_error); + EXPECT_THROW(lists::index_of(list_of_lists->view(), *skey, FIND_FIRST), cudf::logic_error); + EXPECT_THROW(lists::index_of(list_of_lists->view(), *skey, FIND_LAST), cudf::logic_error); } { // Search key must match list elements in type. @@ -513,12 +518,9 @@ TEST_F(ContainsTest, ScalarTypeRelatedExceptions) } .release(); auto skey = create_scalar_search_key("Hello, World!"); - CUDF_EXPECT_THROW_MESSAGE(lists::contains(list_of_ints->view(), *skey), - "Type/Scale of search key does not match list column element type."); - CUDF_EXPECT_THROW_MESSAGE(lists::index_of(list_of_ints->view(), *skey, FIND_FIRST), - "Type/Scale of search key does not match list column element type."); - CUDF_EXPECT_THROW_MESSAGE(lists::index_of(list_of_ints->view(), *skey, FIND_LAST), - "Type/Scale of search key does not match list column element type."); + EXPECT_THROW(lists::contains(list_of_ints->view(), *skey), cudf::logic_error); + EXPECT_THROW(lists::index_of(list_of_ints->view(), *skey, FIND_FIRST), cudf::logic_error); + EXPECT_THROW(lists::index_of(list_of_ints->view(), *skey, FIND_LAST), cudf::logic_error); } } @@ -551,19 +553,19 @@ TYPED_TEST(TypedVectorContainsTest, VectorKeysWithNoNulls) { // CONTAINS auto result = lists::contains(search_space->view(), search_key); - auto expected = bools{1, 0, 0, 1, 1, 0, 0, 0, 1, 0}; + auto expected = bools_col{1, 0, 0, 1, 1, 0, 0, 0, 1, 0}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); } { // FIND_FIRST auto result = lists::index_of(search_space->view(), search_key, FIND_FIRST); - auto expected = indices{1, absent, absent, 2, 0, absent, absent, absent, 2, absent}; + auto expected = indices_col{1, ABSENT, ABSENT, 2, 0, ABSENT, ABSENT, ABSENT, 2, ABSENT}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); } { // FIND_LAST auto result = lists::index_of(search_space->view(), search_key, FIND_LAST); - auto expected = indices{3, absent, absent, 4, 0, absent, absent, absent, 3, absent}; + auto expected = indices_col{3, ABSENT, ABSENT, 4, 0, ABSENT, ABSENT, ABSENT, 3, ABSENT}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); } } @@ -593,21 +595,21 @@ TYPED_TEST(TypedVectorContainsTest, VectorWithNullLists) { // CONTAINS auto result = lists::contains(search_space->view(), search_keys); - auto expected = bools{{1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0}, nulls_at({3, 10})}; + auto expected = bools_col{{1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0}, nulls_at({3, 10})}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); } { // FIND_FIRST - auto result = lists::index_of(search_space->view(), search_keys, FIND_FIRST); - auto expected = - indices{{1, absent, absent, x, absent, 1, absent, absent, absent, 0, x}, nulls_at({3, 10})}; + auto result = lists::index_of(search_space->view(), search_keys, FIND_FIRST); + auto expected = indices_col{{1, ABSENT, ABSENT, X, ABSENT, 1, ABSENT, ABSENT, ABSENT, 0, X}, + nulls_at({3, 10})}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); } { // FIND_LAST - auto result = lists::index_of(search_space->view(), search_keys, FIND_LAST); - auto expected = - indices{{3, absent, absent, x, absent, 1, absent, absent, absent, 0, x}, nulls_at({3, 10})}; + auto result = lists::index_of(search_space->view(), search_keys, FIND_LAST); + auto expected = indices_col{{3, ABSENT, ABSENT, X, ABSENT, 1, ABSENT, ABSENT, ABSENT, 0, X}, + nulls_at({3, 10})}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); } } @@ -617,29 +619,29 @@ TYPED_TEST(TypedVectorContainsTest, VectorNonNullListsWithNullValues) // Test List columns that have no NULL list rows, but NULL elements in some list rows. using T = TypeParam; - auto numerals = fixed_width_column_wrapper{{x, 1, 2, x, 4, 5, x, 7, 8, x, x, 1, 2, x, 1}, + auto numerals = fixed_width_column_wrapper{{X, 1, 2, X, 4, 5, X, 7, 8, X, X, 1, 2, X, 1}, nulls_at({0, 3, 6, 9, 10, 13})}; auto search_space = make_lists_column( - 8, indices{0, 1, 3, 7, 7, 7, 10, 11, 15}.release(), numerals.release(), 0, {}); + 8, indices_col{0, 1, 3, 7, 7, 7, 10, 11, 15}.release(), numerals.release(), 0, {}); // Search space: [ [x], [1,2], [x,4,5,x], [], [], [7,8,x], [x], [1,2,x,1] ] auto search_keys = fixed_width_column_wrapper{1, 2, 3, 1, 2, 3, 1, 1}; { // CONTAINS auto result = lists::contains(search_space->view(), search_keys); - auto expected = bools{0, 1, 0, 0, 0, 0, 0, 1}; + auto expected = bools_col{0, 1, 0, 0, 0, 0, 0, 1}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); } { // FIND_FIRST auto result = lists::index_of(search_space->view(), search_keys, FIND_FIRST); - auto expected = indices{absent, 1, absent, absent, absent, absent, absent, 0}; + auto expected = indices_col{ABSENT, 1, ABSENT, ABSENT, ABSENT, ABSENT, ABSENT, 0}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); } { // FIND_LAST auto result = lists::index_of(search_space->view(), search_keys, FIND_LAST); - auto expected = indices{absent, 1, absent, absent, absent, absent, absent, 3}; + auto expected = indices_col{ABSENT, 1, ABSENT, ABSENT, ABSENT, ABSENT, ABSENT, 3}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); } } @@ -648,14 +650,14 @@ TYPED_TEST(TypedVectorContainsTest, VectorWithNullsInLists) { using T = TypeParam; - auto numerals = fixed_width_column_wrapper{{x, 1, 2, x, 4, 5, x, 7, 8, x, x, 1, 2, x, 1}, + auto numerals = fixed_width_column_wrapper{{X, 1, 2, X, 4, 5, X, 7, 8, X, X, 1, 2, X, 1}, nulls_at({0, 3, 6, 9, 10, 13})}; auto input_null_mask_iter = null_at(4); auto search_space = make_lists_column( 8, - indices{0, 1, 3, 7, 7, 7, 10, 11, 15}.release(), + indices_col{0, 1, 3, 7, 7, 7, 10, 11, 15}.release(), numerals.release(), 1, cudf::test::detail::make_null_mask(input_null_mask_iter, input_null_mask_iter + 8)); @@ -665,19 +667,19 @@ TYPED_TEST(TypedVectorContainsTest, VectorWithNullsInLists) { // CONTAINS auto result = lists::contains(search_space->view(), search_keys); - auto expected = bools{{0, 1, 0, 0, x, 0, 0, 1}, null_at(4)}; + auto expected = bools_col{{0, 1, 0, 0, X, 0, 0, 1}, null_at(4)}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); } { // FIND_FIRST auto result = lists::index_of(search_space->view(), search_keys, FIND_FIRST); - auto expected = indices{{absent, 1, absent, absent, x, absent, absent, 0}, null_at(4)}; + auto expected = indices_col{{ABSENT, 1, ABSENT, ABSENT, X, ABSENT, ABSENT, 0}, null_at(4)}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); } { // FIND_LAST auto result = lists::index_of(search_space->view(), search_keys, FIND_LAST); - auto expected = indices{{absent, 1, absent, absent, x, absent, absent, 3}, null_at(4)}; + auto expected = indices_col{{ABSENT, 1, ABSENT, ABSENT, X, ABSENT, ABSENT, 3}, null_at(4)}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); } } @@ -686,36 +688,36 @@ TYPED_TEST(TypedVectorContainsTest, ListContainsVectorWithNullsInListsAndInSearc { using T = TypeParam; - auto numerals = fixed_width_column_wrapper{{x, 1, 2, x, 4, 5, x, 7, 8, x, x, 1, 2, x, 1}, + auto numerals = fixed_width_column_wrapper{{X, 1, 2, X, 4, 5, X, 7, 8, X, X, 1, 2, X, 1}, nulls_at({0, 3, 6, 9, 10, 13})}; auto input_null_mask_iter = null_at(4); auto search_space = make_lists_column( 8, - indices{0, 1, 3, 7, 7, 7, 10, 11, 15}.release(), + indices_col{0, 1, 3, 7, 7, 7, 10, 11, 15}.release(), numerals.release(), 1, cudf::test::detail::make_null_mask(input_null_mask_iter, input_null_mask_iter + 8)); // Search space: [ [x], [1,2], [x,4,5,x], [], x, [7,8,x], [x], [1,2,x,1] ] - auto search_keys = fixed_width_column_wrapper{{1, 2, 3, x, 2, 3, 1, 1}, null_at(3)}; + auto search_keys = fixed_width_column_wrapper{{1, 2, 3, X, 2, 3, 1, 1}, null_at(3)}; { // CONTAINS auto result = lists::contains(search_space->view(), search_keys); - auto expected = bools{{0, 1, 0, x, x, 0, 0, 1}, nulls_at({3, 4})}; + auto expected = bools_col{{0, 1, 0, X, X, 0, 0, 1}, nulls_at({3, 4})}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); } { // FIND_FIRST auto result = lists::index_of(search_space->view(), search_keys, FIND_FIRST); - auto expected = indices{{absent, 1, absent, x, x, absent, absent, 0}, nulls_at({3, 4})}; + auto expected = indices_col{{ABSENT, 1, ABSENT, X, X, ABSENT, ABSENT, 0}, nulls_at({3, 4})}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); } { // FIND_LAST auto result = lists::index_of(search_space->view(), search_keys, FIND_LAST); - auto expected = indices{{absent, 1, absent, x, x, absent, absent, 3}, nulls_at({3, 4})}; + auto expected = indices_col{{ABSENT, 1, ABSENT, X, X, ABSENT, ABSENT, 3}, nulls_at({3, 4})}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); } } @@ -724,37 +726,37 @@ TEST_F(ContainsTest, BoolKeyVectorWithNullsInListsAndInSearchKeys) { using T = bool; - auto numerals = fixed_width_column_wrapper{{x, 0, 1, x, 1, 1, x, 1, 1, x, x, 0, 1, x, 1}, + auto numerals = fixed_width_column_wrapper{{X, 0, 1, X, 1, 1, X, 1, 1, X, X, 0, 1, X, 1}, nulls_at({0, 3, 6, 9, 10, 13})}; auto input_null_mask_iter = null_at(4); auto search_space = make_lists_column( 8, - indices{0, 1, 3, 7, 7, 7, 10, 11, 15}.release(), + indices_col{0, 1, 3, 7, 7, 7, 10, 11, 15}.release(), numerals.release(), 1, cudf::test::detail::make_null_mask(input_null_mask_iter, input_null_mask_iter + 8)); - auto search_keys = fixed_width_column_wrapper{{0, 1, 0, x, 0, 0, 1, 1}, null_at(3)}; + auto search_keys = fixed_width_column_wrapper{{0, 1, 0, X, 0, 0, 1, 1}, null_at(3)}; // Search space: [ [x], [0,1], [x,1,1,x], [], x, [1,1,x], [x], [0,1,x,1] ] // Search keys : [ 0, 1, 0, x, 0, 0, 1, 1 ] { // CONTAINS auto result = lists::contains(search_space->view(), search_keys); - auto expected = bools{{0, 1, 0, x, x, 0, 0, 1}, nulls_at({3, 4})}; + auto expected = bools_col{{0, 1, 0, X, X, 0, 0, 1}, nulls_at({3, 4})}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); } { // FIND_FIRST auto result = lists::index_of(search_space->view(), search_keys, FIND_FIRST); - auto expected = indices{{absent, 1, absent, x, x, absent, absent, 1}, nulls_at({3, 4})}; + auto expected = indices_col{{ABSENT, 1, ABSENT, X, X, ABSENT, ABSENT, 1}, nulls_at({3, 4})}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); } { // FIND_LAST auto result = lists::index_of(search_space->view(), search_keys, FIND_LAST); - auto expected = indices{{absent, 1, absent, x, x, absent, absent, 3}, nulls_at({3, 4})}; + auto expected = indices_col{{ABSENT, 1, ABSENT, X, X, ABSENT, ABSENT, 3}, nulls_at({3, 4})}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); } } @@ -780,19 +782,19 @@ TEST_F(ContainsTest, StringKeyVectorWithNullsInListsAndInSearchKeys) { // CONTAINS auto result = lists::contains(search_space->view(), search_keys); - auto expected = bools{{0, 1, 0, x, x, 0, 0, 1}, nulls_at({3, 4})}; + auto expected = bools_col{{0, 1, 0, X, X, 0, 0, 1}, nulls_at({3, 4})}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); } { // FIND_FIRST auto result = lists::index_of(search_space->view(), search_keys, FIND_FIRST); - auto expected = indices{{absent, 1, absent, x, x, absent, absent, 0}, nulls_at({3, 4})}; + auto expected = indices_col{{ABSENT, 1, ABSENT, X, X, ABSENT, ABSENT, 0}, nulls_at({3, 4})}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); } { // FIND_LAST auto result = lists::index_of(search_space->view(), search_keys, FIND_LAST); - auto expected = indices{{absent, 1, absent, x, x, absent, absent, 3}, nulls_at({3, 4})}; + auto expected = indices_col{{ABSENT, 1, ABSENT, X, X, ABSENT, ABSENT, 3}, nulls_at({3, 4})}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); } } @@ -807,12 +809,9 @@ TEST_F(ContainsTest, VectorTypeRelatedExceptions) {{1, 2, 3}, {4, 5, 6}}}.release(); auto skey = fixed_width_column_wrapper{0, 1, 2}; - CUDF_EXPECT_THROW_MESSAGE(lists::contains(list_of_lists->view(), skey), - "Nested types not supported in list search operations."); - CUDF_EXPECT_THROW_MESSAGE(lists::index_of(list_of_lists->view(), skey, FIND_FIRST), - "Nested types not supported in list search operations."); - CUDF_EXPECT_THROW_MESSAGE(lists::index_of(list_of_lists->view(), skey, FIND_LAST), - "Nested types not supported in list search operations."); + EXPECT_THROW(lists::contains(list_of_lists->view(), skey), cudf::logic_error); + EXPECT_THROW(lists::index_of(list_of_lists->view(), skey, FIND_FIRST), cudf::logic_error); + EXPECT_THROW(lists::index_of(list_of_lists->view(), skey, FIND_LAST), cudf::logic_error); } { // Search key must match list elements in type. @@ -823,23 +822,17 @@ TEST_F(ContainsTest, VectorTypeRelatedExceptions) } .release(); auto skey = strings_column_wrapper{"Hello", "World"}; - CUDF_EXPECT_THROW_MESSAGE(lists::contains(list_of_ints->view(), skey), - "Type/Scale of search key does not match list column element type."); - CUDF_EXPECT_THROW_MESSAGE(lists::index_of(list_of_ints->view(), skey, FIND_FIRST), - "Type/Scale of search key does not match list column element type."); - CUDF_EXPECT_THROW_MESSAGE(lists::index_of(list_of_ints->view(), skey, FIND_LAST), - "Type/Scale of search key does not match list column element type."); + EXPECT_THROW(lists::contains(list_of_ints->view(), skey), cudf::logic_error); + EXPECT_THROW(lists::index_of(list_of_ints->view(), skey, FIND_FIRST), cudf::logic_error); + EXPECT_THROW(lists::index_of(list_of_ints->view(), skey, FIND_LAST), cudf::logic_error); } { // Search key column size must match lists column size. auto list_of_ints = lists_column_wrapper{{0, 1, 2}, {3, 4, 5}, {6, 7, 8}}.release(); auto skey = fixed_width_column_wrapper{0, 1, 2, 3}; - CUDF_EXPECT_THROW_MESSAGE(lists::contains(list_of_ints->view(), skey), - "Number of search keys must match list column size."); - CUDF_EXPECT_THROW_MESSAGE(lists::index_of(list_of_ints->view(), skey, FIND_FIRST), - "Number of search keys must match list column size."); - CUDF_EXPECT_THROW_MESSAGE(lists::index_of(list_of_ints->view(), skey, FIND_LAST), - "Number of search keys must match list column size."); + EXPECT_THROW(lists::contains(list_of_ints->view(), skey), cudf::logic_error); + EXPECT_THROW(lists::index_of(list_of_ints->view(), skey, FIND_FIRST), cudf::logic_error); + EXPECT_THROW(lists::index_of(list_of_ints->view(), skey, FIND_LAST), cudf::logic_error); } } @@ -887,19 +880,21 @@ TYPED_TEST(TypedContainsNaNsTest, ListWithNaNsScalar) { // CONTAINS auto result = lists::contains(search_space->view(), *search_key_nan); - auto expected = bools{0, 0, 0, 0, 1, 0, 1, 0, 0, 0}; + auto expected = bools_col{0, 0, 0, 0, 1, 0, 1, 0, 0, 0}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); } { // FIND_FIRST - auto result = lists::index_of(search_space->view(), *search_key_nan, FIND_FIRST); - auto expected = indices{absent, absent, absent, absent, 0, absent, 1, absent, absent, absent}; + auto result = lists::index_of(search_space->view(), *search_key_nan, FIND_FIRST); + auto expected = + indices_col{ABSENT, ABSENT, ABSENT, ABSENT, 0, ABSENT, 1, ABSENT, ABSENT, ABSENT}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); } { // FIND_LAST - auto result = lists::index_of(search_space->view(), *search_key_nan, FIND_LAST); - auto expected = indices{absent, absent, absent, absent, 0, absent, 1, absent, absent, absent}; + auto result = lists::index_of(search_space->view(), *search_key_nan, FIND_LAST); + auto expected = + indices_col{ABSENT, ABSENT, ABSENT, ABSENT, 0, ABSENT, 1, ABSENT, ABSENT, ABSENT}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); } } @@ -944,21 +939,21 @@ TYPED_TEST(TypedContainsNaNsTest, ListWithNaNsContainsVector) { // CONTAINS auto result = lists::contains(search_space->view(), search_keys->view()); - auto expected = bools{{1, 0, 0, 0, 1, 0, 1, 0, 1, 0}, null_at(2)}; + auto expected = bools_col{{1, 0, 0, 0, 1, 0, 1, 0, 1, 0}, null_at(2)}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); } { // FIND_FIRST auto result = lists::index_of(search_space->view(), search_keys->view(), FIND_FIRST); auto expected = - indices{{1, absent, x, absent, 0, absent, 2, absent, 1, absent}, nulls_at({2})}; + indices_col{{1, ABSENT, X, ABSENT, 0, ABSENT, 2, ABSENT, 1, ABSENT}, nulls_at({2})}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); } { // FIND_LAST auto result = lists::index_of(search_space->view(), search_keys->view(), FIND_LAST); auto expected = - indices{{1, absent, x, absent, 0, absent, 2, absent, 1, absent}, nulls_at({2})}; + indices_col{{1, ABSENT, X, ABSENT, 0, ABSENT, 2, ABSENT, 1, ABSENT}, nulls_at({2})}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); } } @@ -969,19 +964,19 @@ TYPED_TEST(TypedContainsNaNsTest, ListWithNaNsContainsVector) { // CONTAINS auto result = lists::contains(search_space->view(), search_keys->view()); - auto expected = bools{1, 0, 0, 0, 1, 0, 1, 0, 1, 0}; + auto expected = bools_col{1, 0, 0, 0, 1, 0, 1, 0, 1, 0}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); } { // FIND_FIRST auto result = lists::index_of(search_space->view(), search_keys->view(), FIND_FIRST); - auto expected = indices{1, absent, absent, absent, 0, absent, 2, absent, 1, absent}; + auto expected = indices_col{1, ABSENT, ABSENT, ABSENT, 0, ABSENT, 2, ABSENT, 1, ABSENT}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); } { // FIND_LAST auto result = lists::index_of(search_space->view(), search_keys->view(), FIND_LAST); - auto expected = indices{1, absent, absent, absent, 0, absent, 2, absent, 1, absent}; + auto expected = indices_col{1, ABSENT, ABSENT, ABSENT, 0, ABSENT, 2, ABSENT, 1, ABSENT}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); } } @@ -1002,7 +997,7 @@ TYPED_TEST(TypedContainsDecimalsTest, ScalarKey) 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3}; auto decimals = fixed_point_column_wrapper{ values.begin(), values.end(), numeric::scale_type{0}}; - auto list_offsets = indices{0, 3, 6, 9, 12, 15, 18, 21, 21, 24, 24}; + auto list_offsets = indices_col{0, 3, 6, 9, 12, 15, 18, 21, 21, 24, 24}; return make_lists_column(10, list_offsets.release(), decimals.release(), 0, {}); }(); auto search_key_one = make_fixed_point_scalar(typename T::rep{1}, numeric::scale_type{0}); @@ -1011,19 +1006,19 @@ TYPED_TEST(TypedContainsDecimalsTest, ScalarKey) { // CONTAINS auto result = lists::contains(search_space->view(), *search_key_one); - auto expected = bools{1, 0, 0, 1, 0, 0, 0, 0, 1, 0}; + auto expected = bools_col{1, 0, 0, 1, 0, 0, 0, 0, 1, 0}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); } { // FIND_FIRST auto result = lists::index_of(search_space->view(), *search_key_one, FIND_FIRST); - auto expected = indices{1, absent, absent, 2, absent, absent, absent, absent, 0, absent}; + auto expected = indices_col{1, ABSENT, ABSENT, 2, ABSENT, ABSENT, ABSENT, ABSENT, 0, ABSENT}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); } { // FIND_LAST auto result = lists::index_of(search_space->view(), *search_key_one, FIND_LAST); - auto expected = indices{1, absent, absent, 2, absent, absent, absent, absent, 0, absent}; + auto expected = indices_col{1, ABSENT, ABSENT, 2, ABSENT, ABSENT, ABSENT, ABSENT, 0, ABSENT}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); } } @@ -1037,7 +1032,7 @@ TYPED_TEST(TypedContainsDecimalsTest, VectorKey) 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3}; auto decimals = fixed_point_column_wrapper{ values.begin(), values.end(), numeric::scale_type{0}}; - auto list_offsets = indices{0, 3, 6, 9, 12, 15, 18, 21, 21, 24, 24}; + auto list_offsets = indices_col{0, 3, 6, 9, 12, 15, 18, 21, 21, 24, 24}; return make_lists_column(10, list_offsets.release(), decimals.release(), 0, {}); }(); @@ -1051,19 +1046,781 @@ TYPED_TEST(TypedContainsDecimalsTest, VectorKey) { // CONTAINS auto result = lists::contains(search_space->view(), search_key->view()); - auto expected = bools{1, 0, 0, 1, 1, 0, 0, 0, 1, 0}; + auto expected = bools_col{1, 0, 0, 1, 1, 0, 0, 0, 1, 0}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); } { // FIND_FIRST auto result = lists::index_of(search_space->view(), search_key->view(), FIND_FIRST); - auto expected = indices{1, absent, absent, 2, 0, absent, absent, absent, 2, absent}; + auto expected = indices_col{1, ABSENT, ABSENT, 2, 0, ABSENT, ABSENT, ABSENT, 2, ABSENT}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); } { // FIND_LAST auto result = lists::index_of(search_space->view(), search_key->view(), FIND_LAST); - auto expected = indices{1, absent, absent, 2, 0, absent, absent, absent, 2, absent}; + auto expected = indices_col{1, ABSENT, ABSENT, 2, 0, ABSENT, ABSENT, ABSENT, 2, ABSENT}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } +} + +template +struct TypedStructContainsTest : public ContainsTest { +}; +TYPED_TEST_SUITE(TypedStructContainsTest, ContainsTestTypes); + +TYPED_TEST(TypedStructContainsTest, EmptyInputTest) +{ + using tdata_col = cudf::test::fixed_width_column_wrapper; + + auto const lists = [] { + auto offsets = indices_col{}; + auto data = tdata_col{}; + auto child = structs_col{{data}}; + return make_lists_column(0, offsets.release(), child.release(), 0, {}); + }(); + + auto const scalar_key = [] { + auto child = tdata_col{0}; + return make_struct_scalar(child); + }(); + auto const column_key = [] { + auto child = tdata_col{}; + return structs_col{{child}}; + }(); + + auto const result1 = lists::contains(lists->view(), scalar_key); + auto const result2 = lists::contains(lists->view(), column_key); + auto const expected = bools_col{}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result1); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result2); +} + +TYPED_TEST(TypedStructContainsTest, ScalarKeyNoNullLists) +{ + using tdata_col = cudf::test::fixed_width_column_wrapper; + + auto const lists = [] { + auto offsets = indices_col{0, 4, 7, 10, 15, 18, 21, 24, 24, 28, 28}; + // clang-format off + auto data1 = tdata_col{0, 1, 2, 1, + 3, 4, 5, + 6, 7, 8, + 9, 0, 1, 3, 1, + 2, 3, 4, + 5, 6, 7, + 8, 9, 0, + 1, 2, 1, 3 + }; + auto data2 = tdata_col{0, 1, 2, 3, + 0, 1, 2, + 0, 1, 2, + 1, 1, 2, 2, 2, + 0, 1, 2, + 0, 1, 2, + 0, 1, 2, + 1, 0, 1, 1 + }; + // clang-format on + auto child = structs_col{{data1, data2}}; + return make_lists_column(10, offsets.release(), child.release(), 0, {}); + }(); + + auto const key = [] { + auto child1 = tdata_col{1}; + auto child2 = tdata_col{1}; + return make_struct_scalar(child1, child2); + }(); + + { + // CONTAINS + auto const result = lists::contains(lists->view(), key); + auto const expected = bools_col{1, 0, 0, 0, 0, 0, 0, 0, 1, 0}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // CONTAINS NULLS + auto const result = lists::contains_nulls(lists->view()); + auto const expected = bools_col{0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // FIND_FIRST + auto const result = lists::index_of(lists->view(), key, FIND_FIRST); + auto const expected = + indices_col{1, ABSENT, ABSENT, ABSENT, ABSENT, ABSENT, ABSENT, ABSENT, 0, ABSENT}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // FIND_LAST + auto const result = lists::index_of(lists->view(), key, FIND_LAST); + auto const expected = + indices_col{1, ABSENT, ABSENT, ABSENT, ABSENT, ABSENT, ABSENT, ABSENT, 2, ABSENT}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } +} + +TYPED_TEST(TypedStructContainsTest, ScalarKeyWithNullLists) +{ + using tdata_col = cudf::test::fixed_width_column_wrapper; + + auto const lists = [] { + auto offsets = indices_col{0, 4, 7, 10, 10, 15, 18, 21, 24, 24, 28, 28}; + // clang-format off + auto data1 = tdata_col{0, 1, 2, 1, + 3, 4, 5, + 6, 7, 8, + 9, 0, 1, 3, 1, + 2, 3, 4, + 5, 6, 7, + 8, 9, 0, + 1, 2, 1, 3 + }; + auto data2 = tdata_col{0, 1, 2, 3, + 0, 1, 2, + 0, 1, 2, + 1, 1, 2, 2, 2, + 0, 1, 2, + 0, 1, 2, + 0, 1, 2, + 1, 0, 1, 1 + }; + // clang-format on + auto child = structs_col{{data1, data2}}; + auto const validity_iter = nulls_at({3, 10}); + return make_lists_column(11, + offsets.release(), + child.release(), + 2, + detail::make_null_mask(validity_iter, validity_iter + 11)); + }(); + + auto const key = [] { + auto child1 = tdata_col{1}; + auto child2 = tdata_col{1}; + return make_struct_scalar(child1, child2); + }(); + + { + // CONTAINS + auto const result = lists::contains(lists->view(), key); + auto const expected = bools_col{{1, 0, 0, X, 0, 0, 0, 0, 0, 1, X}, nulls_at({3, 10})}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // CONTAINS NULLS + auto const result = lists::contains_nulls(lists->view()); + auto const expected = bools_col{{0, 0, 0, X, 0, 0, 0, 0, 0, 0, X}, nulls_at({3, 10})}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // FIND_FIRST + auto const result = lists::index_of(lists->view(), key, FIND_FIRST); + auto const expected = indices_col{ + {1, ABSENT, ABSENT, X, ABSENT, ABSENT, ABSENT, ABSENT, ABSENT, 0, X}, nulls_at({3, 10})}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // FIND_LAST + auto const result = lists::index_of(lists->view(), key, FIND_LAST); + auto const expected = indices_col{ + {1, ABSENT, ABSENT, X, ABSENT, ABSENT, ABSENT, ABSENT, ABSENT, 2, X}, nulls_at({3, 10})}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } +} + +TYPED_TEST(TypedStructContainsTest, SlicedListsColumnNoNulls) +{ + using tdata_col = cudf::test::fixed_width_column_wrapper; + + auto const lists_original = [] { + auto offsets = indices_col{0, 4, 7, 10, 15, 18, 21, 24, 24, 28, 28}; + // clang-format off + auto data1 = tdata_col{0, 1, 2, 1, + 3, 4, 5, + 6, 7, 8, + 9, 0, 1, 3, 1, + 2, 3, 4, + 5, 6, 7, + 8, 9, 0, + 1, 2, 1, 3 + }; + auto data2 = tdata_col{0, 1, 2, 3, + 0, 1, 2, + 0, 1, 2, + 1, 1, 2, 2, 2, + 0, 1, 2, + 0, 1, 2, + 0, 1, 2, + 1, 0, 1, 1 + }; + // clang-format on + auto child = structs_col{{data1, data2}}; + return make_lists_column(10, offsets.release(), child.release(), 0, {}); + }(); + auto const lists = cudf::slice(lists_original->view(), {3, 10})[0]; + + auto const key = [] { + auto child1 = tdata_col{1}; + auto child2 = tdata_col{1}; + return make_struct_scalar(child1, child2); + }(); + + { + // CONTAINS + auto const result = lists::contains(lists, key); + auto const expected = bools_col{0, 0, 0, 0, 0, 1, 0}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // CONTAINS NULLS + auto const result = lists::contains_nulls(lists); + auto const expected = bools_col{0, 0, 0, 0, 0, 0, 0}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // FIND_FIRST + auto const result = lists::index_of(lists, key, FIND_FIRST); + auto const expected = indices_col{ABSENT, ABSENT, ABSENT, ABSENT, ABSENT, 0, ABSENT}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // FIND_LAST + auto const result = lists::index_of(lists, key, FIND_LAST); + auto const expected = indices_col{ABSENT, ABSENT, ABSENT, ABSENT, ABSENT, 2, ABSENT}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } +} + +TYPED_TEST(TypedStructContainsTest, ScalarKeyNoNullListsWithNullStructs) +{ + using tdata_col = cudf::test::fixed_width_column_wrapper; + + auto const lists = [] { + auto offsets = indices_col{0, 4, 7, 10, 15, 18, 21, 24, 24, 28, 28}; + // clang-format off + auto data1 = tdata_col{0, X, 2, 1, + 3, 4, 5, + 6, 7, 8, + X, 0, 1, 3, 1, + X, 3, 4, + 5, 6, 7, + 8, 9, 0, + X, 2, 1, 3 + }; + auto data2 = tdata_col{0, X, 2, 1, + 0, 1, 2, + 0, 1, 2, + X, 1, 2, 2, 2, + X, 1, 2, + 0, 1, 2, + 0, 1, 2, + X, 0, 1, 1 + }; + // clang-format on + auto child = structs_col{{data1, data2}, nulls_at({1, 10, 15, 24})}; + return make_lists_column(10, offsets.release(), child.release(), 0, {}); + }(); + + auto const key = [] { + auto child1 = tdata_col{1}; + auto child2 = tdata_col{1}; + return make_struct_scalar(child1, child2); + }(); + + { + // CONTAINS + auto const result = lists::contains(lists->view(), key); + auto const expected = bools_col{1, 0, 0, 0, 0, 0, 0, 0, 1, 0}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // CONTAINS NULLS + auto const result = lists::contains_nulls(lists->view()); + auto const expected = bools_col{1, 0, 0, 1, 1, 0, 0, 0, 1, 0}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // FIND_FIRST + auto const result = lists::index_of(lists->view(), key, FIND_FIRST); + auto const expected = + indices_col{3, ABSENT, ABSENT, ABSENT, ABSENT, ABSENT, ABSENT, ABSENT, 2, ABSENT}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // FIND_LAST + auto const result = lists::index_of(lists->view(), key, FIND_LAST); + auto const expected = + indices_col{3, ABSENT, ABSENT, ABSENT, ABSENT, ABSENT, ABSENT, ABSENT, 2, ABSENT}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } +} + +TYPED_TEST(TypedStructContainsTest, ColumnKeyNoNullLists) +{ + using tdata_col = cudf::test::fixed_width_column_wrapper; + + auto const lists = [] { + auto offsets = indices_col{0, 4, 7, 10, 15, 18, 21, 24, 24, 28, 28}; + // clang-format off + auto data1 = tdata_col{0, 1, 2, 1, + 3, 4, 3, + 6, 7, 8, + 9, 0, 1, 3, 1, + 2, 3, 4, + 5, 6, 7, + 8, 9, 0, + 1, 2, 1, 3 + }; + auto data2 = tdata_col{0, 1, 2, 3, + 0, 0, 0, + 0, 1, 2, + 1, 1, 2, 2, 2, + 0, 1, 2, + 0, 1, 2, + 0, 1, 2, + 1, 0, 1, 1 + }; + // clang-format on + auto child = structs_col{{data1, data2}}; + return make_lists_column(10, offsets.release(), child.release(), 0, {}); + }(); + + auto const keys = [] { + auto child1 = tdata_col{1, 3, 1, 1, 2, 1, 0, 0, 1, 0}; + auto child2 = tdata_col{1, 0, 1, 1, 2, 1, 0, 0, 1, 0}; + return structs_col{{child1, child2}}; + }(); + + { + // CONTAINS + auto const result = lists::contains(lists->view(), keys); + auto const expected = bools_col{1, 1, 0, 0, 0, 0, 0, 0, 1, 0}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // FIND_FIRST + auto const result = lists::index_of(lists->view(), keys, FIND_FIRST); + auto const expected = + indices_col{1, 0, ABSENT, ABSENT, ABSENT, ABSENT, ABSENT, ABSENT, 0, ABSENT}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // FIND_LAST + auto const result = lists::index_of(lists->view(), keys, FIND_LAST); + auto const expected = + indices_col{1, 2, ABSENT, ABSENT, ABSENT, ABSENT, ABSENT, ABSENT, 2, ABSENT}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } +} + +TYPED_TEST(TypedStructContainsTest, ColumnKeyWithSlicedListsNoNulls) +{ + using tdata_col = cudf::test::fixed_width_column_wrapper; + + auto const lists_original = [] { + auto offsets = indices_col{0, 4, 7, 10, 15, 18, 21, 24, 24, 28, 28}; + // clang-format off + auto data1 = tdata_col{0, 1, 2, 1, + 3, 4, 3, + 6, 7, 8, + 9, 0, 1, 3, 1, + 2, 3, 4, + 5, 6, 7, + 8, 9, 0, + 1, 2, 1, 3 + }; + auto data2 = tdata_col{0, 1, 2, 3, + 0, 0, 0, + 0, 1, 2, + 1, 1, 2, 2, 2, + 0, 1, 2, + 0, 1, 2, + 0, 1, 2, + 1, 0, 1, 1 + }; + // clang-format on + auto child = structs_col{{data1, data2}}; + return make_lists_column(10, offsets.release(), child.release(), 0, {}); + }(); + + auto const keys_original = [] { + auto child1 = tdata_col{1, 9, 1, 6, 2, 1, 0, 0, 1, 0}; + auto child2 = tdata_col{1, 1, 1, 1, 2, 1, 0, 0, 1, 0}; + return structs_col{{child1, child2}}; + }(); + + auto const lists = cudf::slice(lists_original->view(), {3, 7})[0]; + auto const keys = cudf::slice(keys_original, {1, 5})[0]; + + { + // CONTAINS + auto const result = lists::contains(lists, keys); + auto const expected = bools_col{1, 0, 1, 0}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // FIND_FIRST + auto const result = lists::index_of(lists, keys, FIND_FIRST); + auto const expected = indices_col{0, ABSENT, 1, ABSENT}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // FIND_LAST + auto const result = lists::index_of(lists, keys, FIND_LAST); + auto const expected = indices_col{0, ABSENT, 1, ABSENT}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } +} + +TYPED_TEST(TypedStructContainsTest, ColumnKeyWithSlicedListsHavingNulls) +{ + using tdata_col = cudf::test::fixed_width_column_wrapper; + + auto const lists_original = [] { + auto offsets = indices_col{0, 4, 7, 10, 10, 15, 18, 21, 24, 24, 28, 28}; + // clang-format off + auto data1 = tdata_col{0, X, 2, 1, + 3, 4, 5, + 6, 7, 8, + X, 0, 1, 3, 1, + X, 3, 4, + 5, 6, 6, + 8, 9, 0, + X, 2, 1, 3 + }; + auto data2 = tdata_col{0, X, 2, 1, + 0, 1, 2, + 0, 1, 2, + X, 1, 2, 2, 2, + X, 1, 2, + 0, 1, 1, + 0, 1, 2, + X, 0, 1, 1 + }; + // clang-format on + auto child = structs_col{{data1, data2}, nulls_at({1, 10, 15, 24})}; + auto const validity_iter = nulls_at({3, 10}); + return make_lists_column(11, + offsets.release(), + child.release(), + 2, + detail::make_null_mask(validity_iter, validity_iter + 11)); + }(); + + auto const keys_original = [] { + auto child1 = tdata_col{{1, X, 1, 6, X, 1, 0, 0, 1, 0, 1}, null_at(4)}; + auto child2 = tdata_col{{1, X, 1, 1, X, 1, 0, 0, 1, 0, 1}, null_at(4)}; + return structs_col{{child1, child2}, null_at(1)}; + }(); + + auto const lists = cudf::slice(lists_original->view(), {4, 8})[0]; + auto const keys = cudf::slice(keys_original, {1, 5})[0]; + + { + // CONTAINS + auto const result = lists::contains(lists, keys); + auto const expected = bools_col{{X, 0, 1, 0}, null_at(0)}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // FIND_FIRST + auto const result = lists::index_of(lists, keys, FIND_FIRST); + auto const expected = indices_col{{X, ABSENT, 1, ABSENT}, null_at(0)}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // FIND_LAST + auto const result = lists::index_of(lists, keys, FIND_LAST); + auto const expected = indices_col{{X, ABSENT, 2, ABSENT}, null_at(0)}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } +} + +template +struct TypedListContainsTest : public ContainsTest { +}; +TYPED_TEST_SUITE(TypedListContainsTest, ContainsTestTypes); + +TYPED_TEST(TypedListContainsTest, ScalarKeyLists) +{ + using tdata_col = cudf::test::fixed_width_column_wrapper; + using lists_col = cudf::test::lists_column_wrapper; + + auto const lists_no_nulls = lists_col{lists_col{{0, 1, 2}, // list0 + {3, 4, 5}, + {0, 1, 2}, + {9, 0, 1, 3, 1}}, + lists_col{{2, 3, 4}, // list1 + {3, 4, 5}, + {8, 9, 0}, + {}}, + lists_col{{0, 2, 1}, // list2 + {}}}; + + auto const lists_have_nulls = lists_col{lists_col{{{0, 1, 2}, // list0 + {} /*NULL*/, + {0, 1, 2}, + {9, 0, 1, 3, 1}}, + null_at(1)}, + lists_col{{{} /*NULL*/, // list1 + {3, 4, 5}, + {8, 9, 0}, + {}}, + null_at(0)}, + lists_col{{0, 2, 1}, // list2 + {}}}; + + auto const key = [] { + auto const child = tdata_col{0, 1, 2}; + return list_scalar(child); + }(); + + auto const do_test = [&](auto const& lists, bool has_nulls) { + { + // CONTAINS + auto const result = lists::contains(lists_column_view{lists}, key); + auto const expected = bools_col{1, 0, 0}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // CONTAINS NULLS + auto const result = lists::contains_nulls(lists_column_view{lists}); + auto const expected = has_nulls ? bools_col{1, 1, 0} : bools_col{0, 0, 0}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // FIND_FIRST + auto const result = lists::index_of(lists_column_view{lists}, key, FIND_FIRST); + auto const expected = indices_col{0, ABSENT, ABSENT}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // FIND_LAST + auto const result = lists::index_of(lists_column_view{lists}, key, FIND_LAST); + auto const expected = indices_col{2, ABSENT, ABSENT}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + }; + + do_test(lists_no_nulls, false); + do_test(lists_have_nulls, true); +} + +TYPED_TEST(TypedListContainsTest, SlicedListsColumn) +{ + using tdata_col = cudf::test::fixed_width_column_wrapper; + using lists_col = cudf::test::lists_column_wrapper; + + auto const lists_no_nulls_original = lists_col{lists_col{{0, 0, 0}, // list-2 (don't care) + {0, 1, 2}, + {0, 1, 2}, + {0, 0, 0}}, + lists_col{{0, 0, 0}, // list-1 (don't care) + {0, 1, 2}, + {0, 1, 2}, + {0, 0, 0}}, + lists_col{{0, 1, 2}, // list0 + {3, 4, 5}, + {0, 1, 2}, + {9, 0, 1, 3, 1}}, + lists_col{{2, 3, 4}, // list1 + {3, 4, 5}, + {8, 9, 0}, + {}}, + lists_col{{0, 2, 1}, // list2 + {}}, + lists_col{{0, 0, 0}, // list3 (don't care) + {0, 1, 2}, + {0, 1, 2}, + {0, 0, 0}}, + lists_col{{0, 0, 0}, // list4 (don't care) + {0, 1, 2}, + {0, 1, 2}, + {0, 0, 0}}}; + + auto const lists_have_nulls_original = lists_col{lists_col{{0, 0, 0}, // list-1 (don't care) + {0, 1, 2}, + {0, 1, 2}, + {0, 0, 0}}, + lists_col{{{0, 1, 2}, // list0 + {} /*NULL*/, + {0, 1, 2}, + {9, 0, 1, 3, 1}}, + null_at(1)}, + lists_col{{{} /*NULL*/, // list1 + {3, 4, 5}, + {8, 9, 0}, + {}}, + null_at(0)}, + lists_col{{0, 2, 1}, // list2 + {}}, + lists_col{{0, 0, 0}, // list3 (don't care) + {0, 1, 2}, + {0, 1, 2}, + {0, 0, 0}}}; + + auto const lists_no_nulls = cudf::slice(lists_no_nulls_original, {2, 5})[0]; + auto const lists_have_nulls = cudf::slice(lists_have_nulls_original, {1, 4})[0]; + + auto const key = [] { + auto const child = tdata_col{0, 1, 2}; + return list_scalar(child); + }(); + + auto const do_test = [&](auto const& lists, bool has_nulls) { + { + // CONTAINS + auto const result = lists::contains(lists_column_view{lists}, key); + auto const expected = bools_col{1, 0, 0}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // CONTAINS NULLS + auto const result = lists::contains_nulls(lists_column_view{lists}); + auto const expected = has_nulls ? bools_col{1, 1, 0} : bools_col{0, 0, 0}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // FIND_FIRST + auto const result = lists::index_of(lists_column_view{lists}, key, FIND_FIRST); + auto const expected = indices_col{0, ABSENT, ABSENT}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // FIND_LAST + auto const result = lists::index_of(lists_column_view{lists}, key, FIND_LAST); + auto const expected = indices_col{2, ABSENT, ABSENT}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + }; + + do_test(lists_no_nulls, false); + do_test(lists_have_nulls, true); +} + +TYPED_TEST(TypedListContainsTest, ColumnKeyLists) +{ + using lists_col = cudf::test::lists_column_wrapper; + auto constexpr null = int32_t{0}; + + auto const lists_no_nulls = lists_col{lists_col{{0, 0, 2}, // list0 + {3, 4, 5}, + {0, 0, 2}, + {9, 0, 1, 3, 1}}, + lists_col{{2, 3, 4}, // list1 + {3, 4, 5}, + {2, 3, 4}, + {}}, + lists_col{{0, 2, 0}, // list2 + {0, 2, 0}, + {3, 4, 5}, + {}}}; + + auto const lists_have_nulls = lists_col{lists_col{{lists_col{{0, null, 2}, null_at(1)}, // list0 + lists_col{} /*NULL*/, + lists_col{{0, null, 2}, null_at(1)}, + lists_col{9, 0, 1, 3, 1}}, + null_at(1)}, + lists_col{{lists_col{} /*NULL*/, // list1 + lists_col{3, 4, 5}, + lists_col{2, 3, 4}, + lists_col{}}, + null_at(0)}, + lists_col{lists_col{0, 2, 1}, // list2 + lists_col{{0, 2, null}, null_at(2)}, + lists_col{3, 4, 5}, + lists_col{}}}; + + auto const key = lists_col{ + lists_col{{0, null, 2}, null_at(1)}, lists_col{2, 3, 4}, lists_col{{0, 2, null}, null_at(2)}}; + + auto const do_test = [&](auto const& lists, bool has_nulls) { + { + // CONTAINS + auto const result = lists::contains(lists_column_view{lists}, key); + auto const expected = has_nulls ? bools_col{1, 1, 1} : bools_col{0, 1, 0}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // CONTAINS NULLS + auto const result = lists::contains_nulls(lists_column_view{lists}); + auto const expected = has_nulls ? bools_col{1, 1, 0} : bools_col{0, 0, 0}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // FIND_FIRST + auto const result = lists::index_of(lists_column_view{lists}, key, FIND_FIRST); + auto const expected = has_nulls ? indices_col{0, 2, 1} : indices_col{ABSENT, 0, ABSENT}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // FIND_LAST + auto const result = lists::index_of(lists_column_view{lists}, key, FIND_LAST); + auto const expected = has_nulls ? indices_col{2, 2, 1} : indices_col{ABSENT, 2, ABSENT}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + }; + + do_test(lists_no_nulls, false); + do_test(lists_have_nulls, true); +} + +TYPED_TEST(TypedListContainsTest, ColumnKeyWithListsOfStructsNoNulls) +{ + using tdata_col = cudf::test::fixed_width_column_wrapper; + + auto const lists = [] { + auto child_offsets = indices_col{0, 3, 6, 9, 14, 17, 20, 23, 23}; + // clang-format off + auto data1 = tdata_col{0, 0, 2, + 3, 4, 5, + 0, 0, 2, + 9, 0, 1, 3, 1, + 0, 2, 0, + 0, 0, 2, + 3, 4, 5 + + }; + auto data2 = tdata_col{10, 10, 12, + 13, 14, 15, + 10, 10, 12, + 19, 10, 11, 13, 11, + 10, 12, 10, + 10, 10, 12, + 13, 14, 15 + + }; + // clang-format on + auto structs = structs_col{{data1, data2}}; + auto child = make_lists_column(8, child_offsets.release(), structs.release(), 0, {}); + + auto offsets = indices_col{0, 4, 8}; + return make_lists_column(2, offsets.release(), std::move(child), 0, {}); + }(); + + auto const key = [] { + auto data1 = tdata_col{0, 0, 2}; + auto data2 = tdata_col{10, 10, 12}; + auto const child = structs_col{{data1, data2}}; + return list_scalar(child); + }(); + + { + // CONTAINS + auto const result = lists::contains(lists_column_view{lists->view()}, key); + auto const expected = bools_col{1, 1}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // FIND_FIRST + auto const result = lists::index_of(lists_column_view{lists->view()}, key, FIND_FIRST); + auto const expected = indices_col{0, 1}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // FIND_LAST + auto const result = lists::index_of(lists_column_view{lists->view()}, key, FIND_LAST); + auto const expected = indices_col{2, 1}; CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); } }