From ff480d62942754103cded3489e3c168e9c30073f Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Fri, 19 Jul 2024 12:15:25 -0700 Subject: [PATCH 1/3] Add missing stream param to dict factory apis --- .../cudf/dictionary/dictionary_factories.hpp | 13 +++++++++---- cpp/src/dictionary/dictionary_factories.cu | 13 +++++++++---- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/cpp/include/cudf/dictionary/dictionary_factories.hpp b/cpp/include/cudf/dictionary/dictionary_factories.hpp index 7cdfa3bf9e5..21f593e1aec 100644 --- a/cpp/include/cudf/dictionary/dictionary_factories.hpp +++ b/cpp/include/cudf/dictionary/dictionary_factories.hpp @@ -87,12 +87,17 @@ std::unique_ptr make_dictionary_column( * @param indices_column Indices to use for the new dictionary column. * @param null_mask Null mask for the output column. * @param null_count Number of nulls for the output column. + * @param stream CUDA stream used for device memory operations and kernel launches. + * @param mr Device memory resource used to allocate the returned column's device memory. * @return New dictionary column. */ -std::unique_ptr make_dictionary_column(std::unique_ptr keys_column, - std::unique_ptr indices_column, - rmm::device_buffer&& null_mask, - size_type null_count); +std::unique_ptr make_dictionary_column( + std::unique_ptr keys_column, + std::unique_ptr indices_column, + rmm::device_buffer&& null_mask, + size_type null_count, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Construct a dictionary column by taking ownership of the provided keys diff --git a/cpp/src/dictionary/dictionary_factories.cu b/cpp/src/dictionary/dictionary_factories.cu index 37f8fa7a05b..0617d71fa51 100644 --- a/cpp/src/dictionary/dictionary_factories.cu +++ b/cpp/src/dictionary/dictionary_factories.cu @@ -77,7 +77,9 @@ std::unique_ptr make_dictionary_column(column_view const& keys_column, std::unique_ptr make_dictionary_column(std::unique_ptr keys_column, std::unique_ptr indices_column, rmm::device_buffer&& null_mask, - size_type null_count) + size_type null_count, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) { CUDF_EXPECTS(!keys_column->has_nulls(), "keys column must not have nulls"); CUDF_EXPECTS(!indices_column->has_nulls(), "indices column must not have nulls"); @@ -89,7 +91,7 @@ std::unique_ptr make_dictionary_column(std::unique_ptr keys_colu children.emplace_back(std::move(keys_column)); return std::make_unique(data_type{type_id::DICTIONARY32}, count, - rmm::device_buffer{}, + rmm::device_buffer{0, stream, mr}, std::move(null_mask), null_count, std::move(children)); @@ -134,8 +136,11 @@ std::unique_ptr make_dictionary_column(std::unique_ptr keys, auto indices_column = [&] { // If the types match, then just commandeer the column's data buffer. if (new_type.id() == indices_type) { - return std::make_unique( - new_type, indices_size, std::move(*(contents.data.release())), rmm::device_buffer{}, 0); + return std::make_unique(new_type, + indices_size, + std::move(*(contents.data.release())), + rmm::device_buffer{0, stream, mr}, + 0); } // If the new type does not match, then convert the data. cudf::column_view cast_view{ From b2c9e43c77008b711a1e11710aed26258d13c57d Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Fri, 19 Jul 2024 18:53:15 -0700 Subject: [PATCH 2/3] Add stream tests for dict factory --- cpp/tests/streams/dictionary_test.cpp | 30 +++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/cpp/tests/streams/dictionary_test.cpp b/cpp/tests/streams/dictionary_test.cpp index 9e81c8574b8..a25acba43f9 100644 --- a/cpp/tests/streams/dictionary_test.cpp +++ b/cpp/tests/streams/dictionary_test.cpp @@ -26,6 +26,36 @@ class DictionaryTest : public cudf::test::BaseFixture {}; +TEST_F(DictionaryTest, FactoryColumnViews) +{ + cudf::test::strings_column_wrapper keys({"aaa", "ccc", "ddd", "www"}); + cudf::test::fixed_width_column_wrapper values{2, 0, 3, 1, 2, 2, 2, 3, 0}; + + auto dictionary = cudf::make_dictionary_column(keys, values, cudf::test::get_default_stream()); + cudf::dictionary_column_view view(dictionary->view()); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(view.keys(), keys); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(view.indices(), values); +} + +TEST_F(DictionaryTest, FactoryColumnsNullMaskCount) +{ + std::vector h_keys{"aaa", "ccc", "ddd", "www"}; + cudf::test::strings_column_wrapper keys(h_keys.begin(), h_keys.end()); + std::vector h_values{2, 0, 3, 1, 2, 2, 2, 3, 0}; + cudf::test::fixed_width_column_wrapper values(h_values.begin(), h_values.end()); + + auto dictionary = cudf::make_dictionary_column( + keys.release(), values.release(), rmm::device_buffer{}, 0, cudf::test::get_default_stream()); + cudf::dictionary_column_view view(dictionary->view()); + + cudf::test::strings_column_wrapper keys_expected(h_keys.begin(), h_keys.end()); + cudf::test::fixed_width_column_wrapper values_expected(h_values.begin(), + h_values.end()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(view.keys(), keys_expected); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(view.indices(), values_expected); +} + TEST_F(DictionaryTest, Encode) { cudf::test::fixed_width_column_wrapper col({1, 2, 3, 4, 5}); From 20cddb1fd146d1feebe29ddf752f22f246b22b7b Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Fri, 19 Jul 2024 19:08:38 -0700 Subject: [PATCH 3/3] Add more dictionary factory stream test --- cpp/tests/streams/dictionary_test.cpp | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/cpp/tests/streams/dictionary_test.cpp b/cpp/tests/streams/dictionary_test.cpp index a25acba43f9..03e4cf47470 100644 --- a/cpp/tests/streams/dictionary_test.cpp +++ b/cpp/tests/streams/dictionary_test.cpp @@ -29,7 +29,7 @@ class DictionaryTest : public cudf::test::BaseFixture {}; TEST_F(DictionaryTest, FactoryColumnViews) { cudf::test::strings_column_wrapper keys({"aaa", "ccc", "ddd", "www"}); - cudf::test::fixed_width_column_wrapper values{2, 0, 3, 1, 2, 2, 2, 3, 0}; + cudf::test::fixed_width_column_wrapper values{2, 0, 3, 1, 2, 2, 2, 3, 0}; auto dictionary = cudf::make_dictionary_column(keys, values, cudf::test::get_default_stream()); cudf::dictionary_column_view view(dictionary->view()); @@ -38,20 +38,36 @@ TEST_F(DictionaryTest, FactoryColumnViews) CUDF_TEST_EXPECT_COLUMNS_EQUAL(view.indices(), values); } +TEST_F(DictionaryTest, FactoryColumns) +{ + std::vector h_keys{"aaa", "ccc", "ddd", "www"}; + cudf::test::strings_column_wrapper keys(h_keys.begin(), h_keys.end()); + std::vector h_values{2, 0, 3, 1, 2, 2, 2, 3, 0}; + cudf::test::fixed_width_column_wrapper values(h_values.begin(), h_values.end()); + + auto dictionary = cudf::make_dictionary_column( + keys.release(), values.release(), cudf::test::get_default_stream()); + cudf::dictionary_column_view view(dictionary->view()); + + cudf::test::strings_column_wrapper keys_expected(h_keys.begin(), h_keys.end()); + cudf::test::fixed_width_column_wrapper values_expected(h_values.begin(), h_values.end()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(view.keys(), keys_expected); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(view.indices(), values_expected); +} + TEST_F(DictionaryTest, FactoryColumnsNullMaskCount) { std::vector h_keys{"aaa", "ccc", "ddd", "www"}; cudf::test::strings_column_wrapper keys(h_keys.begin(), h_keys.end()); - std::vector h_values{2, 0, 3, 1, 2, 2, 2, 3, 0}; - cudf::test::fixed_width_column_wrapper values(h_values.begin(), h_values.end()); + std::vector h_values{2, 0, 3, 1, 2, 2, 2, 3, 0}; + cudf::test::fixed_width_column_wrapper values(h_values.begin(), h_values.end()); auto dictionary = cudf::make_dictionary_column( keys.release(), values.release(), rmm::device_buffer{}, 0, cudf::test::get_default_stream()); cudf::dictionary_column_view view(dictionary->view()); cudf::test::strings_column_wrapper keys_expected(h_keys.begin(), h_keys.end()); - cudf::test::fixed_width_column_wrapper values_expected(h_values.begin(), - h_values.end()); + cudf::test::fixed_width_column_wrapper values_expected(h_values.begin(), h_values.end()); CUDF_TEST_EXPECT_COLUMNS_EQUAL(view.keys(), keys_expected); CUDF_TEST_EXPECT_COLUMNS_EQUAL(view.indices(), values_expected); }