Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle empty results with nested types in copy_if_else #8359

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions cpp/include/cudf/copying.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,14 @@ enum class mask_allocation_policy {
*/
std::unique_ptr<column> empty_like(column_view const& input);

/**
* @brief Initializes and returns an empty column of the same type as the `input`.
*
* @param[in] input Scalar to emulate
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No need [in].

* @return std::unique_ptr<column> An empty column of same type as `input`
*/
std::unique_ptr<column> empty_like(scalar const& input);

/**
* @brief Creates an uninitialized new column of the same size and type as the `input`.
* Supports only fixed-width types.
Expand Down
83 changes: 83 additions & 0 deletions cpp/src/copying/copy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include <cudf/detail/null_mask.hpp>
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/lists/lists_column_view.hpp>
#include <cudf/strings/detail/utilities.hpp>
#include <cudf/table/table.hpp>
#include <cudf/utilities/traits.hpp>

Expand All @@ -44,6 +45,79 @@ inline mask_state should_allocate_mask(mask_allocation_policy mask_alloc, bool m
}
}

/**
* @brief Functor to produce an empty column of the same type as the
* input scalar.
*
* In the case of nested types, full column hierarchy is preserved.
*/
template <typename T>
struct scalar_empty_like_functor_impl {
std::unique_ptr<column> operator()(scalar const& input)
{
return cudf::make_empty_column(input.type());
}
};

template <>
struct scalar_empty_like_functor_impl<cudf::string_view> {
std::unique_ptr<column> operator()(scalar const& input)
{
return cudf::strings::detail::make_empty_strings_column(rmm::cuda_stream_default,
rmm::mr::get_current_device_resource());
}
};

template <>
struct scalar_empty_like_functor_impl<cudf::list_view> {
std::unique_ptr<column> operator()(scalar const& input)
{
auto ls = static_cast<list_scalar const*>(&input);

// TODO: add a manual constructor for lists_column_view.
column_view offsets{cudf::data_type{cudf::type_id::INT32}, 0, nullptr};
std::vector<column_view> children;
children.push_back(offsets);
children.push_back(ls->view());
column_view lcv{cudf::data_type{cudf::type_id::LIST}, 0, nullptr, nullptr, 0, 0, children};

return empty_like(lcv);
}
};

template <>
struct scalar_empty_like_functor_impl<cudf::struct_view> {
std::unique_ptr<column> operator()(scalar const& input)
{
auto ss = static_cast<struct_scalar const*>(&input);

// TODO: add a manual constructor for structs_column_view
// TODO: add cudf::get_element() support for structs
cudf::table_view tbl = ss->view();
std::vector<column_view> children(tbl.begin(), tbl.end());
column_view scv{cudf::data_type{cudf::type_id::STRUCT}, 0, nullptr, nullptr, 0, 0, children};

return empty_like(scv);
}
};

template <>
struct scalar_empty_like_functor_impl<cudf::dictionary32> {
std::unique_ptr<column> operator()(scalar const& input)
{
CUDF_FAIL("Dictionary scalars not supported");
}
};

struct scalar_empty_like_functor {
template <typename T>
std::unique_ptr<column> operator()(scalar const& input)
{
scalar_empty_like_functor_impl<T> func;
mythrocks marked this conversation as resolved.
Show resolved Hide resolved
return func(input);
}
};

} // namespace

/*
Expand Down Expand Up @@ -91,6 +165,15 @@ std::unique_ptr<column> empty_like(column_view const& input)
input.type(), 0, rmm::device_buffer{}, rmm::device_buffer{}, 0, std::move(children));
}

/*
* Initializes and returns an empty column of the same type as the `input`.
*/
std::unique_ptr<column> empty_like(scalar const& input)
{
CUDF_FUNC_RANGE();
return type_dispatcher(input.type(), detail::scalar_empty_like_functor{}, input);
};

/*
* Creates a table of empty columns with the same types as the `input_table`
*/
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/copying/copy.cu
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@ std::unique_ptr<column> copy_if_else(Left const& lhs,
CUDF_EXPECTS(boolean_mask.type() == data_type(type_id::BOOL8),
"Boolean mask column must be of type type_id::BOOL8");

if (boolean_mask.is_empty()) { return cudf::make_empty_column(lhs.type()); }
if (boolean_mask.is_empty()) { return cudf::empty_like(lhs); }

auto bool_mask_device_p = column_device_view::create(boolean_mask);
column_device_view bool_mask_device = *bool_mask_device_p;
Expand Down
79 changes: 79 additions & 0 deletions cpp/tests/copying/copy_tests.cu
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,85 @@ TYPED_TEST(CopyTest, CopyIfElseBadInputLength)
}
}

struct CopyEmptyNested : public cudf::test::BaseFixture {
};

TEST_F(CopyEmptyNested, CopyIfElseTestEmptyNestedColumns)
{
// lists
{
cudf::test::lists_column_wrapper<cudf::string_view> col{{{"abc", "def"}, {"xyz"}}};
auto lhs = cudf::empty_like(col);
auto rhs = cudf::empty_like(col);
cudf::test::fixed_width_column_wrapper<bool> mask{};

auto expected = empty_like(col);

auto out = cudf::copy_if_else(*lhs, *rhs, mask);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(out->view(), *expected);
}

// structs
{
cudf::test::lists_column_wrapper<cudf::string_view> _col0{{{"abc", "def"}, {"xyz"}}};
auto col0 = cudf::empty_like(_col0);
cudf::test::fixed_width_column_wrapper<int> col1;

std::vector<std::unique_ptr<cudf::column>> cols;
cols.push_back(std::move(col0));
cols.push_back(col1.release());
cudf::test::structs_column_wrapper struct_col(std::move(cols));
auto lhs = cudf::empty_like(struct_col);
auto rhs = cudf::empty_like(struct_col);

cudf::test::fixed_width_column_wrapper<bool> mask{};

auto expected = cudf::empty_like(struct_col);

auto out = cudf::copy_if_else(*lhs, *rhs, mask);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(out->view(), *expected);
}
}

TEST_F(CopyEmptyNested, CopyIfElseTestEmptyNestedScalars)
{
// lists
{
cudf::test::lists_column_wrapper<cudf::string_view> _col{{{"abc", "def"}, {"xyz"}}};
std::unique_ptr<cudf::scalar> lhs = cudf::get_element(_col, 0);
std::unique_ptr<cudf::scalar> rhs = cudf::get_element(_col, 0);

cudf::test::fixed_width_column_wrapper<bool> mask{};

auto expected = empty_like(_col);

auto out = cudf::copy_if_else(*lhs, *rhs, mask);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(out->view(), *expected);
}

// structs
{
cudf::test::lists_column_wrapper<cudf::string_view> col0{{{"abc", "def"}, {"xyz"}}};
cudf::test::fixed_width_column_wrapper<int> col1{1};

cudf::table_view tbl({col0, col1});
cudf::struct_scalar lhs(tbl);
cudf::struct_scalar rhs(tbl);

std::vector<std::unique_ptr<cudf::column>> cols;
cols.push_back(col0.release());
cols.push_back(col1.release());
cudf::test::structs_column_wrapper struct_col(std::move(cols));

cudf::test::fixed_width_column_wrapper<bool> mask{};

auto expected = cudf::empty_like(struct_col);

auto out = cudf::copy_if_else(lhs, rhs, mask);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(out->view(), *expected);
}
}

template <typename T>
struct CopyTestNumeric : public cudf::test::BaseFixture {
};
Expand Down
78 changes: 78 additions & 0 deletions cpp/tests/copying/utility_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,84 @@ TEST_F(EmptyLikeStringTest, ColumnStringTest)
check_empty_string_columns(got->view(), strings);
}

template <typename T>
struct EmptyLikeScalarTest : public cudf::test::BaseFixture {
};

TYPED_TEST_CASE(EmptyLikeScalarTest, cudf::test::FixedWidthTypes);

TYPED_TEST(EmptyLikeScalarTest, FixedWidth)
{
// make a column
auto input = make_fixed_width_column(
cudf::data_type{cudf::type_to_id<TypeParam>()}, 1, rmm::device_buffer{});
// get a scalar out of it
std::unique_ptr<cudf::scalar> sc = cudf::get_element(*input, 0);

// empty_like(column) -> column
auto expected = cudf::empty_like(*input);
// empty_like(scalar) -> column
auto result = cudf::empty_like(*sc);
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*expected, *result);
}

struct EmptyLikeScalarStringTest : public EmptyLikeScalarTest<std::string> {
};

TEST_F(EmptyLikeScalarStringTest, String)
{
// make a column
cudf::test::strings_column_wrapper input{"abc"};

// get a scalar out of it
std::unique_ptr<cudf::scalar> sc = cudf::get_element(input, 0);

// empty_like(column) -> column
auto expected = cudf::empty_like(input);
// empty_like(scalar) -> column
auto result = cudf::empty_like(*sc);
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*expected, *result);
}

struct EmptyLikeScalarListTest : public EmptyLikeScalarTest<cudf::list_view> {
};

TEST_F(EmptyLikeScalarListTest, List)
{
// make a column
cudf::test::lists_column_wrapper<cudf::string_view> input{{{"abc", "def"}, {"h", "ijk"}},
{{"123", "456"}, {"78"}}};
// get a scalar out of it
std::unique_ptr<cudf::scalar> sc = cudf::get_element(input, 0);

// empty_like(column) -> column
auto expected = cudf::empty_like(input);
// empty_like(scalar) -> column
auto result = cudf::empty_like(*sc);
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*expected, *result);
}

struct EmptyLikeScalarStructTest : public EmptyLikeScalarTest<cudf::struct_view> {
};

TEST_F(EmptyLikeScalarStructTest, Struct)
{
cudf::test::lists_column_wrapper<cudf::string_view> col0{{{"abc", "def"}, {"h", "ijk"}}};
cudf::test::strings_column_wrapper col1{"abc"};
cudf::test::fixed_width_column_wrapper<float> col2{1.0f};
// scalar. TODO: make cudf::get_element() work for struct scalars
cudf::table_view tbl({col0, col1, col2});
cudf::struct_scalar sc(tbl);
// column
cudf::test::structs_column_wrapper input({col0, col1, col2});

// empty_like(column) -> column
auto expected = cudf::empty_like(input);
// empty_like(scalar) -> column
auto result = cudf::empty_like(sc);
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*expected, *result);
}

std::unique_ptr<cudf::table> create_table(cudf::size_type size, cudf::mask_state state)
{
auto num_column_1 = make_numeric_column(cudf::data_type{cudf::type_id::INT64}, size, state);
Expand Down