Skip to content

Commit

Permalink
Handle empty results with nested types in copy_if_else (#8359)
Browse files Browse the repository at this point in the history
Fixes:  #8322

The code was calling `make_empty_column()` to produce empty results, which does not work for nested types. Fix was to use `empty_like()` instead.

As part of this, I implemented a new public function:

`std::unique_ptr<column> empty_like(scalar const& input);`

Authors:
  - https://github.com/nvdbaranec

Approvers:
  - Jake Hemstad (https://github.com/jrhemstad)
  - Nghia Truong (https://github.com/ttnghia)
  - Paul Taylor (https://github.com/trxcllnt)
  - MithunR (https://github.com/mythrocks)

URL: #8359
  • Loading branch information
nvdbaranec authored May 27, 2021
1 parent 4d1a62e commit 3ee8893
Show file tree
Hide file tree
Showing 5 changed files with 249 additions and 1 deletion.
8 changes: 8 additions & 0 deletions cpp/include/cudf/copying.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,14 @@ enum class mask_allocation_policy {
*/
std::unique_ptr<column> empty_like(column_view const& input);

/**
* @brief Initializes and returns an empty column of the same type as the `input`.
*
* @param[in] input Scalar to emulate
* @return std::unique_ptr<column> An empty column of same type as `input`
*/
std::unique_ptr<column> empty_like(scalar const& input);

/**
* @brief Creates an uninitialized new column of the same size and type as the `input`.
* Supports only fixed-width types.
Expand Down
83 changes: 83 additions & 0 deletions cpp/src/copying/copy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include <cudf/detail/null_mask.hpp>
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/lists/lists_column_view.hpp>
#include <cudf/strings/detail/utilities.hpp>
#include <cudf/table/table.hpp>
#include <cudf/utilities/traits.hpp>

Expand All @@ -44,6 +45,79 @@ inline mask_state should_allocate_mask(mask_allocation_policy mask_alloc, bool m
}
}

/**
* @brief Functor to produce an empty column of the same type as the
* input scalar.
*
* In the case of nested types, full column hierarchy is preserved.
*/
template <typename T>
struct scalar_empty_like_functor_impl {
std::unique_ptr<column> operator()(scalar const& input)
{
return cudf::make_empty_column(input.type());
}
};

template <>
struct scalar_empty_like_functor_impl<cudf::string_view> {
std::unique_ptr<column> operator()(scalar const& input)
{
return cudf::strings::detail::make_empty_strings_column(rmm::cuda_stream_default,
rmm::mr::get_current_device_resource());
}
};

template <>
struct scalar_empty_like_functor_impl<cudf::list_view> {
std::unique_ptr<column> operator()(scalar const& input)
{
auto ls = static_cast<list_scalar const*>(&input);

// TODO: add a manual constructor for lists_column_view.
column_view offsets{cudf::data_type{cudf::type_id::INT32}, 0, nullptr};
std::vector<column_view> children;
children.push_back(offsets);
children.push_back(ls->view());
column_view lcv{cudf::data_type{cudf::type_id::LIST}, 0, nullptr, nullptr, 0, 0, children};

return empty_like(lcv);
}
};

template <>
struct scalar_empty_like_functor_impl<cudf::struct_view> {
std::unique_ptr<column> operator()(scalar const& input)
{
auto ss = static_cast<struct_scalar const*>(&input);

// TODO: add a manual constructor for structs_column_view
// TODO: add cudf::get_element() support for structs
cudf::table_view tbl = ss->view();
std::vector<column_view> children(tbl.begin(), tbl.end());
column_view scv{cudf::data_type{cudf::type_id::STRUCT}, 0, nullptr, nullptr, 0, 0, children};

return empty_like(scv);
}
};

template <>
struct scalar_empty_like_functor_impl<cudf::dictionary32> {
std::unique_ptr<column> operator()(scalar const& input)
{
CUDF_FAIL("Dictionary scalars not supported");
}
};

struct scalar_empty_like_functor {
template <typename T>
std::unique_ptr<column> operator()(scalar const& input)
{
scalar_empty_like_functor_impl<T> func;
return func(input);
}
};

} // namespace

/*
Expand Down Expand Up @@ -91,6 +165,15 @@ std::unique_ptr<column> empty_like(column_view const& input)
input.type(), 0, rmm::device_buffer{}, rmm::device_buffer{}, 0, std::move(children));
}

/*
* Initializes and returns an empty column of the same type as the `input`.
*/
std::unique_ptr<column> empty_like(scalar const& input)
{
CUDF_FUNC_RANGE();
return type_dispatcher(input.type(), detail::scalar_empty_like_functor{}, input);
};

/*
* Creates a table of empty columns with the same types as the `input_table`
*/
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/copying/copy.cu
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@ std::unique_ptr<column> copy_if_else(Left const& lhs,
CUDF_EXPECTS(boolean_mask.type() == data_type(type_id::BOOL8),
"Boolean mask column must be of type type_id::BOOL8");

if (boolean_mask.is_empty()) { return cudf::make_empty_column(lhs.type()); }
if (boolean_mask.is_empty()) { return cudf::empty_like(lhs); }

auto bool_mask_device_p = column_device_view::create(boolean_mask);
column_device_view bool_mask_device = *bool_mask_device_p;
Expand Down
79 changes: 79 additions & 0 deletions cpp/tests/copying/copy_tests.cu
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,85 @@ TYPED_TEST(CopyTest, CopyIfElseBadInputLength)
}
}

struct CopyEmptyNested : public cudf::test::BaseFixture {
};

TEST_F(CopyEmptyNested, CopyIfElseTestEmptyNestedColumns)
{
// lists
{
cudf::test::lists_column_wrapper<cudf::string_view> col{{{"abc", "def"}, {"xyz"}}};
auto lhs = cudf::empty_like(col);
auto rhs = cudf::empty_like(col);
cudf::test::fixed_width_column_wrapper<bool> mask{};

auto expected = empty_like(col);

auto out = cudf::copy_if_else(*lhs, *rhs, mask);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(out->view(), *expected);
}

// structs
{
cudf::test::lists_column_wrapper<cudf::string_view> _col0{{{"abc", "def"}, {"xyz"}}};
auto col0 = cudf::empty_like(_col0);
cudf::test::fixed_width_column_wrapper<int> col1;

std::vector<std::unique_ptr<cudf::column>> cols;
cols.push_back(std::move(col0));
cols.push_back(col1.release());
cudf::test::structs_column_wrapper struct_col(std::move(cols));
auto lhs = cudf::empty_like(struct_col);
auto rhs = cudf::empty_like(struct_col);

cudf::test::fixed_width_column_wrapper<bool> mask{};

auto expected = cudf::empty_like(struct_col);

auto out = cudf::copy_if_else(*lhs, *rhs, mask);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(out->view(), *expected);
}
}

TEST_F(CopyEmptyNested, CopyIfElseTestEmptyNestedScalars)
{
// lists
{
cudf::test::lists_column_wrapper<cudf::string_view> _col{{{"abc", "def"}, {"xyz"}}};
std::unique_ptr<cudf::scalar> lhs = cudf::get_element(_col, 0);
std::unique_ptr<cudf::scalar> rhs = cudf::get_element(_col, 0);

cudf::test::fixed_width_column_wrapper<bool> mask{};

auto expected = empty_like(_col);

auto out = cudf::copy_if_else(*lhs, *rhs, mask);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(out->view(), *expected);
}

// structs
{
cudf::test::lists_column_wrapper<cudf::string_view> col0{{{"abc", "def"}, {"xyz"}}};
cudf::test::fixed_width_column_wrapper<int> col1{1};

cudf::table_view tbl({col0, col1});
cudf::struct_scalar lhs(tbl);
cudf::struct_scalar rhs(tbl);

std::vector<std::unique_ptr<cudf::column>> cols;
cols.push_back(col0.release());
cols.push_back(col1.release());
cudf::test::structs_column_wrapper struct_col(std::move(cols));

cudf::test::fixed_width_column_wrapper<bool> mask{};

auto expected = cudf::empty_like(struct_col);

auto out = cudf::copy_if_else(lhs, rhs, mask);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(out->view(), *expected);
}
}

template <typename T>
struct CopyTestNumeric : public cudf::test::BaseFixture {
};
Expand Down
78 changes: 78 additions & 0 deletions cpp/tests/copying/utility_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,84 @@ TEST_F(EmptyLikeStringTest, ColumnStringTest)
check_empty_string_columns(got->view(), strings);
}

template <typename T>
struct EmptyLikeScalarTest : public cudf::test::BaseFixture {
};

TYPED_TEST_CASE(EmptyLikeScalarTest, cudf::test::FixedWidthTypes);

TYPED_TEST(EmptyLikeScalarTest, FixedWidth)
{
// make a column
auto input = make_fixed_width_column(
cudf::data_type{cudf::type_to_id<TypeParam>()}, 1, rmm::device_buffer{});
// get a scalar out of it
std::unique_ptr<cudf::scalar> sc = cudf::get_element(*input, 0);

// empty_like(column) -> column
auto expected = cudf::empty_like(*input);
// empty_like(scalar) -> column
auto result = cudf::empty_like(*sc);
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*expected, *result);
}

struct EmptyLikeScalarStringTest : public EmptyLikeScalarTest<std::string> {
};

TEST_F(EmptyLikeScalarStringTest, String)
{
// make a column
cudf::test::strings_column_wrapper input{"abc"};

// get a scalar out of it
std::unique_ptr<cudf::scalar> sc = cudf::get_element(input, 0);

// empty_like(column) -> column
auto expected = cudf::empty_like(input);
// empty_like(scalar) -> column
auto result = cudf::empty_like(*sc);
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*expected, *result);
}

struct EmptyLikeScalarListTest : public EmptyLikeScalarTest<cudf::list_view> {
};

TEST_F(EmptyLikeScalarListTest, List)
{
// make a column
cudf::test::lists_column_wrapper<cudf::string_view> input{{{"abc", "def"}, {"h", "ijk"}},
{{"123", "456"}, {"78"}}};
// get a scalar out of it
std::unique_ptr<cudf::scalar> sc = cudf::get_element(input, 0);

// empty_like(column) -> column
auto expected = cudf::empty_like(input);
// empty_like(scalar) -> column
auto result = cudf::empty_like(*sc);
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*expected, *result);
}

struct EmptyLikeScalarStructTest : public EmptyLikeScalarTest<cudf::struct_view> {
};

TEST_F(EmptyLikeScalarStructTest, Struct)
{
cudf::test::lists_column_wrapper<cudf::string_view> col0{{{"abc", "def"}, {"h", "ijk"}}};
cudf::test::strings_column_wrapper col1{"abc"};
cudf::test::fixed_width_column_wrapper<float> col2{1.0f};
// scalar. TODO: make cudf::get_element() work for struct scalars
cudf::table_view tbl({col0, col1, col2});
cudf::struct_scalar sc(tbl);
// column
cudf::test::structs_column_wrapper input({col0, col1, col2});

// empty_like(column) -> column
auto expected = cudf::empty_like(input);
// empty_like(scalar) -> column
auto result = cudf::empty_like(sc);
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*expected, *result);
}

std::unique_ptr<cudf::table> create_table(cudf::size_type size, cudf::mask_state state)
{
auto num_column_1 = make_numeric_column(cudf::data_type{cudf::type_id::INT64}, size, state);
Expand Down

0 comments on commit 3ee8893

Please sign in to comment.