Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Nested scalar support for copy if else #8588

Merged
merged 43 commits into from
Jul 20, 2021
Merged
Show file tree
Hide file tree
Changes from 42 commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
eab79ba
wip
gerashegalov Jun 18, 2021
e8e3745
Merge remote-tracking branch 'origin/branch-21.08' into copy_if_else_…
gerashegalov Jun 18, 2021
02d8798
Merge remote-tracking branch 'origin/branch-21.08' into copy_if_else_…
gerashegalov Jun 19, 2021
beb787d
wip
gerashegalov Jun 19, 2021
3c5134e
fix debug build
gerashegalov Jun 19, 2021
8afd0b9
Merge branch 'assert-8564' into copy_if_else_8361
gerashegalov Jun 19, 2021
dd736bd
wip
gerashegalov Jun 21, 2021
171a40d
wip
gerashegalov Jun 21, 2021
f954da7
wip
gerashegalov Jun 21, 2021
f418fb7
wip
gerashegalov Jun 21, 2021
e643550
Merge remote-tracking branch 'origin/branch-21.08' into copy_if_else_…
gerashegalov Jun 21, 2021
432690f
wip
gerashegalov Jun 21, 2021
a1c2888
wip
gerashegalov Jun 22, 2021
addc3b5
wip
gerashegalov Jun 22, 2021
9190192
wip
gerashegalov Jun 22, 2021
c296a9c
wip
gerashegalov Jun 22, 2021
c1dd5aa
wip
gerashegalov Jun 22, 2021
d74e0df
wip
gerashegalov Jun 22, 2021
77ec81c
left scalar test
gerashegalov Jun 22, 2021
02347a4
right scalar test
gerashegalov Jun 23, 2021
d09b968
scalar init debug
gerashegalov Jun 23, 2021
4e22735
stop using freed memory
gerashegalov Jun 23, 2021
de526bb
list scalar test
gerashegalov Jun 24, 2021
3c0681e
more list scalar tests
gerashegalov Jun 24, 2021
bfcfc0a
review
gerashegalov Jun 26, 2021
c2a74bc
remove nop variable references
gerashegalov Jun 29, 2021
ae8f9c3
struct cols with nulls test
gerashegalov Jun 30, 2021
848fec0
ScalarListLeft nulls
gerashegalov Jun 30, 2021
3220b3b
ScalarListLeft nulls
gerashegalov Jun 30, 2021
3243144
ScalarListRight nulls
gerashegalov Jun 30, 2021
d983e93
ScalarListBothInvalid
gerashegalov Jun 30, 2021
c2d8dbd
review feedback
gerashegalov Jul 2, 2021
f349392
Merge remote-tracking branch 'origin/branch-21.08' into copy_if_else_…
gerashegalov Jul 8, 2021
79e58c9
repro
gerashegalov Jul 8, 2021
6ead5d3
repro2
gerashegalov Jul 8, 2021
11c4eba
wip
gerashegalov Jul 8, 2021
f072c09
test fix
gerashegalov Jul 8, 2021
6d1cff9
scalar scatter for copy_if_else (#1)
gerashegalov Jul 9, 2021
e44eef8
Merge remote-tracking branch 'origin/branch-21.08' into copy_if_else_…
gerashegalov Jul 9, 2021
a3062a3
all_nulls
gerashegalov Jul 9, 2021
4d88ec8
review
gerashegalov Jul 14, 2021
ec88013
review
gerashegalov Jul 15, 2021
40cfcf4
return doxygen
gerashegalov Jul 15, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
154 changes: 88 additions & 66 deletions cpp/src/copying/copy.cu
Original file line number Diff line number Diff line change
Expand Up @@ -143,37 +143,13 @@ struct copy_if_else_functor_impl<string_view> {
}
};

/**
* @brief Functor to generate gather-map for LHS column
*
* If specified `Predicate` evaluates to `true` for index `i`,
* gather map must contain `i` (to select LHS[i]).
* If false, gather map must have `null_index`, so that a null
* is gathered in its place.
*/
template <typename Predicate>
class lhs_gather_map_functor {
public:
lhs_gather_map_functor(Predicate predicate, size_type null_index)
: _pred(predicate), _null_index(null_index)
{
}

size_type __device__ operator()(size_type i) const { return _pred(i) ? i : _null_index; }

private:
Predicate _pred;
size_type _null_index;
};

/**
* @brief Adapter to negate predicates.
*/
template <typename Predicate>
class logical_not {
public:
explicit logical_not(Predicate predicate) : _pred{predicate} {}

bool __device__ operator()(size_type i) const { return not _pred(i); }

private:
Expand All @@ -183,55 +159,101 @@ class logical_not {
/**
* @brief Implementation of copy_if_else() with gather()/scatter().
*
* Currently supports only nested-type column_views. Scalars are not supported.
* Handles nested-typed column views. Uses the iterator `is_left` to decide what row to pick for
* the output column.
*
* Uses `rhs` as the destination for scatter. First gathers indices of rows to copy from lhs.
*
* @tparam Filter Bool iterator producing `true` for indices of output rows to copy from `lhs` and
* `false` for indices of output rows to copy from `rhs`
* @param lhs Left-hand side input column view
* @param rhs Right-hand side input column view
* @param size The size of the output column, inputs rows are iterated from 0 to `size - 1`
* @param is_left Predicate for picking rows from `lhs` on `true` or `rhs` on `false`
* @param stream The stream on which to perform the allocation
* @param mr The resource used to allocate the device storage
gerashegalov marked this conversation as resolved.
Show resolved Hide resolved
*/
template <typename Left, typename Right, typename Filter>
std::unique_ptr<column> scatter_gather_based_if_else(Left const& lhs,
Right const& rhs,
template <typename Filter>
std::unique_ptr<column> scatter_gather_based_if_else(cudf::column_view const& lhs,
gerashegalov marked this conversation as resolved.
Show resolved Hide resolved
cudf::column_view const& rhs,
size_type size,
Filter is_left,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
if constexpr (std::is_same<Left, cudf::column_view>::value &&
std::is_same<Right, cudf::column_view>::value) {
auto scatter_map_rhs = rmm::device_uvector<size_type>{static_cast<std::size_t>(size), stream};
auto const scatter_map_end = thrust::copy_if(rmm::exec_policy(stream),
thrust::make_counting_iterator(size_type{0}),
thrust::make_counting_iterator(size_type{size}),
scatter_map_rhs.begin(),
logical_not{is_left});

auto const scatter_src_rhs = cudf::detail::gather(table_view{std::vector<column_view>{rhs}},
scatter_map_rhs.begin(),
scatter_map_end,
out_of_bounds_policy::DONT_CHECK,
stream);

auto result = cudf::detail::scatter(
table_view{std::vector<column_view>{scatter_src_rhs->get_column(0).view()}},
scatter_map_rhs.begin(),
scatter_map_end,
table_view{std::vector<column_view>{lhs}},
false,
stream,
mr);

return std::move(result->release()[0]);
}
auto scatter_map = rmm::device_uvector<size_type>{static_cast<std::size_t>(size), stream};
auto const scatter_map_end = thrust::copy_if(rmm::exec_policy(stream),
thrust::make_counting_iterator(size_type{0}),
thrust::make_counting_iterator(size_type{size}),
scatter_map.begin(),
is_left);

auto const scatter_src_lhs = cudf::detail::gather(table_view{std::vector<column_view>{lhs}},
gerashegalov marked this conversation as resolved.
Show resolved Hide resolved
scatter_map.begin(),
scatter_map_end,
out_of_bounds_policy::DONT_CHECK,
stream);

auto result = cudf::detail::scatter(
table_view{std::vector<column_view>{scatter_src_lhs->get_column(0).view()}},
scatter_map.begin(),
scatter_map_end,
table_view{std::vector<column_view>{rhs}},
false,
stream,
mr);

return std::move(result->release()[0]);
nvdbaranec marked this conversation as resolved.
Show resolved Hide resolved
}

// Bail out for Scalars.
// For nested types types, scatter/gather based copy_if_else() is not currently supported
// if either `lhs` or `rhs` is a scalar, partially because:
// 1. Struct scalars are not yet available.
// 2. List scalars do not yet support explosion to a full column.
CUDF_FAIL("Scalars of nested types are not currently supported!");
(void)lhs;
(void)rhs;
(void)size;
(void)is_left;
(void)stream;
(void)mr;
template <typename Filter>
std::unique_ptr<column> scatter_gather_based_if_else(cudf::scalar const& lhs,
cudf::column_view const& rhs,
size_type size,
Filter is_left,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
auto scatter_map = rmm::device_uvector<size_type>{static_cast<std::size_t>(size), stream};
auto const scatter_map_end = thrust::copy_if(rmm::exec_policy(stream),
thrust::make_counting_iterator(size_type{0}),
thrust::make_counting_iterator(size_type{size}),
scatter_map.begin(),
is_left);

auto const scatter_map_size = std::distance(scatter_map.begin(), scatter_map_end);
auto scatter_source = std::vector<std::reference_wrapper<const scalar>>{std::ref(lhs)};
auto scatter_map_column_view = cudf::column_view{cudf::data_type{cudf::type_id::INT32},
static_cast<cudf::size_type>(scatter_map_size),
scatter_map.begin()};

auto result = cudf::scatter(
scatter_source, scatter_map_column_view, table_view{std::vector<column_view>{rhs}}, false, mr);

return std::move(result->release()[0]);
gerashegalov marked this conversation as resolved.
Show resolved Hide resolved
}

template <typename Filter>
std::unique_ptr<column> scatter_gather_based_if_else(cudf::column_view const& lhs,
gerashegalov marked this conversation as resolved.
Show resolved Hide resolved
cudf::scalar const& rhs,
size_type size,
Filter is_left,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
return scatter_gather_based_if_else(rhs, lhs, size, logical_not{is_left}, stream, mr);
}

template <typename Filter>
std::unique_ptr<column> scatter_gather_based_if_else(cudf::scalar const& lhs,
cudf::scalar const& rhs,
size_type size,
Filter is_left,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
auto rhs_col = cudf::make_column_from_scalar(rhs, size, stream, mr);
return scatter_gather_based_if_else(lhs, rhs_col->view(), size, is_left, stream, mr);
}

/**
Expand Down
Loading