Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Superimpose null masks for STRUCT columns. #9144

Merged
merged 7 commits into from
Sep 2, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
115 changes: 115 additions & 0 deletions cpp/src/structs/utilities.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,12 @@
#include <cudf/unary.hpp>
#include <cudf/utilities/error.hpp>
#include <cudf/utilities/span.hpp>
#include <iterator>
#include <structs/utilities.hpp>
#include "cudf/types.hpp"
#include "cudf/utilities/traits.hpp"

#include <bitset>
mythrocks marked this conversation as resolved.
Show resolved Hide resolved

namespace cudf {
namespace structs {
Expand Down Expand Up @@ -337,6 +342,116 @@ void superimpose_parent_nulls(bitmask_type const* parent_null_mask,
}
}

namespace {

/**
* @brief Functor to fetch a column-view's `head()` pointer.
*
* Required because `column_view::head<T>()` is a function template
* that necessitates type dispatch.
mythrocks marked this conversation as resolved.
Show resolved Hide resolved
*/
struct head_pointer_getter {
template <typename T>
void* operator()(cudf::column_view const& col) const
{
if constexpr (is_rep_layout_compatible<T>()) {
return const_cast<void*>(reinterpret_cast<void const*>(col.head<T>()));
} else if constexpr (is_fixed_point<T>()) {
return operator()<typename T::rep>(col);
} else {
// List/Struct don't have data themselves.
return nullptr;
}
}
};

/**
* @brief Utility to fetch a column_view's `head()` pointer as a `void*`.
*/
void* get_head_pointer(cudf::column_view const& col)
{
return cudf::type_dispatcher(col.type(), head_pointer_getter{}, col);
}
}; // namespace

std::tuple<cudf::column_view, std::vector<rmm::device_buffer>> superimpose_parent_nulls(
column_view const& parent, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr)
{
if (parent.type().id() != type_id::STRUCT) {
// NOOP for non-STRUCT columns.
return std::make_tuple(parent, std::vector<rmm::device_buffer>{});
}

auto structs_column = structs_column_view{parent};

auto ret_validity_buffers = std::vector<rmm::device_buffer>{};

// Function to rewrite child null mask.
auto rewrite_child_mask = [&](auto const& child_idx) {
auto child = structs_column.get_sliced_child(child_idx);

// If struct is not nullable, child null mask is retained. NOOP.
if (not structs_column.nullable()) { return child; }

auto parent_child_null_masks =
std::vector<cudf::bitmask_type const*>{structs_column.null_mask(), child.null_mask()};

auto new_child_mask = [&] {
if (not child.nullable()) {
// Adopt parent STRUCT's null mask.
return structs_column.null_mask();
}

// Both STRUCT and child are nullable. AND() for the child's new null mask.
//
// Note: ANDing only [offset(), offset()+size()) would not work. The null-mask produced thus
// would start at offset=0. The column-view attempts to apply its offset() to both the _data
// and the _null_mask(). It would be better to AND the bits from the beginning, and apply
// offset() uniformly.
// Alternatively, one could construct a big enough buffer, and use inplace_bitwise_and.
ret_validity_buffers.push_back(cudf::detail::bitmask_and(parent_child_null_masks,
std::vector<size_type>{0, 0},
child.offset() + child.size(),
stream,
mr));
return reinterpret_cast<bitmask_type const*>(ret_validity_buffers.back().data());
}();

return cudf::column_view(
child.type(),
child.size(),
get_head_pointer(child),
new_child_mask,
cudf::UNKNOWN_NULL_COUNT,
child.offset(),
std::vector<cudf::column_view>{child.child_begin(), child.child_end()});
};

auto child_begin =
thrust::make_transform_iterator(thrust::make_counting_iterator(0), rewrite_child_mask);
auto child_end = child_begin + structs_column.num_children();

auto ret_children = std::vector<cudf::column_view>{};
std::for_each(child_begin, child_end, [&](auto const& child) {
auto [processed_child, backing_buffers] = superimpose_parent_nulls(child, stream, mr);
ret_children.push_back(processed_child);
ret_validity_buffers.insert(ret_validity_buffers.end(),
std::make_move_iterator(backing_buffers.begin()),
std::make_move_iterator(backing_buffers.end()));
});

// Make column view out of newly constructed column_views, and all the validity buffers.

return std::make_tuple(column_view(parent.type(),
parent.size(),
nullptr,
parent.null_mask(),
parent.null_count(), // Alternatively, postpone.
parent.offset(),
ret_children),
std::move(ret_validity_buffers));
}

} // namespace detail
} // namespace structs
} // namespace cudf
22 changes: 22 additions & 0 deletions cpp/src/structs/utilities.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include <cudf/structs/structs_column_view.hpp>
#include <cudf/types.hpp>
#include <cudf/utilities/span.hpp>
#include <rmm/cuda_stream_view.hpp>
mythrocks marked this conversation as resolved.
Show resolved Hide resolved

namespace cudf {
namespace structs {
Expand Down Expand Up @@ -123,6 +124,27 @@ void superimpose_parent_nulls(bitmask_type const* parent_null_mask,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr);

/**
* @brief Pushdown nulls from a parent mask into a child column, using AND.
mythrocks marked this conversation as resolved.
Show resolved Hide resolved
*
* Rather than modify the argument column, this function constructs new equivalent column_view
* instances, with new null mask values. This function returns both a (possibly new) column,
* and the device_buffer instances to support any modified null masks.
* 1. If the specified column is not STRUCT, the column is returned unmodified, with no new
* supporting device_buffer instances.
* 2. If the column is STRUCT, the null masks of the parent and child are bitwise-ANDed, and a
* modified column_view is returned. This applies recursively to support
*
* @param parent The parent (possibly STRUCT) column whose nulls need to be pushed to its members.
* @param stream CUDA stream used for device memory operations and kernel launches.
* @param mr Device memory resource used to allocate new device memory.
* @return std::tuple<cudf::column_view, std::vector<rmm::device_buffer>>
mythrocks marked this conversation as resolved.
Show resolved Hide resolved
*/
std::tuple<cudf::column_view, std::vector<rmm::device_buffer>> superimpose_parent_nulls(
mythrocks marked this conversation as resolved.
Show resolved Hide resolved
column_view const& parent,
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

} // namespace detail
} // namespace structs
} // namespace cudf
Loading