Skip to content

Commit

Permalink
Remove make strings children with null mask (#8830)
Browse files Browse the repository at this point in the history
Closes #8580 

The `cudf::strings::detail::make_strings_children_with_null_mask` utility was created temporarily to help build the output column validities bitmask for `join_lists_elements` (for strings) and `lists::interleave_columns` (for strings). But it used a temporary `int8_t` device vector to hold single-bit values. It would then convert the `int8` column into a bitmask with a kernel call. This PR removes the utility in favor of executing a kernel using the `cudf::detail::valid_if` utility to build the bitmask directly without requiring a temporary buffer. Removing the temporary buffer from the `join_list_elements` strings API was not difficult. The temporary buffer is still used in the `lists::interleave_columns` for now.

 A follow on PR should change this to utilize the output bitmask and directly set the bits rather than using a temporary `int8` buffer that gets converted to a bitmask. This approach could also be used in the `join_lists_element` to ultimately avoid the `valid_if` call.

Removing this utility simplifies the code a bit and should speed up compiling any source file that includes `cudf/strings/detail/utilities.cuh` (~160 files right now).

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Robert Maynard (https://github.com/robertmaynard)
  - Conor Hoekstra (https://github.com/codereport)

URL: #8830
  • Loading branch information
davidwendt authored Aug 4, 2021
1 parent 3b391ae commit 2c4d984
Show file tree
Hide file tree
Showing 7 changed files with 90 additions and 124 deletions.
11 changes: 11 additions & 0 deletions cpp/include/cudf/detail/reshape.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,5 +34,16 @@ std::unique_ptr<table> tile(
size_type count,
rmm::cuda_stream_view = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @copydoc cudf::interleave_columns
*
* @param stream CUDA stream used for device memory operations and kernel launches
*/
std::unique_ptr<column> interleave_columns(
table_view const& input,
rmm::cuda_stream_view = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

} // namespace detail
} // namespace cudf
76 changes: 0 additions & 76 deletions cpp/include/cudf/strings/detail/utilities.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
#include <cudf/column/column.hpp>
#include <cudf/column/column_factories.hpp>
#include <cudf/detail/get_value.cuh>
#include <cudf/detail/valid_if.cuh>
#include <cudf/strings/detail/utilities.hpp>
#include <cudf/strings/string_view.cuh>

Expand Down Expand Up @@ -205,81 +204,6 @@ auto make_strings_children(
return make_strings_children(size_and_exec_fn, strings_count, strings_count, stream, mr);
}

/**
* @brief Creates child offsets, chars columns and null mask, null count of a strings column by
* applying the template function that can be used for computing the output size of each string as
* well as create the output.
*
* @tparam SizeAndExecuteFunction Function must accept an index and return a size.
* It must have members `d_offsets`, `d_chars`, and `d_validities` which are set to memory
* containing the offsets column, chars column and string validities during write.
*
* @param size_and_exec_fn This is called twice. Once for the output size of each string, which is
* written into the `d_offsets` array. After that, `d_chars` is set and this
* is called again to fill in the chars memory. The `d_validities` array may
* be modified to set the value `0` for the corresponding rows that contain
* null string elements.
* @param exec_size Range for executing the function `size_and_exec_fn`.
* @param strings_count Number of strings.
* @param mr Device memory resource used to allocate the returned columns' device memory.
* @param stream CUDA stream used for device memory operations and kernel launches.
* @return offsets child column, chars child column, null_mask, and null_count for a strings column.
*/
template <typename SizeAndExecuteFunction>
std::tuple<std::unique_ptr<column>, std::unique_ptr<column>, rmm::device_buffer, size_type>
make_strings_children_with_null_mask(
SizeAndExecuteFunction size_and_exec_fn,
size_type exec_size,
size_type strings_count,
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
{
auto offsets_column = make_numeric_column(
data_type{type_id::INT32}, strings_count + 1, mask_state::UNALLOCATED, stream, mr);
auto offsets_view = offsets_column->mutable_view();
auto d_offsets = offsets_view.template data<int32_t>();
size_and_exec_fn.d_offsets = d_offsets;

auto validities = rmm::device_uvector<int8_t>(strings_count, stream);
size_and_exec_fn.d_validities = validities.begin();

// This is called twice: once for offsets and validities, and once for chars
auto for_each_fn = [exec_size, stream](SizeAndExecuteFunction& size_and_exec_fn) {
thrust::for_each_n(rmm::exec_policy(stream),
thrust::make_counting_iterator<size_type>(0),
exec_size,
size_and_exec_fn);
};

// Compute the string sizes (storing in `d_offsets`) and string validities
for_each_fn(size_and_exec_fn);

// Compute the offsets from string sizes
thrust::exclusive_scan(
rmm::exec_policy(stream), d_offsets, d_offsets + strings_count + 1, d_offsets);

// Now build the chars column
auto const bytes = cudf::detail::get_value<int32_t>(offsets_view, strings_count, stream);
auto chars_column = create_chars_child_column(bytes, stream, mr);

// Execute the function fn again to fill the chars column.
// Note that if the output chars column has zero size, the function fn should not be called to
// avoid accidentally overwriting the offsets.
if (bytes > 0) {
size_and_exec_fn.d_chars = chars_column->mutable_view().template data<char>();
for_each_fn(size_and_exec_fn);
}

// Finally compute null mask and null count from the validities array
auto [null_mask, null_count] = cudf::detail::valid_if(
validities.begin(), validities.end(), thrust::identity<int8_t>{}, stream, mr);

return std::make_tuple(std::move(offsets_column),
std::move(chars_column),
null_count > 0 ? std::move(null_mask) : rmm::device_buffer{},
null_count);
}

// This template is a thin wrapper around per-context singleton objects.
// It maintains a single object for each CUDA context.
template <typename TableType>
Expand Down
25 changes: 10 additions & 15 deletions cpp/src/lists/interleave_columns.cu
Original file line number Diff line number Diff line change
Expand Up @@ -172,29 +172,24 @@ struct interleave_list_entries_fn {
rmm::mr::device_memory_resource* mr) const noexcept
{
auto const table_dv_ptr = table_device_view::create(input);
auto const comp_fn = compute_string_sizes_and_interleave_lists_fn{
auto comp_fn = compute_string_sizes_and_interleave_lists_fn{
*table_dv_ptr, output_list_offsets.template begin<offset_type>(), data_has_null_mask};

if (data_has_null_mask) {
auto [offsets_column, chars_column, null_mask, null_count] =
cudf::strings::detail::make_strings_children_with_null_mask(
comp_fn, num_output_lists, num_output_entries, stream, mr);
return make_strings_column(num_output_entries,
std::move(offsets_column),
std::move(chars_column),
null_count,
std::move(null_mask),
stream,
mr);
}
auto validities =
rmm::device_uvector<int8_t>(data_has_null_mask ? num_output_entries : 0, stream);
comp_fn.d_validities = validities.data();

auto [offsets_column, chars_column] = cudf::strings::detail::make_strings_children(
comp_fn, num_output_lists, num_output_entries, stream, mr);

auto [null_mask, null_count] = cudf::detail::valid_if(
validities.begin(), validities.end(), thrust::identity<int8_t>{}, stream, mr);

return make_strings_column(num_output_entries,
std::move(offsets_column),
std::move(chars_column),
0,
rmm::device_buffer{},
null_count,
std::move(null_mask),
stream,
mr);
}
Expand Down
21 changes: 13 additions & 8 deletions cpp/src/reshape/interleave_columns.cu
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <cudf/copying.hpp>
#include <cudf/detail/gather.cuh>
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/detail/reshape.hpp>
#include <cudf/lists/detail/interleave_columns.hpp>
#include <cudf/strings/detail/utilities.cuh>
#include <cudf/table/table_device_view.cuh>
Expand Down Expand Up @@ -184,12 +185,11 @@ struct interleave_columns_functor {
};

} // anonymous namespace
} // namespace detail

std::unique_ptr<column> interleave_columns(table_view const& input,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
CUDF_EXPECTS(input.num_columns() > 0, "input must have at least one column to determine dtype.");

auto const dtype = input.column(0).type();
Expand All @@ -202,12 +202,17 @@ std::unique_ptr<column> interleave_columns(table_view const& input,
auto const output_needs_mask = std::any_of(
std::cbegin(input), std::cend(input), [](auto const& col) { return col.nullable(); });

return type_dispatcher<dispatch_storage_type>(dtype,
detail::interleave_columns_functor{},
input,
output_needs_mask,
rmm::cuda_stream_default,
mr);
return type_dispatcher<dispatch_storage_type>(
dtype, detail::interleave_columns_functor{}, input, output_needs_mask, stream, mr);
}

} // namespace detail

std::unique_ptr<column> interleave_columns(table_view const& input,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::interleave_columns(input, rmm::cuda_stream_default, mr);
}

} // namespace cudf
73 changes: 52 additions & 21 deletions cpp/src/strings/combine/join_list_elements.cu
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include <cudf/column/column_factories.hpp>
#include <cudf/detail/get_value.cuh>
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/detail/valid_if.cuh>
#include <cudf/lists/lists_column_view.hpp>
#include <cudf/scalar/scalar_device_view.cuh>
#include <cudf/strings/combine.hpp>
Expand Down Expand Up @@ -60,9 +61,6 @@ struct compute_size_and_concatenate_fn {
// If d_chars != nullptr: only concatenate strings.
char* d_chars{nullptr};

// We need to set `1` or `0` for the validities of the output strings.
int8_t* d_validities{nullptr};

__device__ bool output_is_null(size_type const idx,
size_type const start_idx,
size_type const end_idx) const noexcept
Expand All @@ -73,33 +71,31 @@ struct compute_size_and_concatenate_fn {

__device__ void operator()(size_type const idx) const noexcept
{
// If this is the second pass, and the row `idx` is known to be a null string
if (d_chars && !d_validities[idx]) { return; }
// If this is the second pass, and the row `idx` is known to be a null or empty string
if (d_chars && (d_offsets[idx] == d_offsets[idx + 1])) { return; }

// Indices of the strings within the list row
auto const start_idx = list_offsets[idx];
auto const end_idx = list_offsets[idx + 1];

if (!d_chars && output_is_null(idx, start_idx, end_idx)) {
d_offsets[idx] = 0;
d_validities[idx] = false;
d_offsets[idx] = 0;
return;
}

auto const separator = func.separator(idx);
auto size_bytes = size_type{0};
char* output_ptr = d_chars ? d_chars + d_offsets[idx] : nullptr;
bool has_valid_element = false;
bool write_separator = false;
auto size_bytes = size_type{0};
bool has_valid_element = false;

for (size_type str_idx = start_idx; str_idx < end_idx; ++str_idx) {
bool null_element = strings_dv.is_null(str_idx);
has_valid_element = has_valid_element || !null_element;

if (!d_chars && (null_element && !string_narep_dv.is_valid())) {
d_offsets[idx] = 0;
d_validities[idx] = false;
return; // early termination: the entire list of strings will result in a null string
size_bytes = 0;
break;
}

if (write_separator && (separate_nulls == separator_on_nulls::YES || !null_element)) {
Expand All @@ -119,11 +115,7 @@ struct compute_size_and_concatenate_fn {

// If there are all null elements, the output should be the same as having an empty list input:
// a null or an empty string
if (!d_chars) {
d_offsets[idx] = has_valid_element ? size_bytes : 0;
d_validities[idx] =
has_valid_element || empty_list_policy == output_if_empty_list::EMPTY_STRING;
}
if (!d_chars) { d_offsets[idx] = has_valid_element ? size_bytes : 0; }
}
};

Expand All @@ -144,6 +136,33 @@ struct scalar_separator_fn {
__device__ string_view separator(size_type const) const noexcept { return d_separator.value(); }
};

template <typename CompFn>
struct validities_fn {
CompFn comp_fn;

validities_fn(CompFn comp_fn) : comp_fn(comp_fn) {}

__device__ bool operator()(size_type idx)
{
auto const start_idx = comp_fn.list_offsets[idx];
auto const end_idx = comp_fn.list_offsets[idx + 1];
bool valid_output = !comp_fn.output_is_null(idx, start_idx, end_idx);
if (valid_output) {
bool check_elements = false;
for (size_type str_idx = start_idx; str_idx < end_idx; ++str_idx) {
bool const valid_element = comp_fn.strings_dv.is_valid(str_idx);
check_elements = check_elements || valid_element;
// if an element is null and narep is invalid, the output row is null
if (!valid_element && !comp_fn.string_narep_dv.is_valid()) { return false; }
}
// handle empty-list-as-null output policy setting
valid_output =
check_elements || comp_fn.empty_list_policy == output_if_empty_list::EMPTY_STRING;
}
return valid_output;
}
};

} // namespace

std::unique_ptr<column> join_list_elements(lists_column_view const& lists_strings_column,
Expand Down Expand Up @@ -180,8 +199,14 @@ std::unique_ptr<column> join_list_elements(lists_column_view const& lists_string
string_narep_dv,
separate_nulls,
empty_list_policy};
auto [offsets_column, chars_column, null_mask, null_count] =
make_strings_children_with_null_mask(comp_fn, num_rows, num_rows, stream, mr);

auto [offsets_column, chars_column] = make_strings_children(comp_fn, num_rows, stream, mr);
auto [null_mask, null_count] =
cudf::detail::valid_if(thrust::counting_iterator<size_type>(0),
thrust::counting_iterator<size_type>(num_rows),
validities_fn{comp_fn},
stream,
mr);

return make_strings_column(num_rows,
std::move(offsets_column),
Expand Down Expand Up @@ -254,8 +279,14 @@ std::unique_ptr<column> join_list_elements(lists_column_view const& lists_string
string_narep_dv,
separate_nulls,
empty_list_policy};
auto [offsets_column, chars_column, null_mask, null_count] =
make_strings_children_with_null_mask(comp_fn, num_rows, num_rows, stream, mr);

auto [offsets_column, chars_column] = make_strings_children(comp_fn, num_rows, stream, mr);
auto [null_mask, null_count] =
cudf::detail::valid_if(thrust::counting_iterator<size_type>(0),
thrust::counting_iterator<size_type>(num_rows),
validities_fn{comp_fn},
stream,
mr);

return make_strings_column(num_rows,
std::move(offsets_column),
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/transpose/transpose.cu
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@
#include <cudf/copying.hpp>
#include <cudf/detail/copy.hpp>
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/detail/reshape.hpp>
#include <cudf/detail/transpose.hpp>
#include <cudf/detail/utilities/cuda.cuh>
#include <cudf/reshape.hpp>
#include <cudf/table/table_device_view.cuh>
#include <cudf/transpose.hpp>
#include <cudf/utilities/traits.hpp>
Expand All @@ -44,7 +44,7 @@ std::pair<std::unique_ptr<column>, table_view> transpose(table_view const& input
input.begin(), input.end(), [dtype](auto const& col) { return dtype == col.type(); }),
"Column type mismatch");

auto output_column = cudf::interleave_columns(input, mr);
auto output_column = cudf::detail::interleave_columns(input, stream, mr);
auto one_iter = thrust::make_counting_iterator<size_type>(1);
auto splits_iter = thrust::make_transform_iterator(
one_iter, [width = input.num_columns()](size_type idx) { return idx * width; });
Expand Down
4 changes: 2 additions & 2 deletions cpp/tests/strings/combine/join_list_elements_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,11 +90,11 @@ TEST_F(StringsListsConcatenateTest, ZeroSizeStringsInput)
auto const expected = STR_COL{"", "", "", ""};

auto results = cudf::strings::join_list_elements(string_lv);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected, verbosity);
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected, verbosity);

auto const separators = STR_COL{"", "", "", ""}.release();
results = cudf::strings::join_list_elements(string_lv, separators->view());
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected, verbosity);
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected, verbosity);
}

// Empty list results in null
Expand Down

0 comments on commit 2c4d984

Please sign in to comment.