Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace make_empty_strings_column with make_empty_column #8435

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions cpp/include/cudf/strings/detail/copy_if_else.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
#include <cudf/detail/get_value.cuh>
#include <cudf/detail/valid_if.cuh>
#include <cudf/strings/detail/utilities.cuh>
#include <cudf/strings/detail/utilities.hpp>
#include <cudf/strings/strings_column_view.hpp>

#include <rmm/cuda_stream_view.hpp>
Expand Down Expand Up @@ -62,7 +61,7 @@ std::unique_ptr<cudf::column> copy_if_else(
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
{
auto strings_count = std::distance(lhs_begin, lhs_end);
if (strings_count == 0) return make_empty_strings_column(stream, mr);
if (strings_count == 0) return make_empty_column(data_type{type_id::STRING});

// create null mask
auto valid_mask = cudf::detail::valid_if(
Expand Down
2 changes: 1 addition & 1 deletion cpp/include/cudf/strings/detail/gather.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,7 @@ std::unique_ptr<cudf::column> gather(
{
auto const output_count = std::distance(begin, end);
auto const strings_count = strings.size();
if (output_count == 0) return make_empty_strings_column(stream, mr);
if (output_count == 0) return make_empty_column(data_type{type_id::STRING});
// allocate offsets column and use memory to compute string size in each output row
auto out_offsets_column = make_numeric_column(
Expand Down
3 changes: 1 addition & 2 deletions cpp/include/cudf/strings/detail/merge.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
#include <cudf/detail/null_mask.hpp>
#include <cudf/merge.hpp>
#include <cudf/strings/detail/utilities.cuh>
#include <cudf/strings/detail/utilities.hpp>
#include <cudf/strings/string_view.cuh>
#include <cudf/strings/strings_column_view.hpp>

Expand Down Expand Up @@ -54,7 +53,7 @@ std::unique_ptr<column> merge(strings_column_view const& lhs,
{
using cudf::detail::side;
size_type strings_count = static_cast<size_type>(std::distance(begin, end));
if (strings_count == 0) return make_empty_strings_column(stream, mr);
if (strings_count == 0) return make_empty_column(data_type{type_id::STRING});

auto lhs_column = column_device_view::create(lhs.parent(), stream);
auto d_lhs = *lhs_column;
Expand Down
2 changes: 1 addition & 1 deletion cpp/include/cudf/strings/detail/scatter.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ std::unique_ptr<column> scatter(
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
{
if (target.is_empty()) return make_empty_strings_column(stream, mr);
if (target.is_empty()) return make_empty_column(data_type{type_id::STRING});

// create vector of string_view's to scatter into
rmm::device_uvector<string_view> target_vector = create_string_vector_from_column(target, stream);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
#include <cudf/detail/valid_if.cuh>
#include <cudf/strings/detail/gather.cuh>
#include <cudf/strings/detail/utilities.cuh>
#include <cudf/strings/detail/utilities.hpp>
#include <cudf/utilities/error.hpp>

#include <rmm/cuda_stream_view.hpp>
Expand Down Expand Up @@ -63,7 +62,7 @@ std::unique_ptr<column> make_strings_column(IndexPairIterator begin,
{
CUDF_FUNC_RANGE();
size_type strings_count = thrust::distance(begin, end);
if (strings_count == 0) return strings::detail::make_empty_strings_column(stream, mr);
if (strings_count == 0) return make_empty_column(data_type{type_id::STRING});

using string_index_pair = thrust::pair<const char*, size_type>;

Expand Down Expand Up @@ -167,7 +166,7 @@ std::unique_ptr<column> make_strings_column(CharIterator chars_begin,
CUDF_FUNC_RANGE();
size_type strings_count = thrust::distance(offsets_begin, offsets_end) - 1;
size_type bytes = std::distance(chars_begin, chars_end) * sizeof(char);
if (strings_count == 0) return strings::detail::make_empty_strings_column(stream, mr);
if (strings_count == 0) return make_empty_column(data_type{type_id::STRING});

CUDF_EXPECTS(null_count < strings_count, "null strings column not yet supported");
CUDF_EXPECTS(bytes >= 0, "invalid offsets data");
Expand Down
11 changes: 0 additions & 11 deletions cpp/include/cudf/strings/detail/utilities.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,17 +41,6 @@ std::unique_ptr<column> create_chars_child_column(
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Create a strings column with no strings.
*
* @param stream CUDA stream used for device memory operations and kernel launches.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return Empty strings column
*/
std::unique_ptr<column> make_empty_strings_column(
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Creates a string_view vector from a strings column.
*
Expand Down
10 changes: 0 additions & 10 deletions cpp/src/copying/copy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
#include <cudf/detail/null_mask.hpp>
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/lists/lists_column_view.hpp>
#include <cudf/strings/detail/utilities.hpp>
#include <cudf/table/table.hpp>
#include <cudf/utilities/traits.hpp>

Expand Down Expand Up @@ -59,15 +58,6 @@ struct scalar_empty_like_functor_impl {
}
};

template <>
struct scalar_empty_like_functor_impl<cudf::string_view> {
std::unique_ptr<column> operator()(scalar const& input)
{
return cudf::strings::detail::make_empty_strings_column(rmm::cuda_stream_default,
rmm::mr::get_current_device_resource());
}
};

template <>
struct scalar_empty_like_functor_impl<cudf::list_view> {
std::unique_ptr<column> operator()(scalar const& input)
Expand Down
3 changes: 1 addition & 2 deletions cpp/src/interop/from_arrow.cu
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
#include <cudf/dictionary/dictionary_factories.hpp>
#include <cudf/interop.hpp>
#include <cudf/null_mask.hpp>
#include <cudf/strings/detail/utilities.hpp>
#include <cudf/table/table_view.hpp>
#include <cudf/types.hpp>
#include <cudf/utilities/traits.hpp>
Expand Down Expand Up @@ -272,7 +271,7 @@ std::unique_ptr<column> dispatch_to_cudf_column::operator()<cudf::string_view>(
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
if (array.length() == 0) { return cudf::strings::detail::make_empty_strings_column(stream, mr); }
if (array.length() == 0) { return make_empty_column(data_type{type_id::STRING}); }
auto str_array = static_cast<arrow::StringArray const*>(&array);
auto offset_array = std::make_unique<arrow::Int32Array>(
str_array->value_offsets()->size() / sizeof(int32_t), str_array->value_offsets(), nullptr);
Expand Down
3 changes: 0 additions & 3 deletions cpp/src/io/utilities/column_buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@

#include "column_buffer.hpp"
#include <cudf/detail/utilities/vector_factories.hpp>
#include <cudf/strings/detail/utilities.hpp>

namespace cudf {
namespace io {
Expand Down Expand Up @@ -191,8 +190,6 @@ std::unique_ptr<column> empty_like(column_buffer& buffer,
mr);
} break;

case type_id::STRING: return cudf::strings::detail::make_empty_strings_column(stream, mr);

default: return cudf::make_empty_column(buffer.type);
}
}
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/reshape/interleave_columns.cu
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ struct interleave_columns_functor {

auto strings_count = strings_columns.num_rows();
if (strings_count == 0) // All columns have 0 rows
return strings::detail::make_empty_strings_column(stream, mr);
return make_empty_column(data_type{type_id::STRING});

// Create device views from the strings columns.
auto table = table_device_view::create(strings_columns, stream);
Expand Down
5 changes: 2 additions & 3 deletions cpp/src/strings/capitalize.cu
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/strings/capitalize.hpp>
#include <cudf/strings/detail/utilities.cuh>
#include <cudf/strings/detail/utilities.hpp>
#include <cudf/strings/string_view.cuh>
#include <cudf/strings/strings_column_view.hpp>

Expand Down Expand Up @@ -172,7 +171,7 @@ std::unique_ptr<column> capitalize(strings_column_view const& input,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
if (input.is_empty()) return detail::make_empty_strings_column(stream, mr);
if (input.is_empty()) return make_empty_column(data_type{type_id::STRING});
auto d_column = column_device_view::create(input.parent(), stream);
return capitalize_utility(capitalize_fn{*d_column}, input, stream, mr);
}
Expand All @@ -181,7 +180,7 @@ std::unique_ptr<column> title(strings_column_view const& input,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
if (input.is_empty()) return detail::make_empty_strings_column(stream, mr);
if (input.is_empty()) return make_empty_column(data_type{type_id::STRING});
auto d_column = column_device_view::create(input.parent(), stream);
return capitalize_utility(title_fn{*d_column}, input, stream, mr);
}
Expand Down
3 changes: 1 addition & 2 deletions cpp/src/strings/case.cu
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/strings/case.hpp>
#include <cudf/strings/detail/utilities.cuh>
#include <cudf/strings/detail/utilities.hpp>
#include <cudf/strings/string_view.cuh>
#include <cudf/strings/strings_column_view.hpp>
#include <cudf/utilities/error.hpp>
Expand Down Expand Up @@ -126,7 +125,7 @@ std::unique_ptr<column> convert_case(strings_column_view const& strings,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
if (strings.is_empty()) return detail::make_empty_strings_column(stream, mr);
if (strings.is_empty()) return make_empty_column(data_type{type_id::STRING});

auto strings_column = column_device_view::create(strings.parent(), stream);
auto d_column = *strings_column;
Expand Down
5 changes: 2 additions & 3 deletions cpp/src/strings/combine/concatenate.cu
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
#include <cudf/strings/combine.hpp>
#include <cudf/strings/detail/combine.hpp>
#include <cudf/strings/detail/utilities.cuh>
#include <cudf/strings/detail/utilities.hpp>
#include <cudf/strings/string_view.cuh>
#include <cudf/strings/strings_column_view.hpp>
#include <cudf/table/table_device_view.cuh>
Expand Down Expand Up @@ -130,7 +129,7 @@ std::unique_ptr<column> concatenate(table_view const& strings_columns,
"All columns must be of type string");
auto const strings_count = strings_columns.num_rows();
if (strings_count == 0) // empty begets empty
return detail::make_empty_strings_column(stream, mr);
return make_empty_column(data_type{type_id::STRING});

CUDF_EXPECTS(separator.is_valid(), "Parameter separator must be a valid string_scalar");
string_view d_separator(separator.data(), separator.size());
Expand Down Expand Up @@ -222,7 +221,7 @@ std::unique_ptr<column> concatenate(table_view const& strings_columns,
CUDF_EXPECTS(strings_count == separators.size(),
"Separators column should be the same size as the strings columns");
if (strings_count == 0) // Empty begets empty
return detail::make_empty_strings_column(stream, mr);
return make_empty_column(data_type{type_id::STRING});

// Invalid output column strings - null rows
string_view const invalid_str{nullptr, 0};
Expand Down
3 changes: 1 addition & 2 deletions cpp/src/strings/combine/join.cu
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
#include <cudf/strings/combine.hpp>
#include <cudf/strings/detail/combine.hpp>
#include <cudf/strings/detail/utilities.cuh>
#include <cudf/strings/detail/utilities.hpp>
#include <cudf/strings/string_view.cuh>
#include <cudf/strings/strings_column_view.hpp>
#include <cudf/utilities/error.hpp>
Expand All @@ -44,7 +43,7 @@ std::unique_ptr<column> join_strings(strings_column_view const& strings,
rmm::mr::device_memory_resource* mr)
{
auto strings_count = strings.size();
if (strings_count == 0) return detail::make_empty_strings_column(stream, mr);
if (strings_count == 0) return make_empty_column(data_type{type_id::STRING});

CUDF_EXPECTS(separator.is_valid(), "Parameter separator must be a valid string_scalar");

Expand Down
5 changes: 2 additions & 3 deletions cpp/src/strings/combine/join_list_elements.cu
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
#include <cudf/scalar/scalar_device_view.cuh>
#include <cudf/strings/combine.hpp>
#include <cudf/strings/detail/utilities.cuh>
#include <cudf/strings/detail/utilities.hpp>
#include <cudf/strings/strings_column_view.hpp>
#include <cudf/utilities/error.hpp>

Expand Down Expand Up @@ -160,7 +159,7 @@ std::unique_ptr<column> join_list_elements(lists_column_view const& lists_string
CUDF_EXPECTS(separator.is_valid(), "Parameter separator must be a valid string_scalar");

auto const num_rows = lists_strings_column.size();
if (num_rows == 0) { return detail::make_empty_strings_column(stream, mr); }
if (num_rows == 0) { return make_empty_column(data_type{type_id::STRING}); }

// Accessing the child strings column of the lists column must be done by calling `child()` on the
// lists column, not `get_sliced_child()`. This is because calling to `offsets_begin()` on the
Expand Down Expand Up @@ -233,7 +232,7 @@ std::unique_ptr<column> join_list_elements(lists_column_view const& lists_string
"Separators column should be the same size as the lists columns");

auto const num_rows = lists_strings_column.size();
if (num_rows == 0) { return detail::make_empty_strings_column(stream, mr); }
if (num_rows == 0) { return make_empty_column(data_type{type_id::STRING}); }

// Accessing the child strings column of the lists column must be done by calling `child()` on the
// lists column, not `get_sliced_child()`. This is because calling to `offsets_begin()` on the
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/strings/convert/convert_booleans.cu
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ std::unique_ptr<column> from_booleans(column_view const& booleans,
rmm::mr::device_memory_resource* mr)
{
size_type strings_count = booleans.size();
if (strings_count == 0) return make_empty_strings_column(stream, mr);
if (strings_count == 0) return make_empty_column(data_type{type_id::STRING});

CUDF_EXPECTS(booleans.type().id() == type_id::BOOL8, "Input column must be boolean type");
CUDF_EXPECTS(true_string.is_valid() && true_string.size() > 0,
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/strings/convert/convert_datetime.cu
Original file line number Diff line number Diff line change
Expand Up @@ -933,7 +933,7 @@ std::unique_ptr<column> from_timestamps(column_view const& timestamps,
rmm::mr::device_memory_resource* mr)
{
size_type strings_count = timestamps.size();
if (strings_count == 0) return make_empty_strings_column(stream, mr);
if (strings_count == 0) return make_empty_column(data_type{type_id::STRING});

CUDF_EXPECTS(!format.empty(), "Format parameter must not be empty.");
timestamp_units units =
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/strings/convert/convert_durations.cu
Original file line number Diff line number Diff line change
Expand Up @@ -705,7 +705,7 @@ std::unique_ptr<column> from_durations(column_view const& durations,
rmm::mr::device_memory_resource* mr)
{
size_type strings_count = durations.size();
if (strings_count == 0) return make_empty_strings_column(stream, mr);
if (strings_count == 0) return make_empty_column(data_type{type_id::STRING});

return type_dispatcher(
durations.type(), dispatch_from_durations_fn{}, durations, format, stream, mr);
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/strings/convert/convert_fixed_point.cu
Original file line number Diff line number Diff line change
Expand Up @@ -427,7 +427,7 @@ std::unique_ptr<column> from_fixed_point(column_view const& input,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
if (input.is_empty()) return detail::make_empty_strings_column(stream, mr);
if (input.is_empty()) return make_empty_column(data_type{type_id::STRING});
return type_dispatcher(input.type(), dispatch_from_fixed_point_fn{}, input, stream, mr);
}

Expand Down
2 changes: 1 addition & 1 deletion cpp/src/strings/convert/convert_floats.cu
Original file line number Diff line number Diff line change
Expand Up @@ -527,7 +527,7 @@ std::unique_ptr<column> from_floats(column_view const& floats,
rmm::mr::device_memory_resource* mr)
{
size_type strings_count = floats.size();
if (strings_count == 0) return detail::make_empty_strings_column(stream, mr);
if (strings_count == 0) return make_empty_column(data_type{type_id::STRING});

return type_dispatcher(floats.type(), dispatch_from_floats_fn{}, floats, stream, mr);
}
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/strings/convert/convert_integers.cu
Original file line number Diff line number Diff line change
Expand Up @@ -392,7 +392,7 @@ std::unique_ptr<column> from_integers(column_view const& integers,
rmm::mr::device_memory_resource* mr)
{
size_type strings_count = integers.size();
if (strings_count == 0) return detail::make_empty_strings_column(stream, mr);
if (strings_count == 0) return make_empty_column(data_type{type_id::STRING});

return type_dispatcher(integers.type(), dispatch_from_integers_fn{}, integers, stream, mr);
}
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/strings/convert/convert_ipv4.cu
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ std::unique_ptr<column> integers_to_ipv4(
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
{
size_type strings_count = integers.size();
if (strings_count == 0) return make_empty_strings_column(stream, mr);
if (strings_count == 0) return make_empty_column(data_type{type_id::STRING});

CUDF_EXPECTS(integers.type().id() == type_id::INT64, "Input column must be type_id::INT64 type");

Expand Down
4 changes: 2 additions & 2 deletions cpp/src/strings/convert/convert_urls.cu
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ std::unique_ptr<column> url_encode(
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
{
size_type strings_count = strings.size();
if (strings_count == 0) return make_empty_strings_column(stream, mr);
if (strings_count == 0) return make_empty_column(data_type{type_id::STRING});

auto strings_column = column_device_view::create(strings.parent(), stream);
auto d_strings = *strings_column;
Expand Down Expand Up @@ -326,7 +326,7 @@ std::unique_ptr<column> url_decode(
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
{
size_type strings_count = strings.size();
if (strings_count == 0) return make_empty_strings_column(stream, mr);
if (strings_count == 0) return make_empty_column(data_type{type_id::STRING});

auto offset_count = strings_count + 1;
auto d_offsets = strings.offsets().data<int32_t>() + strings.offset();
Expand Down
3 changes: 1 addition & 2 deletions cpp/src/strings/copying/concatenate.cu
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
#include <cudf/detail/utilities/cuda.cuh>
#include <cudf/detail/utilities/vector_factories.hpp>
#include <cudf/strings/detail/concatenate.hpp>
#include <cudf/strings/detail/utilities.hpp>
#include <cudf/strings/strings_column_view.hpp>
#include <cudf/table/table_device_view.cuh>

Expand Down Expand Up @@ -217,7 +216,7 @@ std::unique_ptr<column> concatenate(host_span<column_view const> columns,
auto const total_bytes = std::get<5>(device_views);
auto const offsets_count = strings_count + 1;

if (strings_count == 0) { return make_empty_strings_column(stream, mr); }
if (strings_count == 0) { return make_empty_column(data_type{type_id::STRING}); }

CUDF_EXPECTS(offsets_count <= static_cast<std::size_t>(std::numeric_limits<size_type>::max()),
"total number of strings is too large for cudf column");
Expand Down
3 changes: 1 addition & 2 deletions cpp/src/strings/copying/copying.cu
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
#include <cudf/detail/get_value.cuh>
#include <cudf/detail/null_mask.hpp>
#include <cudf/strings/detail/copying.hpp>
#include <cudf/strings/detail/utilities.hpp>
#include <cudf/strings/strings_column_view.hpp>

#include <rmm/cuda_stream_view.hpp>
Expand All @@ -35,7 +34,7 @@ std::unique_ptr<cudf::column> copy_slice(strings_column_view const& strings,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
if (strings.is_empty()) return make_empty_strings_column(stream, mr);
if (strings.is_empty()) return make_empty_column(data_type{type_id::STRING});
if (end < 0 || end > strings.size()) end = strings.size();
CUDF_EXPECTS(((start >= 0) && (start < end)), "Invalid start parameter value.");
auto const strings_count = end - start;
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/strings/filling/fill.cu
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ std::unique_ptr<column> fill(
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
{
auto strings_count = strings.size();
if (strings_count == 0) return detail::make_empty_strings_column(stream, mr);
if (strings_count == 0) return make_empty_column(data_type{type_id::STRING});
CUDF_EXPECTS((begin >= 0) && (end <= strings_count),
"Parameters [begin,end) are outside the range of the provided strings column");
CUDF_EXPECTS(begin <= end, "Parameters [begin,end) have invalid range values");
Expand Down
Loading