Skip to content

Commit

Permalink
Support for struct scalars. (#8220)
Browse files Browse the repository at this point in the history
Closes:  #7790

Creation of struct scalars.

Also in this PR:
- support for `struct_scalar` in `make_column_from_scalar`
- Refactored `superimpose_parent_nullmask` to be exposed as `structs::detail::superimpose_parent_nulls` in src/structs/utilities.hpp

Authors:
  - https://github.com/nvdbaranec

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Vukasin Milovanovic (https://github.com/vuule)
  - Robert Maynard (https://github.com/robertmaynard)

URL: #8220
  • Loading branch information
nvdbaranec authored May 18, 2021
1 parent 56513a8 commit 414e9bb
Show file tree
Hide file tree
Showing 14 changed files with 498 additions and 151 deletions.
3 changes: 2 additions & 1 deletion cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ add_library(cudf
src/column/column.cu
src/column/column_device_view.cu
src/column/column_factories.cpp
src/column/column_factories.cu
src/column/column_view.cpp
src/comms/ipc/ipc.cpp
src/copying/concatenate.cu
Expand Down Expand Up @@ -375,7 +376,7 @@ add_library(cudf
src/structs/copying/concatenate.cu
src/structs/structs_column_factories.cu
src/structs/structs_column_view.cpp
src/structs/utilities.cu
src/structs/utilities.cpp
src/table/table.cpp
src/table/table_device_view.cu
src/table/table_view.cpp
Expand Down
55 changes: 55 additions & 0 deletions cpp/include/cudf/scalar/scalar.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#pragma once

#include <cudf/column/column.hpp>
#include <cudf/table/table.hpp>
#include <cudf/types.hpp>
#include <cudf/utilities/traits.hpp>

Expand Down Expand Up @@ -572,5 +573,59 @@ class list_scalar : public scalar {
cudf::column _data;
};

/**
* @brief An owning class to represent a struct value in device memory
*/
class struct_scalar : public scalar {
public:
struct_scalar();
~struct_scalar() = default;
struct_scalar(struct_scalar&& other) = default;
struct_scalar(struct_scalar const& other) = default;
struct_scalar& operator=(struct_scalar const& other) = delete;
struct_scalar& operator=(struct_scalar&& other) = delete;

/**
* @brief Construct a new struct scalar object from table_view
*
* The input table_view is deep-copied.
*
* @param data The table data to copy.
* @param is_valid Whether the value held by the scalar is valid
* @param stream CUDA stream used for device memory operations.
* @param mr Device memory resource to use for device memory allocation
*/
struct_scalar(table_view const& data,
bool is_valid = true,
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Construct a new struct scalar object from a host_span of column_views
*
* The input column_views are deep-copied.
*
* @param data The column_views to copy.
* @param is_valid Whether the value held by the scalar is valid
* @param stream CUDA stream used for device memory operations.
* @param mr Device memory resource to use for device memory allocation
*/
struct_scalar(host_span<column_view const> data,
bool is_valid = true,
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Returns a non-owning, immutable view to underlying device data
*/
table_view view() const;

private:
table _data;

void init(bool is_valid, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr);
void superimpose_nulls(rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr);
};

/** @} */ // end of group
} // namespace cudf
30 changes: 29 additions & 1 deletion cpp/include/cudf/scalar/scalar_factories.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION.
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -168,5 +168,33 @@ std::unique_ptr<scalar> make_list_scalar(
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Construct a struct scalar using the given table_view.
*
* The columns must have 1 row.
*
* @param data The columnar data to store in the scalar object
* @param stream CUDA stream used for device memory operations.
* @param mr Device memory resource used to allocate the scalar's `data` and `is_valid` bool.
*/
std::unique_ptr<scalar> make_struct_scalar(
table_view const& data,
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Construct a struct scalar using the given span of column views.
*
* The columns must have 1 row.
*
* @param value The columnar data to store in the scalar object
* @param stream CUDA stream used for device memory operations.
* @param mr Device memory resource used to allocate the scalar's `data` and `is_valid` bool.
*/
std::unique_ptr<scalar> make_struct_scalar(
host_span<column_view const> data,
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/** @} */ // end of group
} // namespace cudf
3 changes: 1 addition & 2 deletions cpp/include/cudf/types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ class scalar;

// clang-format off
class list_scalar;
class struct_scalar;
class string_scalar;
template <typename T> class numeric_scalar;
template <typename T> class fixed_point_scalar;
Expand All @@ -74,8 +75,6 @@ template <typename T> class timestamp_scalar_device_view;
template <typename T> class duration_scalar_device_view;
// clang-format on

class struct_scalar;

class table;
class table_view;
class mutable_table_view;
Expand Down
85 changes: 0 additions & 85 deletions cpp/src/column/column_factories.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/dictionary/dictionary_factories.hpp>
#include <cudf/fixed_point/fixed_point.hpp>
#include <cudf/scalar/scalar.hpp>
#include <cudf/scalar/scalar_factories.hpp>
#include <cudf/strings/detail/fill.hpp>
#include <cudf/utilities/error.hpp>
Expand Down Expand Up @@ -159,90 +158,6 @@ std::unique_ptr<column> make_fixed_width_column(data_type type,
/// clang-format on
}

struct column_from_scalar_dispatch {
template <typename T>
std::unique_ptr<cudf::column> operator()(scalar const& value,
size_type size,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr) const
{
if (!value.is_valid())
return make_fixed_width_column(value.type(), size, mask_state::ALL_NULL, stream, mr);
auto output_column =
make_fixed_width_column(value.type(), size, mask_state::UNALLOCATED, stream, mr);
auto view = output_column->mutable_view();
detail::fill_in_place(view, 0, size, value, stream);
return output_column;
}
};

template <>
std::unique_ptr<cudf::column> column_from_scalar_dispatch::operator()<cudf::string_view>(
scalar const& value,
size_type size,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr) const
{
auto null_mask = detail::create_null_mask(size, mask_state::ALL_NULL, stream, mr);

if (!value.is_valid())
return std::make_unique<column>(value.type(),
size,
rmm::device_buffer{0, stream, mr},
null_mask,
size);

// Create a strings column_view with all nulls and no children.
// Since we are setting every row to the scalar, the fill() never needs to access
// any of the children in the strings column which would otherwise cause an exception.
column_view sc{
data_type{type_id::STRING}, size, nullptr, static_cast<bitmask_type*>(null_mask.data()), size};
auto sv = static_cast<scalar_type_t<cudf::string_view> const&>(value);
// fill the column with the scalar
auto output = strings::detail::fill(strings_column_view(sc), 0, size, sv, stream, mr);
output->set_null_mask(rmm::device_buffer{0, stream, mr}, 0); // should be no nulls
return output;
}

template <>
std::unique_ptr<cudf::column> column_from_scalar_dispatch::operator()<cudf::dictionary32>(
scalar const& value,
size_type size,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr) const
{
CUDF_FAIL("dictionary not supported when creating from scalar");
}

template <>
std::unique_ptr<cudf::column> column_from_scalar_dispatch::operator()<cudf::list_view>(
scalar const& value,
size_type size,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr) const
{
CUDF_FAIL("TODO");
}

template <>
std::unique_ptr<cudf::column> column_from_scalar_dispatch::operator()<cudf::struct_view>(
scalar const& value,
size_type size,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr) const
{
CUDF_FAIL("TODO. struct_view currently not supported.");
}

std::unique_ptr<column> make_column_from_scalar(scalar const& s,
size_type size,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
if (size == 0) return make_empty_column(s.type());
return type_dispatcher(s.type(), column_from_scalar_dispatch{}, s, size, stream, mr);
}

std::unique_ptr<column> make_dictionary_from_scalar(scalar const& s,
size_type size,
rmm::cuda_stream_view stream,
Expand Down
124 changes: 124 additions & 0 deletions cpp/src/column/column_factories.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <cudf/column/column_factories.hpp>
#include <cudf/detail/fill.hpp>
#include <cudf/detail/gather.cuh>
#include <cudf/dictionary/dictionary_factories.hpp>
#include <cudf/scalar/scalar.hpp>
#include <cudf/strings/detail/fill.hpp>

namespace cudf {

namespace {

struct column_from_scalar_dispatch {
template <typename T>
std::unique_ptr<cudf::column> operator()(scalar const& value,
size_type size,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr) const
{
if (!value.is_valid())
return make_fixed_width_column(value.type(), size, mask_state::ALL_NULL, stream, mr);
auto output_column =
make_fixed_width_column(value.type(), size, mask_state::UNALLOCATED, stream, mr);
auto view = output_column->mutable_view();
detail::fill_in_place(view, 0, size, value, stream);
return output_column;
}
};

template <>
std::unique_ptr<cudf::column> column_from_scalar_dispatch::operator()<cudf::string_view>(
scalar const& value,
size_type size,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr) const
{
auto null_mask = detail::create_null_mask(size, mask_state::ALL_NULL, stream, mr);

if (!value.is_valid())
return std::make_unique<column>(
value.type(), size, rmm::device_buffer{0, stream, mr}, null_mask, size);

// Create a strings column_view with all nulls and no children.
// Since we are setting every row to the scalar, the fill() never needs to access
// any of the children in the strings column which would otherwise cause an exception.
column_view sc{
data_type{type_id::STRING}, size, nullptr, static_cast<bitmask_type*>(null_mask.data()), size};
auto sv = static_cast<scalar_type_t<cudf::string_view> const&>(value);
// fill the column with the scalar
auto output = strings::detail::fill(strings_column_view(sc), 0, size, sv, stream, mr);
output->set_null_mask(rmm::device_buffer{0, stream, mr}, 0); // should be no nulls
return output;
}

template <>
std::unique_ptr<cudf::column> column_from_scalar_dispatch::operator()<cudf::dictionary32>(
scalar const& value,
size_type size,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr) const
{
CUDF_FAIL("dictionary not supported when creating from scalar");
}

template <>
std::unique_ptr<cudf::column> column_from_scalar_dispatch::operator()<cudf::list_view>(
scalar const& value,
size_type size,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr) const
{
CUDF_FAIL("TODO");
}

template <>
std::unique_ptr<cudf::column> column_from_scalar_dispatch::operator()<cudf::struct_view>(
scalar const& value,
size_type size,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr) const
{
auto ss = static_cast<scalar_type_t<cudf::struct_view> const&>(value);
auto iter = thrust::make_constant_iterator(0);

auto children =
detail::gather(ss.view(), iter, iter + size, out_of_bounds_policy::NULLIFY, stream, mr);
auto const is_valid = ss.is_valid();
return make_structs_column(size,
std::move(children->release()),
is_valid ? 0 : size,
is_valid
? rmm::device_buffer{}
: detail::create_null_mask(size, mask_state::ALL_NULL, stream, mr),
stream,
mr);
}

} // anonymous namespace

std::unique_ptr<column> make_column_from_scalar(scalar const& s,
size_type size,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
if (size == 0) return make_empty_column(s.type());
return type_dispatcher(s.type(), column_from_scalar_dispatch{}, s, size, stream, mr);
}

} // namespace cudf
Loading

0 comments on commit 414e9bb

Please sign in to comment.