Skip to content

Commit

Permalink
Remove unused masked udf cython/c++ code (#9792)
Browse files Browse the repository at this point in the history
This PR removes the c++ side of the original masked UDF code introduced in #8213. These kernels had some limitations and are now superseded by the numba-generated versions we moved to in #9174. As far as I can tell, cuDF python was the only thing consuming this API for the short time it has existed. However I am marking this breaking just in case.

Authors:
  - https://github.com/brandon-b-miller

Approvers:
  - Mark Harris (https://github.com/harrism)
  - David Wendt (https://github.com/davidwendt)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: #9792
  • Loading branch information
brandon-b-miller authored Dec 1, 2021
1 parent 1ceb8ab commit 11c3dfe
Show file tree
Hide file tree
Showing 6 changed files with 2 additions and 225 deletions.
4 changes: 2 additions & 2 deletions cpp/cmake/Modules/JitifyPreprocessKernels.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,8 @@ function(jit_preprocess_files)
endfunction()

jit_preprocess_files(
SOURCE_DIRECTORY ${CUDF_SOURCE_DIR}/src FILES binaryop/jit/kernel.cu
transform/jit/masked_udf_kernel.cu transform/jit/kernel.cu rolling/jit/kernel.cu
SOURCE_DIRECTORY ${CUDF_SOURCE_DIR}/src FILES binaryop/jit/kernel.cu transform/jit/kernel.cu
rolling/jit/kernel.cu
)

add_custom_target(
Expand Down
6 changes: 0 additions & 6 deletions cpp/include/cudf/transform.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,6 @@ std::unique_ptr<column> transform(
bool is_ptx,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

std::unique_ptr<column> generalized_masked_op(
table_view const& data_view,
std::string const& binary_udf,
data_type output_type,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Creates a null_mask from `input` by converting `NaN` to null and
* preserving existing null values and also returns new null_count.
Expand Down
85 changes: 0 additions & 85 deletions cpp/src/transform/jit/masked_udf_kernel.cu

This file was deleted.

102 changes: 0 additions & 102 deletions cpp/src/transform/transform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,10 @@
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/detail/transform.hpp>
#include <cudf/null_mask.hpp>
#include <cudf/table/table_view.hpp>
#include <cudf/utilities/traits.hpp>
#include <cudf/utilities/type_dispatcher.hpp>

#include <jit_preprocessed_files/transform/jit/kernel.cu.jit.hpp>
#include <jit_preprocessed_files/transform/jit/masked_udf_kernel.cu.jit.hpp>

#include <jit/cache.hpp>
#include <jit/parser.hpp>
Expand Down Expand Up @@ -65,80 +63,6 @@ void unary_operation(mutable_column_view output,
cudf::jit::get_data_ptr(input));
}

std::vector<std::string> make_template_types(column_view outcol_view, table_view const& data_view)
{
std::string mskptr_type =
cudf::jit::get_type_name(cudf::data_type(cudf::type_to_id<cudf::bitmask_type>())) + "*";
std::string offset_type =
cudf::jit::get_type_name(cudf::data_type(cudf::type_to_id<cudf::offset_type>()));

std::vector<std::string> template_types;
template_types.reserve((3 * data_view.num_columns()) + 1);

template_types.push_back(cudf::jit::get_type_name(outcol_view.type()));
for (auto const& col : data_view) {
template_types.push_back(cudf::jit::get_type_name(col.type()) + "*");
template_types.push_back(mskptr_type);
template_types.push_back(offset_type);
}
return template_types;
}

void generalized_operation(table_view const& data_view,
std::string const& udf,
data_type output_type,
mutable_column_view outcol_view,
mutable_column_view outmsk_view,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
auto const template_types = make_template_types(outcol_view, data_view);

std::string generic_kernel_name =
jitify2::reflection::Template("cudf::transformation::jit::generic_udf_kernel")
.instantiate(template_types);

std::string generic_cuda_source = cudf::jit::parse_single_function_ptx(
udf, "GENERIC_OP", cudf::jit::get_type_name(output_type), {0});

std::vector<void*> kernel_args;
kernel_args.reserve((data_view.num_columns() * 3) + 3);

cudf::size_type size = outcol_view.size();
const void* outcol_ptr = cudf::jit::get_data_ptr(outcol_view);
const void* outmsk_ptr = cudf::jit::get_data_ptr(outmsk_view);
kernel_args.insert(kernel_args.begin(), {&size, &outcol_ptr, &outmsk_ptr});

std::vector<const void*> data_ptrs;
std::vector<cudf::bitmask_type const*> mask_ptrs;
std::vector<cudf::offset_type> offsets;

data_ptrs.reserve(data_view.num_columns());
mask_ptrs.reserve(data_view.num_columns());
offsets.reserve(data_view.num_columns());

auto const iters = thrust::make_zip_iterator(
thrust::make_tuple(data_ptrs.begin(), mask_ptrs.begin(), offsets.begin()));

std::for_each(iters, iters + data_view.num_columns(), [&](auto const& tuple_vals) {
kernel_args.push_back(&thrust::get<0>(tuple_vals));
kernel_args.push_back(&thrust::get<1>(tuple_vals));
kernel_args.push_back(&thrust::get<2>(tuple_vals));
});

std::transform(data_view.begin(), data_view.end(), iters, [&](column_view const& col) {
return thrust::make_tuple(cudf::jit::get_data_ptr(col), col.null_mask(), col.offset());
});

cudf::jit::get_program_cache(*transform_jit_masked_udf_kernel_cu_jit)
.get_kernel(generic_kernel_name,
{},
{{"transform/jit/operation-udf.hpp", generic_cuda_source}},
{"-arch=sm_."})
->configure_1d_max_occupancy(0, 0, 0, stream.value())
->launch(kernel_args.data());
}

} // namespace jit
} // namespace transformation

Expand All @@ -165,24 +89,6 @@ std::unique_ptr<column> transform(column_view const& input,
return output;
}

std::unique_ptr<column> generalized_masked_op(table_view const& data_view,
std::string const& udf,
data_type output_type,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
std::unique_ptr<column> output = make_fixed_width_column(output_type, data_view.num_rows());
std::unique_ptr<column> output_mask =
make_fixed_width_column(cudf::data_type{cudf::type_id::BOOL8}, data_view.num_rows());

transformation::jit::generalized_operation(
data_view, udf, output_type, *output, *output_mask, stream, mr);

auto final_output_mask = cudf::bools_to_mask(*output_mask);
output.get()->set_null_mask(std::move(*(final_output_mask.first)));
return output;
}

} // namespace detail

std::unique_ptr<column> transform(column_view const& input,
Expand All @@ -195,12 +101,4 @@ std::unique_ptr<column> transform(column_view const& input,
return detail::transform(input, unary_udf, output_type, is_ptx, rmm::cuda_stream_default, mr);
}

std::unique_ptr<column> generalized_masked_op(table_view const& data_view,
std::string const& udf,
data_type output_type,
rmm::mr::device_memory_resource* mr)
{
return detail::generalized_masked_op(data_view, udf, output_type, rmm::cuda_stream_default, mr);
}

} // namespace cudf
6 changes: 0 additions & 6 deletions python/cudf/cudf/_lib/cpp/transform.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,6 @@ cdef extern from "cudf/transform.hpp" namespace "cudf" nogil:
bool is_ptx
) except +

cdef unique_ptr[column] generalized_masked_op(
const table_view& data_view,
string udf,
data_type output_type,
) except +

cdef pair[unique_ptr[table], unique_ptr[column]] encode(
table_view input
) except +
Expand Down
24 changes: 0 additions & 24 deletions python/cudf/cudf/_lib/transform.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -123,30 +123,6 @@ def transform(Column input, op):
return Column.from_unique_ptr(move(c_output))


def masked_udf(incols, op, output_type):
cdef table_view data_view = table_view_from_table(
incols, ignore_index=True)
cdef string c_str = op.encode("UTF-8")
cdef type_id c_tid
cdef data_type c_dtype

c_tid = <type_id> (
<underlying_type_t_type_id> SUPPORTED_NUMPY_TO_LIBCUDF_TYPES[
output_type
]
)
c_dtype = data_type(c_tid)

with nogil:
c_output = move(libcudf_transform.generalized_masked_op(
data_view,
c_str,
c_dtype,
))

return Column.from_unique_ptr(move(c_output))


def table_encode(input):
cdef table_view c_input = table_view_from_table(
input, ignore_index=True)
Expand Down

0 comments on commit 11c3dfe

Please sign in to comment.