Skip to content

Commit

Permalink
Refactor cudf::contains (NVIDIA#10997)
Browse files Browse the repository at this point in the history
This is just a simple refactor to `cudf::contains`:
 * Remove `cudf/structs/detail/contains.hpp` and its corrresponding source file `src/structs/search/contains.cu`, moving its (modified) implementation into `src/search/contains_nested.cu`.
 * Adopt `experimental::row::equality::two_table_comparator` for struct equality comparison.
 * Add `const` qualifier for the `operator()` functions.
 * Rename some variables, and reorganize code to make it cleaner.

No new feature is added in this PR, just modifying the existing functions and moving things around.

This PR is extracted from the bigger PR for easier review. The original PR is rapidsai/cudf#10656 for fully supporting nested type in `cudf::contains`. As such, this blocks it.

Authors:
  - Nghia Truong (https://github.com/ttnghia)
  - Devavret Makkar (https://github.com/devavret)
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - David Wendt (https://github.com/davidwendt)
  - Jordan Jacobelli (https://github.com/Ethyling)
  - Robert Maynard (https://github.com/robertmaynard)
  - Yunsong Wang (https://github.com/PointKernel)

URL: rapidsai/cudf#10997
  • Loading branch information
ttnghia authored May 31, 2022
1 parent 1c51d28 commit 164e28e
Show file tree
Hide file tree
Showing 9 changed files with 185 additions and 228 deletions.
1 change: 0 additions & 1 deletion conda/recipes/libcudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,6 @@ outputs:
- test -f $PREFIX/include/cudf/structs/structs_column_view.hpp
- test -f $PREFIX/include/cudf/structs/struct_view.hpp
- test -f $PREFIX/include/cudf/structs/detail/concatenate.hpp
- test -f $PREFIX/include/cudf/structs/detail/contains.hpp
- test -f $PREFIX/include/cudf/table/table.hpp
- test -f $PREFIX/include/cudf/table/table_view.hpp
- test -f $PREFIX/include/cudf/tdigest/tdigest_column_view.cuh
Expand Down
2 changes: 1 addition & 1 deletion cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -446,6 +446,7 @@ add_library(
src/scalar/scalar.cpp
src/scalar/scalar_factories.cpp
src/search/contains.cu
src/search/contains_nested.cu
src/search/search_ordered.cu
src/sort/is_sorted.cu
src/sort/rank.cu
Expand Down Expand Up @@ -514,7 +515,6 @@ add_library(
src/strings/utilities.cu
src/strings/wrap.cu
src/structs/copying/concatenate.cu
src/structs/search/contains.cu
src/structs/structs_column_factories.cu
src/structs/structs_column_view.cpp
src/structs/utilities.cpp
Expand Down
59 changes: 34 additions & 25 deletions cpp/include/cudf/detail/search.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,35 +23,30 @@

#include <rmm/cuda_stream_view.hpp>

#include <vector>

namespace cudf {
namespace detail {
namespace cudf::detail {
/**
* @copydoc cudf::lower_bound
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
std::unique_ptr<column> lower_bound(
table_view const& haystack,
table_view const& needles,
std::vector<order> const& column_order,
std::vector<null_order> const& null_precedence,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
std::unique_ptr<column> lower_bound(table_view const& haystack,
table_view const& needles,
std::vector<order> const& column_order,
std::vector<null_order> const& null_precedence,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr);

/**
* @copydoc cudf::upper_bound
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
std::unique_ptr<column> upper_bound(
table_view const& haystack,
table_view const& needles,
std::vector<order> const& column_order,
std::vector<null_order> const& null_precedence,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
std::unique_ptr<column> upper_bound(table_view const& haystack,
table_view const& needles,
std::vector<order> const& column_order,
std::vector<null_order> const& null_precedence,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr);

/**
* @copydoc cudf::contains(column_view const&, scalar const&, rmm::mr::device_memory_resource*)
Expand All @@ -65,11 +60,25 @@ bool contains(column_view const& haystack, scalar const& needle, rmm::cuda_strea
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
std::unique_ptr<column> contains(
column_view const& haystack,
column_view const& needles,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
std::unique_ptr<column> contains(column_view const& haystack,
column_view const& needles,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr);

/**
* @brief Check if the (unique) row of the `needle` column is contained in the `haystack` column.
*
* If the input `needle` column has more than one row, only the first row will be considered.
*
* This function is designed for nested types only. It can also work with non-nested types
* but with lower performance due to the complexity of the implementation.
*
* @param haystack The column containing search space.
* @param needle A scalar value to check for existence in the search space.
* @return true if the given `needle` value exists in the `haystack` column.
*/
bool contains_nested_element(column_view const& haystack,
column_view const& needle,
rmm::cuda_stream_view stream);

} // namespace detail
} // namespace cudf
} // namespace cudf::detail
41 changes: 0 additions & 41 deletions cpp/include/cudf/structs/detail/contains.hpp

This file was deleted.

2 changes: 1 addition & 1 deletion cpp/src/hash/unordered_multiset.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ class unordered_multiset {
return unordered_multiset(d_col.size(), std::move(hash_bins_start), std::move(hash_data));
}

unordered_multiset_device_view<Element, Hasher, Equality> to_device()
unordered_multiset_device_view<Element, Hasher, Equality> to_device() const
{
return unordered_multiset_device_view<Element, Hasher, Equality>(
size, hash_bins.data(), hash_data.data());
Expand Down
Loading

0 comments on commit 164e28e

Please sign in to comment.