rapidsai · harrism · Nov 21, 2019 · Oct 31, 2019 · Oct 31, 2019 · Nov 1, 2019
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -125,6 +125,7 @@
 - PR #3309 Add java and JNI bindings for search bounds
 - PR #3382 Add fill function for strings column
 - PR #3391 Move device_atomics_tests.cu files to legacy
+- PR #3303 Define and implement new stream compaction APIs `copy_if`, `drop_nulls`, `apply_boolean_mask`, `drop_duplicate` and `unique_count`.
 
 ## Bug Fixes
 

diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml
@@ -76,6 +76,7 @@ test:
     - test -f $PREFIX/include/cudf/legacy/replace.hpp
     - test -f $PREFIX/include/cudf/legacy/rolling.hpp
     - test -f $PREFIX/include/cudf/legacy/search.hpp
+    - test -f $PREFIX/include/cudf/stream_compaction.hpp
     - test -f $PREFIX/include/cudf/legacy/stream_compaction.hpp
     - test -f $PREFIX/include/cudf/transpose.hpp
     - test -f $PREFIX/include/cudf/legacy/transform.hpp

@@ -394,6 +394,9 @@ add_library(cudf
             src/transform/legacy/nans_to_nulls.cu
             src/transform/transform.cpp
             src/bitmask/legacy/bitmask_ops.cu
+            src/stream_compaction/apply_boolean_mask.cu
+            src/stream_compaction/drop_nulls.cu
+            src/stream_compaction/drop_duplicates.cu
             src/stream_compaction/legacy/apply_boolean_mask.cu
             src/stream_compaction/legacy/drop_nulls.cu
             src/stream_compaction/legacy/drop_duplicates.cu

@@ -9,6 +9,7 @@
 #include <cudf/table/table_device_view.cuh>
 #include <cudf/detail/utilities/release_assert.cuh>
 #include <cudf/detail/utilities/cuda.cuh>
+#include <cudf/strings/detail/gather.cuh>
 
 #include <rmm/thrust_rmm_allocator.h>
 
@@ -50,24 +51,24 @@ struct bounds_checker {
 template <bool ignore_out_of_bounds, typename MapIterator>
 __global__ void gather_bitmask_kernel(table_device_view source_table,
                                       MapIterator gather_map,
-                                      mutable_table_device_view destination_table,
+                                      bitmask_type * masks[],
+                                      size_type destination_table_num_rows,
                                       size_type* valid_counts) {
 
   for (size_type i = 0; i < source_table.num_columns(); i++) {
 
     constexpr int warp_size = 32;
 
     column_device_view source_col = source_table.column(i);
-    mutable_column_device_view destination_col = destination_table.column(i);
 
     if (source_col.has_nulls()) {
       size_type destination_row_base = blockIdx.x * blockDim.x;
       cudf::size_type valid_count_accumulate = 0;
 
-      while (destination_row_base < destination_table.num_rows()) {
+      while (destination_row_base < destination_table_num_rows) {
         size_type destination_row = destination_row_base + threadIdx.x;
 
-        const bool thread_active = destination_row < destination_col.size();
+        const bool thread_active = destination_row < destination_table_num_rows;
         size_type source_row =
           thread_active ? gather_map[destination_row] : 0;
 
@@ -84,7 +85,7 @@ __global__ void gather_bitmask_kernel(table_device_view source_table,
 
         // Only one thread writes output
         if (0 == threadIdx.x % warp_size) {
-          destination_col.set_mask_word(valid_index, valid_warp);
+          masks[i][valid_index] = valid_warp;
         }
         valid_count_accumulate += single_lane_block_popc_reduce(valid_warp);
         destination_row_base += blockDim.x * gridDim.x;
@@ -150,14 +151,24 @@ struct column_gatherer
   }
 
   template <typename Element, typename MapIterator,
-    std::enable_if_t<not is_fixed_width<Element>()>* = nullptr>
+    std::enable_if_t<std::is_same<Element, cudf::string_view>::value>* = nullptr>
   std::unique_ptr<column> operator()(column_view const& source_column,
                                      MapIterator gather_map_begin,
                                      MapIterator gather_map_end,
                                      bool ignore_out_of_bounds,
                                      rmm::mr::device_memory_resource *mr,
                                      cudaStream_t stream) {
-    CUDF_FAIL("Column type must be numeric");
+      if (true == ignore_out_of_bounds) {
+        return cudf::strings::detail::gather<true>(
+                       strings_column_view(source_column),
+                       gather_map_begin, gather_map_end,
+                       mr, stream);
+      } else {
+        return cudf::strings::detail::gather<false>(
+                       strings_column_view(source_column),
+                       gather_map_begin, gather_map_end,
+                       mr, stream);
+      }
   }
 
 };
@@ -263,13 +274,20 @@ gather(table_view const& source_table, MapIterator gather_map_begin,
   CUDA_TRY(cudaOccupancyMaxPotentialBlockSize(
                &gather_grid_size, &gather_block_size, bitmask_kernel));
 
-
   auto source_table_view = table_device_view::create(source_table);
-  auto destination_table_view = mutable_table_device_view::create(destination_table->mutable_view());
+  std::vector<bitmask_type*> host_masks(destination_table->num_columns());
+  auto mutable_destination_table = destination_table->mutable_view();
+  std::transform(mutable_destination_table.begin(), mutable_destination_table.end(),
+                    host_masks.begin(), [] (auto col){
+                        return  col.nullable()?col.null_mask():nullptr;
+                    });
+
+  rmm::device_vector<bitmask_type*> masks(host_masks);
 
   bitmask_kernel<<<gather_grid_size, gather_block_size, 0, stream>>>(*source_table_view,
                                                           gather_map_begin,
-                                                          *destination_table_view,
+                                                          masks.data().get(),
+                                                          destination_table->num_rows(),
                                                           valid_counts.data().get());
 
   thrust::host_vector<cudf::size_type> h_valid_counts(valid_counts);

@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2019, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cudf/types.hpp>
+
+#include <memory>
+#include <vector>
+
+namespace cudf {
+namespace experimental {
+namespace detail {
+
+/**---------------------------------------------------------------------------*
+ * @brief Computes the row indices that would produce `input`  in a
+ * lexicographical sorted order.
+ *
+ * @param input The table to sort
+ * @param column_order The desired sort order for each column. Size must be
+ * equal to `input.num_columns()` or empty. If empty, all columns will be sorted
+ * in ascending order.
+ * @param null_precedence The desired order of null compared to other elements
+ * for each column.  Size must be equal to `input.num_columns()` or empty.
+ * If empty, all columns will be sorted in `null_order::BEFORE`.
+ * @param[in] stream Optional CUDA stream on which to execute kernels
+ * @param[in] mr Optional, The resource to use for all allocations
+ * @return std::unique_ptr<column> A non-nullable column of INT32 elements
+ * containing the permuted row indices of `input` if it were sorted
+ *---------------------------------------------------------------------------**/
+std::unique_ptr<column> sorted_order(
+    table_view input, std::vector<order> const& column_order = {},
+    std::vector<null_order> const& null_precedence = {},
+    cudaStream_t stream = 0,
+    rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource());
+
+}  // namespace detail
+}  // namespace experimental
+}  // namespace cudf