From 8a7af11f7da1fd43fde4d308cc6394371f884e05 Mon Sep 17 00:00:00 2001
From: Mike Wilson <hyperbolic2346@users.noreply.github.com>
Date: Wed, 24 Mar 2021 13:01:23 -0400
Subject: [PATCH 1/2] Fixing empty null lists throwing explode_outer for a
 loop. (#7649)

I found two issues, one was that we didn't build the correct number of null or empty offsets. We should build them for the exploded column and they are sized as such, but I was marching over it the size of the child data column. This didn't cause trouble as long as there was more data than nulls.

The second issue was the large loop. We have to go over that loop at least the number of nulls we have as that loop is doing two things at once.

1) writing the valid data rows to a gather map.
2) filling in the holes in the gather map for null and empty entries.

This was another case of things working fine as long as we ran the loop enough to cover all the null entries, which happens unless there are more nulls than entries. That wasn't tested and so it was never seen. Thankfully, @sperlingxx tested exactly that.

Added a test for this case.

Fixes #7636

Authors:
  - Mike Wilson (@hyperbolic2346)

Approvers:
  - Nghia Truong (@ttnghia)
  - MithunR (@mythrocks)

URL: https://github.com/rapidsai/cudf/pull/7649
---
 cpp/src/lists/explode.cu          |  77 +++---
 cpp/tests/lists/explode_tests.cpp | 374 +++++++++++++++++++++++-------
 2 files changed, 326 insertions(+), 125 deletions(-)
diff --git a/cpp/src/lists/explode.cu b/cpp/src/lists/explode.cu
index 8233635050e..2b495deb47f 100644
--- a/cpp/src/lists/explode.cu
+++ b/cpp/src/lists/explode.cu
@@ -188,7 +188,7 @@ std::unique_ptr<table> explode_outer(table_view const& input_table,
     });
   thrust::inclusive_scan(rmm::exec_policy(stream),
                          null_or_empty,
-                         null_or_empty + sliced_child.size(),
+                         null_or_empty + explode_col.size(),
                          null_or_empty_offset.begin());
 
   auto null_or_empty_count =
@@ -209,41 +209,48 @@ std::unique_ptr<table> explode_outer(table_view const& input_table,
   // offsets + 1 here to skip the 0th offset, which removes a - 1 operation later.
   auto offsets_minus_one = thrust::make_transform_iterator(
     thrust::next(offsets), [offsets] __device__(auto i) { return (i - offsets[0]) - 1; });
+
+  auto fill_gather_maps = [offsets_minus_one,
+                           gather_map_p             = gather_map.begin(),
+                           explode_col_gather_map_p = explode_col_gather_map.begin(),
+                           position_array           = pos.begin(),
+                           sliced_child_size        = sliced_child.size(),
+                           null_or_empty_offset_p   = null_or_empty_offset.begin(),
+                           include_position,
+                           offsets,
+                           null_or_empty,
+                           offset_size = explode_col.offsets().size() - 1] __device__(auto idx) {
+    if (idx < sliced_child_size) {
+      auto lb_idx =
+        thrust::distance(offsets_minus_one,
+                         thrust::lower_bound(
+                           thrust::seq, offsets_minus_one, offsets_minus_one + (offset_size), idx));
+      auto index_to_write                      = null_or_empty_offset_p[lb_idx] + idx;
+      gather_map_p[index_to_write]             = lb_idx;
+      explode_col_gather_map_p[index_to_write] = idx;
+      if (include_position) {
+        position_array[index_to_write] = idx - (offsets[lb_idx] - offsets[0]);
+      }
+    }
+    if (null_or_empty[idx]) {
+      auto invalid_index = null_or_empty_offset_p[idx] == 0
+                             ? offsets[idx]
+                             : offsets[idx] + null_or_empty_offset_p[idx] - 1;
+      gather_map_p[invalid_index] = idx;
+
+      // negative one to indicate a null value
+      explode_col_gather_map_p[invalid_index] = -1;
+      if (include_position) { position_array[invalid_index] = 0; }
+    }
+  };
+
+  // we need to do this loop at least explode_col times or we may not properly fill in null and
+  // empty entries.
+  auto loop_count = std::max(sliced_child.size(), explode_col.size());
+
   // Fill in gather map with all the child column's entries
-  thrust::for_each(rmm::exec_policy(stream),
-                   counting_iter,
-                   counting_iter + sliced_child.size(),
-                   [offsets_minus_one,
-                    gather_map             = gather_map.begin(),
-                    explode_col_gather_map = explode_col_gather_map.begin(),
-                    position_array         = pos.begin(),
-                    include_position,
-                    offsets,
-                    null_or_empty_offset = null_or_empty_offset.begin(),
-                    null_or_empty,
-                    offset_size = explode_col.offsets().size() - 1] __device__(auto idx) {
-                     auto lb_idx = thrust::distance(
-                       offsets_minus_one,
-                       thrust::lower_bound(
-                         thrust::seq, offsets_minus_one, offsets_minus_one + (offset_size), idx));
-                     auto index_to_write                    = null_or_empty_offset[lb_idx] + idx;
-                     gather_map[index_to_write]             = lb_idx;
-                     explode_col_gather_map[index_to_write] = idx;
-                     if (include_position) {
-                       position_array[index_to_write] = idx - (offsets[lb_idx] - offsets[0]);
-                     }
-                     if (null_or_empty[idx]) {
-                       auto invalid_index = null_or_empty_offset[idx] == 0
-                                              ? offsets[idx]
-                                              : offsets[idx] + null_or_empty_offset[idx] - 1;
-                       gather_map[invalid_index] = idx;
-
-                       // negative one to indicate a null value
-                       explode_col_gather_map[invalid_index] = -1;
-
-                       if (include_position) { position_array[invalid_index] = 0; }
-                     }
-                   });
+  thrust::for_each(
+    rmm::exec_policy(stream), counting_iter, counting_iter + loop_count, fill_gather_maps);
 
   return build_table(
     input_table,
diff --git a/cpp/tests/lists/explode_tests.cpp b/cpp/tests/lists/explode_tests.cpp
index 2ec9294d118..4c7ded0efd7 100644
--- a/cpp/tests/lists/explode_tests.cpp
+++ b/cpp/tests/lists/explode_tests.cpp
@@ -102,15 +102,17 @@ TEST_F(ExplodeTest, Basics)
 TEST_F(ExplodeTest, SingleNull)
 {
   //    a                   b
-  //    [1, 2, 7]           100
+  //    null                100
   //    [5, 6]              200
   //    []                  300
   //    [0, 3]              400
 
+  constexpr auto null = 0;
+
   auto first_invalid =
     cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i == 0 ? false : true; });
 
-  LCW a({LCW{1, 2, 7}, LCW{5, 6}, LCW{}, LCW{0, 3}}, first_invalid);
+  LCW a({LCW{null}, LCW{5, 6}, LCW{}, LCW{0, 3}}, first_invalid);
   FCW b({100, 200, 300, 400});
 
   FCW expected_a{5, 6, 0, 3};
@@ -134,15 +136,17 @@ TEST_F(ExplodeTest, Nulls)
 {
   //    a                   b
   //    [1, 2, 7]           100
-  //    [5, 6]              200
+  //    null                200
   //    [0, 3]              300
 
+  constexpr auto null = 0;
+
   auto valids = cudf::detail::make_counting_transform_iterator(
     0, [](auto i) { return i % 2 == 0 ? true : false; });
   auto always_valid =
     cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; });
 
-  LCW a({LCW{1, 2, 7}, LCW{5, 6}, LCW{0, 3}}, valids);
+  LCW a({LCW{1, 2, 7}, LCW{null}, LCW{0, 3}}, valids);
   FCW b({100, 200, 300}, valids);
 
   FCW expected_a({1, 2, 7, 0, 3});
@@ -165,18 +169,21 @@ TEST_F(ExplodeTest, Nulls)
 TEST_F(ExplodeTest, NullsInList)
 {
   //    a                   b
-  //    [1, 2, 7]           100
-  //    [5, 6, 0, 9]        200
+  //    [1, null, 7]        100
+  //    [5, null, 0, null]  200
   //    []                  300
-  //    [0, 3, 8]           400
+  //    [0, null, 8]        400
+
+  constexpr auto null = 0;
 
   auto valids = cudf::detail::make_counting_transform_iterator(
     0, [](auto i) { return i % 2 == 0 ? true : false; });
 
-  LCW a{LCW({1, 2, 7}, valids), LCW({5, 6, 0, 9}, valids), LCW{}, LCW({0, 3, 8}, valids)};
+  LCW a{
+    LCW({1, null, 7}, valids), LCW({5, null, 0, null}, valids), LCW{}, LCW({0, null, 8}, valids)};
   FCW b{100, 200, 300, 400};
 
-  FCW expected_a({1, 2, 7, 5, 6, 0, 9, 0, 3, 8}, {1, 0, 1, 1, 0, 1, 0, 1, 0, 1});
+  FCW expected_a({1, null, 7, 5, null, 0, null, 0, null, 8}, {1, 0, 1, 1, 0, 1, 0, 1, 0, 1});
   FCW expected_b{100, 100, 100, 200, 200, 200, 200, 400, 400, 400};
 
   cudf::table_view t({a, b});
@@ -224,16 +231,18 @@ TEST_F(ExplodeTest, NestedNulls)
 {
   //    a                   b
   //    [[1, 2], [7, 6, 5]] 100
-  //    [[5, 6]]            200
+  //    null                null
   //    [[0, 3],[5],[2, 1]] 300
 
+  constexpr auto null = 0;
+
   auto valids = cudf::detail::make_counting_transform_iterator(
     0, [](auto i) { return i % 2 == 0 ? true : false; });
   auto always_valid =
     cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; });
 
-  LCW a({LCW{LCW{1, 2}, LCW{7, 6, 5}}, LCW{LCW{5, 6}}, LCW{LCW{0, 3}, LCW{5}, LCW{2, 1}}}, valids);
-  FCW b({100, 200, 300}, valids);
+  LCW a({LCW{LCW{1, 2}, LCW{7, 6, 5}}, LCW{LCW{null}}, LCW{LCW{0, 3}, LCW{5}, LCW{2, 1}}}, valids);
+  FCW b({100, null, 300}, valids);
 
   LCW expected_a{LCW{1, 2}, LCW{7, 6, 5}, LCW{0, 3}, LCW{5}, LCW{2, 1}};
   FCW expected_b({100, 100, 300, 300, 300}, always_valid);
@@ -254,21 +263,23 @@ TEST_F(ExplodeTest, NestedNulls)
 
 TEST_F(ExplodeTest, NullsInNested)
 {
-  //    a                   b
-  //    [[1, 2], [7, 6, 5]] 100
-  //    [[5, 6]]            200
-  //    [[0, 3],[5],[2, 1]] 300
+  //    a                      b
+  //    [[1, null], [7, 6, 5]] 100
+  //    [[5, 6]]               200
+  //    [[0, 3],[5],[2, null]] 300
+
+  constexpr auto null = 0;
 
   auto valids = cudf::detail::make_counting_transform_iterator(
     0, [](auto i) { return i % 2 == 0 ? true : false; });
 
-  LCW a({LCW{LCW({1, 2}, valids), LCW{7, 6, 5}},
+  LCW a({LCW{LCW({1, null}, valids), LCW{7, 6, 5}},
          LCW{LCW{5, 6}},
-         LCW{LCW{0, 3}, LCW{5}, LCW({2, 1}, valids)}});
+         LCW{LCW{0, 3}, LCW{5}, LCW({2, null}, valids)}});
   FCW b({100, 200, 300});
 
   LCW expected_a{
-    LCW({1, 2}, valids), LCW{7, 6, 5}, LCW{5, 6}, LCW{0, 3}, LCW{5}, LCW({2, 1}, valids)};
+    LCW({1, null}, valids), LCW{7, 6, 5}, LCW{5, 6}, LCW{0, 3}, LCW{5}, LCW({2, null}, valids)};
   FCW expected_b{100, 100, 200, 300, 300, 300};
 
   cudf::table_view t({a, b});
@@ -287,20 +298,22 @@ TEST_F(ExplodeTest, NullsInNested)
 
 TEST_F(ExplodeTest, NullsInNestedDoubleExplode)
 {
-  //    a                       b
-  //    [[1, 2], [], [7, 6, 5]] 100
-  //    [[5, 6]]                200
-  //    [[0, 3],[5],[2, 1]]     300
+  //    a                          b
+  //    [[1, null], [], [7, 6, 5]] 100
+  //    [[5, 6]]                   200
+  //    [[0, 3],[5],[2, null]]     300
+
+  constexpr auto null = 0;
 
   auto valids = cudf::detail::make_counting_transform_iterator(
     0, [](auto i) { return i % 2 == 0 ? true : false; });
 
-  LCW a{LCW{LCW({1, 2}, valids), LCW{}, LCW{7, 6, 5}},
+  LCW a{LCW{LCW({1, null}, valids), LCW{}, LCW{7, 6, 5}},
         LCW{LCW{5, 6}},
-        LCW{LCW{0, 3}, LCW{5}, LCW({2, 1}, valids)}};
+        LCW{LCW{0, 3}, LCW{5}, LCW({2, null}, valids)}};
   FCW b{100, 200, 300};
 
-  FCW expected_a({1, 2, 7, 6, 5, 5, 6, 0, 3, 5, 2, 1}, {1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0});
+  FCW expected_a({1, null, 7, 6, 5, 5, 6, 0, 3, 5, 2, null}, {1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0});
   FCW expected_b{100, 100, 100, 100, 100, 200, 200, 300, 300, 300, 300, 300};
 
   cudf::table_view t({a, b});
@@ -320,23 +333,25 @@ TEST_F(ExplodeTest, NullsInNestedDoubleExplode)
 
 TEST_F(ExplodeTest, NestedStructs)
 {
-  //    a                   b
-  //    [[1, 2], [7, 6, 5]] {100, "100"}
-  //    [[5, 6]]            {200, "200"}
-  //    [[0, 3],[5],[2, 1]] {300, "300"}
+  //    a                      b
+  //    [[1, null], [7, 6, 5]] {100, "100"}
+  //    [[5, 6]]               {200, "200"}
+  //    [[0, 3],[5],[2, null]] {300, "300"}
+
+  constexpr auto null = 0;
 
   auto valids = cudf::detail::make_counting_transform_iterator(
     0, [](auto i) { return i % 2 == 0 ? true : false; });
 
-  LCW a({LCW{LCW({1, 2}, valids), LCW{7, 6, 5}},
+  LCW a({LCW{LCW({1, null}, valids), LCW{7, 6, 5}},
          LCW{LCW{5, 6}},
-         LCW{LCW{0, 3}, LCW{5}, LCW({2, 1}, valids)}});
+         LCW{LCW{0, 3}, LCW{5}, LCW({2, null}, valids)}});
   FCW b1({100, 200, 300});
   strings_column_wrapper b2{"100", "200", "300"};
   structs_column_wrapper b({b1, b2});
 
   LCW expected_a{
-    LCW({1, 2}, valids), LCW{7, 6, 5}, LCW{5, 6}, LCW{0, 3}, LCW{5}, LCW({2, 1}, valids)};
+    LCW({1, null}, valids), LCW{7, 6, 5}, LCW{5, 6}, LCW{0, 3}, LCW{5}, LCW({2, null}, valids)};
   FCW expected_b1{100, 100, 200, 300, 300, 300};
   strings_column_wrapper expected_b2{"100", "100", "200", "300", "300", "300"};
   structs_column_wrapper expected_b({expected_b1, expected_b2});
@@ -397,15 +412,17 @@ TYPED_TEST(ExplodeTypedTest, ListOfStructs)
 
 TEST_F(ExplodeTest, SlicedList)
 {
-  //    a                        b
-  //    [[1, 2],[7, 6, 5]]       100
-  //    [[5, 6]]                 200
-  //    [[0, 3],[5],[2, 1]]      300
-  //    [[8, 3],[],[4, 3, 1, 2]] 400
-  //    [[2, 3, 4],[9, 8]]       500
+  //    a                              b
+  //    [[1, null],[7, 6, 5]]          100
+  //    [[5, 6]]                       200
+  //    [[0, 3],[5],[2, null]]         300
+  //    [[8, 3],[],[4, null, 1, null]] 400
+  //    [[2, 3, 4],[9, 8]]             500
 
   //    slicing the top 2 rows and the bottom row off
 
+  constexpr auto null = 0;
+
   auto valids = cudf::detail::make_counting_transform_iterator(
     0, [](auto i) { return i % 2 == 0 ? true : false; });
 
@@ -417,7 +434,7 @@ TEST_F(ExplodeTest, SlicedList)
   FCW b({100, 200, 300, 400, 500});
 
   LCW expected_a{
-    LCW{0, 3}, LCW{5}, LCW({2, 1}, valids), LCW{8, 3}, LCW{}, LCW({4, 3, 1, 2}, valids)};
+    LCW{0, 3}, LCW{5}, LCW({2, null}, valids), LCW{8, 3}, LCW{}, LCW({4, null, 1, null}, valids)};
   FCW expected_b{300, 300, 300, 400, 400, 400};
 
   cudf::table_view t({a, b});
@@ -490,19 +507,21 @@ TEST_F(ExplodeOuterTest, Basics)
 
 TEST_F(ExplodeOuterTest, SingleNull)
 {
-  //    a                   b
-  //    [1, 2, 7]           100
-  //    [5, 6]              200
-  //    []                  300
-  //    [0, 3]              400
+  //    a              b
+  //    null           100
+  //    [5, 6]         200
+  //    []             300
+  //    [0, 3]         400
+
+  constexpr auto null = 0;
 
   auto first_invalid =
     cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i == 0 ? false : true; });
 
-  LCW a({LCW{1, 2, 7}, LCW{5, 6}, LCW{}, LCW{0, 3}}, first_invalid);
+  LCW a({LCW{null}, LCW{5, 6}, LCW{}, LCW{0, 3}}, first_invalid);
   FCW b({100, 200, 300, 400});
 
-  FCW expected_a{{0, 5, 6, 0, 0, 3}, {0, 1, 1, 0, 1, 1}};
+  FCW expected_a{{null, 5, 6, 0, 0, 3}, {0, 1, 1, 0, 1, 1}};
   FCW expected_b{100, 200, 200, 300, 400, 400};
 
   cudf::table_view t({a, b});
@@ -522,17 +541,19 @@ TEST_F(ExplodeOuterTest, Nulls)
 {
   //    a                   b
   //    [1, 2, 7]           100
-  //    [5, 6]              200
+  //    null                null
   //    [0, 3]              300
 
+  constexpr auto null = 0;
+
   auto valids = cudf::detail::make_counting_transform_iterator(
     0, [](auto i) { return i % 2 == 0 ? true : false; });
 
-  LCW a({LCW{1, 2, 7}, LCW{5, 6}, LCW{0, 3}}, valids);
-  FCW b({100, 200, 300}, valids);
+  LCW a({LCW{1, 2, 7}, LCW{null}, LCW{0, 3}}, valids);
+  FCW b({100, null, 300}, valids);
 
-  FCW expected_a({1, 2, 7, 0, 0, 3}, {1, 1, 1, 0, 1, 1});
-  FCW expected_b({100, 100, 100, 200, 300, 300}, {1, 1, 1, 0, 1, 1});
+  FCW expected_a({1, 2, 7, null, 0, 3}, {1, 1, 1, 0, 1, 1});
+  FCW expected_b({100, 100, 100, null, 300, 300}, {1, 1, 1, 0, 1, 1});
 
   cudf::table_view t({a, b});
   cudf::table_view expected({expected_a, expected_b});
@@ -547,21 +568,182 @@ TEST_F(ExplodeOuterTest, Nulls)
   CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected);
 }
 
+TEST_F(ExplodeOuterTest, AllNulls)
+{
+  //    a            b
+  //    null         100
+  //    null         200
+  //    null         300
+
+  constexpr auto null = 0;
+
+  auto non_valid = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return false; });
+
+  LCW a({LCW{null}, LCW{null}, LCW{null}}, non_valid);
+  FCW b({100, 200, 300});
+
+  FCW expected_a({null, null, null}, {0, 0, 0});
+  FCW expected_b({100, 200, 300});
+
+  cudf::table_view t({a, b});
+  cudf::table_view expected({expected_a, expected_b});
+
+  auto ret = cudf::explode_outer(t, 0);
+  CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected);
+
+  FCW expected_pos_col{0, 0, 0};
+  cudf::table_view pos_expected({expected_pos_col, expected_a, expected_b});
+
+  auto pos_ret = cudf::explode_outer_position(t, 0);
+  CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected);
+}
+
+TEST_F(ExplodeOuterTest, SequentialNulls)
+{
+  //    a               b
+  //    [1, 2, null]    100
+  //    [3, 4]          200
+  //    []              300
+  //    []              400
+  //    [5, 6, 7]       500
+
+  constexpr auto null = 0;
+
+  auto third_invalid =
+    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i == 2 ? false : true; });
+
+  LCW a{LCW({1, 2, null}, third_invalid), LCW{3, 4}, LCW{}, LCW{}, LCW{5, 6, 7}};
+  FCW b{100, 200, 300, 400, 500};
+
+  FCW expected_a({1, 2, null, 3, 4, null, null, 5, 6, 7}, {1, 1, 0, 1, 1, 0, 0, 1, 1, 1});
+  FCW expected_b({100, 100, 100, 200, 200, 300, 400, 500, 500, 500});
+
+  cudf::table_view t({a, b});
+  cudf::table_view expected({expected_a, expected_b});
+
+  auto ret = cudf::explode_outer(t, 0);
+  CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected);
+
+  FCW expected_pos_col{0, 1, 2, 0, 1, 0, 0, 0, 1, 2};
+  cudf::table_view pos_expected({expected_pos_col, expected_a, expected_b});
+
+  auto pos_ret = cudf::explode_outer_position(t, 0);
+  CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected);
+}
+
+TEST_F(ExplodeOuterTest, MoreEmptyThanData)
+{
+  //    a               b
+  //    [1, 2]          100
+  //    []              200
+  //    []              300
+  //    []              400
+  //    []              500
+  //    [3]             600
+
+  constexpr auto null = 0;
+
+  LCW a{LCW{1, 2}, LCW{}, LCW{}, LCW{}, LCW{}, LCW{3}};
+  FCW b{100, 200, 300, 400, 500, 600};
+
+  FCW expected_a({1, 2, null, null, null, null, 3}, {1, 1, 0, 0, 0, 0, 1});
+  FCW expected_b({100, 100, 200, 300, 400, 500, 600});
+
+  cudf::table_view t({a, b});
+  cudf::table_view expected({expected_a, expected_b});
+
+  auto ret = cudf::explode_outer(t, 0);
+  CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected);
+
+  FCW expected_pos_col{0, 1, 0, 0, 0, 0, 0};
+  cudf::table_view pos_expected({expected_pos_col, expected_a, expected_b});
+
+  auto pos_ret = cudf::explode_outer_position(t, 0);
+  CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected);
+}
+
+TEST_F(ExplodeOuterTest, TrailingEmptys)
+{
+  //    a               b
+  //    [1, 2]          100
+  //    []              200
+  //    []              300
+  //    []              400
+  //    []              500
+
+  constexpr auto null = 0;
+
+  LCW a{LCW{1, 2}, LCW{}, LCW{}, LCW{}, LCW{}};
+  FCW b{100, 200, 300, 400, 500};
+
+  FCW expected_a({1, 2, null, null, null, null}, {1, 1, 0, 0, 0, 0});
+  FCW expected_b({100, 100, 200, 300, 400, 500});
+
+  cudf::table_view t({a, b});
+  cudf::table_view expected({expected_a, expected_b});
+
+  auto ret = cudf::explode_outer(t, 0);
+  CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected);
+
+  FCW expected_pos_col{0, 1, 0, 0, 0, 0};
+  cudf::table_view pos_expected({expected_pos_col, expected_a, expected_b});
+
+  auto pos_ret = cudf::explode_outer_position(t, 0);
+  CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected);
+}
+
+TEST_F(ExplodeOuterTest, LeadingNulls)
+{
+  //    a               b
+  //    null            100
+  //    null            200
+  //    null            300
+  //    null            400
+  //    [1, 2]          500
+
+  constexpr auto null = 0;
+
+  auto valids =
+    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i == 4 ? true : false; });
+
+  LCW a({LCW{null}, LCW{null}, LCW{null}, LCW{null}, LCW{1, 2}}, valids);
+  FCW b{100, 200, 300, 400, 500};
+
+  FCW expected_a({null, null, null, null, 1, 2}, {0, 0, 0, 0, 1, 1});
+  FCW expected_b({100, 200, 300, 400, 500, 500});
+
+  cudf::table_view t({a, b});
+  cudf::table_view expected({expected_a, expected_b});
+
+  auto ret = cudf::explode_outer(t, 0);
+  CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected);
+
+  FCW expected_pos_col{0, 0, 0, 0, 0, 1};
+  cudf::table_view pos_expected({expected_pos_col, expected_a, expected_b});
+
+  auto pos_ret = cudf::explode_outer_position(t, 0);
+  CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected);
+}
+
 TEST_F(ExplodeOuterTest, NullsInList)
 {
   //    a                   b
-  //    [1, 2, 7]           100
-  //    [5, 6, 0, 9]        200
+  //    [1, null, 7]        100
+  //    [5, null, 0, null]  200
   //    []                  300
-  //    [0, 3, 8]           400
+  //    [0, null, 8]        400
+
+  constexpr auto null = 0;
 
   auto valids = cudf::detail::make_counting_transform_iterator(
     0, [](auto i) { return i % 2 == 0 ? true : false; });
 
-  LCW a{LCW({1, 2, 7}, valids), LCW({5, 6, 0, 9}, valids), LCW{}, LCW({0, 3, 8}, valids)};
+  LCW a{
+    LCW({1, null, 7}, valids), LCW({5, null, 0, null}, valids), LCW{}, LCW({0, null, 8}, valids)};
   FCW b{100, 200, 300, 400};
 
-  FCW expected_a({1, 2, 7, 5, 6, 0, 9, 0, 0, 3, 8}, {1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1});
+  FCW expected_a({1, null, 7, 5, null, 0, null, null, 0, null, 8},
+                 {1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1});
   FCW expected_b{100, 100, 100, 200, 200, 200, 200, 300, 400, 400, 400};
 
   cudf::table_view t({a, b});
@@ -612,15 +794,18 @@ TEST_F(ExplodeOuterTest, NestedNulls)
   //    [[5, 6]]            200
   //    [[0, 3],[5],[2, 1]] 300
 
+  constexpr auto null = 0;
+
   auto valids = cudf::detail::make_counting_transform_iterator(
     0, [](auto i) { return i % 2 == 0 ? true : false; });
 
-  LCW a({LCW{LCW{1, 2}, LCW{7, 6, 5}}, LCW{LCW{5, 6}}, LCW{LCW{0, 3}, LCW{5}, LCW{2, 1}}}, valids);
+  LCW a({LCW{LCW{1, 2}, LCW{7, 6, 5}}, LCW{LCW{null}}, LCW{LCW{0, 3}, LCW{5}, LCW{2, 1}}}, valids);
   FCW b({100, 200, 300});
 
   auto expected_valids =
     cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i == 2 ? false : true; });
-  LCW expected_a({LCW{1, 2}, LCW{7, 6, 5}, LCW{}, LCW{0, 3}, LCW{5}, LCW{2, 1}}, expected_valids);
+  LCW expected_a({LCW{1, 2}, LCW{7, 6, 5}, LCW{null}, LCW{0, 3}, LCW{5}, LCW{2, 1}},
+                 expected_valids);
   FCW expected_b({100, 100, 200, 300, 300, 300});
   cudf::table_view t({a, b});
   cudf::table_view expected({expected_a, expected_b});
@@ -637,21 +822,23 @@ TEST_F(ExplodeOuterTest, NestedNulls)
 
 TEST_F(ExplodeOuterTest, NullsInNested)
 {
-  //    a                   b
-  //    [[1, 2], [7, 6, 5]] 100
-  //    [[5, 6]]            200
-  //    [[0, 3],[5],[2, 1]] 300
+  //    a                      b
+  //    [[1, null], [7, 6, 5]] 100
+  //    [[5, 6]]               200
+  //    [[0, 3],[5],[2, null]] 300
+
+  constexpr auto null = 0;
 
   auto valids = cudf::detail::make_counting_transform_iterator(
     0, [](auto i) { return i % 2 == 0 ? true : false; });
 
-  LCW a({LCW{LCW({1, 2}, valids), LCW{7, 6, 5}},
+  LCW a({LCW{LCW({1, null}, valids), LCW{7, 6, 5}},
          LCW{LCW{5, 6}},
-         LCW{LCW{0, 3}, LCW{5}, LCW({2, 1}, valids)}});
+         LCW{LCW{0, 3}, LCW{5}, LCW({2, null}, valids)}});
   FCW b({100, 200, 300});
 
   LCW expected_a{
-    LCW({1, 2}, valids), LCW{7, 6, 5}, LCW{5, 6}, LCW{0, 3}, LCW{5}, LCW({2, 1}, valids)};
+    LCW({1, null}, valids), LCW{7, 6, 5}, LCW{5, 6}, LCW{0, 3}, LCW{5}, LCW({2, null}, valids)};
   FCW expected_b{100, 100, 200, 300, 300, 300};
 
   cudf::table_view t({a, b});
@@ -670,20 +857,23 @@ TEST_F(ExplodeOuterTest, NullsInNested)
 
 TEST_F(ExplodeOuterTest, NullsInNestedDoubleExplode)
 {
-  //    a                       b
-  //    [[1, 2], [], [7, 6, 5]] 100
-  //    [[5, 6]]                200
-  //    [[0, 3],[5],[2, 1]]     300
+  //    a                          b
+  //    [[1, null], [], [7, 6, 5]] 100
+  //    [[5, 6]]                   200
+  //    [[0, 3],[5],[2, null]]     300
+
+  constexpr auto null = 0;
 
   auto valids = cudf::detail::make_counting_transform_iterator(
     0, [](auto i) { return i % 2 == 0 ? true : false; });
 
-  LCW a{LCW{LCW({1, 2}, valids), LCW{}, LCW{7, 6, 5}},
+  LCW a{LCW{LCW({1, null}, valids), LCW{}, LCW{7, 6, 5}},
         LCW{LCW{5, 6}},
-        LCW{LCW{0, 3}, LCW{5}, LCW({2, 1}, valids)}};
+        LCW{LCW{0, 3}, LCW{5}, LCW({2, null}, valids)}};
   FCW b{100, 200, 300};
 
-  FCW expected_a({1, 2, 0, 7, 6, 5, 5, 6, 0, 3, 5, 2, 1}, {1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0});
+  FCW expected_a({1, null, null, 7, 6, 5, 5, 6, 0, 3, 5, 2, null},
+                 {1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0});
   FCW expected_b{100, 100, 100, 100, 100, 100, 200, 200, 300, 300, 300, 300, 300};
 
   cudf::table_view t({a, b});
@@ -703,23 +893,25 @@ TEST_F(ExplodeOuterTest, NullsInNestedDoubleExplode)
 
 TEST_F(ExplodeOuterTest, NestedStructs)
 {
-  //    a                   b
-  //    [[1, 2], [7, 6, 5]] {100, "100"}
-  //    [[5, 6]]            {200, "200"}
-  //    [[0, 3],[5],[2, 1]] {300, "300"}
+  //    a                      b
+  //    [[1, null], [7, 6, 5]] {100, "100"}
+  //    [[5, 6]]               {200, "200"}
+  //    [[0, 3],[5],[2, null]] {300, "300"}
+
+  constexpr auto null = 0;
 
   auto valids = cudf::detail::make_counting_transform_iterator(
     0, [](auto i) { return i % 2 == 0 ? true : false; });
 
-  LCW a({LCW{LCW({1, 2}, valids), LCW{7, 6, 5}},
+  LCW a({LCW{LCW({1, null}, valids), LCW{7, 6, 5}},
          LCW{LCW{5, 6}},
-         LCW{LCW{0, 3}, LCW{5}, LCW({2, 1}, valids)}});
+         LCW{LCW{0, 3}, LCW{5}, LCW({2, null}, valids)}});
   FCW b1({100, 200, 300});
   strings_column_wrapper b2{"100", "200", "300"};
   structs_column_wrapper b({b1, b2});
 
   LCW expected_a{
-    LCW({1, 2}, valids), LCW{7, 6, 5}, LCW{5, 6}, LCW{0, 3}, LCW{5}, LCW({2, 1}, valids)};
+    LCW({1, null}, valids), LCW{7, 6, 5}, LCW{5, 6}, LCW{0, 3}, LCW{5}, LCW({2, null}, valids)};
   FCW expected_b1{100, 100, 200, 300, 300, 300};
   strings_column_wrapper expected_b2{"100", "100", "200", "300", "300", "300"};
   structs_column_wrapper expected_b({expected_b1, expected_b2});
@@ -780,27 +972,29 @@ TYPED_TEST(ExplodeOuterTypedTest, ListOfStructs)
 
 TEST_F(ExplodeOuterTest, SlicedList)
 {
-  //    a                        b
-  //    [[1, 2],[7, 6, 5]]       100
-  //    [[5, 6]]                 200
-  //    [[0, 3],[5],[2, 1]]      300
-  //    [[8, 3],[],[4, 3, 1, 2]] 400
-  //    [[2, 3, 4],[9, 8]]       500
+  //    a                              b
+  //    [[1, null],[7, 6, 5]]          100
+  //    [[5, 6]]                       200
+  //    [[0, 3],[5],[2, null]]         300
+  //    [[8, 3],[],[4, null, 1, null]] 400
+  //    [[2, 3, 4],[9, 8]]             500
 
   //    slicing the top 2 rows and the bottom row off
 
+  constexpr auto null = 0;
+
   auto valids = cudf::detail::make_counting_transform_iterator(
     0, [](auto i) { return i % 2 == 0 ? true : false; });
 
-  LCW a({LCW{LCW({1, 2}, valids), LCW{7, 6, 5}},
+  LCW a({LCW{LCW({1, null}, valids), LCW{7, 6, 5}},
          LCW{LCW{5, 6}},
-         LCW{LCW{0, 3}, LCW{5}, LCW({2, 1}, valids)},
-         LCW{LCW{8, 3}, LCW{}, LCW({4, 3, 1, 2}, valids)},
+         LCW{LCW{0, 3}, LCW{5}, LCW({2, null}, valids)},
+         LCW{LCW{8, 3}, LCW{}, LCW({4, null, 1, null}, valids)},
          LCW{LCW{2, 3, 4}, LCW{9, 8}}});
   FCW b({100, 200, 300, 400, 500});
 
   LCW expected_a{
-    LCW{0, 3}, LCW{5}, LCW({2, 1}, valids), LCW{8, 3}, LCW{}, LCW({4, 3, 1, 2}, valids)};
+    LCW{0, 3}, LCW{5}, LCW({2, null}, valids), LCW{8, 3}, LCW{}, LCW({4, null, 1, null}, valids)};
   FCW expected_b{300, 300, 300, 400, 400, 400};
 
   cudf::table_view t({a, b});

From e73fff0196d5b38b539068ea5e5bd8d6a2336afa Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <3190405+shwina@users.noreply.github.com>
Date: Wed, 24 Mar 2021 14:01:25 -0400
Subject: [PATCH 2/2] Misc Python/Cython optimizations (#7686)

This PR introduces various small optimizations that should generally improve various common Python overhead. See https://github.com/rapidsai/cudf/pull/7454#issuecomment-804483021 for the motivation behind these optimizations and some benchmarks.

Merge after: #7660

Summary:

* Adds a way to initialize a ColumnAccessor (_init_unsafe) without validating its input. This is useful when converting a `cudf::table` to a `Frame`, where we're guaranteed the columns are well formed
* Improved (faster) `is_numerical_dtype`
* Prioritize check for numeric dtypes in `astype()` and `build_column()`. Numeric types are presumably more common, and we can avoid expensive checks for other dtypes this way.

Authors:
  - Ashwin Srinath (@shwina)

Approvers:
  - Keith Kraus (@kkraus14)

URL: https://github.com/rapidsai/cudf/pull/7686
---
 python/cudf/cudf/_lib/table.pyx          | 32 +++++++++++++++---------
 python/cudf/cudf/core/buffer.py          |  4 +++
 python/cudf/cudf/core/column/column.py   | 24 ++++++++++--------
 python/cudf/cudf/core/column_accessor.py | 23 ++++++++++++-----
 python/cudf/cudf/core/frame.py           |  4 ++-
 python/cudf/cudf/utils/dtypes.py         | 15 +++++------
 6 files changed, 64 insertions(+), 38 deletions(-)

diff --git a/python/cudf/cudf/_lib/table.pyx b/python/cudf/cudf/_lib/table.pyx
index f97b45d8abf..93d79ba6843 100644
--- a/python/cudf/cudf/_lib/table.pyx
+++ b/python/cudf/cudf/_lib/table.pyx
@@ -99,22 +99,30 @@ cdef class Table:
         cdef vector[unique_ptr[column]].iterator it = columns.begin()
 
         # First construct the index, if any
+        cdef int i
+
         index = None
         if index_names is not None:
-            index_columns = []
-            for _ in index_names:
-                index_columns.append(Column.from_unique_ptr(
-                    move(dereference(it))
-                ))
-                it += 1
-            index = Table(dict(zip(index_names, index_columns)))
+            index_data = ColumnAccessor._create_unsafe(
+                {
+                    name: Column.from_unique_ptr(
+                        move(dereference(it + i))
+                    )
+                    for i, name in enumerate(index_names)
+                }
+            )
+            index = Table(data=index_data)
 
         # Construct the data dict
-        data_columns = []
-        for _ in column_names:
-            data_columns.append(Column.from_unique_ptr(move(dereference(it))))
-            it += 1
-        data = dict(zip(column_names, data_columns))
+        cdef int n_index_columns = len(index_names) if index_names else 0
+        data = ColumnAccessor._create_unsafe(
+            {
+                name: Column.from_unique_ptr(
+                    move(dereference(it + i + n_index_columns))
+                )
+                for i, name in enumerate(column_names)
+            }
+        )
 
         return Table(data=data, index=index)
 
diff --git a/python/cudf/cudf/core/buffer.py b/python/cudf/cudf/core/buffer.py
index 350346a87f9..9fc5570e35a 100644
--- a/python/cudf/cudf/core/buffer.py
+++ b/python/cudf/cudf/core/buffer.py
@@ -42,6 +42,10 @@ def __init__(
             self.ptr = data.ptr
             self.size = data.size
             self._owner = owner or data._owner
+        elif isinstance(data, rmm.DeviceBuffer):
+            self.ptr = data.ptr
+            self.size = data.size
+            self._owner = data
         elif hasattr(data, "__array_interface__") or hasattr(
             data, "__cuda_array_interface__"
         ):
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index b2b2874eeb4..dd06d97d105 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -1017,7 +1017,9 @@ def distinct_count(
         return cpp_distinct_count(self, ignore_nulls=dropna)
 
     def astype(self, dtype: Dtype, **kwargs) -> ColumnBase:
-        if is_categorical_dtype(dtype):
+        if is_numerical_dtype(dtype):
+            return self.as_numerical_column(dtype)
+        elif is_categorical_dtype(dtype):
             return self.as_categorical_column(dtype, **kwargs)
         elif pd.api.types.pandas_dtype(dtype).type in {
             np.str_,
@@ -1548,6 +1550,16 @@ def build_column(
     """
     dtype = pd.api.types.pandas_dtype(dtype)
 
+    if is_numerical_dtype(dtype):
+        assert data is not None
+        return cudf.core.column.NumericalColumn(
+            data=data,
+            dtype=dtype,
+            mask=mask,
+            size=size,
+            offset=offset,
+            null_count=null_count,
+        )
     if is_categorical_dtype(dtype):
         if not len(children) == 1:
             raise ValueError(
@@ -1634,15 +1646,7 @@ def build_column(
             children=children,
         )
     else:
-        assert data is not None
-        return cudf.core.column.NumericalColumn(
-            data=data,
-            dtype=dtype,
-            mask=mask,
-            size=size,
-            offset=offset,
-            null_count=null_count,
-        )
+        raise TypeError(f"Unrecognized dtype: {dtype}")
 
 
 def build_categorical_column(
diff --git a/python/cudf/cudf/core/column_accessor.py b/python/cudf/cudf/core/column_accessor.py
index 0c580132290..33bae5c1328 100644
--- a/python/cudf/cudf/core/column_accessor.py
+++ b/python/cudf/cudf/core/column_accessor.py
@@ -19,11 +19,7 @@
 
 import cudf
 from cudf.core import column
-from cudf.utils.utils import (
-    cached_property,
-    to_flat_dict,
-    to_nested_dict,
-)
+from cudf.utils.utils import cached_property, to_flat_dict, to_nested_dict
 
 if TYPE_CHECKING:
     from cudf.core.column import ColumnBase
@@ -84,6 +80,21 @@ def __init__(
             self.multiindex = multiindex
             self._level_names = level_names
 
+    @classmethod
+    def _create_unsafe(
+        cls,
+        data: Dict[Any, ColumnBase],
+        multiindex: bool = False,
+        level_names=None,
+    ) -> ColumnAccessor:
+        # create a ColumnAccessor without verifying column
+        # type or size
+        obj = cls()
+        obj._data = data
+        obj.multiindex = multiindex
+        obj._level_names = level_names
+        return obj
+
     def __iter__(self):
         return self._data.__iter__()
 
@@ -167,7 +178,7 @@ def _column_length(self):
             return 0
 
     def _clear_cache(self):
-        cached_properties = "columns", "names", "_grouped_data"
+        cached_properties = ("columns", "names", "_grouped_data")
         for attr in cached_properties:
             try:
                 self.__delattr__(attr)
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index e6898b8c606..ecff3dee573 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -2408,7 +2408,9 @@ def _copy_type_metadata(
         for name, col, other_col in zip(
             self._data.keys(), self._data.values(), other._data.values()
         ):
-            self._data[name] = other_col._copy_type_metadata(col)
+            self._data.set_by_label(
+                name, other_col._copy_type_metadata(col), validate=False
+            )
 
         if include_index:
             if self._index is not None and other._index is not None:
diff --git a/python/cudf/cudf/utils/dtypes.py b/python/cudf/cudf/utils/dtypes.py
index 1438421bb12..8875a36dba8 100644
--- a/python/cudf/cudf/utils/dtypes.py
+++ b/python/cudf/cudf/utils/dtypes.py
@@ -144,16 +144,13 @@ def numeric_normalize_types(*args):
 
 
 def is_numerical_dtype(obj):
-    if is_categorical_dtype(obj):
+    # TODO: we should handle objects with a `.dtype` attribute,
+    # e.g., arrays, here.
+    try:
+        dtype = np.dtype(obj)
+    except TypeError:
         return False
-    if is_list_dtype(obj):
-        return False
-    return (
-        np.issubdtype(obj, np.bool_)
-        or np.issubdtype(obj, np.floating)
-        or np.issubdtype(obj, np.signedinteger)
-        or np.issubdtype(obj, np.unsignedinteger)
-    )
+    return dtype.kind in "biuf"
 
 
 def is_string_dtype(obj):