From 172f8488cba944a5f5a72cbfb0a657df921befd7 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 26 May 2021 14:03:48 -0600 Subject: [PATCH 1/3] Use `has_nulls()` instead of `nullable()` during flattening of structs column --- cpp/src/structs/utilities.cpp | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/cpp/src/structs/utilities.cpp b/cpp/src/structs/utilities.cpp index 4f7795bad7a..2a1ab8a5748 100644 --- a/cpp/src/structs/utilities.cpp +++ b/cpp/src/structs/utilities.cpp @@ -93,11 +93,16 @@ struct flattened_table { order col_order, null_order col_null_order) { - if (nullability == column_nullability::FORCE || col.nullable()) { - // nullable columns could be required for comparisions such as joins + // Even if it is not required to extract the bitmask to a separate column, + // we should always do that if the structs column has any null element. + // In addition, we should check for null by calling to `has_nulls()`, not `nullable()`. + // This is because when comparing structs columns, if one column has bitmask while the other + // does not (and both columns do not have any null element) then flattening them will result + // in tables with different number of columns. + if (nullability == column_nullability::FORCE || col.has_nulls()) { validity_as_column.push_back(cudf::is_valid(col)); - if (col.nullable()) { - // copy bitmask only works if the column is nullable + if (col.has_nulls()) { + // copy bitmask is needed only if the column has null validity_as_column.back()->set_null_mask(copy_bitmask(col)); } flat_columns.push_back(validity_as_column.back()->view()); From 5596785cd49762c53425309c3a4005b253ae4f24 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 26 May 2021 14:18:43 -0600 Subject: [PATCH 2/3] Fix doc --- cpp/src/structs/utilities.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/structs/utilities.cpp b/cpp/src/structs/utilities.cpp index 2a1ab8a5748..79895651c7c 100644 --- a/cpp/src/structs/utilities.cpp +++ b/cpp/src/structs/utilities.cpp @@ -97,8 +97,8 @@ struct flattened_table { // we should always do that if the structs column has any null element. // In addition, we should check for null by calling to `has_nulls()`, not `nullable()`. // This is because when comparing structs columns, if one column has bitmask while the other - // does not (and both columns do not have any null element) then flattening them will result - // in tables with different number of columns. + // does not (and both columns do not have any null element) then flattening them using + // `nullable()` will result in tables with different number of columns. if (nullability == column_nullability::FORCE || col.has_nulls()) { validity_as_column.push_back(cudf::is_valid(col)); if (col.has_nulls()) { From 224eb13530c7412b3467b613f682b1758aea2c53 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 26 May 2021 14:41:24 -0600 Subject: [PATCH 3/3] Add more explanation to the comments --- cpp/src/structs/utilities.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/cpp/src/structs/utilities.cpp b/cpp/src/structs/utilities.cpp index 79895651c7c..80bea2ab55e 100644 --- a/cpp/src/structs/utilities.cpp +++ b/cpp/src/structs/utilities.cpp @@ -95,10 +95,16 @@ struct flattened_table { { // Even if it is not required to extract the bitmask to a separate column, // we should always do that if the structs column has any null element. + // // In addition, we should check for null by calling to `has_nulls()`, not `nullable()`. // This is because when comparing structs columns, if one column has bitmask while the other // does not (and both columns do not have any null element) then flattening them using // `nullable()` will result in tables with different number of columns. + // + // Notice that, for comparing structs columns when one column has null while the other + // doesn't, `nullability` must be passed in with value `column_nullability::FORCE` to make + // sure the flattening results are tables having the same number of columns. + if (nullability == column_nullability::FORCE || col.has_nulls()) { validity_as_column.push_back(cudf::is_valid(col)); if (col.has_nulls()) {