Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Java] Added method to remove null_masks if the column has no nulls #9061

Merged
merged 10 commits into from
Aug 20, 2021
38 changes: 36 additions & 2 deletions java/src/main/native/src/TableJni.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -935,6 +935,39 @@ jlongArray combine_join_results(JNIEnv *env, cudf::table &left_results,
return combine_join_results(env, std::move(left_cols), std::move(right_cols));
}

cudf::column_view remove_validity_from_col(cudf::column_view column_view) {
if (column_view.nullable() && column_view.null_count() == 0) {
// null_mask is allocated but no nulls present therefore we create a new column_view without the
// null_mask to avoid things blowing up in reading the parquet file
if (!cudf::is_nested(column_view.type())) {
razajafri marked this conversation as resolved.
Show resolved Hide resolved
cudf::column_view cv(column_view.type(), column_view.size(), column_view.data<int32_t>(),
nullptr, 0);
return cv;
razajafri marked this conversation as resolved.
Show resolved Hide resolved
} else {
std::vector<cudf::column_view> children;
children.reserve(column_view.num_children());
for (auto it = column_view.child_begin(); it != column_view.child_end(); it++) {
children.push_back(remove_validity_from_col(*it));
}
cudf::column_view cv(column_view.type(), column_view.size(), column_view.data<int32_t>(),
nullptr, 0, 0, children);
razajafri marked this conversation as resolved.
Show resolved Hide resolved
return cv;
}
} else {
return column_view;
}
}

cudf::table_view remove_validity_if_needed(cudf::table_view *input_table_view) {
razajafri marked this conversation as resolved.
Show resolved Hide resolved
std::vector<cudf::column_view> views;
views.reserve(input_table_view->num_columns());
for (auto it = input_table_view->begin(); it != input_table_view->end(); it++) {
views.push_back(remove_validity_from_col(*it));
}

return cudf::table_view(views);
}

} // namespace

} // namespace jni
Expand Down Expand Up @@ -1346,7 +1379,8 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Table_writeParquetChunk(JNIEnv *env,
JNI_NULL_CHECK(env, j_state, "null state", );

using namespace cudf::io;
cudf::table_view *tview = reinterpret_cast<cudf::table_view *>(j_table);
cudf::table_view *tview_with_empty_nullmask = reinterpret_cast<cudf::table_view *>(j_table);
cudf::table_view tview = cudf::jni::remove_validity_if_needed(tview_with_empty_nullmask);
cudf::jni::native_parquet_writer_handle *state =
reinterpret_cast<cudf::jni::native_parquet_writer_handle *>(j_state);

Expand All @@ -1356,7 +1390,7 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Table_writeParquetChunk(JNIEnv *env,
}
try {
cudf::jni::auto_set_device(env);
state->writer->write(*tview);
state->writer->write(tview);
}
CATCH_STD(env, )
}
Expand Down