Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Java] Added method to remove null_masks if the column has no nulls #9061

Merged
merged 10 commits into from
Aug 20, 2021
56 changes: 55 additions & 1 deletion java/src/main/java/ai/rapids/cudf/ColumnView.java
Original file line number Diff line number Diff line change
Expand Up @@ -101,11 +101,39 @@ public ColumnView(DType type, long rows, Optional<Long> nullCount,
|| !nullCount.isPresent();
}

/**
* Create a new column view based off of data already on the device. Ref count on the buffers
* is not incremented and none of the underlying buffers are owned by this view. The returned
* ColumnView is only valid as long as the underlying buffers remain valid. If the buffers are
* closed before this ColumnView is closed, it will result in undefined behavior.
*
* If ownership is needed, call {@link ColumnView#copyToColumnVector}
*
* @param type the type of the vector
* @param rows the number of rows in this vector.
* @param nullCount the number of nulls in the dataset.
* @param dataBuffer a host buffer required for nested types including strings and string
* categories. The ownership doesn't change on this buffer
* @param validityBuffer an optional validity buffer. Must be provided if nullCount != 0.
* The ownership doesn't change on this buffer
* @param offsetBuffer The offsetbuffer for columns that need an offset buffer
*/
public ColumnView(DType type, long rows, Optional<Long> nullCount,
revans2 marked this conversation as resolved.
Show resolved Hide resolved
BaseDeviceMemoryBuffer dataBuffer,
BaseDeviceMemoryBuffer validityBuffer, BaseDeviceMemoryBuffer offsetBuffer) {
this(type, (int) rows, nullCount.orElse(UNKNOWN_NULL_COUNT).intValue(),
dataBuffer, validityBuffer, offsetBuffer, null);
assert (!type.isNestedType());
assert (nullCount.isPresent() && nullCount.get() <= Integer.MAX_VALUE)
|| !nullCount.isPresent();
}

private ColumnView(DType type, long rows, int nullCount,
BaseDeviceMemoryBuffer dataBuffer, BaseDeviceMemoryBuffer validityBuffer,
BaseDeviceMemoryBuffer offsetBuffer, ColumnView[] children) {
this(ColumnVector.initViewHandle(type, (int) rows, nullCount, dataBuffer, validityBuffer,
offsetBuffer, Arrays.stream(children).mapToLong(c -> c.getNativeView()).toArray()));
offsetBuffer, children == null ? new long[]{} :
Arrays.stream(children).mapToLong(c -> c.getNativeView()).toArray()));
}

/** Creates a ColumnVector from a column view handle
Expand Down Expand Up @@ -140,6 +168,32 @@ public final DType getType() {
return type;
}

/**
* Returns the child column views for this view
* Please note that it is the responsibility of the caller to close these views.
* @return an array of child column views
*/
public final ColumnView[] getChildColumnViews() {
int numChildren = getNumChildren();
if (!getType().isNestedType()) {
return null;
}
ColumnView[] views = new ColumnView[numChildren];
razajafri marked this conversation as resolved.
Show resolved Hide resolved
try {
for (int i = 0; i < numChildren; i++) {
views[i] = getChildColumnView(i);
}
return views;
} catch(Throwable t) {
for (ColumnView v: views) {
if (v != null) {
v.close();
}
}
throw t;
}
}

/**
* Returns the child column view at a given index.
* Please note that it is the responsibility of the caller to close this view.
Expand Down
11 changes: 10 additions & 1 deletion java/src/main/java/ai/rapids/cudf/Table.java
Original file line number Diff line number Diff line change
Expand Up @@ -170,10 +170,19 @@ public long getDeviceMemorySize() {
return total;
}

/**
* This method is internal and exposed purely for testing purpopses
*/
static Table removeNullMasksIfNeeded(Table table) {
return new Table(removeNullMasksIfNeeded(table.nativeHandle));
}

/////////////////////////////////////////////////////////////////////////////
// NATIVE APIs
/////////////////////////////////////////////////////////////////////////////


private static native long[] removeNullMasksIfNeeded(long tableView) throws CudfException;

private static native ContiguousTable[] contiguousSplit(long inputTable, int[] indices);

private static native long[] partition(long inputTable, long partitionView,
Expand Down
63 changes: 61 additions & 2 deletions java/src/main/native/src/TableJni.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -935,13 +935,71 @@ jlongArray combine_join_results(JNIEnv *env, cudf::table &left_results,
return combine_join_results(env, std::move(left_cols), std::move(right_cols));
}

cudf::column_view remove_validity_from_col(cudf::column_view column_view) {
if (!cudf::is_compound(column_view.type())) {
if (column_view.nullable() && column_view.null_count() == 0) {
// null_mask is allocated but no nulls present therefore we create a new column_view without
// the null_mask to avoid things blowing up in reading the parquet file
return cudf::column_view(column_view.type(), column_view.size(), column_view.head(), nullptr,
0, column_view.offset());
} else {
return cudf::column_view(column_view);
}
} else {
std::unique_ptr<cudf::column_view> ret;
std::vector<cudf::column_view> children;
children.reserve(column_view.num_children());
for (auto it = column_view.child_begin(); it != column_view.child_end(); it++) {
children.push_back(remove_validity_from_col(*it));
}
if (!column_view.nullable() || column_view.null_count() != 0) {
ret.reset(new cudf::column_view(column_view.type(), column_view.size(), nullptr,
column_view.null_mask(), column_view.null_count(),
column_view.offset(), children));
} else {
ret.reset(new cudf::column_view(column_view.type(), column_view.size(), nullptr, nullptr, 0,
column_view.offset(), children));
}
return *ret.release();
}
}

cudf::table_view remove_validity_if_needed(cudf::table_view *input_table_view) {
razajafri marked this conversation as resolved.
Show resolved Hide resolved
std::vector<cudf::column_view> views;
views.reserve(input_table_view->num_columns());
for (auto it = input_table_view->begin(); it != input_table_view->end(); it++) {
views.push_back(remove_validity_from_col(*it));
}

return cudf::table_view(views);
}

} // namespace

} // namespace jni
} // namespace cudf

extern "C" {

// This is a method purely added for testing remove_validity_if_needed method
JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_removeNullMasksIfNeeded(JNIEnv *env, jclass,
jlong j_table_view) {
JNI_NULL_CHECK(env, j_table_view, "table view handle is null", 0);
try {
cudf::table_view *tview = reinterpret_cast<cudf::table_view *>(j_table_view);
cudf::table_view result = cudf::jni::remove_validity_if_needed(tview);
cudf::table m_tbl(result);
std::vector<std::unique_ptr<cudf::column>> cols = m_tbl.release();
auto results = cudf::jni::native_jlongArray(env, cols.size());
int i = 0;
for (auto it = cols.begin(); it != cols.end(); it++) {
results[i++] = reinterpret_cast<jlong>(it->release());
}
return results.get_jArray();
}
CATCH_STD(env, 0);
}

JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Table_createCudfTableView(JNIEnv *env, jclass,
jlongArray j_cudf_columns) {
JNI_NULL_CHECK(env, j_cudf_columns, "columns are null", 0);
Expand Down Expand Up @@ -1363,7 +1421,8 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Table_writeParquetChunk(JNIEnv *env,
JNI_NULL_CHECK(env, j_state, "null state", );

using namespace cudf::io;
cudf::table_view *tview = reinterpret_cast<cudf::table_view *>(j_table);
cudf::table_view *tview_with_empty_nullmask = reinterpret_cast<cudf::table_view *>(j_table);
cudf::table_view tview = cudf::jni::remove_validity_if_needed(tview_with_empty_nullmask);
cudf::jni::native_parquet_writer_handle *state =
reinterpret_cast<cudf::jni::native_parquet_writer_handle *>(j_state);

Expand All @@ -1373,7 +1432,7 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Table_writeParquetChunk(JNIEnv *env,
}
try {
cudf::jni::auto_set_device(env);
state->writer->write(*tview);
state->writer->write(tview);
}
CATCH_STD(env, )
}
Expand Down
Loading