From 912cbaa2d9d6c15a628d4da9abb04d2152cdcb1d Mon Sep 17 00:00:00 2001 From: Alessandro Bellina Date: Fri, 8 Oct 2021 11:11:36 -0500 Subject: [PATCH 1/5] Expose an enum to be used instead of the checkBounds boolean in Table.gather --- java/ci/build-in-docker.sh | 2 +- .../ai/rapids/cudf/OutOfBoundsPolicy.java | 30 +++++++++++++++++++ java/src/main/java/ai/rapids/cudf/Table.java | 15 ++++++---- 3 files changed, 41 insertions(+), 6 deletions(-) create mode 100644 java/src/main/java/ai/rapids/cudf/OutOfBoundsPolicy.java diff --git a/java/ci/build-in-docker.sh b/java/ci/build-in-docker.sh index e596cdae5b3..505b7f85392 100755 --- a/java/ci/build-in-docker.sh +++ b/java/ci/build-in-docker.sh @@ -19,7 +19,7 @@ set -e gcc --version -PARALLEL_LEVEL=${PARALLEL_LEVEL:-4} +PARALLEL_LEVEL=${PARALLEL_LEVEL:-12} SKIP_JAVA_TESTS=${SKIP_JAVA_TESTS:-true} BUILD_CPP_TESTS=${BUILD_CPP_TESTS:-OFF} ENABLE_PTDS=${ENABLE_PTDS:-ON} diff --git a/java/src/main/java/ai/rapids/cudf/OutOfBoundsPolicy.java b/java/src/main/java/ai/rapids/cudf/OutOfBoundsPolicy.java new file mode 100644 index 00000000000..d3436d5e455 --- /dev/null +++ b/java/src/main/java/ai/rapids/cudf/OutOfBoundsPolicy.java @@ -0,0 +1,30 @@ +/* + * + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package ai.rapids.cudf; + +// This enum doesn't have a nativeId because the out_of_bounds_policy is a +// a boolean enum. It is just added for clarity in the Java API + +public enum OutOfBoundsPolicy { + /* Output values corresponding to out-of-bounds indices are null */ + NULLIFY, + + /* No bounds checking is performed, better performance */ + DONT_CHECK +} diff --git a/java/src/main/java/ai/rapids/cudf/Table.java b/java/src/main/java/ai/rapids/cudf/Table.java index 0af02d1c926..4f28c4989ad 100644 --- a/java/src/main/java/ai/rapids/cudf/Table.java +++ b/java/src/main/java/ai/rapids/cudf/Table.java @@ -2016,7 +2016,7 @@ public ColumnVector rowBitCount() { * @return the resulting Table. */ public Table gather(ColumnView gatherMap) { - return gather(gatherMap, true); + return gather(gatherMap, OutOfBoundsPolicy.NULLIFY); } /** @@ -2027,13 +2027,18 @@ public Table gather(ColumnView gatherMap) { * * A negative value `i` in the `gatherMap` is interpreted as `i+n`, where * `n` is the number of rows in this table. - + * * @param gatherMap the map of indexes. Must be non-nullable and integral type. - * @param checkBounds if true bounds checking is performed on the value. Be very careful - * when setting this to false. + * @param outOfBoundsPolicy NULLIFY: indices that are out of bounds (not in this table) + * will yield null values in the result table. + * DONT_CHECK: all indices from the gather map must be valid and + * therefore there is no need to nullify. This should yield + * better performance. DONT_CHECK can result in a CUDA exception + * if not used carefully. * @return the resulting Table. */ - public Table gather(ColumnView gatherMap, boolean checkBounds) { + public Table gather(ColumnView gatherMap, OutOfBoundsPolicy outOfBoundsPolicy) { + boolean checkBounds = outOfBoundsPolicy == OutOfBoundsPolicy.NULLIFY; return new Table(gather(nativeHandle, gatherMap.getNativeView(), checkBounds)); } From df88f90e0f879bea68aa730786505b7e66e0a2ba Mon Sep 17 00:00:00 2001 From: Alessandro Bellina Date: Fri, 8 Oct 2021 18:48:00 +0000 Subject: [PATCH 2/5] Remove change committed by mistake --- java/ci/build-in-docker.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/ci/build-in-docker.sh b/java/ci/build-in-docker.sh index 505b7f85392..e596cdae5b3 100755 --- a/java/ci/build-in-docker.sh +++ b/java/ci/build-in-docker.sh @@ -19,7 +19,7 @@ set -e gcc --version -PARALLEL_LEVEL=${PARALLEL_LEVEL:-12} +PARALLEL_LEVEL=${PARALLEL_LEVEL:-4} SKIP_JAVA_TESTS=${SKIP_JAVA_TESTS:-true} BUILD_CPP_TESTS=${BUILD_CPP_TESTS:-OFF} ENABLE_PTDS=${ENABLE_PTDS:-ON} From 66953a5689aa0a08b8e15ad7cd34eb331561ecb6 Mon Sep 17 00:00:00 2001 From: Alessandro Bellina Date: Fri, 8 Oct 2021 15:10:39 -0500 Subject: [PATCH 3/5] Code review comments --- .../ai/rapids/cudf/OutOfBoundsPolicy.java | 15 ++++++++--- java/src/main/java/ai/rapids/cudf/Table.java | 27 ++++++++++++++----- 2 files changed, 33 insertions(+), 9 deletions(-) diff --git a/java/src/main/java/ai/rapids/cudf/OutOfBoundsPolicy.java b/java/src/main/java/ai/rapids/cudf/OutOfBoundsPolicy.java index d3436d5e455..36f39aa8ad3 100644 --- a/java/src/main/java/ai/rapids/cudf/OutOfBoundsPolicy.java +++ b/java/src/main/java/ai/rapids/cudf/OutOfBoundsPolicy.java @@ -18,9 +18,18 @@ package ai.rapids.cudf; -// This enum doesn't have a nativeId because the out_of_bounds_policy is a -// a boolean enum. It is just added for clarity in the Java API - +/** + * Policy to account for possible out-of-bounds indices + * + * `NULLIFY` means to nullify output values corresponding to out-of-bounds gather map values. + * + * `DONT_CHECK` means do not check whether the indices are out-of-bounds, for better + * performance. Use `DONT_CHECK` carefully, as it can result in a CUDA exception if + * the gather map values are actually out of range. + * + * @note This enum doesn't have a nativeId because the C++ out_of_bounds_policy is a + * a boolean enum. It is just added for clarity in the Java API. + */ public enum OutOfBoundsPolicy { /* Output values corresponding to out-of-bounds indices are null */ NULLIFY, diff --git a/java/src/main/java/ai/rapids/cudf/Table.java b/java/src/main/java/ai/rapids/cudf/Table.java index 4f28c4989ad..161083b5462 100644 --- a/java/src/main/java/ai/rapids/cudf/Table.java +++ b/java/src/main/java/ai/rapids/cudf/Table.java @@ -2028,13 +2028,28 @@ public Table gather(ColumnView gatherMap) { * A negative value `i` in the `gatherMap` is interpreted as `i+n`, where * `n` is the number of rows in this table. * + * @deprecated Use {@link #gather(ColumnView, OutOfBoundsPolicy)} * @param gatherMap the map of indexes. Must be non-nullable and integral type. - * @param outOfBoundsPolicy NULLIFY: indices that are out of bounds (not in this table) - * will yield null values in the result table. - * DONT_CHECK: all indices from the gather map must be valid and - * therefore there is no need to nullify. This should yield - * better performance. DONT_CHECK can result in a CUDA exception - * if not used carefully. + * @param checkBounds if true bounds checking is performed on the value. Be very careful + * when setting this to false. + * @return the resulting Table. + */ + @Deprecated + public Table gather(ColumnView gatherMap, boolean checkBounds) { + return new Table(gather(nativeHandle, gatherMap.getNativeView(), checkBounds)); + } + + /** + * Gathers the rows of this table according to `gatherMap` such that row "i" + * in the resulting table's columns will contain row "gatherMap[i]" from this table. + * The number of rows in the result table will be equal to the number of elements in + * `gatherMap`. + * + * A negative value `i` in the `gatherMap` is interpreted as `i+n`, where + * `n` is the number of rows in this table. + * + * @param gatherMap the map of indexes. Must be non-nullable and integral type. + * @param outOfBoundsPolicy policy to use when an out-of-range value is in `gatherMap` * @return the resulting Table. */ public Table gather(ColumnView gatherMap, OutOfBoundsPolicy outOfBoundsPolicy) { From 4fcd45236b3e65d225f32417d343a8aca98c6496 Mon Sep 17 00:00:00 2001 From: Firestarman Date: Mon, 11 Oct 2021 18:29:00 +0800 Subject: [PATCH 4/5] JNI data writer sink supports device_write_async. Now call the sync version directly. Signed-off-by: Firestarman --- java/src/main/native/src/TableJni.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/java/src/main/native/src/TableJni.cpp b/java/src/main/native/src/TableJni.cpp index ee75112a2ed..faff20c90a6 100644 --- a/java/src/main/native/src/TableJni.cpp +++ b/java/src/main/native/src/TableJni.cpp @@ -143,6 +143,13 @@ class jni_writer_data_sink final : public cudf::io::data_sink { stream.synchronize(); } + std::future device_write_async(void const *gpu_data, size_t size, + rmm::cuda_stream_view stream) override { + // Call the sync version until figuring out how to write asynchronously. + device_write(gpu_data, size, stream); + return std::async(std::launch::deferred, []{}); + } + void flush() override { if (current_buffer_written > 0) { JNIEnv *env = cudf::jni::get_jni_env(jvm); From 8c25802eda645eb8632bff9530e172eeca0e960c Mon Sep 17 00:00:00 2001 From: Alessandro Bellina Date: Mon, 11 Oct 2021 21:30:22 +0000 Subject: [PATCH 5/5] Fix codestyle violation --- java/src/main/native/src/TableJni.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/src/main/native/src/TableJni.cpp b/java/src/main/native/src/TableJni.cpp index faff20c90a6..9e07f4433fa 100644 --- a/java/src/main/native/src/TableJni.cpp +++ b/java/src/main/native/src/TableJni.cpp @@ -147,7 +147,7 @@ class jni_writer_data_sink final : public cudf::io::data_sink { rmm::cuda_stream_view stream) override { // Call the sync version until figuring out how to write asynchronously. device_write(gpu_data, size, stream); - return std::async(std::launch::deferred, []{}); + return std::async(std::launch::deferred, [] {}); } void flush() override {