From fc5ae49683fc7e071e25f90e6a86c8ce11baeb30 Mon Sep 17 00:00:00 2001 From: Allen Xu Date: Tue, 11 May 2021 13:51:30 +0800 Subject: [PATCH 01/13] check if map contains a key --- java/src/main/native/src/ColumnViewJni.cpp | 16 ++++++++++++ java/src/main/native/src/map_lookup.cu | 29 ++++++++++++++++++++++ java/src/main/native/src/map_lookup.hpp | 29 ++++++++++++++++++++++ 3 files changed, 74 insertions(+) diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp index 8d2d67b8fd0..8fcebfdac93 100644 --- a/java/src/main/native/src/ColumnViewJni.cpp +++ b/java/src/main/native/src/ColumnViewJni.cpp @@ -1169,6 +1169,22 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_mapLookup(JNIEnv *env, jc CATCH_STD(env, 0); } +JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_mapSearchKey(JNIEnv *env, jclass, + jlong map_column_view, + jlong lookup_key) { + JNI_NULL_CHECK(env, map_column_view, "column is null", 0); + JNI_NULL_CHECK(env, lookup_key, "target string scalar is null", 0); + try { + cudf::jni::auto_set_device(env); + cudf::column_view *cv = reinterpret_cast(map_column_view); + cudf::string_scalar *ss_key = reinterpret_cast(lookup_key); + + std::unique_ptr result = cudf::jni::map_search_key(*cv, *ss_key); + return reinterpret_cast(result.release()); + } + CATCH_STD(env, 0); +} + JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_stringReplaceWithBackrefs(JNIEnv *env, jclass, jlong column_view, diff --git a/java/src/main/native/src/map_lookup.cu b/java/src/main/native/src/map_lookup.cu index 0ba683b45f1..7a73b279e1e 100644 --- a/java/src/main/native/src/map_lookup.cu +++ b/java/src/main/native/src/map_lookup.cu @@ -127,6 +127,35 @@ get_gather_map_for_map_values(column_view const &input, string_scalar &lookup_ke } // namespace namespace jni { + +std::unique_ptr map_search_key(column_view const &map_column, string_scalar lookup_key, + bool has_nulls, rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource *mr) { + // Defensive checks. + CUDF_EXPECTS(map_column.type().id() == type_id::LIST, "Expected LIST>."); + + lists_column_view lcv{map_column}; + auto structs_column = lcv.get_sliced_child(stream); + + CUDF_EXPECTS(structs_column.type().id() == type_id::STRUCT, "Expected LIST>."); + + structs_column_view scv{structs_column}; + CUDF_EXPECTS(structs_column.num_children() == 2, "Expected LIST>."); + CUDF_EXPECTS(structs_column.child(0).type().id() == type_id::STRING, + "Expected LIST>."); + CUDF_EXPECTS(structs_column.child(1).type().id() == type_id::STRING, + "Expected LIST>."); + + // Two-pass plan: construct gather map, and then gather() on structs_column.child(1). Plan A. + // (Can do in one pass perhaps, but that's Plan B.) + + auto gather_map = has_nulls ? + get_gather_map_for_map_values(map_column, lookup_key, stream, mr) : + get_gather_map_for_map_values(map_column, lookup_key, stream, mr); + return gather_map; +} + + std::unique_ptr map_lookup(column_view const &map_column, string_scalar lookup_key, bool has_nulls, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource *mr) { diff --git a/java/src/main/native/src/map_lookup.hpp b/java/src/main/native/src/map_lookup.hpp index 6d54bfa371d..82ed6e19539 100644 --- a/java/src/main/native/src/map_lookup.hpp +++ b/java/src/main/native/src/map_lookup.hpp @@ -51,6 +51,35 @@ map_lookup(column_view const &map_column, string_scalar lookup_key, bool has_nul rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Looks up a "map" column by specified key to see if the key exists or not, + * and returns a column of int values. + * + * The map-column is represented as follows: + * + * list_view >. + * <---KEY---> <--VALUE--> + * + * The string_view struct members are the key and value, respectively. + * For each row in the input list column. If the key is not found, -1 is returned. + * + * @param map_column The input "map" column to be searched. Must be of + * type list_view>. + * @param lookup_key The search key, whose value is to be returned for each list row + * @param has_nulls Whether the input column might contain null list-rows, or null keys. + * @param stream The CUDA stream + * @param mr The device memory resource to be used for allocations + * @return A string_view column with the value from the first match in each list. + * A null row is returned for any row where the lookup_key is not found. + * @throw cudf::logic_error If the input column is not of type + * list_view> + */ +std::unique_ptr +map_search_key(column_view const &map_column, string_scalar lookup_key, bool has_nulls = true, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()); + } // namespace jni } // namespace cudf From cead0de670238680f1deddb24ae2c7e0d66c971f Mon Sep 17 00:00:00 2001 From: Allen Xu Date: Tue, 11 May 2021 14:21:40 +0800 Subject: [PATCH 02/13] Update --- java/src/main/java/ai/rapids/cudf/ColumnView.java | 9 +++++++++ java/src/main/native/src/ColumnViewJni.cpp | 4 ++-- java/src/main/native/src/map_lookup.cu | 2 +- java/src/main/native/src/map_lookup.hpp | 2 +- 4 files changed, 13 insertions(+), 4 deletions(-) diff --git a/java/src/main/java/ai/rapids/cudf/ColumnView.java b/java/src/main/java/ai/rapids/cudf/ColumnView.java index 5a5c3d3f687..ea386c5fafd 100644 --- a/java/src/main/java/ai/rapids/cudf/ColumnView.java +++ b/java/src/main/java/ai/rapids/cudf/ColumnView.java @@ -2526,6 +2526,13 @@ public final ColumnVector getMapValue(Scalar key) { return new ColumnVector(mapLookup(getNativeView(), key.getScalarHandle())); } + public final ColumnVector getMapKeyExistence(Scalar key) { + assert type.equals(DType.LIST) : "column type must be a LIST"; + assert key != null : "target string may not be null"; + assert key.getType().equals(DType.STRING) : "target string must be a string scalar"; + + return new ColumnVector(mapContains(getNativeView(), key.getScalarHandle())); + } /** * Create a new struct column view of existing column views. Note that this will NOT copy @@ -2844,6 +2851,8 @@ private static native long stringReplaceWithBackrefs(long columnView, String pat * @throws CudfException */ private static native long mapLookup(long columnView, long key) throws CudfException; + + private static native long mapContains(long columnView, long key) throws CudfException; /** * Native method to add zeros as padding to the left of each string. */ diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp index 8fcebfdac93..1ee1d7a00f6 100644 --- a/java/src/main/native/src/ColumnViewJni.cpp +++ b/java/src/main/native/src/ColumnViewJni.cpp @@ -1169,7 +1169,7 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_mapLookup(JNIEnv *env, jc CATCH_STD(env, 0); } -JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_mapSearchKey(JNIEnv *env, jclass, +JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_mapContains(JNIEnv *env, jclass, jlong map_column_view, jlong lookup_key) { JNI_NULL_CHECK(env, map_column_view, "column is null", 0); @@ -1179,7 +1179,7 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_mapSearchKey(JNIEnv *env, cudf::column_view *cv = reinterpret_cast(map_column_view); cudf::string_scalar *ss_key = reinterpret_cast(lookup_key); - std::unique_ptr result = cudf::jni::map_search_key(*cv, *ss_key); + std::unique_ptr result = cudf::jni::map_contains(*cv, *ss_key); return reinterpret_cast(result.release()); } CATCH_STD(env, 0); diff --git a/java/src/main/native/src/map_lookup.cu b/java/src/main/native/src/map_lookup.cu index 7a73b279e1e..be34fbc5db3 100644 --- a/java/src/main/native/src/map_lookup.cu +++ b/java/src/main/native/src/map_lookup.cu @@ -128,7 +128,7 @@ get_gather_map_for_map_values(column_view const &input, string_scalar &lookup_ke namespace jni { -std::unique_ptr map_search_key(column_view const &map_column, string_scalar lookup_key, +std::unique_ptr map_contains(column_view const &map_column, string_scalar lookup_key, bool has_nulls, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource *mr) { // Defensive checks. diff --git a/java/src/main/native/src/map_lookup.hpp b/java/src/main/native/src/map_lookup.hpp index 82ed6e19539..28d7918b5e4 100644 --- a/java/src/main/native/src/map_lookup.hpp +++ b/java/src/main/native/src/map_lookup.hpp @@ -76,7 +76,7 @@ map_lookup(column_view const &map_column, string_scalar lookup_key, bool has_nul * list_view> */ std::unique_ptr -map_search_key(column_view const &map_column, string_scalar lookup_key, bool has_nulls = true, +map_contains(column_view const &map_column, string_scalar lookup_key, bool has_nulls = true, rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()); From 7d512f33051ad82470b78928a3a4293bb45c9fe3 Mon Sep 17 00:00:00 2001 From: Allen Xu Date: Tue, 11 May 2021 16:24:43 +0800 Subject: [PATCH 03/13] Add test --- .../java/ai/rapids/cudf/ColumnVectorTest.java | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java index 4c5ee7295d9..270c89ccbae 100644 --- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java +++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java @@ -4410,6 +4410,23 @@ void testGetMapValue() { assertColumnsAreEqual(expected, res); } } + @Test + void testGetMapKeyExistence() { + List list1 = Arrays.asList(new HostColumnVector.StructData(Arrays.asList("a", "b"))); + List list2 = Arrays.asList(new HostColumnVector.StructData(Arrays.asList("a", "c"))); + List list3 = Arrays.asList(new HostColumnVector.StructData(Arrays.asList("e", "d"))); + List list4 = Arrays.asList(new HostColumnVector.StructData(Arrays.asList("a", "g"))); + List list5 = Arrays.asList(new HostColumnVector.StructData(Arrays.asList("f", "h"))); + List list6 = Arrays.asList(new HostColumnVector.StructData(Arrays.asList("a", null))); + List list7 = Arrays.asList(new HostColumnVector.StructData(Arrays.asList(null, null))); + HostColumnVector.StructType structType = new HostColumnVector.StructType(true, Arrays.asList(new HostColumnVector.BasicType(true, DType.STRING), + new HostColumnVector.BasicType(true, DType.STRING))); + try (ColumnVector cv = ColumnVector.fromLists(new HostColumnVector.ListType(true, structType), list1, list2, list3, list4, list5, list6, list7); + ColumnVector res = cv.getMapKeyExistence(Scalar.fromString("a")); + ColumnVector expected = ColumnVector.fromInts(0, 1, -1, 3, -1, 5, -1)) { + assertColumnsAreEqual(expected, res); + } + } @Test void testListOfStructsOfStructs() { From acadd4e9b21be9a43e984e103afeca96ee711265 Mon Sep 17 00:00:00 2001 From: Allen Xu Date: Tue, 11 May 2021 19:23:49 +0800 Subject: [PATCH 04/13] refactor --- .../main/java/ai/rapids/cudf/ColumnView.java | 14 +++++- java/src/main/native/src/map_lookup.cu | 43 ++++++++++--------- java/src/main/native/src/map_lookup.hpp | 7 +-- .../java/ai/rapids/cudf/ColumnVectorTest.java | 2 +- 4 files changed, 41 insertions(+), 25 deletions(-) diff --git a/java/src/main/java/ai/rapids/cudf/ColumnView.java b/java/src/main/java/ai/rapids/cudf/ColumnView.java index ea386c5fafd..317b8c2e892 100644 --- a/java/src/main/java/ai/rapids/cudf/ColumnView.java +++ b/java/src/main/java/ai/rapids/cudf/ColumnView.java @@ -2526,10 +2526,15 @@ public final ColumnVector getMapValue(Scalar key) { return new ColumnVector(mapLookup(getNativeView(), key.getScalarHandle())); } + /** For a column of type List> and a passed in String key, return an int column + * for all the offsets(index) in the struct for key, -1 if the key doesn't exist. + * @param key the String scalar to lookup in the column + * @return a boolean column based on the lookup result + */ public final ColumnVector getMapKeyExistence(Scalar key) { assert type.equals(DType.LIST) : "column type must be a LIST"; assert key != null : "target string may not be null"; - assert key.getType().equals(DType.STRING) : "target string must be a string scalar"; + assert key.getType().equals(DType.STRING) : "target must be a string scalar"; return new ColumnVector(mapContains(getNativeView(), key.getScalarHandle())); } @@ -2852,6 +2857,13 @@ private static native long stringReplaceWithBackrefs(long columnView, String pat */ private static native long mapLookup(long columnView, long key) throws CudfException; + /** + * Native method for check the existence of a key over a column of List> + * @param columnView the column view handle of the map + * @param key the string scalar that is the key for lookup + * @return an boolean column handle of the resultant + * @throws CudfException + */ private static native long mapContains(long columnView, long key) throws CudfException; /** * Native method to add zeros as padding to the left of each string. diff --git a/java/src/main/native/src/map_lookup.cu b/java/src/main/native/src/map_lookup.cu index be34fbc5db3..95a4297b7de 100644 --- a/java/src/main/native/src/map_lookup.cu +++ b/java/src/main/native/src/map_lookup.cu @@ -27,6 +27,7 @@ #include #include #include +#include namespace cudf { namespace { @@ -128,23 +129,28 @@ get_gather_map_for_map_values(column_view const &input, string_scalar &lookup_ke namespace jni { -std::unique_ptr map_contains(column_view const &map_column, string_scalar lookup_key, - bool has_nulls, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr) { - // Defensive checks. + +void map_check(column_view const &map_column, rmm::cuda_stream_view stream) { CUDF_EXPECTS(map_column.type().id() == type_id::LIST, "Expected LIST>."); lists_column_view lcv{map_column}; - auto structs_column = lcv.get_sliced_child(stream); + column_view structs_column = lcv.get_sliced_child(stream); CUDF_EXPECTS(structs_column.type().id() == type_id::STRUCT, "Expected LIST>."); - structs_column_view scv{structs_column}; CUDF_EXPECTS(structs_column.num_children() == 2, "Expected LIST>."); CUDF_EXPECTS(structs_column.child(0).type().id() == type_id::STRING, "Expected LIST>."); CUDF_EXPECTS(structs_column.child(1).type().id() == type_id::STRING, "Expected LIST>."); + return; +} + +std::unique_ptr map_contains(column_view const &map_column, string_scalar lookup_key, + bool has_nulls, rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource *mr) { + // Defensive checks. + map_check(map_column, stream); // Two-pass plan: construct gather map, and then gather() on structs_column.child(1). Plan A. // (Can do in one pass perhaps, but that's Plan B.) @@ -152,28 +158,25 @@ std::unique_ptr map_contains(column_view const &map_column, string_scala auto gather_map = has_nulls ? get_gather_map_for_map_values(map_column, lookup_key, stream, mr) : get_gather_map_for_map_values(map_column, lookup_key, stream, mr); - return gather_map; -} + auto found = make_numeric_column(data_type{type_id::BOOL8}, gather_map->size(), + mask_state::UNALLOCATED, stream, mr); + thrust::transform(rmm::exec_policy(stream), thrust::make_counting_iterator(0), + thrust::make_counting_iterator(gather_map->size()), + found->mutable_view().template begin(), + [d_gather_map = gather_map->view().template begin()] __device__( + auto i) { return d_gather_map[i] >= 0; }); + return found; +} std::unique_ptr map_lookup(column_view const &map_column, string_scalar lookup_key, bool has_nulls, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource *mr) { // Defensive checks. - CUDF_EXPECTS(map_column.type().id() == type_id::LIST, "Expected LIST>."); + map_check(map_column, stream); lists_column_view lcv{map_column}; - auto structs_column = lcv.get_sliced_child(stream); - - CUDF_EXPECTS(structs_column.type().id() == type_id::STRUCT, "Expected LIST>."); - - structs_column_view scv{structs_column}; - CUDF_EXPECTS(structs_column.num_children() == 2, "Expected LIST>."); - CUDF_EXPECTS(structs_column.child(0).type().id() == type_id::STRING, - "Expected LIST>."); - CUDF_EXPECTS(structs_column.child(1).type().id() == type_id::STRING, - "Expected LIST>."); - + column_view structs_column = lcv.get_sliced_child(stream); // Two-pass plan: construct gather map, and then gather() on structs_column.child(1). Plan A. // (Can do in one pass perhaps, but that's Plan B.) diff --git a/java/src/main/native/src/map_lookup.hpp b/java/src/main/native/src/map_lookup.hpp index 28d7918b5e4..638a1cca57b 100644 --- a/java/src/main/native/src/map_lookup.hpp +++ b/java/src/main/native/src/map_lookup.hpp @@ -18,6 +18,7 @@ #include #include +#include namespace cudf { @@ -66,12 +67,12 @@ map_lookup(column_view const &map_column, string_scalar lookup_key, bool has_nul * * @param map_column The input "map" column to be searched. Must be of * type list_view>. - * @param lookup_key The search key, whose value is to be returned for each list row + * @param lookup_key The search key, whose index(offset) is to be returned for each list row * @param has_nulls Whether the input column might contain null list-rows, or null keys. * @param stream The CUDA stream * @param mr The device memory resource to be used for allocations - * @return A string_view column with the value from the first match in each list. - * A null row is returned for any row where the lookup_key is not found. + * @return An boolean_view column reflecting the existence for the key in each list. + * false means the lookup_key is not found. * @throw cudf::logic_error If the input column is not of type * list_view> */ diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java index 270c89ccbae..cdeaae164b2 100644 --- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java +++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java @@ -4423,7 +4423,7 @@ void testGetMapKeyExistence() { new HostColumnVector.BasicType(true, DType.STRING))); try (ColumnVector cv = ColumnVector.fromLists(new HostColumnVector.ListType(true, structType), list1, list2, list3, list4, list5, list6, list7); ColumnVector res = cv.getMapKeyExistence(Scalar.fromString("a")); - ColumnVector expected = ColumnVector.fromInts(0, 1, -1, 3, -1, 5, -1)) { + ColumnVector expected = ColumnVector.fromBoxedBooleans(true, true, false, true, false, true, false)) { assertColumnsAreEqual(expected, res); } } From 577ab5f800d2de55738aab5c7f19ef8b024cd8ac Mon Sep 17 00:00:00 2001 From: Allen Xu Date: Tue, 11 May 2021 19:46:44 +0800 Subject: [PATCH 05/13] clean --- java/src/main/java/ai/rapids/cudf/ColumnView.java | 4 ++-- java/src/main/native/src/map_lookup.hpp | 1 - .../test/java/ai/rapids/cudf/ColumnVectorTest.java | 14 +++++++------- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/java/src/main/java/ai/rapids/cudf/ColumnView.java b/java/src/main/java/ai/rapids/cudf/ColumnView.java index 317b8c2e892..19706e5cee5 100644 --- a/java/src/main/java/ai/rapids/cudf/ColumnView.java +++ b/java/src/main/java/ai/rapids/cudf/ColumnView.java @@ -2526,8 +2526,8 @@ public final ColumnVector getMapValue(Scalar key) { return new ColumnVector(mapLookup(getNativeView(), key.getScalarHandle())); } - /** For a column of type List> and a passed in String key, return an int column - * for all the offsets(index) in the struct for key, -1 if the key doesn't exist. + /** For a column of type List> and a passed in String key, return a boolean + * column for all keys in the structs, false if the key doesn't exist. * @param key the String scalar to lookup in the column * @return a boolean column based on the lookup result */ diff --git a/java/src/main/native/src/map_lookup.hpp b/java/src/main/native/src/map_lookup.hpp index 638a1cca57b..e0eeccbdde8 100644 --- a/java/src/main/native/src/map_lookup.hpp +++ b/java/src/main/native/src/map_lookup.hpp @@ -18,7 +18,6 @@ #include #include -#include namespace cudf { diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java index cdeaae164b2..8fa98642dfd 100644 --- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java +++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java @@ -4412,13 +4412,13 @@ void testGetMapValue() { } @Test void testGetMapKeyExistence() { - List list1 = Arrays.asList(new HostColumnVector.StructData(Arrays.asList("a", "b"))); - List list2 = Arrays.asList(new HostColumnVector.StructData(Arrays.asList("a", "c"))); - List list3 = Arrays.asList(new HostColumnVector.StructData(Arrays.asList("e", "d"))); - List list4 = Arrays.asList(new HostColumnVector.StructData(Arrays.asList("a", "g"))); - List list5 = Arrays.asList(new HostColumnVector.StructData(Arrays.asList("f", "h"))); - List list6 = Arrays.asList(new HostColumnVector.StructData(Arrays.asList("a", null))); - List list7 = Arrays.asList(new HostColumnVector.StructData(Arrays.asList(null, null))); + List list1 = Arrays.asList(new HostColumnVector.StructData("a", "b")); + List list2 = Arrays.asList(new HostColumnVector.StructData("a", "c")); + List list3 = Arrays.asList(new HostColumnVector.StructData("e", "d")); + List list4 = Arrays.asList(new HostColumnVector.StructData("a", "g")); + List list5 = Arrays.asList(new HostColumnVector.StructData("f", "h")); + List list6 = Arrays.asList(new HostColumnVector.StructData("a", null)); + List list7 = Arrays.asList(new HostColumnVector.StructData(null, null)); HostColumnVector.StructType structType = new HostColumnVector.StructType(true, Arrays.asList(new HostColumnVector.BasicType(true, DType.STRING), new HostColumnVector.BasicType(true, DType.STRING))); try (ColumnVector cv = ColumnVector.fromLists(new HostColumnVector.ListType(true, structType), list1, list2, list3, list4, list5, list6, list7); From 636c41d0e2047c6f00adcce7cf99380b6e560437 Mon Sep 17 00:00:00 2001 From: Allen Xu Date: Wed, 12 May 2021 13:56:28 +0800 Subject: [PATCH 06/13] use contains --- .../main/java/ai/rapids/cudf/ColumnView.java | 10 ++-- java/src/main/native/src/ColumnViewJni.cpp | 6 +-- java/src/main/native/src/map_lookup.cu | 48 ++++++++++--------- java/src/main/native/src/map_lookup.hpp | 8 ++-- .../java/ai/rapids/cudf/ColumnVectorTest.java | 21 +++++--- 5 files changed, 52 insertions(+), 41 deletions(-) diff --git a/java/src/main/java/ai/rapids/cudf/ColumnView.java b/java/src/main/java/ai/rapids/cudf/ColumnView.java index 19706e5cee5..2b3172ad2d8 100644 --- a/java/src/main/java/ai/rapids/cudf/ColumnView.java +++ b/java/src/main/java/ai/rapids/cudf/ColumnView.java @@ -2527,16 +2527,16 @@ public final ColumnVector getMapValue(Scalar key) { } /** For a column of type List> and a passed in String key, return a boolean - * column for all keys in the structs, false if the key doesn't exist. + * scalar for all keys in the structs, true if the key exists in all maps in the column. * @param key the String scalar to lookup in the column - * @return a boolean column based on the lookup result + * @return a boolean scalar based on the lookup result */ - public final ColumnVector getMapKeyExistence(Scalar key) { + public final Scalar getMapKeyExistence(Scalar key) { assert type.equals(DType.LIST) : "column type must be a LIST"; assert key != null : "target string may not be null"; assert key.getType().equals(DType.STRING) : "target must be a string scalar"; - return new ColumnVector(mapContains(getNativeView(), key.getScalarHandle())); + return new Scalar(DType.BOOL8, mapContains(getNativeView(), key.getScalarHandle())); } /** @@ -2861,7 +2861,7 @@ private static native long stringReplaceWithBackrefs(long columnView, String pat * Native method for check the existence of a key over a column of List> * @param columnView the column view handle of the map * @param key the string scalar that is the key for lookup - * @return an boolean column handle of the resultant + * @return an boolean scalar handle of the resultant * @throws CudfException */ private static native long mapContains(long columnView, long key) throws CudfException; diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp index 1ee1d7a00f6..9879a4c7bde 100644 --- a/java/src/main/native/src/ColumnViewJni.cpp +++ b/java/src/main/native/src/ColumnViewJni.cpp @@ -1170,8 +1170,8 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_mapLookup(JNIEnv *env, jc } JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_mapContains(JNIEnv *env, jclass, - jlong map_column_view, - jlong lookup_key) { + jlong map_column_view, + jlong lookup_key) { JNI_NULL_CHECK(env, map_column_view, "column is null", 0); JNI_NULL_CHECK(env, lookup_key, "target string scalar is null", 0); try { @@ -1179,7 +1179,7 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_mapContains(JNIEnv *env, cudf::column_view *cv = reinterpret_cast(map_column_view); cudf::string_scalar *ss_key = reinterpret_cast(lookup_key); - std::unique_ptr result = cudf::jni::map_contains(*cv, *ss_key); + std::unique_ptr result = cudf::jni::map_contains(*cv, *ss_key); return reinterpret_cast(result.release()); } CATCH_STD(env, 0); diff --git a/java/src/main/native/src/map_lookup.cu b/java/src/main/native/src/map_lookup.cu index 95a4297b7de..3f762442517 100644 --- a/java/src/main/native/src/map_lookup.cu +++ b/java/src/main/native/src/map_lookup.cu @@ -18,7 +18,10 @@ #include #include #include +#include +#include #include +#include #include #include #include @@ -125,11 +128,9 @@ get_gather_map_for_map_values(column_view const &input, string_scalar &lookup_ke return gather_map; } -} // namespace - -namespace jni { - - +/** + * @brief a defensive check for the map column that is going to be processed + */ void map_check(column_view const &map_column, rmm::cuda_stream_view stream) { CUDF_EXPECTS(map_column.type().id() == type_id::LIST, "Expected LIST>."); @@ -143,30 +144,33 @@ void map_check(column_view const &map_column, rmm::cuda_stream_view stream) { "Expected LIST>."); CUDF_EXPECTS(structs_column.child(1).type().id() == type_id::STRING, "Expected LIST>."); - return; } -std::unique_ptr map_contains(column_view const &map_column, string_scalar lookup_key, +} // namespace + +namespace jni { + +std::unique_ptr map_contains(column_view const &map_column, string_scalar lookup_key, bool has_nulls, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource *mr) { // Defensive checks. map_check(map_column, stream); - // Two-pass plan: construct gather map, and then gather() on structs_column.child(1). Plan A. - // (Can do in one pass perhaps, but that's Plan B.) - - auto gather_map = has_nulls ? - get_gather_map_for_map_values(map_column, lookup_key, stream, mr) : - get_gather_map_for_map_values(map_column, lookup_key, stream, mr); - - auto found = make_numeric_column(data_type{type_id::BOOL8}, gather_map->size(), - mask_state::UNALLOCATED, stream, mr); - thrust::transform(rmm::exec_policy(stream), thrust::make_counting_iterator(0), - thrust::make_counting_iterator(gather_map->size()), - found->mutable_view().template begin(), - [d_gather_map = gather_map->view().template begin()] __device__( - auto i) { return d_gather_map[i] >= 0; }); - return found; + lists_column_view lcv(map_column); + structs_column_view scv(lcv.child()); + + std::vector children; + children.push_back(lcv.offsets()); + children.push_back(scv.child(0)); + + column_view list_of_keys(map_column.type(), map_column.size(), + nullptr, map_column.null_mask(), map_column.null_count(), 0, children); + auto contains_column = lists::contains(list_of_keys, lookup_key); + // null will skipped in all-aggregation, so mask all nulls with 0. + contains_column->set_null_mask(rmm::device_buffer{0}, 0); + auto result = cudf::reduce(contains_column->view(), make_all_aggregation(), + cudf::data_type{type_id::BOOL8}); + return result; } std::unique_ptr map_lookup(column_view const &map_column, string_scalar lookup_key, diff --git a/java/src/main/native/src/map_lookup.hpp b/java/src/main/native/src/map_lookup.hpp index e0eeccbdde8..411767cd701 100644 --- a/java/src/main/native/src/map_lookup.hpp +++ b/java/src/main/native/src/map_lookup.hpp @@ -62,7 +62,7 @@ map_lookup(column_view const &map_column, string_scalar lookup_key, bool has_nul * <---KEY---> <--VALUE--> * * The string_view struct members are the key and value, respectively. - * For each row in the input list column. If the key is not found, -1 is returned. + * For any row in the input list column, if the key is not found, false will be returned. * * @param map_column The input "map" column to be searched. Must be of * type list_view>. @@ -70,12 +70,12 @@ map_lookup(column_view const &map_column, string_scalar lookup_key, bool has_nul * @param has_nulls Whether the input column might contain null list-rows, or null keys. * @param stream The CUDA stream * @param mr The device memory resource to be used for allocations - * @return An boolean_view column reflecting the existence for the key in each list. - * false means the lookup_key is not found. + * @return An boolean scalar reflecting the existence of the key in the map column. + * True means the lookup_key is found in all of the rows in the column. * @throw cudf::logic_error If the input column is not of type * list_view> */ -std::unique_ptr +std::unique_ptr map_contains(column_view const &map_column, string_scalar lookup_key, bool has_nulls = true, rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()); diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java index 8fa98642dfd..272ad029067 100644 --- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java +++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java @@ -4416,15 +4416,22 @@ void testGetMapKeyExistence() { List list2 = Arrays.asList(new HostColumnVector.StructData("a", "c")); List list3 = Arrays.asList(new HostColumnVector.StructData("e", "d")); List list4 = Arrays.asList(new HostColumnVector.StructData("a", "g")); - List list5 = Arrays.asList(new HostColumnVector.StructData("f", "h")); - List list6 = Arrays.asList(new HostColumnVector.StructData("a", null)); - List list7 = Arrays.asList(new HostColumnVector.StructData(null, null)); + List list5 = Arrays.asList(new HostColumnVector.StructData("a", null)); + List list6 = Arrays.asList(new HostColumnVector.StructData(null, null)); HostColumnVector.StructType structType = new HostColumnVector.StructType(true, Arrays.asList(new HostColumnVector.BasicType(true, DType.STRING), new HostColumnVector.BasicType(true, DType.STRING))); - try (ColumnVector cv = ColumnVector.fromLists(new HostColumnVector.ListType(true, structType), list1, list2, list3, list4, list5, list6, list7); - ColumnVector res = cv.getMapKeyExistence(Scalar.fromString("a")); - ColumnVector expected = ColumnVector.fromBoxedBooleans(true, true, false, true, false, true, false)) { - assertColumnsAreEqual(expected, res); + try (ColumnVector cv_1 = ColumnVector.fromLists(new HostColumnVector.ListType(true, structType), list1, list2, list4, list5); + ColumnVector cv_2 = ColumnVector.fromLists(new HostColumnVector.ListType(true, structType), list1, list2, list3); + ColumnVector cv_3 = ColumnVector.fromLists(new HostColumnVector.ListType(true, structType), list1, list2, list4, list5, list6); + Scalar res_1 = cv_1.getMapKeyExistence(Scalar.fromString("a")); + Scalar res_2 = cv_2.getMapKeyExistence(Scalar.fromString("a")); + Scalar res_3 = cv_3.getMapKeyExistence(Scalar.fromString("a")); + Scalar expectedTrue = Scalar.fromBool(true); + Scalar expectedFalse = Scalar.fromBool(false) + ) { + assertEquals(expectedTrue, res_1); + assertEquals(expectedFalse, res_2); + assertEquals(expectedFalse, res_3); } } From 5793bb941ea6d1e8a5e7991aa63e3a19d6f4e1e8 Mon Sep 17 00:00:00 2001 From: Allen Xu Date: Wed, 12 May 2021 14:07:15 +0800 Subject: [PATCH 07/13] update --- java/src/main/native/src/map_lookup.cu | 1 - java/src/main/native/src/map_lookup.hpp | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/java/src/main/native/src/map_lookup.cu b/java/src/main/native/src/map_lookup.cu index 3f762442517..25a72553fea 100644 --- a/java/src/main/native/src/map_lookup.cu +++ b/java/src/main/native/src/map_lookup.cu @@ -30,7 +30,6 @@ #include #include #include -#include namespace cudf { namespace { diff --git a/java/src/main/native/src/map_lookup.hpp b/java/src/main/native/src/map_lookup.hpp index 411767cd701..cf3613c139d 100644 --- a/java/src/main/native/src/map_lookup.hpp +++ b/java/src/main/native/src/map_lookup.hpp @@ -54,7 +54,7 @@ map_lookup(column_view const &map_column, string_scalar lookup_key, bool has_nul /** * @brief Looks up a "map" column by specified key to see if the key exists or not, - * and returns a column of int values. + * and returns a cudf scalar of bool value. * * The map-column is represented as follows: * From de80502a04cdecc467026749d9b120666901ae74 Mon Sep 17 00:00:00 2001 From: Allen Xu Date: Thu, 13 May 2021 21:51:23 +0800 Subject: [PATCH 08/13] return column instead of scalar --- .../main/java/ai/rapids/cudf/ColumnView.java | 8 +++---- java/src/main/native/src/ColumnViewJni.cpp | 2 +- java/src/main/native/src/map_lookup.cu | 24 ++++++++++++------- java/src/main/native/src/map_lookup.hpp | 9 +++---- .../java/ai/rapids/cudf/ColumnVectorTest.java | 15 ++++-------- 5 files changed, 29 insertions(+), 29 deletions(-) diff --git a/java/src/main/java/ai/rapids/cudf/ColumnView.java b/java/src/main/java/ai/rapids/cudf/ColumnView.java index 2b3172ad2d8..50f42972bfe 100644 --- a/java/src/main/java/ai/rapids/cudf/ColumnView.java +++ b/java/src/main/java/ai/rapids/cudf/ColumnView.java @@ -2529,14 +2529,14 @@ public final ColumnVector getMapValue(Scalar key) { /** For a column of type List> and a passed in String key, return a boolean * scalar for all keys in the structs, true if the key exists in all maps in the column. * @param key the String scalar to lookup in the column - * @return a boolean scalar based on the lookup result + * @return a boolean column based on the lookup result */ - public final Scalar getMapKeyExistence(Scalar key) { + public final ColumnVector getMapKeyExistence(Scalar key) { assert type.equals(DType.LIST) : "column type must be a LIST"; assert key != null : "target string may not be null"; assert key.getType().equals(DType.STRING) : "target must be a string scalar"; - return new Scalar(DType.BOOL8, mapContains(getNativeView(), key.getScalarHandle())); + return new ColumnVector(mapContains(getNativeView(), key.getScalarHandle())); } /** @@ -2861,7 +2861,7 @@ private static native long stringReplaceWithBackrefs(long columnView, String pat * Native method for check the existence of a key over a column of List> * @param columnView the column view handle of the map * @param key the string scalar that is the key for lookup - * @return an boolean scalar handle of the resultant + * @return an boolean column handle of the resultant * @throws CudfException */ private static native long mapContains(long columnView, long key) throws CudfException; diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp index 9879a4c7bde..f09b9029ecb 100644 --- a/java/src/main/native/src/ColumnViewJni.cpp +++ b/java/src/main/native/src/ColumnViewJni.cpp @@ -1179,7 +1179,7 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_mapContains(JNIEnv *env, cudf::column_view *cv = reinterpret_cast(map_column_view); cudf::string_scalar *ss_key = reinterpret_cast(lookup_key); - std::unique_ptr result = cudf::jni::map_contains(*cv, *ss_key); + std::unique_ptr result = cudf::jni::map_contains(*cv, *ss_key); return reinterpret_cast(result.release()); } CATCH_STD(env, 0); diff --git a/java/src/main/native/src/map_lookup.cu b/java/src/main/native/src/map_lookup.cu index 25a72553fea..725ecc0b04c 100644 --- a/java/src/main/native/src/map_lookup.cu +++ b/java/src/main/native/src/map_lookup.cu @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include #include #include @@ -21,9 +22,10 @@ #include #include #include -#include +#include #include #include +#include #include #include #include @@ -130,7 +132,7 @@ get_gather_map_for_map_values(column_view const &input, string_scalar &lookup_ke /** * @brief a defensive check for the map column that is going to be processed */ -void map_check(column_view const &map_column, rmm::cuda_stream_view stream) { +void map_input_check(column_view const &map_column, rmm::cuda_stream_view stream) { CUDF_EXPECTS(map_column.type().id() == type_id::LIST, "Expected LIST>."); lists_column_view lcv{map_column}; @@ -149,11 +151,11 @@ void map_check(column_view const &map_column, rmm::cuda_stream_view stream) { namespace jni { -std::unique_ptr map_contains(column_view const &map_column, string_scalar lookup_key, +std::unique_ptr map_contains(column_view const &map_column, string_scalar lookup_key, bool has_nulls, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource *mr) { // Defensive checks. - map_check(map_column, stream); + map_input_check(map_column, stream); lists_column_view lcv(map_column); structs_column_view scv(lcv.child()); @@ -165,10 +167,14 @@ std::unique_ptr map_contains(column_view const &map_column, string_scala column_view list_of_keys(map_column.type(), map_column.size(), nullptr, map_column.null_mask(), map_column.null_count(), 0, children); auto contains_column = lists::contains(list_of_keys, lookup_key); - // null will skipped in all-aggregation, so mask all nulls with 0. - contains_column->set_null_mask(rmm::device_buffer{0}, 0); - auto result = cudf::reduce(contains_column->view(), make_all_aggregation(), - cudf::data_type{type_id::BOOL8}); + // null will be skipped in all-aggregation when checking if all rows contain the key, + // so replace all nulls with 0. + std::unique_ptr replacement = + cudf::make_numeric_scalar(cudf::data_type(cudf::type_id::BOOL8)); + replacement->set_valid(true); + using ScalarType = cudf::scalar_type_t; + static_cast(replacement.get())->set_value(0); + auto result = cudf::replace_nulls(contains_column->view(), *replacement); return result; } @@ -176,7 +182,7 @@ std::unique_ptr map_lookup(column_view const &map_column, string_scalar bool has_nulls, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource *mr) { // Defensive checks. - map_check(map_column, stream); + map_input_check(map_column, stream); lists_column_view lcv{map_column}; column_view structs_column = lcv.get_sliced_child(stream); diff --git a/java/src/main/native/src/map_lookup.hpp b/java/src/main/native/src/map_lookup.hpp index cf3613c139d..e1b943237c7 100644 --- a/java/src/main/native/src/map_lookup.hpp +++ b/java/src/main/native/src/map_lookup.hpp @@ -62,7 +62,8 @@ map_lookup(column_view const &map_column, string_scalar lookup_key, bool has_nul * <---KEY---> <--VALUE--> * * The string_view struct members are the key and value, respectively. - * For any row in the input list column, if the key is not found, false will be returned. + * For each row in the input list column, if the key is not found, false will be returned for that + * row. * * @param map_column The input "map" column to be searched. Must be of * type list_view>. @@ -70,12 +71,12 @@ map_lookup(column_view const &map_column, string_scalar lookup_key, bool has_nul * @param has_nulls Whether the input column might contain null list-rows, or null keys. * @param stream The CUDA stream * @param mr The device memory resource to be used for allocations - * @return An boolean scalar reflecting the existence of the key in the map column. - * True means the lookup_key is found in all of the rows in the column. + * @return An boolean column reflecting the existence of the key in each row in the map + * column. True means the lookup_key is found in that row. * @throw cudf::logic_error If the input column is not of type * list_view> */ -std::unique_ptr +std::unique_ptr map_contains(column_view const &map_column, string_scalar lookup_key, bool has_nulls = true, rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()); diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java index 272ad029067..579423dc39a 100644 --- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java +++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java @@ -4420,18 +4420,11 @@ void testGetMapKeyExistence() { List list6 = Arrays.asList(new HostColumnVector.StructData(null, null)); HostColumnVector.StructType structType = new HostColumnVector.StructType(true, Arrays.asList(new HostColumnVector.BasicType(true, DType.STRING), new HostColumnVector.BasicType(true, DType.STRING))); - try (ColumnVector cv_1 = ColumnVector.fromLists(new HostColumnVector.ListType(true, structType), list1, list2, list4, list5); - ColumnVector cv_2 = ColumnVector.fromLists(new HostColumnVector.ListType(true, structType), list1, list2, list3); - ColumnVector cv_3 = ColumnVector.fromLists(new HostColumnVector.ListType(true, structType), list1, list2, list4, list5, list6); - Scalar res_1 = cv_1.getMapKeyExistence(Scalar.fromString("a")); - Scalar res_2 = cv_2.getMapKeyExistence(Scalar.fromString("a")); - Scalar res_3 = cv_3.getMapKeyExistence(Scalar.fromString("a")); - Scalar expectedTrue = Scalar.fromBool(true); - Scalar expectedFalse = Scalar.fromBool(false) + try (ColumnVector cv = ColumnVector.fromLists(new HostColumnVector.ListType(true, structType), list1, list2, list3, list4, list5, list6); + ColumnVector res = cv.getMapKeyExistence(Scalar.fromString("a")); + ColumnVector expected = ColumnVector.fromBoxedBooleans(true, true, false, true, true, false); ) { - assertEquals(expectedTrue, res_1); - assertEquals(expectedFalse, res_2); - assertEquals(expectedFalse, res_3); + assertColumnsAreEqual(expected, res); } } From 323d85e76ce62fe77ba29802eeea3ea286f6aadd Mon Sep 17 00:00:00 2001 From: Allen Xu Date: Thu, 13 May 2021 22:00:03 +0800 Subject: [PATCH 09/13] clean --- java/rmm_log.txt | 6 ++++++ java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java | 3 +-- 2 files changed, 7 insertions(+), 2 deletions(-) create mode 100644 java/rmm_log.txt diff --git a/java/rmm_log.txt b/java/rmm_log.txt new file mode 100644 index 00000000000..23eb5842a25 --- /dev/null +++ b/java/rmm_log.txt @@ -0,0 +1,6 @@ +[ 36761][16:10:56:695952][info ] ----- RMM LOG BEGIN [PTDS DISABLED] ----- +[ 36761][16:10:56:695976][error ] [A][Stream 0x1][Upstream 3458764513820540928B][FAILURE maximum pool size exceeded] +[ 36761][16:10:56:696008][error ] [A][Stream 0x1][Upstream 3458764513820540928B][FAILURE maximum pool size exceeded] +[ 36761][16:10:56:696028][error ] [A][Stream 0x1][Upstream 3458764513820540928B][FAILURE maximum pool size exceeded] +[ 36761][16:10:56:732564][error ] [A][Stream 0x1][Upstream 1024B][FAILURE maximum pool size exceeded] +[ 36761][16:10:56:875676][error ] [A][Stream 0x1][Upstream 3458764513820540928B][FAILURE maximum pool size exceeded] diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java index 579423dc39a..61a15e77021 100644 --- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java +++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java @@ -4422,8 +4422,7 @@ void testGetMapKeyExistence() { new HostColumnVector.BasicType(true, DType.STRING))); try (ColumnVector cv = ColumnVector.fromLists(new HostColumnVector.ListType(true, structType), list1, list2, list3, list4, list5, list6); ColumnVector res = cv.getMapKeyExistence(Scalar.fromString("a")); - ColumnVector expected = ColumnVector.fromBoxedBooleans(true, true, false, true, true, false); - ) { + ColumnVector expected = ColumnVector.fromBoxedBooleans(true, true, false, true, true, false)) { assertColumnsAreEqual(expected, res); } } From 719aaa9156f78433b584bca0512b19e6563785a8 Mon Sep 17 00:00:00 2001 From: Allen Xu Date: Fri, 14 May 2021 00:59:12 +0800 Subject: [PATCH 10/13] resolve comments --- java/rmm_log.txt | 6 -- .../main/java/ai/rapids/cudf/ColumnView.java | 4 +- java/src/main/native/src/map_lookup.cu | 1 - java/src/main/native/src/map_lookup.hpp | 4 +- .../java/ai/rapids/cudf/ColumnVectorTest.java | 63 ++++++++++++++----- 5 files changed, 53 insertions(+), 25 deletions(-) delete mode 100644 java/rmm_log.txt diff --git a/java/rmm_log.txt b/java/rmm_log.txt deleted file mode 100644 index 23eb5842a25..00000000000 --- a/java/rmm_log.txt +++ /dev/null @@ -1,6 +0,0 @@ -[ 36761][16:10:56:695952][info ] ----- RMM LOG BEGIN [PTDS DISABLED] ----- -[ 36761][16:10:56:695976][error ] [A][Stream 0x1][Upstream 3458764513820540928B][FAILURE maximum pool size exceeded] -[ 36761][16:10:56:696008][error ] [A][Stream 0x1][Upstream 3458764513820540928B][FAILURE maximum pool size exceeded] -[ 36761][16:10:56:696028][error ] [A][Stream 0x1][Upstream 3458764513820540928B][FAILURE maximum pool size exceeded] -[ 36761][16:10:56:732564][error ] [A][Stream 0x1][Upstream 1024B][FAILURE maximum pool size exceeded] -[ 36761][16:10:56:875676][error ] [A][Stream 0x1][Upstream 3458764513820540928B][FAILURE maximum pool size exceeded] diff --git a/java/src/main/java/ai/rapids/cudf/ColumnView.java b/java/src/main/java/ai/rapids/cudf/ColumnView.java index 50f42972bfe..54f68a2fb3c 100644 --- a/java/src/main/java/ai/rapids/cudf/ColumnView.java +++ b/java/src/main/java/ai/rapids/cudf/ColumnView.java @@ -2527,7 +2527,7 @@ public final ColumnVector getMapValue(Scalar key) { } /** For a column of type List> and a passed in String key, return a boolean - * scalar for all keys in the structs, true if the key exists in all maps in the column. + * column for all keys in the structs, true if the key exists in all maps in the column. * @param key the String scalar to lookup in the column * @return a boolean column based on the lookup result */ @@ -2861,7 +2861,7 @@ private static native long stringReplaceWithBackrefs(long columnView, String pat * Native method for check the existence of a key over a column of List> * @param columnView the column view handle of the map * @param key the string scalar that is the key for lookup - * @return an boolean column handle of the resultant + * @return boolean column handle of the result * @throws CudfException */ private static native long mapContains(long columnView, long key) throws CudfException; diff --git a/java/src/main/native/src/map_lookup.cu b/java/src/main/native/src/map_lookup.cu index 725ecc0b04c..b20daf84fc1 100644 --- a/java/src/main/native/src/map_lookup.cu +++ b/java/src/main/native/src/map_lookup.cu @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include diff --git a/java/src/main/native/src/map_lookup.hpp b/java/src/main/native/src/map_lookup.hpp index e1b943237c7..9f52e3d990c 100644 --- a/java/src/main/native/src/map_lookup.hpp +++ b/java/src/main/native/src/map_lookup.hpp @@ -54,7 +54,7 @@ map_lookup(column_view const &map_column, string_scalar lookup_key, bool has_nul /** * @brief Looks up a "map" column by specified key to see if the key exists or not, - * and returns a cudf scalar of bool value. + * and returns a cudf column of bool value. * * The map-column is represented as follows: * @@ -64,6 +64,8 @@ map_lookup(column_view const &map_column, string_scalar lookup_key, bool has_nul * The string_view struct members are the key and value, respectively. * For each row in the input list column, if the key is not found, false will be returned for that * row. + * Note: when search for the scalar key of "null", a column full of "false" will be returned because + * of cudf::list:contains * * @param map_column The input "map" column to be searched. Must be of * type list_view>. diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java index 61a15e77021..c188309ea50 100644 --- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java +++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java @@ -4410,22 +4410,55 @@ void testGetMapValue() { assertColumnsAreEqual(expected, res); } } - @Test - void testGetMapKeyExistence() { - List list1 = Arrays.asList(new HostColumnVector.StructData("a", "b")); - List list2 = Arrays.asList(new HostColumnVector.StructData("a", "c")); - List list3 = Arrays.asList(new HostColumnVector.StructData("e", "d")); - List list4 = Arrays.asList(new HostColumnVector.StructData("a", "g")); - List list5 = Arrays.asList(new HostColumnVector.StructData("a", null)); - List list6 = Arrays.asList(new HostColumnVector.StructData(null, null)); - HostColumnVector.StructType structType = new HostColumnVector.StructType(true, Arrays.asList(new HostColumnVector.BasicType(true, DType.STRING), - new HostColumnVector.BasicType(true, DType.STRING))); - try (ColumnVector cv = ColumnVector.fromLists(new HostColumnVector.ListType(true, structType), list1, list2, list3, list4, list5, list6); - ColumnVector res = cv.getMapKeyExistence(Scalar.fromString("a")); - ColumnVector expected = ColumnVector.fromBoxedBooleans(true, true, false, true, true, false)) { - assertColumnsAreEqual(expected, res); - } + + @Test + void testGetMapKeyExistence() { + List list1 = Arrays.asList(new HostColumnVector.StructData("a", "b")); + List list2 = Arrays.asList(new HostColumnVector.StructData("a", "c")); + List list3 = Arrays.asList(new HostColumnVector.StructData("e", "d")); + List list4 = Arrays.asList(new HostColumnVector.StructData("a", "g")); + List list5 = Arrays.asList(new HostColumnVector.StructData("a", null)); + List list6 = Arrays.asList(new HostColumnVector.StructData(null, null)); + List list7 = Arrays.asList(new HostColumnVector.StructData()); + HostColumnVector.StructType structType = new HostColumnVector.StructType(true, Arrays.asList(new HostColumnVector.BasicType(true, DType.STRING), + new HostColumnVector.BasicType(true, DType.STRING))); + try (ColumnVector cv = ColumnVector.fromLists(new HostColumnVector.ListType(true, structType), list1, list2, list3, list4, list5, list6, list7); + ColumnVector resValidKey = cv.getMapKeyExistence(Scalar.fromString("a")); + ColumnVector expectedValid = ColumnVector.fromBoxedBooleans(true, true, false, true, true, false, false); + ColumnVector expectedNull = ColumnVector.fromBoxedBooleans(false, false, false, false, false, false, false); + ColumnVector resNullKey = cv.getMapKeyExistence(Scalar.fromNull(DType.STRING))) { + assertColumnsAreEqual(expectedValid, resValidKey); + assertColumnsAreEqual(expectedNull, resNullKey); + } + +// Exception e = assertThrows(IllegalArgumentException.class, () -> { +// try (ColumnVector cv = ColumnVector.fromLists(new HostColumnVector.ListType(true, structType), list1, list2, list3, list4, list5, list6, list7); +// ColumnVector resNullKey = cv.getMapKeyExistence(null)) { +// } +// }); +// assertTrue(e.getMessage().contains("target string scalar is null")); + + } + + @Test + void testGetMapKeyExistenceForNullKey() { + List list1 = Arrays.asList(new HostColumnVector.StructData("a", "b")); + List list2 = Arrays.asList(new HostColumnVector.StructData("a", "c")); + List list3 = Arrays.asList(new HostColumnVector.StructData("e", "d")); + List list4 = Arrays.asList(new HostColumnVector.StructData("a", "g")); + List list5 = Arrays.asList(new HostColumnVector.StructData("a", null)); + List list6 = Arrays.asList(new HostColumnVector.StructData(null, null)); + List list7 = Arrays.asList(new HostColumnVector.StructData()); + HostColumnVector.StructType structType = new HostColumnVector.StructType(true, Arrays.asList(new HostColumnVector.BasicType(true, DType.STRING), + new HostColumnVector.BasicType(true, DType.STRING))); + try (ColumnVector cv = ColumnVector.fromLists(new HostColumnVector.ListType(true, structType), list1, list2, list3, list4, list5, list6, list7); + ColumnVector resValidKey = cv.getMapKeyExistence(Scalar.fromString("a")); + ColumnVector expected = ColumnVector.fromBoxedBooleans(true, true, false, true, true, false, false)) { + assertColumnsAreEqual(expected, resValidKey); } + } + + @Test void testListOfStructsOfStructs() { From df7f753bc394a3e9fe4b4ba13c17a8a35c920738 Mon Sep 17 00:00:00 2001 From: Allen Xu Date: Fri, 14 May 2021 01:22:49 +0800 Subject: [PATCH 11/13] update --- java/src/main/native/src/map_lookup.hpp | 6 ++-- .../java/ai/rapids/cudf/ColumnVectorTest.java | 33 ++++--------------- 2 files changed, 10 insertions(+), 29 deletions(-) diff --git a/java/src/main/native/src/map_lookup.hpp b/java/src/main/native/src/map_lookup.hpp index 9f52e3d990c..301293dc188 100644 --- a/java/src/main/native/src/map_lookup.hpp +++ b/java/src/main/native/src/map_lookup.hpp @@ -62,10 +62,10 @@ map_lookup(column_view const &map_column, string_scalar lookup_key, bool has_nul * <---KEY---> <--VALUE--> * * The string_view struct members are the key and value, respectively. - * For each row in the input list column, if the key is not found, false will be returned for that + * For each row in the input list column, if the key is not found, false will be returned for that * row. - * Note: when search for the scalar key of "null", a column full of "false" will be returned because - * of cudf::list:contains + * Note: when search for the scalar key of "null", a column full of "false" will be returned because + * map_contains is leveraging cudf::list:contains. * * @param map_column The input "map" column to be searched. Must be of * type list_view>. diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java index c188309ea50..c0cd23592aa 100644 --- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java +++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java @@ -4431,35 +4431,16 @@ void testGetMapKeyExistence() { assertColumnsAreEqual(expectedNull, resNullKey); } -// Exception e = assertThrows(IllegalArgumentException.class, () -> { -// try (ColumnVector cv = ColumnVector.fromLists(new HostColumnVector.ListType(true, structType), list1, list2, list3, list4, list5, list6, list7); -// ColumnVector resNullKey = cv.getMapKeyExistence(null)) { -// } -// }); -// assertTrue(e.getMessage().contains("target string scalar is null")); - - } - - @Test - void testGetMapKeyExistenceForNullKey() { - List list1 = Arrays.asList(new HostColumnVector.StructData("a", "b")); - List list2 = Arrays.asList(new HostColumnVector.StructData("a", "c")); - List list3 = Arrays.asList(new HostColumnVector.StructData("e", "d")); - List list4 = Arrays.asList(new HostColumnVector.StructData("a", "g")); - List list5 = Arrays.asList(new HostColumnVector.StructData("a", null)); - List list6 = Arrays.asList(new HostColumnVector.StructData(null, null)); - List list7 = Arrays.asList(new HostColumnVector.StructData()); - HostColumnVector.StructType structType = new HostColumnVector.StructType(true, Arrays.asList(new HostColumnVector.BasicType(true, DType.STRING), - new HostColumnVector.BasicType(true, DType.STRING))); - try (ColumnVector cv = ColumnVector.fromLists(new HostColumnVector.ListType(true, structType), list1, list2, list3, list4, list5, list6, list7); - ColumnVector resValidKey = cv.getMapKeyExistence(Scalar.fromString("a")); - ColumnVector expected = ColumnVector.fromBoxedBooleans(true, true, false, true, true, false, false)) { - assertColumnsAreEqual(expected, resValidKey); - } + AssertionError e = assertThrows(AssertionError.class, () -> { + try (ColumnVector cv = ColumnVector.fromLists(new HostColumnVector.ListType(true, structType), list1, list2, list3, list4, list5, list6, list7); + ColumnVector resNullKey = cv.getMapKeyExistence(null)) { + } + }); + System.out.println(e.getMessage()); + assertTrue(e.getMessage().contains("target string may not be null")); } - @Test void testListOfStructsOfStructs() { List list1 = Arrays.asList( From 7a6115f084b5bbdb4f85bf183adb5bd632539209 Mon Sep 17 00:00:00 2001 From: Allen Xu Date: Fri, 14 May 2021 08:31:36 +0800 Subject: [PATCH 12/13] Make doc correct --- java/src/main/java/ai/rapids/cudf/ColumnView.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/java/src/main/java/ai/rapids/cudf/ColumnView.java b/java/src/main/java/ai/rapids/cudf/ColumnView.java index 54f68a2fb3c..d3d65bede04 100644 --- a/java/src/main/java/ai/rapids/cudf/ColumnView.java +++ b/java/src/main/java/ai/rapids/cudf/ColumnView.java @@ -2527,7 +2527,8 @@ public final ColumnVector getMapValue(Scalar key) { } /** For a column of type List> and a passed in String key, return a boolean - * column for all keys in the structs, true if the key exists in all maps in the column. + * column for all keys in the structs, It is true if the key exists in the corresponding map for + * that row, false otherwise. It will never return null for a row. * @param key the String scalar to lookup in the column * @return a boolean column based on the lookup result */ From 141698ee2627829bc3dd1e6f20c1e73b670677f7 Mon Sep 17 00:00:00 2001 From: Allen Xu Date: Fri, 14 May 2021 08:33:56 +0800 Subject: [PATCH 13/13] cleaan code --- java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java | 1 - 1 file changed, 1 deletion(-) diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java index c0cd23592aa..82b8e2b99be 100644 --- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java +++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java @@ -4436,7 +4436,6 @@ void testGetMapKeyExistence() { ColumnVector resNullKey = cv.getMapKeyExistence(null)) { } }); - System.out.println(e.getMessage()); assertTrue(e.getMessage().contains("target string may not be null")); }