rapidsai · rapids-bot · May 14, 2021 · May 11, 2021 · May 11, 2021 · May 11, 2021
@@ -2526,6 +2526,18 @@ public final ColumnVector getMapValue(Scalar key) {
     return new ColumnVector(mapLookup(getNativeView(), key.getScalarHandle()));
   }
 
+  /** For a column of type List<Struct<String, String>> and a passed in String key, return a boolean
+   * column for all keys in the structs, false if the key doesn't exist.
+   * @param key the String scalar to lookup in the column
+   * @return a boolean column based on the lookup result
+   */
+  public final ColumnVector getMapKeyExistence(Scalar key) {
+    assert type.equals(DType.LIST) : "column type must be a LIST";
+    assert key != null : "target string may not be null";
+    assert key.getType().equals(DType.STRING) : "target must be a string scalar";
+
+    return new ColumnVector(mapContains(getNativeView(), key.getScalarHandle()));
+  }
 
   /**
    * Create a new struct column view of existing column views. Note that this will NOT copy
@@ -2844,6 +2856,15 @@ private static native long stringReplaceWithBackrefs(long columnView, String pat
    * @throws CudfException
    */
   private static native long mapLookup(long columnView, long key) throws CudfException;
+
+  /**
+   * Native method for check the existence of a key over a column of List<Struct<String,String>>
+   * @param columnView the column view handle of the map
+   * @param key the string scalar that is the key for lookup
+   * @return an boolean column handle of the resultant
+   * @throws CudfException
+   */
+  private static native long mapContains(long columnView, long key) throws CudfException;
   /**
    * Native method to add zeros as padding to the left of each string.
    */

@@ -1169,6 +1169,22 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_mapLookup(JNIEnv *env, jc
   CATCH_STD(env, 0);
 }
 
+JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_mapContains(JNIEnv *env, jclass,
+                                                                 jlong map_column_view,
+                                                                 jlong lookup_key) {
+  JNI_NULL_CHECK(env, map_column_view, "column is null", 0);
+  JNI_NULL_CHECK(env, lookup_key, "target string scalar is null", 0);
+  try {
+    cudf::jni::auto_set_device(env);
+    cudf::column_view *cv = reinterpret_cast<cudf::column_view *>(map_column_view);
+    cudf::string_scalar *ss_key = reinterpret_cast<cudf::string_scalar *>(lookup_key);
+
+    std::unique_ptr<cudf::column> result = cudf::jni::map_contains(*cv, *ss_key);
+    return reinterpret_cast<jlong>(result.release());
+  }
+  CATCH_STD(env, 0);
+}
+
 JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_stringReplaceWithBackrefs(JNIEnv *env,
                                                                                  jclass,
                                                                                  jlong column_view,

@@ -27,6 +27,7 @@
 #include <cudf/utilities/type_dispatcher.hpp>
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/mr/device/device_memory_resource.hpp>
+#include <rmm/exec_policy.hpp>
 
 namespace cudf {
 namespace {
@@ -127,24 +128,55 @@ get_gather_map_for_map_values(column_view const &input, string_scalar &lookup_ke
 } // namespace
 
 namespace jni {
-std::unique_ptr<column> map_lookup(column_view const &map_column, string_scalar lookup_key,
-                                   bool has_nulls, rmm::cuda_stream_view stream,
-                                   rmm::mr::device_memory_resource *mr) {
-  // Defensive checks.
+
+
+void map_check(column_view const &map_column, rmm::cuda_stream_view stream) {
   CUDF_EXPECTS(map_column.type().id() == type_id::LIST, "Expected LIST<STRUCT<key,value>>.");
 
   lists_column_view lcv{map_column};
-  auto structs_column = lcv.get_sliced_child(stream);
+  column_view structs_column = lcv.get_sliced_child(stream);
 
   CUDF_EXPECTS(structs_column.type().id() == type_id::STRUCT, "Expected LIST<STRUCT<key,value>>.");
 
-  structs_column_view scv{structs_column};
   CUDF_EXPECTS(structs_column.num_children() == 2, "Expected LIST<STRUCT<key,value>>.");
   CUDF_EXPECTS(structs_column.child(0).type().id() == type_id::STRING,
                "Expected LIST<STRUCT<key,value>>.");
   CUDF_EXPECTS(structs_column.child(1).type().id() == type_id::STRING,
                "Expected LIST<STRUCT<key,value>>.");
+  return;
+}
+
+std::unique_ptr<column> map_contains(column_view const &map_column, string_scalar lookup_key,
+                                     bool has_nulls, rmm::cuda_stream_view stream,
+                                     rmm::mr::device_memory_resource *mr) {
+  // Defensive checks.
+  map_check(map_column, stream);
+
+  // Two-pass plan: construct gather map, and then gather() on structs_column.child(1). Plan A.
+  // (Can do in one pass perhaps, but that's Plan B.)
 
+  auto gather_map = has_nulls ?
+                        get_gather_map_for_map_values<true>(map_column, lookup_key, stream, mr) :
+                        get_gather_map_for_map_values<false>(map_column, lookup_key, stream, mr);
+
+  auto found = make_numeric_column(data_type{type_id::BOOL8}, gather_map->size(),
+                                   mask_state::UNALLOCATED, stream, mr);
+  thrust::transform(rmm::exec_policy(stream), thrust::make_counting_iterator<size_type>(0),
+                    thrust::make_counting_iterator<size_type>(gather_map->size()),
+                    found->mutable_view().template begin<bool>(),
+                    [d_gather_map = gather_map->view().template begin<size_type>()] __device__(
+                        auto i) { return d_gather_map[i] >= 0; });
+  return found;
+}
+
+std::unique_ptr<column> map_lookup(column_view const &map_column, string_scalar lookup_key,
+                                   bool has_nulls, rmm::cuda_stream_view stream,
+                                   rmm::mr::device_memory_resource *mr) {
+  // Defensive checks.
+  map_check(map_column, stream);
+
+  lists_column_view lcv{map_column};
+  column_view structs_column = lcv.get_sliced_child(stream);
   // Two-pass plan: construct gather map, and then gather() on structs_column.child(1). Plan A.
   // (Can do in one pass perhaps, but that's Plan B.)
 

@@ -51,6 +51,35 @@ map_lookup(column_view const &map_column, string_scalar lookup_key, bool has_nul
            rmm::cuda_stream_view stream = rmm::cuda_stream_default,
            rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource());
 
+
+/**
+ * @brief Looks up a "map" column by specified key to see if the key exists or not,
+ *        and returns a column of int values.
+ *
+ * The map-column is represented as follows:
+ *
+ *  list_view<struct_view< string_view, string_view > >.
+ *                         <---KEY--->  <--VALUE-->
+ *
+ * The string_view struct members are the key and value, respectively.
+ * For each row in the input list column. If the key is not found, -1 is returned.
+ *
+ * @param map_column The input "map" column to be searched. Must be of
+ *                   type list_view<struct_view<string_view, string_view>>.
+ * @param lookup_key The search key, whose index(offset) is to be returned for each list row
+ * @param has_nulls  Whether the input column might contain null list-rows, or null keys.
+ * @param stream     The CUDA stream
+ * @param mr         The device memory resource to be used for allocations
+ * @return           An boolean_view column reflecting the existence for the key in each list.
+ *                   false means the lookup_key is not found.
+ * @throw cudf::logic_error If the input column is not of type
+ *                          list_view<struct_view<string_view, string_view>>
+ */
+std::unique_ptr<column>
+map_contains(column_view const &map_column, string_scalar lookup_key, bool has_nulls = true,
+           rmm::cuda_stream_view stream = rmm::cuda_stream_default,
+           rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource());
+
 } // namespace jni
 
 } // namespace cudf
@@ -4410,6 +4410,23 @@ void testGetMapValue() {
       assertColumnsAreEqual(expected, res);
     }
   }
+    @Test
+    void testGetMapKeyExistence() {
+        List<HostColumnVector.StructData> list1 = Arrays.asList(new HostColumnVector.StructData("a", "b"));
+        List<HostColumnVector.StructData> list2 = Arrays.asList(new HostColumnVector.StructData("a", "c"));
+        List<HostColumnVector.StructData> list3 = Arrays.asList(new HostColumnVector.StructData("e", "d"));
+        List<HostColumnVector.StructData> list4 = Arrays.asList(new HostColumnVector.StructData("a", "g"));
+        List<HostColumnVector.StructData> list5 = Arrays.asList(new HostColumnVector.StructData("f", "h"));
+        List<HostColumnVector.StructData> list6 = Arrays.asList(new HostColumnVector.StructData("a", null));
+        List<HostColumnVector.StructData> list7 = Arrays.asList(new HostColumnVector.StructData(null, null));
+        HostColumnVector.StructType structType = new HostColumnVector.StructType(true, Arrays.asList(new HostColumnVector.BasicType(true, DType.STRING),
+                new HostColumnVector.BasicType(true, DType.STRING)));
+        try (ColumnVector cv = ColumnVector.fromLists(new HostColumnVector.ListType(true, structType), list1, list2, list3, list4, list5, list6, list7);
+             ColumnVector res = cv.getMapKeyExistence(Scalar.fromString("a"));
+             ColumnVector expected = ColumnVector.fromBoxedBooleans(true, true, false, true, false, true, false)) {
+            assertColumnsAreEqual(expected, res);
+        }
+    }
 
   @Test
   void testListOfStructsOfStructs() {