From 560946b98ad1f97204ce1f7c1c3eaa272456864d Mon Sep 17 00:00:00 2001 From: Jason Lowe Date: Wed, 7 Feb 2024 10:55:22 -0600 Subject: [PATCH 1/4] JNI bindings for unique_hash_join Signed-off-by: Jason Lowe --- java/src/main/java/ai/rapids/cudf/Table.java | 25 ++++ java/src/main/native/src/TableJni.cpp | 26 +++- .../test/java/ai/rapids/cudf/TableTest.java | 111 +++++++++++++++++- 3 files changed, 159 insertions(+), 3 deletions(-) diff --git a/java/src/main/java/ai/rapids/cudf/Table.java b/java/src/main/java/ai/rapids/cudf/Table.java index 9a790c8518b..939e8e6c393 100644 --- a/java/src/main/java/ai/rapids/cudf/Table.java +++ b/java/src/main/java/ai/rapids/cudf/Table.java @@ -618,6 +618,9 @@ private static native long[] leftHashJoinGatherMapsWithCount(long leftTable, lon private static native long[] innerJoinGatherMaps(long leftKeys, long rightKeys, boolean compareNullsEqual) throws CudfException; + private static native long[] innerUniqueJoinGatherMaps(long leftKeys, long rightKeys, + boolean compareNullsEqual) throws CudfException; + private static native long innerJoinRowCount(long table, long hashJoin) throws CudfException; private static native long[] innerHashJoinGatherMaps(long table, long hashJoin) throws CudfException; @@ -3150,6 +3153,28 @@ public GatherMap[] innerJoinGatherMaps(Table rightKeys, boolean compareNullsEqua return buildJoinGatherMaps(gatherMapData); } + /** + * Computes the gather maps that can be used to manifest the result of an inner equi-join between + * two tables where the right table is guaranteed to not contain any duplicated join keys. It is + * assumed this table instance holds the key columns from the left table, and the table argument + * represents the key columns from the right table. Two {@link GatherMap} instances will be + * returned that can be used to gather the left and right tables, respectively, to produce the + * result of the inner join. + * It is the responsibility of the caller to close the resulting gather map instances. + * @param rightKeys join key columns from the right table + * @param compareNullsEqual true if null key values should match otherwise false + * @return left and right table gather maps + */ + public GatherMap[] innerUniqueJoinGatherMaps(Table rightKeys, boolean compareNullsEqual) { + if (getNumberOfColumns() != rightKeys.getNumberOfColumns()) { + throw new IllegalArgumentException("column count mismatch, this: " + getNumberOfColumns() + + "rightKeys: " + rightKeys.getNumberOfColumns()); + } + long[] gatherMapData = + innerUniqueJoinGatherMaps(getNativeView(), rightKeys.getNativeView(), compareNullsEqual); + return buildJoinGatherMaps(gatherMapData); + } + /** * Computes the number of rows resulting from an inner equi-join between two tables. * @param otherHash hash table built from join key columns from the other table diff --git a/java/src/main/native/src/TableJni.cpp b/java/src/main/native/src/TableJni.cpp index 1d6f1332b06..ca9ae555d5c 100644 --- a/java/src/main/native/src/TableJni.cpp +++ b/java/src/main/native/src/TableJni.cpp @@ -702,9 +702,9 @@ jlongArray gather_maps_to_java(JNIEnv *env, jlongArray gather_map_to_java(JNIEnv *env, std::unique_ptr> map) { // release the underlying device buffer to Java - auto gather_map_buffer = std::make_unique(map->release()); cudf::jni::native_jlongArray result(env, 3); - result[0] = static_cast(gather_map_buffer->size()); + result[0] = static_cast(map->size() * sizeof(cudf::size_type)); + auto gather_map_buffer = std::make_unique(map->release()); result[1] = ptr_as_jlong(gather_map_buffer->data()); result[2] = release_as_jlong(gather_map_buffer); return result.get_jArray(); @@ -2550,6 +2550,28 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_innerJoinGatherMaps( }); } +JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_innerUniqueJoinGatherMaps( + JNIEnv *env, jclass, jlong j_left_keys, jlong j_right_keys, jboolean compare_nulls_equal) { + return cudf::jni::join_gather_maps( + env, j_left_keys, j_right_keys, compare_nulls_equal, + [](cudf::table_view const &left, cudf::table_view const &right, cudf::null_equality nulleq) { + auto has_nulls = cudf::has_nested_nulls(left) || cudf::has_nested_nulls(right) + ? cudf::nullable_join::YES : cudf::nullable_join::NO; + std::pair>, + std::unique_ptr>> maps; + if (cudf::detail::has_nested_columns(right)) { + cudf::unique_hash_join unique_hash(right, left, has_nulls, nulleq); + maps = unique_hash.inner_join(); + } else { + cudf::unique_hash_join unique_hash(right, left, has_nulls, nulleq); + maps = unique_hash.inner_join(); + } + // Unique join returns {right map, left map} but all the other joins + // return {left map, right map}. Swap here to make it consistent. + return std::make_pair(std::move(maps.second), std::move(maps.first)); + }); +} + JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Table_innerJoinRowCount(JNIEnv *env, jclass, jlong j_left_table, jlong j_right_hash_join) { diff --git a/java/src/test/java/ai/rapids/cudf/TableTest.java b/java/src/test/java/ai/rapids/cudf/TableTest.java index 76f127eae77..04d39c76eab 100644 --- a/java/src/test/java/ai/rapids/cudf/TableTest.java +++ b/java/src/test/java/ai/rapids/cudf/TableTest.java @@ -33,7 +33,6 @@ import com.google.common.base.Charsets; import com.google.common.collect.Lists; import com.google.common.collect.Maps; -import org.apache.avro.SchemaBuilder; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.parquet.hadoop.ParquetFileReader; @@ -2085,6 +2084,116 @@ void testInnerJoinGatherMapsNulls() { } } + private void checkInnerUniqueJoin(Table leftKeys, Table rightKeys, Table expected, + boolean compareNullsEqual) { + GatherMap[] maps = leftKeys.innerUniqueJoinGatherMaps(rightKeys, compareNullsEqual); + try { + verifyJoinGatherMaps(maps, expected); + } finally { + for (GatherMap map : maps) { + map.close(); + } + } + } + + @Test + void testInnerUniqueJoinGatherMaps() { + try (Table leftKeys = new Table.TestBuilder().column(2, 3, 9, 0, 1, 7, 4, 6, 5, 8, 6).build(); + Table rightKeys = new Table.TestBuilder().column(6, 5, 9, 8, 10, 32).build(); + Table expected = new Table.TestBuilder() + .column(2, 7, 8, 9, 10) // left + .column(2, 0, 1, 3, 0) // right + .build()) { + checkInnerUniqueJoin(leftKeys, rightKeys, expected, false); + } + } + + @Test + void testInnerUniqueJoinGatherMapsWithNested() { + StructType structType = new StructType(false, + new BasicType(false, DType.STRING), + new BasicType(false, DType.INT32)); + StructData[] leftData = new StructData[]{ + new StructData("abc", 1), + new StructData("xyz", 1), + new StructData("abc", 2), + new StructData("xyz", 2), + new StructData("abc", 1), + new StructData("abc", 3), + new StructData("xyz", 3) + }; + StructData[] rightData = new StructData[]{ + new StructData("abc", 1), + new StructData("xyz", 4), + new StructData("xyz", 2), + new StructData("abc", -1), + }; + try (Table leftKeys = new Table.TestBuilder().column(structType, leftData).build(); + Table rightKeys = new Table.TestBuilder().column(structType, rightData).build(); + Table expected = new Table.TestBuilder() + .column(0, 3, 4) + .column(0, 2, 0) + .build()) { + checkInnerUniqueJoin(leftKeys, rightKeys, expected, false); + } + } + + @Test + void testInnerUniqueJoinGatherMapsNullsEqual() { + try (Table leftKeys = new Table.TestBuilder() + .column(2, 3, 9, 0, 1, 7, 4, null, null, 8) + .build(); + Table rightKeys = new Table.TestBuilder() + .column(null, 9, 8, 10, 32) + .build(); + Table expected = new Table.TestBuilder() + .column(2, 7, 8, 9) // left + .column(1, 0, 0, 2) // right + .build()) { + checkInnerUniqueJoin(leftKeys, rightKeys, expected, true); + } + } + + @Test + void testInnerUniqueJoinGatherMapsWithNestedNullsEqual() { + StructType structType = new StructType(true, + new BasicType(true, DType.STRING), + new BasicType(true, DType.INT32)); + StructData[] leftData = new StructData[]{ + new StructData("abc", 1), + null, + new StructData("xyz", 1), + new StructData("abc", 2), + new StructData("xyz", null), + null, + new StructData("abc", 1), + new StructData("abc", 3), + new StructData("xyz", 3), + new StructData(null, null), + new StructData(null, 1) + }; + StructData[] rightData = new StructData[]{ + null, + new StructData("abc", 1), + new StructData("xyz", 4), + new StructData("xyz", 2), + new StructData(null, null), + new StructData(null, 2), + new StructData(null, 1), + new StructData("xyz", null), + new StructData("abc", null), + new StructData("abc", -1) + }; + try (Table leftKeys = new Table.TestBuilder().column(structType, leftData).build(); + Table rightKeys = new Table.TestBuilder().column(structType, rightData).build(); + Table expected = new Table.TestBuilder() + .column(0, 1, 4, 5, 6, 9, 10) + .column(1, 0, 7, 0, 1, 4, 6) + .build()) { + checkInnerUniqueJoin(leftKeys, rightKeys, expected, true); + } + } + @Test void testInnerHashJoinGatherMaps() { try (Table leftKeys = new Table.TestBuilder().column(2, 3, 9, 0, 1, 7, 4, 6, 5, 8).build(); From 3ee5480e78cc57f87a99a5e490014f284e2b5c9a Mon Sep 17 00:00:00 2001 From: Jason Lowe Date: Wed, 14 Feb 2024 10:53:39 -0600 Subject: [PATCH 2/4] Update to unique_join->distinct_join naming change --- java/src/main/java/ai/rapids/cudf/Table.java | 8 +++---- java/src/main/native/src/TableJni.cpp | 10 ++++----- .../test/java/ai/rapids/cudf/TableTest.java | 22 +++++++++---------- 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/java/src/main/java/ai/rapids/cudf/Table.java b/java/src/main/java/ai/rapids/cudf/Table.java index 939e8e6c393..8d4488061a8 100644 --- a/java/src/main/java/ai/rapids/cudf/Table.java +++ b/java/src/main/java/ai/rapids/cudf/Table.java @@ -618,8 +618,8 @@ private static native long[] leftHashJoinGatherMapsWithCount(long leftTable, lon private static native long[] innerJoinGatherMaps(long leftKeys, long rightKeys, boolean compareNullsEqual) throws CudfException; - private static native long[] innerUniqueJoinGatherMaps(long leftKeys, long rightKeys, - boolean compareNullsEqual) throws CudfException; + private static native long[] innerDistinctJoinGatherMaps(long leftKeys, long rightKeys, + boolean compareNullsEqual) throws CudfException; private static native long innerJoinRowCount(long table, long hashJoin) throws CudfException; @@ -3165,13 +3165,13 @@ public GatherMap[] innerJoinGatherMaps(Table rightKeys, boolean compareNullsEqua * @param compareNullsEqual true if null key values should match otherwise false * @return left and right table gather maps */ - public GatherMap[] innerUniqueJoinGatherMaps(Table rightKeys, boolean compareNullsEqual) { + public GatherMap[] innerDistinctJoinGatherMaps(Table rightKeys, boolean compareNullsEqual) { if (getNumberOfColumns() != rightKeys.getNumberOfColumns()) { throw new IllegalArgumentException("column count mismatch, this: " + getNumberOfColumns() + "rightKeys: " + rightKeys.getNumberOfColumns()); } long[] gatherMapData = - innerUniqueJoinGatherMaps(getNativeView(), rightKeys.getNativeView(), compareNullsEqual); + innerDistinctJoinGatherMaps(getNativeView(), rightKeys.getNativeView(), compareNullsEqual); return buildJoinGatherMaps(gatherMapData); } diff --git a/java/src/main/native/src/TableJni.cpp b/java/src/main/native/src/TableJni.cpp index ca9ae555d5c..1ef17121a16 100644 --- a/java/src/main/native/src/TableJni.cpp +++ b/java/src/main/native/src/TableJni.cpp @@ -2550,7 +2550,7 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_innerJoinGatherMaps( }); } -JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_innerUniqueJoinGatherMaps( +JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_innerDistinctJoinGatherMaps( JNIEnv *env, jclass, jlong j_left_keys, jlong j_right_keys, jboolean compare_nulls_equal) { return cudf::jni::join_gather_maps( env, j_left_keys, j_right_keys, compare_nulls_equal, @@ -2560,11 +2560,11 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_innerUniqueJoinGatherMaps std::pair>, std::unique_ptr>> maps; if (cudf::detail::has_nested_columns(right)) { - cudf::unique_hash_join unique_hash(right, left, has_nulls, nulleq); - maps = unique_hash.inner_join(); + cudf::distinct_hash_join hash(right, left, has_nulls, nulleq); + maps = hash.inner_join(); } else { - cudf::unique_hash_join unique_hash(right, left, has_nulls, nulleq); - maps = unique_hash.inner_join(); + cudf::distinct_hash_join hash(right, left, has_nulls, nulleq); + maps = hash.inner_join(); } // Unique join returns {right map, left map} but all the other joins // return {left map, right map}. Swap here to make it consistent. diff --git a/java/src/test/java/ai/rapids/cudf/TableTest.java b/java/src/test/java/ai/rapids/cudf/TableTest.java index 04d39c76eab..299019d74a2 100644 --- a/java/src/test/java/ai/rapids/cudf/TableTest.java +++ b/java/src/test/java/ai/rapids/cudf/TableTest.java @@ -2084,9 +2084,9 @@ void testInnerJoinGatherMapsNulls() { } } - private void checkInnerUniqueJoin(Table leftKeys, Table rightKeys, Table expected, - boolean compareNullsEqual) { - GatherMap[] maps = leftKeys.innerUniqueJoinGatherMaps(rightKeys, compareNullsEqual); + private void checkInnerDistinctJoin(Table leftKeys, Table rightKeys, Table expected, + boolean compareNullsEqual) { + GatherMap[] maps = leftKeys.innerDistinctJoinGatherMaps(rightKeys, compareNullsEqual); try { verifyJoinGatherMaps(maps, expected); } finally { @@ -2097,19 +2097,19 @@ private void checkInnerUniqueJoin(Table leftKeys, Table rightKeys, Table expecte } @Test - void testInnerUniqueJoinGatherMaps() { + void testInnerDistinctJoinGatherMaps() { try (Table leftKeys = new Table.TestBuilder().column(2, 3, 9, 0, 1, 7, 4, 6, 5, 8, 6).build(); Table rightKeys = new Table.TestBuilder().column(6, 5, 9, 8, 10, 32).build(); Table expected = new Table.TestBuilder() .column(2, 7, 8, 9, 10) // left .column(2, 0, 1, 3, 0) // right .build()) { - checkInnerUniqueJoin(leftKeys, rightKeys, expected, false); + checkInnerDistinctJoin(leftKeys, rightKeys, expected, false); } } @Test - void testInnerUniqueJoinGatherMapsWithNested() { + void testInnerDistinctJoinGatherMapsWithNested() { StructType structType = new StructType(false, new BasicType(false, DType.STRING), new BasicType(false, DType.INT32)); @@ -2134,12 +2134,12 @@ void testInnerUniqueJoinGatherMapsWithNested() { .column(0, 3, 4) .column(0, 2, 0) .build()) { - checkInnerUniqueJoin(leftKeys, rightKeys, expected, false); + checkInnerDistinctJoin(leftKeys, rightKeys, expected, false); } } @Test - void testInnerUniqueJoinGatherMapsNullsEqual() { + void testInnerDistinctJoinGatherMapsNullsEqual() { try (Table leftKeys = new Table.TestBuilder() .column(2, 3, 9, 0, 1, 7, 4, null, null, 8) .build(); @@ -2150,12 +2150,12 @@ void testInnerUniqueJoinGatherMapsNullsEqual() { .column(2, 7, 8, 9) // left .column(1, 0, 0, 2) // right .build()) { - checkInnerUniqueJoin(leftKeys, rightKeys, expected, true); + checkInnerDistinctJoin(leftKeys, rightKeys, expected, true); } } @Test - void testInnerUniqueJoinGatherMapsWithNestedNullsEqual() { + void testInnerDistinctJoinGatherMapsWithNestedNullsEqual() { StructType structType = new StructType(true, new BasicType(true, DType.STRING), new BasicType(true, DType.INT32)); @@ -2190,7 +2190,7 @@ void testInnerUniqueJoinGatherMapsWithNestedNullsEqual() { .column(0, 1, 4, 5, 6, 9, 10) .column(1, 0, 7, 0, 1, 4, 6) .build()) { - checkInnerUniqueJoin(leftKeys, rightKeys, expected, true); + checkInnerDistinctJoin(leftKeys, rightKeys, expected, true); } } From ff14dad34ba2261c5aa5c286b94f16890a1189e9 Mon Sep 17 00:00:00 2001 From: Jason Lowe Date: Fri, 23 Feb 2024 16:42:00 -0600 Subject: [PATCH 3/4] Style fixes --- java/src/main/native/src/TableJni.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/java/src/main/native/src/TableJni.cpp b/java/src/main/native/src/TableJni.cpp index 1ef17121a16..05d8c6bf021 100644 --- a/java/src/main/native/src/TableJni.cpp +++ b/java/src/main/native/src/TableJni.cpp @@ -2555,10 +2555,12 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_innerDistinctJoinGatherMa return cudf::jni::join_gather_maps( env, j_left_keys, j_right_keys, compare_nulls_equal, [](cudf::table_view const &left, cudf::table_view const &right, cudf::null_equality nulleq) { - auto has_nulls = cudf::has_nested_nulls(left) || cudf::has_nested_nulls(right) - ? cudf::nullable_join::YES : cudf::nullable_join::NO; + auto has_nulls = cudf::has_nested_nulls(left) || cudf::has_nested_nulls(right) ? + cudf::nullable_join::YES : + cudf::nullable_join::NO; std::pair>, - std::unique_ptr>> maps; + std::unique_ptr>> + maps; if (cudf::detail::has_nested_columns(right)) { cudf::distinct_hash_join hash(right, left, has_nulls, nulleq); maps = hash.inner_join(); From 81f9e8753384c026b2e43d57e438a9d833f85c85 Mon Sep 17 00:00:00 2001 From: Jason Lowe Date: Mon, 26 Feb 2024 15:13:16 -0600 Subject: [PATCH 4/4] Address review comments --- java/src/main/java/ai/rapids/cudf/Table.java | 82 +++++++++++++++++--- 1 file changed, 71 insertions(+), 11 deletions(-) diff --git a/java/src/main/java/ai/rapids/cudf/Table.java b/java/src/main/java/ai/rapids/cudf/Table.java index 8d4488061a8..ba372bc0c73 100644 --- a/java/src/main/java/ai/rapids/cudf/Table.java +++ b/java/src/main/java/ai/rapids/cudf/Table.java @@ -2913,7 +2913,9 @@ private static GatherMap[] buildJoinGatherMaps(long[] gatherMapData) { * the table argument represents the key columns from the right table. Two {@link GatherMap} * instances will be returned that can be used to gather the left and right tables, * respectively, to produce the result of the left join. + * * It is the responsibility of the caller to close the resulting gather map instances. + * * @param rightKeys join key columns from the right table * @param compareNullsEqual true if null key values should match otherwise false * @return left and right table gather maps @@ -2949,7 +2951,9 @@ public long leftJoinRowCount(HashJoin rightHash) { * the {@link HashJoin} argument has been constructed from the key columns from the right table. * Two {@link GatherMap} instances will be returned that can be used to gather the left and right * tables, respectively, to produce the result of the left join. + * * It is the responsibility of the caller to close the resulting gather map instances. + * * @param rightHash hash table built from join key columns from the right table * @return left and right table gather maps */ @@ -2968,11 +2972,15 @@ public GatherMap[] leftJoinGatherMaps(HashJoin rightHash) { * the {@link HashJoin} argument has been constructed from the key columns from the right table. * Two {@link GatherMap} instances will be returned that can be used to gather the left and right * tables, respectively, to produce the result of the left join. + * * It is the responsibility of the caller to close the resulting gather map instances. + * * This interface allows passing an output row count that was previously computed from * {@link #leftJoinRowCount(HashJoin)}. + * * WARNING: Passing a row count that is smaller than the actual row count will result * in undefined behavior. + * * @param rightHash hash table built from join key columns from the right table * @param outputRowCount number of output rows in the join result * @return left and right table gather maps @@ -3006,7 +3014,9 @@ public long conditionalLeftJoinRowCount(Table rightTable, CompiledExpression con * the columns from the left table, and the table argument represents the columns from the * right table. Two {@link GatherMap} instances will be returned that can be used to gather * the left and right tables, respectively, to produce the result of the left join. + * * It is the responsibility of the caller to close the resulting gather map instances. + * * @param rightTable the right side table of the join in the join * @param condition conditional expression to evaluate during the join * @return left and right table gather maps @@ -3025,11 +3035,15 @@ public GatherMap[] conditionalLeftJoinGatherMaps(Table rightTable, * the columns from the left table, and the table argument represents the columns from the * right table. Two {@link GatherMap} instances will be returned that can be used to gather * the left and right tables, respectively, to produce the result of the left join. + * * It is the responsibility of the caller to close the resulting gather map instances. + * * This interface allows passing an output row count that was previously computed from * {@link #conditionalLeftJoinRowCount(Table, CompiledExpression)}. + * * WARNING: Passing a row count that is smaller than the actual row count will result * in undefined behavior. + * * @param rightTable the right side table of the join in the join * @param condition conditional expression to evaluate during the join * @param outputRowCount number of output rows in the join result @@ -3078,7 +3092,9 @@ public static MixedJoinSize mixedLeftJoinSize(Table leftKeys, Table rightKeys, * assumed to be a logical AND of the equality condition and inequality condition. * Two {@link GatherMap} instances will be returned that can be used to gather * the left and right tables, respectively, to produce the result of the left join. + * * It is the responsibility of the caller to close the resulting gather map instances. + * * @param leftKeys the left table's key columns for the equality condition * @param rightKeys the right table's key columns for the equality condition * @param leftConditional the left table's columns needed to evaluate the inequality condition @@ -3105,10 +3121,13 @@ public static GatherMap[] mixedLeftJoinGatherMaps(Table leftKeys, Table rightKey * assumed to be a logical AND of the equality condition and inequality condition. * Two {@link GatherMap} instances will be returned that can be used to gather * the left and right tables, respectively, to produce the result of the left join. + * * It is the responsibility of the caller to close the resulting gather map instances. + * * This interface allows passing the size result from * {@link #mixedLeftJoinSize(Table, Table, Table, Table, CompiledExpression, NullEquality)} * when the output size was computed previously. + * * @param leftKeys the left table's key columns for the equality condition * @param rightKeys the right table's key columns for the equality condition * @param leftConditional the left table's columns needed to evaluate the inequality condition @@ -3138,14 +3157,16 @@ public static GatherMap[] mixedLeftJoinGatherMaps(Table leftKeys, Table rightKey * the table argument represents the key columns from the right table. Two {@link GatherMap} * instances will be returned that can be used to gather the left and right tables, * respectively, to produce the result of the inner join. + * * It is the responsibility of the caller to close the resulting gather map instances. + * * @param rightKeys join key columns from the right table * @param compareNullsEqual true if null key values should match otherwise false * @return left and right table gather maps */ public GatherMap[] innerJoinGatherMaps(Table rightKeys, boolean compareNullsEqual) { if (getNumberOfColumns() != rightKeys.getNumberOfColumns()) { - throw new IllegalArgumentException("column count mismatch, this: " + getNumberOfColumns() + + throw new IllegalArgumentException("Column count mismatch, this: " + getNumberOfColumns() + "rightKeys: " + rightKeys.getNumberOfColumns()); } long[] gatherMapData = @@ -3160,14 +3181,16 @@ public GatherMap[] innerJoinGatherMaps(Table rightKeys, boolean compareNullsEqua * represents the key columns from the right table. Two {@link GatherMap} instances will be * returned that can be used to gather the left and right tables, respectively, to produce the * result of the inner join. + * * It is the responsibility of the caller to close the resulting gather map instances. + * * @param rightKeys join key columns from the right table * @param compareNullsEqual true if null key values should match otherwise false * @return left and right table gather maps */ public GatherMap[] innerDistinctJoinGatherMaps(Table rightKeys, boolean compareNullsEqual) { if (getNumberOfColumns() != rightKeys.getNumberOfColumns()) { - throw new IllegalArgumentException("column count mismatch, this: " + getNumberOfColumns() + + throw new IllegalArgumentException("Column count mismatch, this: " + getNumberOfColumns() + "rightKeys: " + rightKeys.getNumberOfColumns()); } long[] gatherMapData = @@ -3182,7 +3205,7 @@ public GatherMap[] innerDistinctJoinGatherMaps(Table rightKeys, boolean compareN */ public long innerJoinRowCount(HashJoin otherHash) { if (getNumberOfColumns() != otherHash.getNumberOfColumns()) { - throw new IllegalArgumentException("column count mismatch, this: " + getNumberOfColumns() + + throw new IllegalArgumentException("Column count mismatch, this: " + getNumberOfColumns() + "otherKeys: " + otherHash.getNumberOfColumns()); } return innerJoinRowCount(getNativeView(), otherHash.getNativeView()); @@ -3194,13 +3217,15 @@ public long innerJoinRowCount(HashJoin otherHash) { * the {@link HashJoin} argument has been constructed from the key columns from the right table. * Two {@link GatherMap} instances will be returned that can be used to gather the left and right * tables, respectively, to produce the result of the inner join. + * * It is the responsibility of the caller to close the resulting gather map instances. + * * @param rightHash hash table built from join key columns from the right table * @return left and right table gather maps */ public GatherMap[] innerJoinGatherMaps(HashJoin rightHash) { if (getNumberOfColumns() != rightHash.getNumberOfColumns()) { - throw new IllegalArgumentException("column count mismatch, this: " + getNumberOfColumns() + + throw new IllegalArgumentException("Column count mismatch, this: " + getNumberOfColumns() + "rightKeys: " + rightHash.getNumberOfColumns()); } long[] gatherMapData = innerHashJoinGatherMaps(getNativeView(), rightHash.getNativeView()); @@ -3213,18 +3238,22 @@ public GatherMap[] innerJoinGatherMaps(HashJoin rightHash) { * the {@link HashJoin} argument has been constructed from the key columns from the right table. * Two {@link GatherMap} instances will be returned that can be used to gather the left and right * tables, respectively, to produce the result of the inner join. + * * It is the responsibility of the caller to close the resulting gather map instances. + * * This interface allows passing an output row count that was previously computed from * {@link #innerJoinRowCount(HashJoin)}. + * * WARNING: Passing a row count that is smaller than the actual row count will result * in undefined behavior. + * * @param rightHash hash table built from join key columns from the right table * @param outputRowCount number of output rows in the join result * @return left and right table gather maps */ public GatherMap[] innerJoinGatherMaps(HashJoin rightHash, long outputRowCount) { if (getNumberOfColumns() != rightHash.getNumberOfColumns()) { - throw new IllegalArgumentException("column count mismatch, this: " + getNumberOfColumns() + + throw new IllegalArgumentException("Column count mismatch, this: " + getNumberOfColumns() + "rightKeys: " + rightHash.getNumberOfColumns()); } long[] gatherMapData = innerHashJoinGatherMapsWithCount(getNativeView(), @@ -3252,7 +3281,9 @@ public long conditionalInnerJoinRowCount(Table rightTable, * the columns from the left table, and the table argument represents the columns from the * right table. Two {@link GatherMap} instances will be returned that can be used to gather * the left and right tables, respectively, to produce the result of the inner join. + * * It is the responsibility of the caller to close the resulting gather map instances. + * * @param rightTable the right side table of the join * @param condition conditional expression to evaluate during the join * @return left and right table gather maps @@ -3271,11 +3302,15 @@ public GatherMap[] conditionalInnerJoinGatherMaps(Table rightTable, * the columns from the left table, and the table argument represents the columns from the * right table. Two {@link GatherMap} instances will be returned that can be used to gather * the left and right tables, respectively, to produce the result of the inner join. + * * It is the responsibility of the caller to close the resulting gather map instances. + * * This interface allows passing an output row count that was previously computed from * {@link #conditionalInnerJoinRowCount(Table, CompiledExpression)}. + * * WARNING: Passing a row count that is smaller than the actual row count will result * in undefined behavior. + * * @param rightTable the right side table of the join in the join * @param condition conditional expression to evaluate during the join * @param outputRowCount number of output rows in the join result @@ -3324,7 +3359,9 @@ public static MixedJoinSize mixedInnerJoinSize(Table leftKeys, Table rightKeys, * assumed to be a logical AND of the equality condition and inequality condition. * Two {@link GatherMap} instances will be returned that can be used to gather * the left and right tables, respectively, to produce the result of the inner join. + * * It is the responsibility of the caller to close the resulting gather map instances. + * * @param leftKeys the left table's key columns for the equality condition * @param rightKeys the right table's key columns for the equality condition * @param leftConditional the left table's columns needed to evaluate the inequality condition @@ -3351,10 +3388,13 @@ public static GatherMap[] mixedInnerJoinGatherMaps(Table leftKeys, Table rightKe * assumed to be a logical AND of the equality condition and inequality condition. * Two {@link GatherMap} instances will be returned that can be used to gather * the left and right tables, respectively, to produce the result of the inner join. + * * It is the responsibility of the caller to close the resulting gather map instances. + * * This interface allows passing the size result from * {@link #mixedInnerJoinSize(Table, Table, Table, Table, CompiledExpression, NullEquality)} * when the output size was computed previously. + * * @param leftKeys the left table's key columns for the equality condition * @param rightKeys the right table's key columns for the equality condition * @param leftConditional the left table's columns needed to evaluate the inequality condition @@ -3384,14 +3424,16 @@ public static GatherMap[] mixedInnerJoinGatherMaps(Table leftKeys, Table rightKe * the table argument represents the key columns from the right table. Two {@link GatherMap} * instances will be returned that can be used to gather the left and right tables, * respectively, to produce the result of the full join. + * * It is the responsibility of the caller to close the resulting gather map instances. + * * @param rightKeys join key columns from the right table * @param compareNullsEqual true if null key values should match otherwise false * @return left and right table gather maps */ public GatherMap[] fullJoinGatherMaps(Table rightKeys, boolean compareNullsEqual) { if (getNumberOfColumns() != rightKeys.getNumberOfColumns()) { - throw new IllegalArgumentException("column count mismatch, this: " + getNumberOfColumns() + + throw new IllegalArgumentException("Column count mismatch, this: " + getNumberOfColumns() + "rightKeys: " + rightKeys.getNumberOfColumns()); } long[] gatherMapData = @@ -3411,7 +3453,7 @@ public GatherMap[] fullJoinGatherMaps(Table rightKeys, boolean compareNullsEqual */ public long fullJoinRowCount(HashJoin rightHash) { if (getNumberOfColumns() != rightHash.getNumberOfColumns()) { - throw new IllegalArgumentException("column count mismatch, this: " + getNumberOfColumns() + + throw new IllegalArgumentException("Column count mismatch, this: " + getNumberOfColumns() + "rightKeys: " + rightHash.getNumberOfColumns()); } return fullJoinRowCount(getNativeView(), rightHash.getNativeView()); @@ -3423,13 +3465,15 @@ public long fullJoinRowCount(HashJoin rightHash) { * the {@link HashJoin} argument has been constructed from the key columns from the right table. * Two {@link GatherMap} instances will be returned that can be used to gather the left and right * tables, respectively, to produce the result of the full join. + * * It is the responsibility of the caller to close the resulting gather map instances. + * * @param rightHash hash table built from join key columns from the right table * @return left and right table gather maps */ public GatherMap[] fullJoinGatherMaps(HashJoin rightHash) { if (getNumberOfColumns() != rightHash.getNumberOfColumns()) { - throw new IllegalArgumentException("column count mismatch, this: " + getNumberOfColumns() + + throw new IllegalArgumentException("Column count mismatch, this: " + getNumberOfColumns() + "rightKeys: " + rightHash.getNumberOfColumns()); } long[] gatherMapData = fullHashJoinGatherMaps(getNativeView(), rightHash.getNativeView()); @@ -3442,7 +3486,9 @@ public GatherMap[] fullJoinGatherMaps(HashJoin rightHash) { * the {@link HashJoin} argument has been constructed from the key columns from the right table. * Two {@link GatherMap} instances will be returned that can be used to gather the left and right * tables, respectively, to produce the result of the full join. + * * It is the responsibility of the caller to close the resulting gather map instances. + * * This interface allows passing an output row count that was previously computed from * {@link #fullJoinRowCount(HashJoin)}. * WARNING: Passing a row count that is smaller than the actual row count will result @@ -3453,7 +3499,7 @@ public GatherMap[] fullJoinGatherMaps(HashJoin rightHash) { */ public GatherMap[] fullJoinGatherMaps(HashJoin rightHash, long outputRowCount) { if (getNumberOfColumns() != rightHash.getNumberOfColumns()) { - throw new IllegalArgumentException("column count mismatch, this: " + getNumberOfColumns() + + throw new IllegalArgumentException("Column count mismatch, this: " + getNumberOfColumns() + "rightKeys: " + rightHash.getNumberOfColumns()); } long[] gatherMapData = fullHashJoinGatherMapsWithCount(getNativeView(), @@ -3467,7 +3513,9 @@ public GatherMap[] fullJoinGatherMaps(HashJoin rightHash, long outputRowCount) { * the columns from the left table, and the table argument represents the columns from the * right table. Two {@link GatherMap} instances will be returned that can be used to gather * the left and right tables, respectively, to produce the result of the full join. + * * It is the responsibility of the caller to close the resulting gather map instances. + * * @param rightTable the right side table of the join * @param condition conditional expression to evaluate during the join * @return left and right table gather maps @@ -3486,7 +3534,9 @@ public GatherMap[] conditionalFullJoinGatherMaps(Table rightTable, * assumed to be a logical AND of the equality condition and inequality condition. * Two {@link GatherMap} instances will be returned that can be used to gather * the left and right tables, respectively, to produce the result of the full join. + * * It is the responsibility of the caller to close the resulting gather map instances. + * * @param leftKeys the left table's key columns for the equality condition * @param rightKeys the right table's key columns for the equality condition * @param leftConditional the left table's columns needed to evaluate the inequality condition @@ -3527,7 +3577,7 @@ private static GatherMap buildSemiJoinGatherMap(long[] gatherMapData) { */ public GatherMap leftSemiJoinGatherMap(Table rightKeys, boolean compareNullsEqual) { if (getNumberOfColumns() != rightKeys.getNumberOfColumns()) { - throw new IllegalArgumentException("column count mismatch, this: " + getNumberOfColumns() + + throw new IllegalArgumentException("Column count mismatch, this: " + getNumberOfColumns() + "rightKeys: " + rightKeys.getNumberOfColumns()); } long[] gatherMapData = @@ -3627,7 +3677,9 @@ public static MixedJoinSize mixedLeftSemiJoinSize(Table leftKeys, Table rightKey * assumed to be a logical AND of the equality condition and inequality condition. * A {@link GatherMap} instance will be returned that can be used to gather * the left table to produce the result of the left semi join. + * * It is the responsibility of the caller to close the resulting gather map instances. + * * @param leftKeys the left table's key columns for the equality condition * @param rightKeys the right table's key columns for the equality condition * @param leftConditional the left table's columns needed to evaluate the inequality condition @@ -3654,10 +3706,13 @@ public static GatherMap mixedLeftSemiJoinGatherMap(Table leftKeys, Table rightKe * assumed to be a logical AND of the equality condition and inequality condition. * A {@link GatherMap} instance will be returned that can be used to gather * the left table to produce the result of the left semi join. + * * It is the responsibility of the caller to close the resulting gather map instances. + * * This interface allows passing the size result from * {@link #mixedLeftSemiJoinSize(Table, Table, Table, Table, CompiledExpression, NullEquality)} * when the output size was computed previously. + * * @param leftKeys the left table's key columns for the equality condition * @param rightKeys the right table's key columns for the equality condition * @param leftConditional the left table's columns needed to evaluate the inequality condition @@ -3694,7 +3749,7 @@ public static GatherMap mixedLeftSemiJoinGatherMap(Table leftKeys, Table rightKe */ public GatherMap leftAntiJoinGatherMap(Table rightKeys, boolean compareNullsEqual) { if (getNumberOfColumns() != rightKeys.getNumberOfColumns()) { - throw new IllegalArgumentException("column count mismatch, this: " + getNumberOfColumns() + + throw new IllegalArgumentException("Column count mismatch, this: " + getNumberOfColumns() + "rightKeys: " + rightKeys.getNumberOfColumns()); } long[] gatherMapData = @@ -3794,7 +3849,9 @@ public static MixedJoinSize mixedLeftAntiJoinSize(Table leftKeys, Table rightKey * assumed to be a logical AND of the equality condition and inequality condition. * A {@link GatherMap} instance will be returned that can be used to gather * the left table to produce the result of the left anti join. + * * It is the responsibility of the caller to close the resulting gather map instances. + * * @param leftKeys the left table's key columns for the equality condition * @param rightKeys the right table's key columns for the equality condition * @param leftConditional the left table's columns needed to evaluate the inequality condition @@ -3821,10 +3878,13 @@ public static GatherMap mixedLeftAntiJoinGatherMap(Table leftKeys, Table rightKe * assumed to be a logical AND of the equality condition and inequality condition. * A {@link GatherMap} instance will be returned that can be used to gather * the left table to produce the result of the left anti join. + * * It is the responsibility of the caller to close the resulting gather map instances. + * * This interface allows passing the size result from * {@link #mixedLeftAntiJoinSize(Table, Table, Table, Table, CompiledExpression, NullEquality)} * when the output size was computed previously. + * * @param leftKeys the left table's key columns for the equality condition * @param rightKeys the right table's key columns for the equality condition * @param leftConditional the left table's columns needed to evaluate the inequality condition