From 7e42959b012325b591d7b3369434c9d568068b0d Mon Sep 17 00:00:00 2001 From: Cece Mei Date: Wed, 25 Sep 2024 23:11:26 -0700 Subject: [PATCH] Incorporate `estimatedComputeCost` into all `BitmapColumnIndex` classes. (#17125) changes: * filter index processing is now automatically ordered based on estimated 'cost', which is approximated based on how many expected bitmap operations are required to construct the bitmap used for the 'offset' * cursorAutoArrangeFilters context flag now defaults to true, but can be set to false to disable cost based filter index sorting --- .../query/SqlExpressionBenchmark.java | 28 ++- .../query/SqlNestedDataBenchmark.java | 2 +- .../segment/QueryableIndexCursorHolder.java | 2 +- .../druid/segment/filter/BoundFilter.java | 8 +- .../druid/segment/filter/IsBooleanFilter.java | 6 + .../druid/segment/filter/NotFilter.java | 6 + .../druid/segment/filter/SpatialFilter.java | 6 + .../index/AllFalseBitmapColumnIndex.java | 6 + .../index/AllTrueBitmapColumnIndex.java | 6 + .../index/AllUnknownBitmapColumnIndex.java | 6 + .../segment/index/BitmapColumnIndex.java | 10 +- .../DictionaryRangeScanningBitmapIndex.java | 6 + .../index/DictionaryScanningBitmapIndex.java | 6 + .../index/IndexedUtf8ValueIndexes.java | 15 +- .../index/SimpleImmutableBitmapIndex.java | 6 + .../index/semantic/ValueSetIndexes.java | 35 ++- .../NestedFieldColumnIndexSupplier.java | 88 ++++++- .../ScalarDoubleColumnAndIndexSupplier.java | 21 ++ .../ScalarLongColumnAndIndexSupplier.java | 20 ++ .../nested/VariantColumnAndIndexSupplier.java | 10 + .../virtual/ListFilteredVirtualColumn.java | 133 +++++++---- .../segment/filter/FilterBundleTest.java | 214 +++++++++++++++--- 22 files changed, 532 insertions(+), 108 deletions(-) diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java index d351b40f2dbf..cc6dd636df08 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java @@ -118,6 +118,7 @@ public int getNumThreads() { return 1; } + @Override public String getFormatString() { @@ -220,8 +221,11 @@ public String getFormatString() "SELECT ARRAY_CONTAINS(\"multi-string3\", 100) FROM foo", "SELECT ARRAY_CONTAINS(\"multi-string3\", ARRAY[1, 2, 10, 11, 20, 22, 30, 33, 40, 44, 50, 55, 100]) FROM foo", "SELECT ARRAY_OVERLAP(\"multi-string3\", ARRAY[1, 100]) FROM foo", - "SELECT ARRAY_OVERLAP(\"multi-string3\", ARRAY[1, 2, 10, 11, 20, 22, 30, 33, 40, 44, 50, 55, 100]) FROM foo" - ); + "SELECT ARRAY_OVERLAP(\"multi-string3\", ARRAY[1, 2, 10, 11, 20, 22, 30, 33, 40, 44, 50, 55, 100]) FROM foo", + // 46: filters with random orders + "SELECT string2, SUM(long1) FROM foo WHERE string5 LIKE '%1%' AND string1 = '1000' GROUP BY 1 ORDER BY 2", + "SELECT string2, SUM(long1) FROM foo WHERE string5 LIKE '%1%' AND (string3 in ('1', '10', '20', '22', '32') AND long2 IN (1, 19, 21, 23, 25, 26, 46) AND double3 < 1010.0 AND double3 > 1000.0 AND (string4 = '1' OR REGEXP_EXTRACT(string1, '^1') IS NOT NULL OR REGEXP_EXTRACT('Z' || string2, '^Z2') IS NOT NULL)) AND string1 = '1000' GROUP BY 1 ORDER BY 2" + ); @Param({"5000000"}) private int rowsPerSegment; @@ -294,7 +298,9 @@ public String getFormatString() "42", "43", "44", - "45" + "45", + "46", + "47" }) private String query; @@ -319,7 +325,12 @@ public void setup() final PlannerConfig plannerConfig = new PlannerConfig(); final SegmentGenerator segmentGenerator = closer.register(new SegmentGenerator()); - log.info("Starting benchmark setup using cacheDir[%s], rows[%,d], schema[%s].", segmentGenerator.getCacheDir(), rowsPerSegment, schema); + log.info( + "Starting benchmark setup using cacheDir[%s], rows[%,d], schema[%s].", + segmentGenerator.getCacheDir(), + rowsPerSegment, + schema + ); final QueryableIndex index; if ("auto".equals(schema)) { List columnSchemas = schemaInfo.getDimensionsSpec() @@ -383,7 +394,14 @@ public void setup() final String sql = QUERIES.get(Integer.parseInt(query)); - try (final DruidPlanner planner = plannerFactory.createPlannerForTesting(engine, "EXPLAIN PLAN FOR " + sql, ImmutableMap.of("useNativeQueryExplain", true))) { + try (final DruidPlanner planner = plannerFactory.createPlannerForTesting( + engine, + "EXPLAIN PLAN FOR " + sql, + ImmutableMap.of( + "useNativeQueryExplain", + true + ) + )) { final PlannerResult plannerResult = planner.plan(); final Sequence resultSequence = plannerResult.run().getResults(); final Object[] planResult = resultSequence.toList().get(0); diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlNestedDataBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlNestedDataBenchmark.java index b896f8df52fc..fa5942dc2c98 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlNestedDataBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlNestedDataBenchmark.java @@ -199,7 +199,7 @@ public String getFormatString() // 42, 43 big cardinality like predicate filter "SELECT SUM(long1) FROM foo WHERE string5 LIKE '%1%'", "SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.string5') LIKE '%1%'", - // 44, 45 big cardinality like filter + selector filter + // 44, 45 big cardinality like filter + selector filter with different ordering "SELECT SUM(long1) FROM foo WHERE string5 LIKE '%1%' AND string1 = '1000'", "SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.string5') LIKE '%1%' AND JSON_VALUE(nested, '$.nesteder.string1') = '1000'", "SELECT SUM(long1) FROM foo WHERE string1 = '1000' AND string5 LIKE '%1%'", diff --git a/processing/src/main/java/org/apache/druid/segment/QueryableIndexCursorHolder.java b/processing/src/main/java/org/apache/druid/segment/QueryableIndexCursorHolder.java index 5aa4dbed8cc0..4b295931f0a8 100644 --- a/processing/src/main/java/org/apache/druid/segment/QueryableIndexCursorHolder.java +++ b/processing/src/main/java/org/apache/druid/segment/QueryableIndexCursorHolder.java @@ -113,7 +113,7 @@ public QueryableIndexCursorHolder( Cursors.getTimeOrdering(ordering), interval, filter, - cursorBuildSpec.getQueryContext().getBoolean(QueryContexts.CURSOR_AUTO_ARRANGE_FILTERS, false), + cursorBuildSpec.getQueryContext().getBoolean(QueryContexts.CURSOR_AUTO_ARRANGE_FILTERS, true), metrics ) ); diff --git a/processing/src/main/java/org/apache/druid/segment/filter/BoundFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/BoundFilter.java index 4fa1a4802489..ca6f66b8fdaf 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/BoundFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/BoundFilter.java @@ -149,8 +149,6 @@ private BitmapColumnIndex wrapRangeIndexWithNullValueIndex( BitmapColumnIndex rangeIndex ) { - - final BitmapColumnIndex nullBitmap; final NullValueIndex nulls = indexSupplier.as(NullValueIndex.class); if (nulls == null) { @@ -166,6 +164,12 @@ public ColumnIndexCapabilities getIndexCapabilities() return rangeIndex.getIndexCapabilities().merge(nullBitmap.getIndexCapabilities()); } + @Override + public int estimatedComputeCost() + { + return rangeIndex.estimatedComputeCost() + 1; + } + @Override public T computeBitmapResult(BitmapResultFactory bitmapResultFactory, boolean includeUnknown) { diff --git a/processing/src/main/java/org/apache/druid/segment/filter/IsBooleanFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/IsBooleanFilter.java index 75eaebb27c81..d39afdecb7f0 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/IsBooleanFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/IsBooleanFilter.java @@ -78,6 +78,12 @@ public ColumnIndexCapabilities getIndexCapabilities() return baseIndex.getIndexCapabilities(); } + @Override + public int estimatedComputeCost() + { + return baseIndex.estimatedComputeCost(); + } + @Override public T computeBitmapResult(BitmapResultFactory bitmapResultFactory, boolean includeUnknown) { diff --git a/processing/src/main/java/org/apache/druid/segment/filter/NotFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/NotFilter.java index 58dd90c8af18..3e89148e2ccc 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/NotFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/NotFilter.java @@ -80,6 +80,12 @@ public ColumnIndexCapabilities getIndexCapabilities() return baseIndex.getIndexCapabilities(); } + @Override + public int estimatedComputeCost() + { + return baseIndex.estimatedComputeCost(); + } + @Override public T computeBitmapResult(BitmapResultFactory bitmapResultFactory, boolean includeUnknown) { diff --git a/processing/src/main/java/org/apache/druid/segment/filter/SpatialFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/SpatialFilter.java index a79d4d2ac222..311f32667dd3 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/SpatialFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/SpatialFilter.java @@ -94,6 +94,12 @@ public ColumnIndexCapabilities getIndexCapabilities() return new SimpleColumnIndexCapabilities(true, true); } + @Override + public int estimatedComputeCost() + { + return Integer.MAX_VALUE; + } + @Override public T computeBitmapResult(BitmapResultFactory bitmapResultFactory, boolean includeUnknown) { diff --git a/processing/src/main/java/org/apache/druid/segment/index/AllFalseBitmapColumnIndex.java b/processing/src/main/java/org/apache/druid/segment/index/AllFalseBitmapColumnIndex.java index d0469dffe560..907ce55cb2a9 100644 --- a/processing/src/main/java/org/apache/druid/segment/index/AllFalseBitmapColumnIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/index/AllFalseBitmapColumnIndex.java @@ -50,6 +50,12 @@ public ColumnIndexCapabilities getIndexCapabilities() return SimpleColumnIndexCapabilities.getConstant(); } + @Override + public int estimatedComputeCost() + { + return 0; + } + @Override public T computeBitmapResult(BitmapResultFactory bitmapResultFactory, boolean includeUnknown) { diff --git a/processing/src/main/java/org/apache/druid/segment/index/AllTrueBitmapColumnIndex.java b/processing/src/main/java/org/apache/druid/segment/index/AllTrueBitmapColumnIndex.java index 02ff2c5bccaa..5bd48d31802b 100644 --- a/processing/src/main/java/org/apache/druid/segment/index/AllTrueBitmapColumnIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/index/AllTrueBitmapColumnIndex.java @@ -39,6 +39,12 @@ public ColumnIndexCapabilities getIndexCapabilities() return SimpleColumnIndexCapabilities.getConstant(); } + @Override + public int estimatedComputeCost() + { + return 0; + } + @Override public T computeBitmapResult(BitmapResultFactory bitmapResultFactory, boolean includeUnknown) { diff --git a/processing/src/main/java/org/apache/druid/segment/index/AllUnknownBitmapColumnIndex.java b/processing/src/main/java/org/apache/druid/segment/index/AllUnknownBitmapColumnIndex.java index a33ae663d53d..793773ede11e 100644 --- a/processing/src/main/java/org/apache/druid/segment/index/AllUnknownBitmapColumnIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/index/AllUnknownBitmapColumnIndex.java @@ -44,6 +44,12 @@ public ColumnIndexCapabilities getIndexCapabilities() return SimpleColumnIndexCapabilities.getConstant(); } + @Override + public int estimatedComputeCost() + { + return 0; + } + @Override public T computeBitmapResult(BitmapResultFactory bitmapResultFactory, boolean includeUnknown) { diff --git a/processing/src/main/java/org/apache/druid/segment/index/BitmapColumnIndex.java b/processing/src/main/java/org/apache/druid/segment/index/BitmapColumnIndex.java index f28429969d33..97d24ff0babf 100644 --- a/processing/src/main/java/org/apache/druid/segment/index/BitmapColumnIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/index/BitmapColumnIndex.java @@ -36,12 +36,12 @@ public interface BitmapColumnIndex ColumnIndexCapabilities getIndexCapabilities(); /** - * Returns an estimated cost for computing the bitmap result. + * Returns an estimated cost for computing the bitmap result. Generally this is equivalent to number of bitmap union + * or intersection operations need to be performed. E.x. null value index bitmap has a cost of 0, non-null value index + * bitmap union with null bitmap has a cost of 1, range (size of 10) scanning index bitmap union with null bitmap has + * a cost of 10. */ - default int estimatedComputeCost() - { - return Integer.MAX_VALUE; - } + int estimatedComputeCost(); /** * Compute a bitmap result wrapped with the {@link BitmapResultFactory} representing the rows matched by this index. diff --git a/processing/src/main/java/org/apache/druid/segment/index/DictionaryRangeScanningBitmapIndex.java b/processing/src/main/java/org/apache/druid/segment/index/DictionaryRangeScanningBitmapIndex.java index 37e84bb8b5d0..ee0a2dc67adf 100644 --- a/processing/src/main/java/org/apache/druid/segment/index/DictionaryRangeScanningBitmapIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/index/DictionaryRangeScanningBitmapIndex.java @@ -46,6 +46,12 @@ public DictionaryRangeScanningBitmapIndex(double sizeScale, int rangeSize) this.rangeSize = rangeSize; } + @Override + public int estimatedComputeCost() + { + return this.rangeSize; + } + @Nullable @Override public final T computeBitmapResult( diff --git a/processing/src/main/java/org/apache/druid/segment/index/DictionaryScanningBitmapIndex.java b/processing/src/main/java/org/apache/druid/segment/index/DictionaryScanningBitmapIndex.java index 1ae00a846baa..118d310d6566 100644 --- a/processing/src/main/java/org/apache/druid/segment/index/DictionaryScanningBitmapIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/index/DictionaryScanningBitmapIndex.java @@ -49,6 +49,12 @@ public DictionaryScanningBitmapIndex(int dictionarySize, double scaleThreshold) this.scaleThreshold = scaleThreshold; } + @Override + public int estimatedComputeCost() + { + return this.dictionarySize; + } + @Nullable @Override public final T computeBitmapResult( diff --git a/processing/src/main/java/org/apache/druid/segment/index/IndexedUtf8ValueIndexes.java b/processing/src/main/java/org/apache/druid/segment/index/IndexedUtf8ValueIndexes.java index 6015088d558d..a9819fd75cb3 100644 --- a/processing/src/main/java/org/apache/druid/segment/index/IndexedUtf8ValueIndexes.java +++ b/processing/src/main/java/org/apache/druid/segment/index/IndexedUtf8ValueIndexes.java @@ -83,6 +83,11 @@ public BitmapColumnIndex forValue(@Nullable String value) final ByteBuffer utf8 = StringUtils.toUtf8ByteBuffer(value); return new SimpleBitmapColumnIndex() { + @Override + public int estimatedComputeCost() + { + return 1; + } @Override public T computeBitmapResult(BitmapResultFactory bitmapResultFactory, boolean includeUnknown) @@ -122,10 +127,7 @@ public BitmapColumnIndex forValue(@Nonnull Object value, TypeSignature values) { return getBitmapColumnIndexForSortedIterableUtf8( - Iterables.transform( - values, - StringUtils::toUtf8ByteBuffer - ), + Iterables.transform(values, StringUtils::toUtf8ByteBuffer), values.size(), values.contains(null) ); @@ -181,6 +183,7 @@ private BitmapColumnIndex getBitmapColumnIndexForSortedIterableUtf8( bitmapFactory, COMPARATOR, valuesUtf8, + size, dictionary, bitmaps, () -> { @@ -197,6 +200,7 @@ private BitmapColumnIndex getBitmapColumnIndexForSortedIterableUtf8( return ValueSetIndexes.buildBitmapColumnIndexFromSortedIteratorBinarySearch( bitmapFactory, valuesUtf8, + size, dictionary, bitmaps, () -> { @@ -242,6 +246,7 @@ public BitmapColumnIndex forSortedValues(@Nonnull List sortedValues, TypeSign bitmapFactory, ByteBufferUtils.utf8Comparator(), Iterables.transform(tailSet, StringUtils::toUtf8ByteBuffer), + tailSet.size(), dictionary, bitmaps, unknownsIndex @@ -251,6 +256,7 @@ public BitmapColumnIndex forSortedValues(@Nonnull List sortedValues, TypeSign return ValueSetIndexes.buildBitmapColumnIndexFromSortedIteratorBinarySearch( bitmapFactory, Iterables.transform(tailSet, StringUtils::toUtf8ByteBuffer), + tailSet.size(), dictionary, bitmaps, unknownsIndex @@ -262,6 +268,7 @@ public BitmapColumnIndex forSortedValues(@Nonnull List sortedValues, TypeSign sortedValues, x -> StringUtils.toUtf8ByteBuffer(DimensionHandlerUtils.convertObjectToString(x)) ), + sortedValues.size(), dictionary, bitmaps, unknownsIndex diff --git a/processing/src/main/java/org/apache/druid/segment/index/SimpleImmutableBitmapIndex.java b/processing/src/main/java/org/apache/druid/segment/index/SimpleImmutableBitmapIndex.java index 7c6f6023ed0a..6c4177dc53a4 100644 --- a/processing/src/main/java/org/apache/druid/segment/index/SimpleImmutableBitmapIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/index/SimpleImmutableBitmapIndex.java @@ -35,6 +35,12 @@ public SimpleImmutableBitmapIndex(ImmutableBitmap bitmap) this.bitmap = bitmap; } + @Override + public int estimatedComputeCost() + { + return 0; + } + @Override public T computeBitmapResult(BitmapResultFactory bitmapResultFactory, boolean includeUnknown) { diff --git a/processing/src/main/java/org/apache/druid/segment/index/semantic/ValueSetIndexes.java b/processing/src/main/java/org/apache/druid/segment/index/semantic/ValueSetIndexes.java index 56d6da9a19af..f31f0a7bfedf 100644 --- a/processing/src/main/java/org/apache/druid/segment/index/semantic/ValueSetIndexes.java +++ b/processing/src/main/java/org/apache/druid/segment/index/semantic/ValueSetIndexes.java @@ -63,8 +63,8 @@ public interface ValueSetIndexes * @param sortedValues values to match, sorted in matchValueType order * @param matchValueType type of the value to match, used to assist conversion from the match value type to the column * value type - * @return {@link ImmutableBitmap} corresponding to the rows which match the values, or null if an index - * connot be computed for the supplied value type + * @return {@link ImmutableBitmap} corresponding to the rows which match the values, or null if an index + * connot be computed for the supplied value type */ @Nullable BitmapColumnIndex forSortedValues(@Nonnull List sortedValues, TypeSignature matchValueType); @@ -78,10 +78,10 @@ public interface ValueSetIndexes * {@link Indexed} value dictionary. Uses a strategy that does zipping similar to the merge step of a sort-merge, * where we step forward on both the iterator and the dictionary to find matches to build a * {@link Iterable}. - *

+ *

* If sorted match value iterator size is greater than (dictionary size * {@link #SORTED_SCAN_RATIO_THRESHOLD}), * consider using this method instead of {@link #buildBitmapColumnIndexFromSortedIteratorBinarySearch}. - *

+ *

* If the values in the iterator are NOT sorted the same as the dictionary, do NOT use this method, use * {@link #buildBitmapColumnIndexFromIteratorBinarySearch} instead. */ @@ -89,6 +89,7 @@ static BitmapColumnIndex buildBitmapColumnIndexFromSortedIteratorScan( BitmapFactory bitmapFactory, Comparator comparator, Iterable values, + int size, Indexed dictionary, Indexed bitmaps, Supplier unknownsBitmap @@ -96,6 +97,13 @@ static BitmapColumnIndex buildBitmapColumnIndexFromSortedIteratorScan( { return new BaseValueSetIndexesFromIterable(bitmapFactory, bitmaps, unknownsBitmap) { + + @Override + public int estimatedComputeCost() + { + return Integer.max(size, dictionary.size()); + } + @Override public Iterable getBitmapIterable() { @@ -163,13 +171,14 @@ private void findNext() *

* If sorted match value iterator size is less than (dictionary size * {@link #SORTED_SCAN_RATIO_THRESHOLD}), * consider using this method instead of {@link #buildBitmapColumnIndexFromSortedIteratorScan}. - *

+ *

* If the values in the iterator are not sorted the same as the dictionary, do not use this method, use * {@link #buildBitmapColumnIndexFromIteratorBinarySearch} instead. */ static BitmapColumnIndex buildBitmapColumnIndexFromSortedIteratorBinarySearch( BitmapFactory bitmapFactory, Iterable values, + int size, Indexed dictionary, Indexed bitmaps, Supplier getUnknownsIndex @@ -177,6 +186,13 @@ static BitmapColumnIndex buildBitmapColumnIndexFromSortedIteratorBinarySearc { return new BaseValueSetIndexesFromIterable(bitmapFactory, bitmaps, getUnknownsIndex) { + + @Override + public int estimatedComputeCost() + { + return size; + } + @Override public Iterable getBitmapIterable() { @@ -236,7 +252,7 @@ private void findNext() * {@link Indexed} value dictionary. This algorithm iterates the values to match and does a binary search for * matching values using {@link Indexed#indexOf(Object)} to build a {@link Iterable} until the match * values iterator is exhausted. - *

+ *

* If values of the iterator are sorted the same as the dictionary, use * {@link #buildBitmapColumnIndexFromSortedIteratorScan} or * {@link #buildBitmapColumnIndexFromSortedIteratorBinarySearch} instead. @@ -244,6 +260,7 @@ private void findNext() static BitmapColumnIndex buildBitmapColumnIndexFromIteratorBinarySearch( BitmapFactory bitmapFactory, Iterable values, + int size, Indexed dictionary, Indexed bitmaps, Supplier getUnknownsIndex @@ -251,6 +268,12 @@ static BitmapColumnIndex buildBitmapColumnIndexFromIteratorBinarySearch( { return new BaseValueSetIndexesFromIterable(bitmapFactory, bitmaps, getUnknownsIndex) { + @Override + public int estimatedComputeCost() + { + return size; + } + @Override public Iterable getBitmapIterable() { diff --git a/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldColumnIndexSupplier.java b/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldColumnIndexSupplier.java index c784eaa8d13f..d53596964f8b 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldColumnIndexSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldColumnIndexSupplier.java @@ -158,7 +158,8 @@ public T as(Class clazz) nullIndex = new SimpleImmutableBitmapIndex(bitmapFactory.makeEmptyImmutableBitmap()); } return (T) (NullValueIndex) () -> nullIndex; - } else if (clazz.equals(DictionaryEncodedStringValueIndex.class) || clazz.equals(DictionaryEncodedValueIndex.class)) { + } else if (clazz.equals(DictionaryEncodedStringValueIndex.class) + || clazz.equals(DictionaryEncodedValueIndex.class)) { return (T) new NestedFieldDictionaryEncodedStringValueIndex(); } @@ -402,6 +403,12 @@ public BitmapColumnIndex forValue(@Nullable String value) final FixedIndexed localDictionary = localDictionarySupplier.get(); final Indexed stringDictionary = globalStringDictionarySupplier.get(); + @Override + public int estimatedComputeCost() + { + return 1; + } + @Override public T computeBitmapResult(BitmapResultFactory bitmapResultFactory, boolean includeUnknown) { @@ -430,6 +437,12 @@ public BitmapColumnIndex forSortedValues(SortedSet values) { return new SimpleImmutableBitmapDelegatingIterableIndex() { + @Override + public int estimatedComputeCost() + { + return values.size(); + } + @Override public Iterable getBitmapIterable() { @@ -548,7 +561,9 @@ public Iterable getBitmapIterable() private int findNext() { - while (currIndex < end && !matcher.apply(StringUtils.fromUtf8Nullable(stringDictionary.get(localDictionary.get(currIndex)))).matches(false)) { + while (currIndex < end + && !matcher.apply(StringUtils.fromUtf8Nullable(stringDictionary.get(localDictionary.get(currIndex)))) + .matches(false)) { currIndex++; } @@ -674,6 +689,12 @@ public BitmapColumnIndex forValue(@Nonnull Object value, TypeSignature localDictionary = localDictionarySupplier.get(); final FixedIndexed globalDictionary = globalLongDictionarySupplier.get(); + @Override + public int estimatedComputeCost() + { + return 1; + } + @Override public T computeBitmapResult(BitmapResultFactory bitmapResultFactory, boolean includeUnknown) { @@ -715,6 +736,12 @@ public BitmapColumnIndex forValue(@Nullable String value) final FixedIndexed localDictionary = localDictionarySupplier.get(); final FixedIndexed longDictionary = globalLongDictionarySupplier.get(); + @Override + public int estimatedComputeCost() + { + return 1; + } + @Override public T computeBitmapResult(BitmapResultFactory bitmapResultFactory, boolean includeUnknown) { @@ -750,6 +777,12 @@ public BitmapColumnIndex forSortedValues(SortedSet values) { return new SimpleImmutableBitmapDelegatingIterableIndex() { + @Override + public int estimatedComputeCost() + { + return values.size(); + } + @Override public Iterable getBitmapIterable() { @@ -922,7 +955,8 @@ private void findNext() if (nextValue == 0) { nextSet = longPredicate.applyNull().matches(includeUnknown); } else { - nextSet = longPredicate.applyLong(longDictionary.get(nextValue - adjustLongId)).matches(includeUnknown); + nextSet = longPredicate.applyLong(longDictionary.get(nextValue - adjustLongId)) + .matches(includeUnknown); } } } @@ -953,6 +987,12 @@ public BitmapColumnIndex forValue(@Nonnull Object value, TypeSignature localDictionary = localDictionarySupplier.get(); final FixedIndexed globalDictionary = globalDoubleDictionarySupplier.get(); + @Override + public int estimatedComputeCost() + { + return 1; + } + @Override public T computeBitmapResult(BitmapResultFactory bitmapResultFactory, boolean includeUnknown) { @@ -993,6 +1033,12 @@ public BitmapColumnIndex forValue(@Nullable String value) final FixedIndexed localDictionary = localDictionarySupplier.get(); final FixedIndexed doubleDictionary = globalDoubleDictionarySupplier.get(); + @Override + public int estimatedComputeCost() + { + return 1; + } + @Override public T computeBitmapResult(BitmapResultFactory bitmapResultFactory, boolean includeUnknown) { @@ -1028,6 +1074,12 @@ public BitmapColumnIndex forSortedValues(SortedSet values) { return new SimpleImmutableBitmapDelegatingIterableIndex() { + @Override + public int estimatedComputeCost() + { + return values.size(); + } + @Override public Iterable getBitmapIterable() { @@ -1204,6 +1256,9 @@ private abstract class NestedVariantIndexes final FrontCodedIntArrayIndexed arrayDictionary = globalArrayDictionarySupplier == null ? null : globalArrayDictionarySupplier.get(); + // For every single String value, we need to look up indexes from stringDictionary, longDictionary and + // doubleDictionary. Hence, the compute cost for one value is 3. + static final int INDEX_COMPUTE_SCALE = 3; IntList getIndexes(@Nullable String value) { @@ -1256,6 +1311,12 @@ public BitmapColumnIndex forValue(@Nullable String value) { return new SimpleImmutableBitmapDelegatingIterableIndex() { + @Override + public int estimatedComputeCost() + { + return NestedVariantIndexes.INDEX_COMPUTE_SCALE; + } + @Override protected Iterable getBitmapIterable() { @@ -1293,6 +1354,15 @@ public BitmapColumnIndex forSortedValues(SortedSet values) { return new SimpleImmutableBitmapDelegatingIterableIndex() { + @Override + public int estimatedComputeCost() + { + if (values.size() >= Integer.MAX_VALUE / NestedVariantIndexes.INDEX_COMPUTE_SCALE) { + return Integer.MAX_VALUE; + } + return values.size() * NestedVariantIndexes.INDEX_COMPUTE_SCALE; + } + @Override public Iterable getBitmapIterable() { @@ -1504,6 +1574,12 @@ public BitmapColumnIndex forValue(@Nonnull Object value, TypeSignature T computeBitmapResult(BitmapResultFactory bitmapResultFactory, boolean includeUnknown) { @@ -1574,6 +1650,12 @@ public BitmapColumnIndex containsValue(@Nullable Object value, TypeSignature T computeBitmapResult(BitmapResultFactory bitmapResultFactory, boolean includeUnknown) { diff --git a/processing/src/main/java/org/apache/druid/segment/nested/ScalarDoubleColumnAndIndexSupplier.java b/processing/src/main/java/org/apache/druid/segment/nested/ScalarDoubleColumnAndIndexSupplier.java index e535d9cb54b6..5199c98d93f0 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/ScalarDoubleColumnAndIndexSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/ScalarDoubleColumnAndIndexSupplier.java @@ -241,10 +241,17 @@ public BitmapColumnIndex forValue(@Nonnull Object value, TypeSignature dictionary = doubleDictionarySupplier.get(); + @Override + public int estimatedComputeCost() + { + return 1; + } + @Override public T computeBitmapResult(BitmapResultFactory bitmapResultFactory, boolean includeUnknown) { @@ -307,6 +314,7 @@ public BitmapColumnIndex forSortedValues(@Nonnull List sortedValues, TypeSign bitmapFactory, ColumnType.DOUBLE.getNullableStrategy(), tailSet, + tailSet.size(), dictionary, valueIndexes, unknownsIndex @@ -316,6 +324,7 @@ public BitmapColumnIndex forSortedValues(@Nonnull List sortedValues, TypeSign return ValueSetIndexes.buildBitmapColumnIndexFromSortedIteratorBinarySearch( bitmapFactory, tailSet, + tailSet.size(), dictionary, valueIndexes, unknownsIndex @@ -325,6 +334,7 @@ public BitmapColumnIndex forSortedValues(@Nonnull List sortedValues, TypeSign return ValueSetIndexes.buildBitmapColumnIndexFromIteratorBinarySearch( bitmapFactory, Iterables.transform(sortedValues, DimensionHandlerUtils::convertObjectToDouble), + sortedValues.size(), dictionary, valueIndexes, unknownsIndex @@ -345,6 +355,11 @@ public BitmapColumnIndex forValue(@Nullable String value) return new SimpleBitmapColumnIndex() { + @Override + public int estimatedComputeCost() + { + return 1; + } @Override public T computeBitmapResult(BitmapResultFactory bitmapResultFactory, boolean includeUnknown) @@ -390,6 +405,12 @@ public BitmapColumnIndex forSortedValues(SortedSet values) { return new SimpleImmutableBitmapDelegatingIterableIndex() { + @Override + public int estimatedComputeCost() + { + return values.size(); + } + @Override public Iterable getBitmapIterable() { diff --git a/processing/src/main/java/org/apache/druid/segment/nested/ScalarLongColumnAndIndexSupplier.java b/processing/src/main/java/org/apache/druid/segment/nested/ScalarLongColumnAndIndexSupplier.java index a9932f7ae3cd..66527530336c 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/ScalarLongColumnAndIndexSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/ScalarLongColumnAndIndexSupplier.java @@ -246,6 +246,12 @@ public BitmapColumnIndex forValue(@Nonnull Object value, TypeSignature dictionary = longDictionarySupplier.get(); + @Override + public int estimatedComputeCost() + { + return 1; + } + @Override public T computeBitmapResult(BitmapResultFactory bitmapResultFactory, boolean includeUnknown) { @@ -305,6 +311,7 @@ public BitmapColumnIndex forSortedValues(@Nonnull List sortedValues, TypeSign bitmapFactory, ColumnType.LONG.getNullableStrategy(), tailSet, + tailSet.size(), dictionary, valueIndexes, unknownsIndex @@ -314,6 +321,7 @@ public BitmapColumnIndex forSortedValues(@Nonnull List sortedValues, TypeSign return ValueSetIndexes.buildBitmapColumnIndexFromSortedIteratorBinarySearch( bitmapFactory, tailSet, + tailSet.size(), dictionary, valueIndexes, unknownsIndex @@ -326,6 +334,7 @@ public BitmapColumnIndex forSortedValues(@Nonnull List sortedValues, TypeSign sortedValues, DimensionHandlerUtils::convertObjectToLong ), + sortedValues.size(), dictionary, valueIndexes, unknownsIndex @@ -346,6 +355,11 @@ public BitmapColumnIndex forValue(@Nullable String value) final Long longValue = GuavaUtils.tryParseLong(value); return new SimpleBitmapColumnIndex() { + @Override + public int estimatedComputeCost() + { + return 1; + } @Override public T computeBitmapResult(BitmapResultFactory bitmapResultFactory, boolean includeUnknown) @@ -390,6 +404,12 @@ public BitmapColumnIndex forSortedValues(SortedSet values) { return new SimpleImmutableBitmapDelegatingIterableIndex() { + @Override + public int estimatedComputeCost() + { + return values.size(); + } + @Override public Iterable getBitmapIterable() { diff --git a/processing/src/main/java/org/apache/druid/segment/nested/VariantColumnAndIndexSupplier.java b/processing/src/main/java/org/apache/druid/segment/nested/VariantColumnAndIndexSupplier.java index aa1dd143e4e7..3cd9e4f7308a 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/VariantColumnAndIndexSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/VariantColumnAndIndexSupplier.java @@ -363,6 +363,11 @@ public BitmapColumnIndex forValue(@Nonnull Object value, TypeSignature T computeBitmapResult(BitmapResultFactory bitmapResultFactory, boolean includeUnknown) @@ -428,6 +433,11 @@ public BitmapColumnIndex containsValue(@Nullable Object value, TypeSignature T computeBitmapResult(BitmapResultFactory bitmapResultFactory, boolean includeUnknown) diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ListFilteredVirtualColumn.java b/processing/src/main/java/org/apache/druid/segment/virtual/ListFilteredVirtualColumn.java index 9262e8a17303..f82949512345 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/ListFilteredVirtualColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/ListFilteredVirtualColumn.java @@ -26,6 +26,8 @@ import com.google.common.base.Suppliers; import com.google.common.collect.ImmutableList; import com.google.common.collect.Iterables; +import it.unimi.dsi.fastutil.ints.IntIntImmutablePair; +import it.unimi.dsi.fastutil.ints.IntIntPair; import org.apache.druid.collections.bitmap.BitmapFactory; import org.apache.druid.collections.bitmap.ImmutableBitmap; import org.apache.druid.common.config.NullHandling; @@ -49,6 +51,8 @@ import org.apache.druid.segment.column.ColumnHolder; import org.apache.druid.segment.column.ColumnIndexSupplier; import org.apache.druid.segment.index.BitmapColumnIndex; +import org.apache.druid.segment.index.DictionaryRangeScanningBitmapIndex; +import org.apache.druid.segment.index.DictionaryScanningBitmapIndex; import org.apache.druid.segment.index.SimpleBitmapColumnIndex; import org.apache.druid.segment.index.SimpleImmutableBitmapDelegatingIterableIndex; import org.apache.druid.segment.index.semantic.DictionaryEncodedStringValueIndex; @@ -139,9 +143,17 @@ public DimensionSelector makeDimensionSelector( ) { if (allowList) { - return ListFilteredDimensionSpec.filterAllowList(values, factory.makeDimensionSelector(delegate), delegate.getExtractionFn() != null); + return ListFilteredDimensionSpec.filterAllowList( + values, + factory.makeDimensionSelector(delegate), + delegate.getExtractionFn() != null + ); } else { - return ListFilteredDimensionSpec.filterDenyList(values, factory.makeDimensionSelector(delegate), delegate.getExtractionFn() != null); + return ListFilteredDimensionSpec.filterDenyList( + values, + factory.makeDimensionSelector(delegate), + delegate.getExtractionFn() != null + ); } } @@ -243,7 +255,8 @@ public T as(Class clazz) return (T) new ListFilteredDruidPredicateIndexes(underlyingIndex, idMapping, nullValueBitmapSupplier); } else if (clazz.equals(LexicographicalRangeIndexes.class)) { return (T) new ListFilteredLexicographicalRangeIndexes(underlyingIndex, idMapping, nullValueBitmapSupplier); - } else if (clazz.equals(DictionaryEncodedStringValueIndex.class) || clazz.equals(DictionaryEncodedValueIndex.class)) { + } else if (clazz.equals(DictionaryEncodedStringValueIndex.class) + || clazz.equals(DictionaryEncodedValueIndex.class)) { return (T) new ListFilteredDictionaryEncodedStringValueIndex(underlyingIndex, idMapping); } return null; @@ -320,7 +333,8 @@ private static Iterable getBitmapsInRange( private int findNext() { - while (currIndex < end && !matcher.apply(delegate.getValue(idMapping.getReverseId(currIndex))).matches(includeUnknown)) { + while (currIndex < end && !matcher.apply(delegate.getValue(idMapping.getReverseId(currIndex))) + .matches(includeUnknown)) { currIndex++; } @@ -400,7 +414,12 @@ int getReverseIndex(@Nullable String value) return -(minIndex + 1); } - Iterable getBitmapsInRange(DruidObjectPredicate matcher, int start, int end, boolean includeUnknown) + Iterable getBitmapsInRange( + DruidObjectPredicate matcher, + int start, + int end, + boolean includeUnknown + ) { return ListFilteredVirtualColumn.getBitmapsInRange(delegate, idMapping, matcher, start, end, includeUnknown); } @@ -420,6 +439,11 @@ public BitmapColumnIndex get() { return new SimpleBitmapColumnIndex() { + @Override + public int estimatedComputeCost() + { + return 0; + } @Override public T computeBitmapResult(BitmapResultFactory bitmapResultFactory, boolean includeUnknowns) @@ -450,6 +474,11 @@ public BitmapColumnIndex forValue(@Nullable String value) { return new SimpleBitmapColumnIndex() { + @Override + public int estimatedComputeCost() + { + return 1; + } @Override public T computeBitmapResult(BitmapResultFactory bitmapResultFactory, boolean includeUnknown) @@ -478,6 +507,12 @@ public BitmapColumnIndex forSortedValues(SortedSet values) { return new SimpleImmutableBitmapDelegatingIterableIndex() { + @Override + public int estimatedComputeCost() + { + return values.size(); + } + @Override public Iterable getBitmapIterable() { @@ -548,22 +583,17 @@ private ListFilteredDruidPredicateIndexes( @Nullable public BitmapColumnIndex forPredicate(DruidPredicateFactory matcherFactory) { - return new SimpleBitmapColumnIndex() + return new DictionaryScanningBitmapIndex(getCardinality()) { @Override - public T computeBitmapResult(BitmapResultFactory bitmapResultFactory, boolean includeUnknown) + protected Iterable getBitmapIterable(boolean includeUnknown) { - final int start = 0, end = getCardinality(); - Iterable bitmaps; - if (includeUnknown) { - bitmaps = Iterables.concat( - getBitmapsInRange(matcherFactory.makeStringPredicate(), start, end, includeUnknown), - Collections.singletonList(nullValueBitmapSupplier.get()) - ); - } else { - bitmaps = getBitmapsInRange(matcherFactory.makeStringPredicate(), start, end, includeUnknown); - } - return bitmapResultFactory.unionDimensionValueBitmaps(bitmaps); + return Iterables.concat(getBitmapsInRange( + matcherFactory.makeStringPredicate(), + 0, + getCardinality(), + includeUnknown + ), includeUnknown ? Collections.singletonList(nullValueBitmapSupplier.get()) : Collections.emptyList()); } }; } @@ -606,37 +636,13 @@ public BitmapColumnIndex forRange( DruidObjectPredicate matcher ) { - return new SimpleImmutableBitmapDelegatingIterableIndex() + final IntIntPair range = getRange(startValue, startStrict, endValue, endStrict); + final int start = range.leftInt(), end = range.rightInt(); + return new DictionaryRangeScanningBitmapIndex(1.0, end - start) { @Override - public Iterable getBitmapIterable() + protected Iterable getBitmapIterable() { - int startIndex, endIndex; - final int firstValue = NullHandling.isNullOrEquivalent(delegate.getValue(idMapping.getReverseId(0))) ? 1 : 0; - if (startValue == null) { - startIndex = firstValue; - } else { - final int found = getReverseIndex(NullHandling.emptyToNullIfNeeded(startValue)); - if (found >= 0) { - startIndex = startStrict ? found + 1 : found; - } else { - startIndex = -(found + 1); - } - } - - if (endValue == null) { - endIndex = idMapping.getValueCardinality(); - } else { - final int found = getReverseIndex(NullHandling.emptyToNullIfNeeded(endValue)); - if (found >= 0) { - endIndex = endStrict ? found : found + 1; - } else { - endIndex = -(found + 1); - } - } - - endIndex = Math.max(startIndex, endIndex); - final int start = startIndex, end = endIndex; return getBitmapsInRange(matcher, start, end, false); } @@ -648,6 +654,41 @@ protected ImmutableBitmap getUnknownsBitmap() } }; } + + private IntIntPair getRange( + @Nullable String startValue, + boolean startStrict, + @Nullable String endValue, + boolean endStrict + ) + { + int startIndex, endIndex; + final int firstValue = NullHandling.isNullOrEquivalent(delegate.getValue(idMapping.getReverseId(0))) ? 1 : 0; + if (startValue == null) { + startIndex = firstValue; + } else { + final int found = getReverseIndex(NullHandling.emptyToNullIfNeeded(startValue)); + if (found >= 0) { + startIndex = startStrict ? found + 1 : found; + } else { + startIndex = -(found + 1); + } + } + + if (endValue == null) { + endIndex = idMapping.getValueCardinality(); + } else { + final int found = getReverseIndex(NullHandling.emptyToNullIfNeeded(endValue)); + if (found >= 0) { + endIndex = endStrict ? found : found + 1; + } else { + endIndex = -(found + 1); + } + } + + endIndex = Math.max(startIndex, endIndex); + return new IntIntImmutablePair(startIndex, endIndex); + } } private static class ListFilteredDictionaryEncodedStringValueIndex extends BaseListFilteredColumnIndex diff --git a/processing/src/test/java/org/apache/druid/segment/filter/FilterBundleTest.java b/processing/src/test/java/org/apache/druid/segment/filter/FilterBundleTest.java index 9bc86ae3900e..a7afd9755c53 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/FilterBundleTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/FilterBundleTest.java @@ -20,6 +20,8 @@ package org.apache.druid.segment.filter; import com.google.common.collect.ImmutableList; +import junitparams.JUnitParamsRunner; +import junitparams.Parameters; import org.apache.druid.collections.bitmap.BitmapFactory; import org.apache.druid.java.util.common.io.Closer; import org.apache.druid.query.DefaultBitmapResultFactory; @@ -44,11 +46,8 @@ import org.junit.Test; import org.junit.rules.TemporaryFolder; import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.junit.runners.Parameterized.Parameter; -import org.junit.runners.Parameterized.Parameters; -@RunWith(Parameterized.class) +@RunWith(JUnitParamsRunner.class) public class FilterBundleTest extends InitializedNullHandlingTest { private Closer closer; @@ -58,15 +57,6 @@ public class FilterBundleTest extends InitializedNullHandlingTest @Rule public TemporaryFolder tmpDir = new TemporaryFolder(); - @Parameters - public static Object[] flags() - { - return new Object[]{false, true}; - } - - @Parameter - public boolean cursorAutoArrangeFilters; - @Before public void setUp() { @@ -88,7 +78,8 @@ public void tearDown() throws Exception } @Test - public void test_or_country_isRobot() + @Parameters({"true", "false"}) + public void test_or_country_isRobot(boolean cursorAutoArrangeFilters) { final FilterBundle filterBundle = makeFilterBundle( new OrFilter( @@ -96,7 +87,8 @@ public void test_or_country_isRobot() new EqualityFilter("countryName", ColumnType.STRING, "United States", null), new EqualityFilter("isRobot", ColumnType.STRING, "true", null) ) - ) + ), + cursorAutoArrangeFilters ); Assert.assertEquals( @@ -108,7 +100,8 @@ public void test_or_country_isRobot() } @Test - public void test_and_country_isRobot() + @Parameters({"true", "false"}) + public void test_and_country_isRobot(boolean cursorAutoArrangeFilters) { final FilterBundle filterBundle = makeFilterBundle( new AndFilter( @@ -116,7 +109,8 @@ public void test_and_country_isRobot() new EqualityFilter("countryName", ColumnType.STRING, "United States", null), new EqualityFilter("isRobot", ColumnType.STRING, "true", null) ) - ) + ), + cursorAutoArrangeFilters ); Assert.assertEquals( @@ -128,7 +122,8 @@ public void test_and_country_isRobot() } @Test - public void test_or_countryIsNull_pageLike() + @Parameters({"true", "false"}) + public void test_or_countryIsNull_pageLike(boolean cursorAutoArrangeFilters) { final FilterBundle filterBundle = makeFilterBundle( new OrFilter( @@ -136,7 +131,8 @@ public void test_or_countryIsNull_pageLike() new NullFilter("countryName", null), new LikeDimFilter("page", "%u%", null, null).toFilter() ) - ) + ), + cursorAutoArrangeFilters ); Assert.assertEquals( @@ -149,7 +145,8 @@ public void test_or_countryIsNull_pageLike() } @Test - public void test_and_countryIsNull_pageLike() + @Parameters({"true", "false"}) + public void test_and_countryIsNull_pageLike(boolean cursorAutoArrangeFilters) { final FilterBundle filterBundle = makeFilterBundle( new AndFilter( @@ -157,7 +154,8 @@ public void test_and_countryIsNull_pageLike() new NullFilter("countryName", null), new LikeDimFilter("page", "%u%", null, null).toFilter() ) - ) + ), + cursorAutoArrangeFilters ); Assert.assertEquals( @@ -169,7 +167,8 @@ public void test_and_countryIsNull_pageLike() } @Test - public void test_and_country_pageLike() + @Parameters({"true", "false"}) + public void test_and_country_pageLike(boolean cursorAutoArrangeFilters) { final FilterBundle filterBundle = makeFilterBundle( new AndFilter( @@ -177,9 +176,32 @@ public void test_and_country_pageLike() new EqualityFilter("countryName", ColumnType.STRING, "United States", null), new LikeDimFilter("page", "%u%", null, null).toFilter() ) - ) + ), + cursorAutoArrangeFilters + ); + + Assert.assertEquals( + "index: countryName = United States (selectionSize = 528)\n" + + "matcher: page LIKE '%u%'\n", + filterBundle.getInfo().describe() + ); + } + + @Test + @Parameters({"true"}) + public void test_pageLike_and_country_pageLike_with_cursorAutoArrangeFilters(boolean cursorAutoArrangeFilters) + { + final FilterBundle filterBundle = makeFilterBundle( + new AndFilter( + ImmutableList.of( + new LikeDimFilter("page", "%u%", null, null).toFilter(), + new EqualityFilter("countryName", ColumnType.STRING, "United States", null) + ) + ), + cursorAutoArrangeFilters ); + // With cursorAutoArrangeFilters flag on, the indexes are sorted by cost ASC, hence country name index is used first. Assert.assertEquals( "index: countryName = United States (selectionSize = 528)\n" + "matcher: page LIKE '%u%'\n", @@ -188,7 +210,8 @@ public void test_and_country_pageLike() } @Test - public void test_or_countryNotNull_pageLike() + @Parameters({"true", "false"}) + public void test_or_countryNotNull_pageLike(boolean cursorAutoArrangeFilters) { final FilterBundle filterBundle = makeFilterBundle( new OrFilter( @@ -196,7 +219,8 @@ public void test_or_countryNotNull_pageLike() new NotFilter(new NullFilter("countryName", null)), new LikeDimFilter("page", "%u%", null, null).toFilter() ) - ) + ), + cursorAutoArrangeFilters ); Assert.assertEquals( @@ -208,7 +232,31 @@ public void test_or_countryNotNull_pageLike() } @Test - public void test_and_countryNotNull_pageLike() + @Parameters({"true"}) + public void test_or_pageLike_countryNotNull_pageLike_with_cursorAutoArrangeFilters(boolean cursorAutoArrangeFilters) + { + final FilterBundle filterBundle = makeFilterBundle( + new OrFilter( + ImmutableList.of( + new LikeDimFilter("page", "%u%", null, null).toFilter(), + new NotFilter(new NullFilter("countryName", null)) + ) + ), + cursorAutoArrangeFilters + ); + + // With cursorAutoArrangeFilters flag on, the indexes are sorted by cost ASC, hence country name index is used first. + Assert.assertEquals( + "index: OR (selectionSize = 39244)\n" + + " index: ~(countryName IS NULL) (selectionSize = 3799)\n" + + " index: page LIKE '%u%' (selectionSize = 15328)\n", + filterBundle.getInfo().describe() + ); + } + + @Test + @Parameters({"true", "false"}) + public void test_and_countryNotNull_pageLike(boolean cursorAutoArrangeFilters) { final FilterBundle filterBundle = makeFilterBundle( new AndFilter( @@ -216,7 +264,8 @@ public void test_and_countryNotNull_pageLike() new NotFilter(new NullFilter("countryName", null)), new LikeDimFilter("page", "%u%", null, null).toFilter() ) - ) + ), + cursorAutoArrangeFilters ); Assert.assertEquals( @@ -226,8 +275,96 @@ public void test_and_countryNotNull_pageLike() ); } + + @Test + @Parameters({"true"}) + public void test_and_cursorAutoArrangeFilters(boolean cursorAutoArrangeFilters) + { + final FilterBundle filterBundle = makeFilterBundle( + new AndFilter( + ImmutableList.of( + new OrFilter( + ImmutableList.of( + new LikeDimFilter("page", "O%", null, null).toFilter(), + new EqualityFilter("countryName", ColumnType.STRING, "United States", null) + ) + ), + new EqualityFilter("isRobot", ColumnType.STRING, "false", null), + new NotFilter(new NullFilter("countryName", null)) + ) + ), + cursorAutoArrangeFilters + ); + + // The estimate cost for child Filters: + // 1. countryName NullFilter: 0 + // 2. isRobot EqualityFilter: 1 + // 3. page LikeFilter with prefix and countryName EqualityFIlter: 1 + size of page dictionary with keys matching "O" prefix + Assert.assertEquals( + "index: AND (selectionSize = 562)\n" + + " index: ~(countryName IS NULL) (selectionSize = 3799)\n" + + " index: isRobot = false (selectionSize = 23824)\n" + + " index: OR (selectionSize = 3799)\n" + + " index: countryName = United States (selectionSize = 528)\n" + + " index: page LIKE 'O%' (selectionSize = 351)\n", + filterBundle.getInfo().describe() + ); + } + + @Test + @Parameters({"true"}) + public void test_or_cursorAutoArrangeFilters(boolean cursorAutoArrangeFilters) + { + final FilterBundle filterBundle = makeFilterBundle( + new OrFilter( + ImmutableList.of( + new AndFilter( + ImmutableList.of( + new EqualityFilter("countryName", ColumnType.STRING, "United Kingdom", null), + new LikeDimFilter("page", "%b%", null, null).toFilter() + ) + ), + new AndFilter( + ImmutableList.of( + new EqualityFilter("countryName", ColumnType.STRING, "United States", null), + new LikeDimFilter("page", "O%", null, null).toFilter() + ) + ), + new TypedInFilter("isRobot", ColumnType.STRING, ImmutableList.of("false", "true"), null, null), + new EqualityFilter("channel", ColumnType.STRING, "#en.wikipedia", null), + new NullFilter("countryName", null) + ) + ), + cursorAutoArrangeFilters + ); + + // The estimate cost for child Filters: + // 1. countryName NullFilter: 0 + // 2. channel EqualityFilter: 1 + // 3. isRobot TypedInFilter: 2 + // 4. page LikeFilter with prefix and countryName EqualityFilter: 1 + size of page dictionary with keys matching "O" prefix + // 5. page LikeFilter and countryName EqualityFilter: 1 + size of page dictionary + Assert.assertEquals( + "matcher: OR\n" + + " matcher: OR\n" + + " with partial index: OR (selectionSize = 39244)\n" + + " index: countryName IS NULL (selectionSize = 35445)\n" + + " index: channel = #en.wikipedia (selectionSize = 11549)\n" + + " index: isRobot IN (false, true) (STRING) (selectionSize = 39244)\n" + + " matcher: AND\n" + + " with partial index: countryName = United States (selectionSize = 528)\n" + + " matcher: page LIKE 'O%'\n" + + " matcher: AND\n" + + " with partial index: countryName = United Kingdom (selectionSize = 234)\n" + + " matcher: page LIKE '%b%'\n", + filterBundle.getInfo().describe() + ); + + } + @Test - public void test_or_countryIsAndPageLike() + @Parameters({"true", "false"}) + public void test_or_countryIsAndPageLike(boolean cursorAutoArrangeFilters) { final FilterBundle filterBundle = makeFilterBundle( new OrFilter( @@ -251,7 +388,8 @@ public void test_or_countryIsAndPageLike() ) ) ) - ) + ), + cursorAutoArrangeFilters ); Assert.assertEquals( @@ -271,7 +409,8 @@ public void test_or_countryIsAndPageLike() } @Test - public void test_or_countryIsNull_and_country_pageLike() + @Parameters({"true", "false"}) + public void test_or_countryIsNull_and_country_pageLike(boolean cursorAutoArrangeFilters) { final FilterBundle filterBundle = makeFilterBundle( new OrFilter( @@ -284,7 +423,8 @@ public void test_or_countryIsNull_and_country_pageLike() ) ) ) - ) + ), + cursorAutoArrangeFilters ); Assert.assertEquals( @@ -299,7 +439,8 @@ public void test_or_countryIsNull_and_country_pageLike() } @Test - public void test_or_countryIsNull_and_isRobotInFalseTrue_pageLike() + @Parameters({"true", "false"}) + public void test_or_countryIsNull_and_isRobotInFalseTrue_pageLike(boolean cursorAutoArrangeFilters) { final FilterBundle filterBundle = makeFilterBundle( new OrFilter( @@ -315,7 +456,8 @@ public void test_or_countryIsNull_and_isRobotInFalseTrue_pageLike() ) ) ) - ) + ), + cursorAutoArrangeFilters ); Assert.assertEquals( @@ -329,9 +471,11 @@ public void test_or_countryIsNull_and_isRobotInFalseTrue_pageLike() ); } - protected FilterBundle makeFilterBundle(final Filter filter) + protected FilterBundle makeFilterBundle(final Filter filter, boolean cursorAutoArrangeFilters) { - return new FilterBundle.Builder(filter, indexSelector, cursorAutoArrangeFilters).build( + return new FilterBundle.Builder(filter, indexSelector, + cursorAutoArrangeFilters + ).build( new DefaultBitmapResultFactory(bitmapFactory), indexSelector.getNumRows(), indexSelector.getNumRows(),