Skip to content

Commit

Permalink
Incorporate estimatedComputeCost into all BitmapColumnIndex class…
Browse files Browse the repository at this point in the history
…es. (apache#17125)

changes:
* filter index processing is now automatically ordered based on estimated 'cost', which is approximated based on how many expected bitmap operations are required to construct the bitmap used for the 'offset'
* cursorAutoArrangeFilters context flag now defaults to true, but can be set to false to disable cost based filter index sorting
  • Loading branch information
cecemei committed Sep 26, 2024
1 parent bcc3da6 commit 7e42959
Show file tree
Hide file tree
Showing 22 changed files with 532 additions and 108 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ public int getNumThreads()
{
return 1;
}

@Override
public String getFormatString()
{
Expand Down Expand Up @@ -220,8 +221,11 @@ public String getFormatString()
"SELECT ARRAY_CONTAINS(\"multi-string3\", 100) FROM foo",
"SELECT ARRAY_CONTAINS(\"multi-string3\", ARRAY[1, 2, 10, 11, 20, 22, 30, 33, 40, 44, 50, 55, 100]) FROM foo",
"SELECT ARRAY_OVERLAP(\"multi-string3\", ARRAY[1, 100]) FROM foo",
"SELECT ARRAY_OVERLAP(\"multi-string3\", ARRAY[1, 2, 10, 11, 20, 22, 30, 33, 40, 44, 50, 55, 100]) FROM foo"
);
"SELECT ARRAY_OVERLAP(\"multi-string3\", ARRAY[1, 2, 10, 11, 20, 22, 30, 33, 40, 44, 50, 55, 100]) FROM foo",
// 46: filters with random orders
"SELECT string2, SUM(long1) FROM foo WHERE string5 LIKE '%1%' AND string1 = '1000' GROUP BY 1 ORDER BY 2",
"SELECT string2, SUM(long1) FROM foo WHERE string5 LIKE '%1%' AND (string3 in ('1', '10', '20', '22', '32') AND long2 IN (1, 19, 21, 23, 25, 26, 46) AND double3 < 1010.0 AND double3 > 1000.0 AND (string4 = '1' OR REGEXP_EXTRACT(string1, '^1') IS NOT NULL OR REGEXP_EXTRACT('Z' || string2, '^Z2') IS NOT NULL)) AND string1 = '1000' GROUP BY 1 ORDER BY 2"
);

@Param({"5000000"})
private int rowsPerSegment;
Expand Down Expand Up @@ -294,7 +298,9 @@ public String getFormatString()
"42",
"43",
"44",
"45"
"45",
"46",
"47"
})
private String query;

Expand All @@ -319,7 +325,12 @@ public void setup()
final PlannerConfig plannerConfig = new PlannerConfig();

final SegmentGenerator segmentGenerator = closer.register(new SegmentGenerator());
log.info("Starting benchmark setup using cacheDir[%s], rows[%,d], schema[%s].", segmentGenerator.getCacheDir(), rowsPerSegment, schema);
log.info(
"Starting benchmark setup using cacheDir[%s], rows[%,d], schema[%s].",
segmentGenerator.getCacheDir(),
rowsPerSegment,
schema
);
final QueryableIndex index;
if ("auto".equals(schema)) {
List<DimensionSchema> columnSchemas = schemaInfo.getDimensionsSpec()
Expand Down Expand Up @@ -383,7 +394,14 @@ public void setup()

final String sql = QUERIES.get(Integer.parseInt(query));

try (final DruidPlanner planner = plannerFactory.createPlannerForTesting(engine, "EXPLAIN PLAN FOR " + sql, ImmutableMap.of("useNativeQueryExplain", true))) {
try (final DruidPlanner planner = plannerFactory.createPlannerForTesting(
engine,
"EXPLAIN PLAN FOR " + sql,
ImmutableMap.of(
"useNativeQueryExplain",
true
)
)) {
final PlannerResult plannerResult = planner.plan();
final Sequence<Object[]> resultSequence = plannerResult.run().getResults();
final Object[] planResult = resultSequence.toList().get(0);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ public String getFormatString()
// 42, 43 big cardinality like predicate filter
"SELECT SUM(long1) FROM foo WHERE string5 LIKE '%1%'",
"SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.string5') LIKE '%1%'",
// 44, 45 big cardinality like filter + selector filter
// 44, 45 big cardinality like filter + selector filter with different ordering
"SELECT SUM(long1) FROM foo WHERE string5 LIKE '%1%' AND string1 = '1000'",
"SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.string5') LIKE '%1%' AND JSON_VALUE(nested, '$.nesteder.string1') = '1000'",
"SELECT SUM(long1) FROM foo WHERE string1 = '1000' AND string5 LIKE '%1%'",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ public QueryableIndexCursorHolder(
Cursors.getTimeOrdering(ordering),
interval,
filter,
cursorBuildSpec.getQueryContext().getBoolean(QueryContexts.CURSOR_AUTO_ARRANGE_FILTERS, false),
cursorBuildSpec.getQueryContext().getBoolean(QueryContexts.CURSOR_AUTO_ARRANGE_FILTERS, true),
metrics
)
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -149,8 +149,6 @@ private BitmapColumnIndex wrapRangeIndexWithNullValueIndex(
BitmapColumnIndex rangeIndex
)
{


final BitmapColumnIndex nullBitmap;
final NullValueIndex nulls = indexSupplier.as(NullValueIndex.class);
if (nulls == null) {
Expand All @@ -166,6 +164,12 @@ public ColumnIndexCapabilities getIndexCapabilities()
return rangeIndex.getIndexCapabilities().merge(nullBitmap.getIndexCapabilities());
}

@Override
public int estimatedComputeCost()
{
return rangeIndex.estimatedComputeCost() + 1;
}

@Override
public <T> T computeBitmapResult(BitmapResultFactory<T> bitmapResultFactory, boolean includeUnknown)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,12 @@ public ColumnIndexCapabilities getIndexCapabilities()
return baseIndex.getIndexCapabilities();
}

@Override
public int estimatedComputeCost()
{
return baseIndex.estimatedComputeCost();
}

@Override
public <T> T computeBitmapResult(BitmapResultFactory<T> bitmapResultFactory, boolean includeUnknown)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,12 @@ public ColumnIndexCapabilities getIndexCapabilities()
return baseIndex.getIndexCapabilities();
}

@Override
public int estimatedComputeCost()
{
return baseIndex.estimatedComputeCost();
}

@Override
public <T> T computeBitmapResult(BitmapResultFactory<T> bitmapResultFactory, boolean includeUnknown)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,12 @@ public ColumnIndexCapabilities getIndexCapabilities()
return new SimpleColumnIndexCapabilities(true, true);
}

@Override
public int estimatedComputeCost()
{
return Integer.MAX_VALUE;
}

@Override
public <T> T computeBitmapResult(BitmapResultFactory<T> bitmapResultFactory, boolean includeUnknown)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,12 @@ public ColumnIndexCapabilities getIndexCapabilities()
return SimpleColumnIndexCapabilities.getConstant();
}

@Override
public int estimatedComputeCost()
{
return 0;
}

@Override
public <T> T computeBitmapResult(BitmapResultFactory<T> bitmapResultFactory, boolean includeUnknown)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,12 @@ public ColumnIndexCapabilities getIndexCapabilities()
return SimpleColumnIndexCapabilities.getConstant();
}

@Override
public int estimatedComputeCost()
{
return 0;
}

@Override
public <T> T computeBitmapResult(BitmapResultFactory<T> bitmapResultFactory, boolean includeUnknown)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,12 @@ public ColumnIndexCapabilities getIndexCapabilities()
return SimpleColumnIndexCapabilities.getConstant();
}

@Override
public int estimatedComputeCost()
{
return 0;
}

@Override
public <T> T computeBitmapResult(BitmapResultFactory<T> bitmapResultFactory, boolean includeUnknown)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,12 @@ public interface BitmapColumnIndex
ColumnIndexCapabilities getIndexCapabilities();

/**
* Returns an estimated cost for computing the bitmap result.
* Returns an estimated cost for computing the bitmap result. Generally this is equivalent to number of bitmap union
* or intersection operations need to be performed. E.x. null value index bitmap has a cost of 0, non-null value index
* bitmap union with null bitmap has a cost of 1, range (size of 10) scanning index bitmap union with null bitmap has
* a cost of 10.
*/
default int estimatedComputeCost()
{
return Integer.MAX_VALUE;
}
int estimatedComputeCost();

/**
* Compute a bitmap result wrapped with the {@link BitmapResultFactory} representing the rows matched by this index.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,12 @@ public DictionaryRangeScanningBitmapIndex(double sizeScale, int rangeSize)
this.rangeSize = rangeSize;
}

@Override
public int estimatedComputeCost()
{
return this.rangeSize;
}

@Nullable
@Override
public final <T> T computeBitmapResult(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,12 @@ public DictionaryScanningBitmapIndex(int dictionarySize, double scaleThreshold)
this.scaleThreshold = scaleThreshold;
}

@Override
public int estimatedComputeCost()
{
return this.dictionarySize;
}

@Nullable
@Override
public final <T> T computeBitmapResult(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,11 @@ public BitmapColumnIndex forValue(@Nullable String value)
final ByteBuffer utf8 = StringUtils.toUtf8ByteBuffer(value);
return new SimpleBitmapColumnIndex()
{
@Override
public int estimatedComputeCost()
{
return 1;
}

@Override
public <T> T computeBitmapResult(BitmapResultFactory<T> bitmapResultFactory, boolean includeUnknown)
Expand Down Expand Up @@ -122,10 +127,7 @@ public BitmapColumnIndex forValue(@Nonnull Object value, TypeSignature<ValueType
public BitmapColumnIndex forSortedValues(SortedSet<String> values)
{
return getBitmapColumnIndexForSortedIterableUtf8(
Iterables.transform(
values,
StringUtils::toUtf8ByteBuffer
),
Iterables.transform(values, StringUtils::toUtf8ByteBuffer),
values.size(),
values.contains(null)
);
Expand Down Expand Up @@ -181,6 +183,7 @@ private BitmapColumnIndex getBitmapColumnIndexForSortedIterableUtf8(
bitmapFactory,
COMPARATOR,
valuesUtf8,
size,
dictionary,
bitmaps,
() -> {
Expand All @@ -197,6 +200,7 @@ private BitmapColumnIndex getBitmapColumnIndexForSortedIterableUtf8(
return ValueSetIndexes.buildBitmapColumnIndexFromSortedIteratorBinarySearch(
bitmapFactory,
valuesUtf8,
size,
dictionary,
bitmaps,
() -> {
Expand Down Expand Up @@ -242,6 +246,7 @@ public BitmapColumnIndex forSortedValues(@Nonnull List<?> sortedValues, TypeSign
bitmapFactory,
ByteBufferUtils.utf8Comparator(),
Iterables.transform(tailSet, StringUtils::toUtf8ByteBuffer),
tailSet.size(),
dictionary,
bitmaps,
unknownsIndex
Expand All @@ -251,6 +256,7 @@ public BitmapColumnIndex forSortedValues(@Nonnull List<?> sortedValues, TypeSign
return ValueSetIndexes.buildBitmapColumnIndexFromSortedIteratorBinarySearch(
bitmapFactory,
Iterables.transform(tailSet, StringUtils::toUtf8ByteBuffer),
tailSet.size(),
dictionary,
bitmaps,
unknownsIndex
Expand All @@ -262,6 +268,7 @@ public BitmapColumnIndex forSortedValues(@Nonnull List<?> sortedValues, TypeSign
sortedValues,
x -> StringUtils.toUtf8ByteBuffer(DimensionHandlerUtils.convertObjectToString(x))
),
sortedValues.size(),
dictionary,
bitmaps,
unknownsIndex
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,12 @@ public SimpleImmutableBitmapIndex(ImmutableBitmap bitmap)
this.bitmap = bitmap;
}

@Override
public int estimatedComputeCost()
{
return 0;
}

@Override
public <T> T computeBitmapResult(BitmapResultFactory<T> bitmapResultFactory, boolean includeUnknown)
{
Expand Down
Loading

0 comments on commit 7e42959

Please sign in to comment.