Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Incorporate estimatedComputeCost into all BitmapColumnIndex classes. #17125

Merged
merged 7 commits into from
Sep 26, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ public int getNumThreads()
{
return 1;
}

@Override
public String getFormatString()
{
Expand Down Expand Up @@ -220,8 +221,11 @@ public String getFormatString()
"SELECT ARRAY_CONTAINS(\"multi-string3\", 100) FROM foo",
"SELECT ARRAY_CONTAINS(\"multi-string3\", ARRAY[1, 2, 10, 11, 20, 22, 30, 33, 40, 44, 50, 55, 100]) FROM foo",
"SELECT ARRAY_OVERLAP(\"multi-string3\", ARRAY[1, 100]) FROM foo",
"SELECT ARRAY_OVERLAP(\"multi-string3\", ARRAY[1, 2, 10, 11, 20, 22, 30, 33, 40, 44, 50, 55, 100]) FROM foo"
);
"SELECT ARRAY_OVERLAP(\"multi-string3\", ARRAY[1, 2, 10, 11, 20, 22, 30, 33, 40, 44, 50, 55, 100]) FROM foo",
// 46: filters with random orders
"SELECT string2, SUM(long1) FROM foo WHERE string5 LIKE '%1%' AND string1 = '1000' GROUP BY 1 ORDER BY 2",
"SELECT string2, SUM(long1) FROM foo WHERE string5 LIKE '%1%' AND (string3 in ('1', '10', '20', '22', '32') AND long2 IN (1, 19, 21, 23, 25, 26, 46) AND double3 < 1010.0 AND double3 > 1000.0 AND (string4 = '1' OR REGEXP_EXTRACT(string1, '^1') IS NOT NULL OR REGEXP_EXTRACT('Z' || string2, '^Z2') IS NOT NULL)) AND string1 = '1000' GROUP BY 1 ORDER BY 2"
);

@Param({"5000000"})
private int rowsPerSegment;
Expand Down Expand Up @@ -319,7 +323,12 @@ public void setup()
final PlannerConfig plannerConfig = new PlannerConfig();

final SegmentGenerator segmentGenerator = closer.register(new SegmentGenerator());
log.info("Starting benchmark setup using cacheDir[%s], rows[%,d], schema[%s].", segmentGenerator.getCacheDir(), rowsPerSegment, schema);
log.info(
"Starting benchmark setup using cacheDir[%s], rows[%,d], schema[%s].",
segmentGenerator.getCacheDir(),
rowsPerSegment,
schema
);
final QueryableIndex index;
if ("auto".equals(schema)) {
List<DimensionSchema> columnSchemas = schemaInfo.getDimensionsSpec()
Expand Down Expand Up @@ -383,7 +392,14 @@ public void setup()

final String sql = QUERIES.get(Integer.parseInt(query));

try (final DruidPlanner planner = plannerFactory.createPlannerForTesting(engine, "EXPLAIN PLAN FOR " + sql, ImmutableMap.of("useNativeQueryExplain", true))) {
try (final DruidPlanner planner = plannerFactory.createPlannerForTesting(
engine,
"EXPLAIN PLAN FOR " + sql,
ImmutableMap.of(
"useNativeQueryExplain",
true
)
)) {
final PlannerResult plannerResult = planner.plan();
final Sequence<Object[]> resultSequence = plannerResult.run().getResults();
final Object[] planResult = resultSequence.toList().get(0);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,9 @@ public String getFormatString()
"SELECT long2 FROM foo WHERE double3 IN (1.0, 19.0, 21.0, 23.0, 25.0, 26.0, 46.0, 50.0, 51.0, 55.0, 60.0, 61.0, 66.0, 68.0, 69.0, 70.0, 77.0, 88.0, 90.0, 92.0, 93.0, 94.0, 95.0, 100.0, 101.0, 102.0, 104.0, 109.0, 111.0, 113.0, 114.0, 115.0, 120.0, 121.0, 122.0, 134.0, 135.0, 136.0, 140.0, 142.0, 150.0, 155.0, 170.0, 172.0, 173.0, 174.0, 180.0, 181.0, 190.0, 199.0, 200.0, 201.0, 202.0, 203.0, 204.0)",
"SELECT JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) IN (1.0, 19.0, 21.0, 23.0, 25.0, 26.0, 46.0, 50.0, 51.0, 55.0, 60.0, 61.0, 66.0, 68.0, 69.0, 70.0, 77.0, 88.0, 90.0, 92.0, 93.0, 94.0, 95.0, 100.0, 101.0, 102.0, 104.0, 109.0, 111.0, 113.0, 114.0, 115.0, 120.0, 121.0, 122.0, 134.0, 135.0, 136.0, 140.0, 142.0, 150.0, 155.0, 170.0, 172.0, 173.0, 174.0, 180.0, 181.0, 190.0, 199.0, 200.0, 201.0, 202.0, 203.0, 204.0)",
"SELECT long2 FROM foo WHERE double3 IN (1.0, 19.0, 21.0, 23.0, 25.0, 26.0, 46.0, 50.0, 51.0, 55.0, 60.0, 61.0, 66.0, 68.0, 69.0, 70.0, 77.0, 88.0, 90.0, 92.0, 93.0, 94.0, 95.0, 100.0, 101.0, 102.0, 104.0, 109.0, 111.0, 113.0, 114.0, 115.0, 120.0, 121.0, 122.0, 134.0, 135.0, 136.0, 140.0, 142.0, 150.0, 155.0, 170.0, 172.0, 173.0, 174.0, 180.0, 181.0, 190.0, 199.0, 200.0, 201.0, 202.0, 203.0, 204.0) GROUP BY 1",
"SELECT JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) IN (1.0, 19.0, 21.0, 23.0, 25.0, 26.0, 46.0, 50.0, 51.0, 55.0, 60.0, 61.0, 66.0, 68.0, 69.0, 70.0, 77.0, 88.0, 90.0, 92.0, 93.0, 94.0, 95.0, 100.0, 101.0, 102.0, 104.0, 109.0, 111.0, 113.0, 114.0, 115.0, 120.0, 121.0, 122.0, 134.0, 135.0, 136.0, 140.0, 142.0, 150.0, 155.0, 170.0, 172.0, 173.0, 174.0, 180.0, 181.0, 190.0, 199.0, 200.0, 201.0, 202.0, 203.0, 204.0) GROUP BY 1"
"SELECT JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) IN (1.0, 19.0, 21.0, 23.0, 25.0, 26.0, 46.0, 50.0, 51.0, 55.0, 60.0, 61.0, 66.0, 68.0, 69.0, 70.0, 77.0, 88.0, 90.0, 92.0, 93.0, 94.0, 95.0, 100.0, 101.0, 102.0, 104.0, 109.0, 111.0, 113.0, 114.0, 115.0, 120.0, 121.0, 122.0, 134.0, 135.0, 136.0, 140.0, 142.0, 150.0, 155.0, 170.0, 172.0, 173.0, 174.0, 180.0, 181.0, 190.0, 199.0, 200.0, 201.0, 202.0, 203.0, 204.0) GROUP BY 1",
// 56 filters with random orders
"SELECT SUM(long1) FROM foo WHERE string5 LIKE '%1%' AND string1 = '1000'"
cecemei marked this conversation as resolved.
Show resolved Hide resolved
);

@Param({"5000000"})
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ public QueryableIndexCursorHolder(
Cursors.getTimeOrdering(ordering),
interval,
filter,
cursorBuildSpec.getQueryContext().getBoolean(QueryContexts.CURSOR_AUTO_ARRANGE_FILTERS, false),
cursorBuildSpec.getQueryContext().getBoolean(QueryContexts.CURSOR_AUTO_ARRANGE_FILTERS, true),
metrics
)
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -149,8 +149,6 @@ private BitmapColumnIndex wrapRangeIndexWithNullValueIndex(
BitmapColumnIndex rangeIndex
)
{


final BitmapColumnIndex nullBitmap;
final NullValueIndex nulls = indexSupplier.as(NullValueIndex.class);
if (nulls == null) {
Expand All @@ -166,6 +164,12 @@ public ColumnIndexCapabilities getIndexCapabilities()
return rangeIndex.getIndexCapabilities().merge(nullBitmap.getIndexCapabilities());
}

@Override
public int estimatedComputeCost()
{
return rangeIndex.estimatedComputeCost();
cecemei marked this conversation as resolved.
Show resolved Hide resolved
}

@Override
public <T> T computeBitmapResult(BitmapResultFactory<T> bitmapResultFactory, boolean includeUnknown)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,12 @@ public ColumnIndexCapabilities getIndexCapabilities()
return baseIndex.getIndexCapabilities();
}

@Override
public int estimatedComputeCost()
{
return baseIndex.estimatedComputeCost();
}

@Override
public <T> T computeBitmapResult(BitmapResultFactory<T> bitmapResultFactory, boolean includeUnknown)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,12 @@ public ColumnIndexCapabilities getIndexCapabilities()
return baseIndex.getIndexCapabilities();
}

@Override
public int estimatedComputeCost()
{
return baseIndex.estimatedComputeCost();
}

@Override
public <T> T computeBitmapResult(BitmapResultFactory<T> bitmapResultFactory, boolean includeUnknown)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,12 @@ public ColumnIndexCapabilities getIndexCapabilities()
return new SimpleColumnIndexCapabilities(true, true);
}

@Override
public int estimatedComputeCost()
{
return Integer.MAX_VALUE;
}

@Override
public <T> T computeBitmapResult(BitmapResultFactory<T> bitmapResultFactory, boolean includeUnknown)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,12 @@ public ColumnIndexCapabilities getIndexCapabilities()
return SimpleColumnIndexCapabilities.getConstant();
}

@Override
public int estimatedComputeCost()
{
return 0;
}

@Override
public <T> T computeBitmapResult(BitmapResultFactory<T> bitmapResultFactory, boolean includeUnknown)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,12 @@ public ColumnIndexCapabilities getIndexCapabilities()
return SimpleColumnIndexCapabilities.getConstant();
}

@Override
public int estimatedComputeCost()
{
return 0;
}

@Override
public <T> T computeBitmapResult(BitmapResultFactory<T> bitmapResultFactory, boolean includeUnknown)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,12 @@ public ColumnIndexCapabilities getIndexCapabilities()
return SimpleColumnIndexCapabilities.getConstant();
}

@Override
public int estimatedComputeCost()
{
return 0;
}

@Override
public <T> T computeBitmapResult(BitmapResultFactory<T> bitmapResultFactory, boolean includeUnknown)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,7 @@ public interface BitmapColumnIndex
/**
* Returns an estimated cost for computing the bitmap result.
*/
default int estimatedComputeCost()
{
return Integer.MAX_VALUE;
}
int estimatedComputeCost();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i know this isn't new in this PR, but i feel like maybe the javadoc should mention that the estimated cost should be related to the number of bitmap operations that need to be performed to compute the filter bitmap

Copy link
Contributor Author

@cecemei cecemei Sep 25, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added more explanation on this.


/**
* Compute a bitmap result wrapped with the {@link BitmapResultFactory} representing the rows matched by this index.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,12 @@ public DictionaryRangeScanningBitmapIndex(double sizeScale, int rangeSize)
this.rangeSize = rangeSize;
}

@Override
public int estimatedComputeCost()
{
return this.rangeSize;
}

@Nullable
@Override
public final <T> T computeBitmapResult(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,12 @@ public DictionaryScanningBitmapIndex(int dictionarySize, double scaleThreshold)
this.scaleThreshold = scaleThreshold;
}

@Override
public int estimatedComputeCost()
{
return this.dictionarySize;
}

@Nullable
@Override
public final <T> T computeBitmapResult(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
import com.google.common.base.Preconditions;
import com.google.common.base.Supplier;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import org.apache.druid.annotations.SuppressFBWarnings;
import org.apache.druid.collections.bitmap.BitmapFactory;
import org.apache.druid.collections.bitmap.ImmutableBitmap;
Expand All @@ -48,6 +48,7 @@
import java.util.Comparator;
import java.util.List;
import java.util.SortedSet;
import java.util.stream.Collectors;

public final class IndexedUtf8ValueIndexes<TDictionary extends Indexed<ByteBuffer>>
implements StringValueSetIndexes, Utf8ValueSetIndexes, ValueIndexes, ValueSetIndexes
Expand Down Expand Up @@ -83,6 +84,11 @@ public BitmapColumnIndex forValue(@Nullable String value)
final ByteBuffer utf8 = StringUtils.toUtf8ByteBuffer(value);
return new SimpleBitmapColumnIndex()
{
@Override
public int estimatedComputeCost()
{
return 1;
}

@Override
public <T> T computeBitmapResult(BitmapResultFactory<T> bitmapResultFactory, boolean includeUnknown)
Expand Down Expand Up @@ -122,10 +128,7 @@ public BitmapColumnIndex forValue(@Nonnull Object value, TypeSignature<ValueType
public BitmapColumnIndex forSortedValues(SortedSet<String> values)
{
return getBitmapColumnIndexForSortedIterableUtf8(
Iterables.transform(
values,
StringUtils::toUtf8ByteBuffer
),
values.stream().map(StringUtils::toUtf8ByteBuffer).collect(Collectors.toList()),
cecemei marked this conversation as resolved.
Show resolved Hide resolved
values.size(),
values.contains(null)
);
Expand Down Expand Up @@ -170,7 +173,7 @@ private ImmutableBitmap getBitmap(int idx)
* Helper used by {@link #forSortedValues} and {@link #forSortedValuesUtf8}.
*/
private BitmapColumnIndex getBitmapColumnIndexForSortedIterableUtf8(
Iterable<ByteBuffer> valuesUtf8,
List<ByteBuffer> valuesUtf8,
int size,
boolean valuesContainsNull
)
Expand Down Expand Up @@ -241,7 +244,7 @@ public BitmapColumnIndex forSortedValues(@Nonnull List<?> sortedValues, TypeSign
return ValueSetIndexes.buildBitmapColumnIndexFromSortedIteratorScan(
bitmapFactory,
ByteBufferUtils.utf8Comparator(),
Iterables.transform(tailSet, StringUtils::toUtf8ByteBuffer),
Lists.transform(tailSet, StringUtils::toUtf8ByteBuffer),
dictionary,
bitmaps,
unknownsIndex
Expand All @@ -250,15 +253,15 @@ public BitmapColumnIndex forSortedValues(@Nonnull List<?> sortedValues, TypeSign
// fall through to value iteration
return ValueSetIndexes.buildBitmapColumnIndexFromSortedIteratorBinarySearch(
bitmapFactory,
Iterables.transform(tailSet, StringUtils::toUtf8ByteBuffer),
Lists.transform(tailSet, StringUtils::toUtf8ByteBuffer),
dictionary,
bitmaps,
unknownsIndex
);
} else {
return ValueSetIndexes.buildBitmapColumnIndexFromIteratorBinarySearch(
bitmapFactory,
Iterables.transform(
Lists.transform(
sortedValues,
x -> StringUtils.toUtf8ByteBuffer(DimensionHandlerUtils.convertObjectToString(x))
),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,12 @@ public SimpleImmutableBitmapIndex(ImmutableBitmap bitmap)
this.bitmap = bitmap;
}

@Override
public int estimatedComputeCost()
{
return 0;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it looks like this is mainly used for null value index, should this be 1 to be consistent with the equality indexes, like ValueIndexes.forValue, since the null indexes still have a bitmap?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right this index seems mainly for null index, so it's just 1 bitmap with no union. When I looked up forValue seems like it's possible there're two bitmaps (one for the value and one for null) with one union. That's why i decided 0 for this, and 1 for other SimpleBitmapIndex instances. I feel SimpleImmutableBitmapIndex is slightly cheaper since no binary search for dictionary and no bitmap union.

}

@Override
public <T> T computeBitmapResult(BitmapResultFactory<T> bitmapResultFactory, boolean includeUnknown)
{
Expand Down
Loading
Loading