Skip to content

Commit

Permalink
ESQL: Fixed length vector builder (elastic#99970)
Browse files Browse the repository at this point in the history
This adds things like `IntVector.FixedBuilder` which is slightly simpler
to use than constructing the arrays by hand. It also measures bytes used
up front in the circuit breaker. And it'll be easier to integrate it
into framework happening over in elastic#99931 to handle errors in topn.

This also uses it in `mv_` functions.
  • Loading branch information
nik9000 authored and piergm committed Oct 2, 2023
1 parent e61da09 commit 76cc43e
Show file tree
Hide file tree
Showing 59 changed files with 879 additions and 228 deletions.
21 changes: 21 additions & 0 deletions x-pack/plugin/esql/compute/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,27 @@ tasks.named('stringTemplates').configure {
it.inputFile = vectorBuildersInputFile
it.outputFile = "org/elasticsearch/compute/data/BooleanVectorBuilder.java"
}
File vectorFixedBuildersInputFile = new File("${projectDir}/src/main/java/org/elasticsearch/compute/data/X-VectorFixedBuilder.java.st")
template {
it.properties = intProperties
it.inputFile = vectorFixedBuildersInputFile
it.outputFile = "org/elasticsearch/compute/data/IntVectorFixedBuilder.java"
}
template {
it.properties = longProperties
it.inputFile = vectorFixedBuildersInputFile
it.outputFile = "org/elasticsearch/compute/data/LongVectorFixedBuilder.java"
}
template {
it.properties = doubleProperties
it.inputFile = vectorFixedBuildersInputFile
it.outputFile = "org/elasticsearch/compute/data/DoubleVectorFixedBuilder.java"
}
template {
it.properties = booleanProperties
it.inputFile = vectorFixedBuildersInputFile
it.outputFile = "org/elasticsearch/compute/data/BooleanVectorFixedBuilder.java"
}
File stateInputFile = new File("${projectDir}/src/main/java/org/elasticsearch/compute/aggregation/X-State.java.st")
template {
it.properties = intProperties
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,17 +31,15 @@
import static org.elasticsearch.compute.gen.Methods.getMethod;
import static org.elasticsearch.compute.gen.Types.ABSTRACT_MULTIVALUE_FUNCTION_EVALUATOR;
import static org.elasticsearch.compute.gen.Types.ABSTRACT_NULLABLE_MULTIVALUE_FUNCTION_EVALUATOR;
import static org.elasticsearch.compute.gen.Types.BIG_ARRAYS;
import static org.elasticsearch.compute.gen.Types.BLOCK;
import static org.elasticsearch.compute.gen.Types.BYTES_REF;
import static org.elasticsearch.compute.gen.Types.BYTES_REF_ARRAY;
import static org.elasticsearch.compute.gen.Types.DRIVER_CONTEXT;
import static org.elasticsearch.compute.gen.Types.EXPRESSION_EVALUATOR;
import static org.elasticsearch.compute.gen.Types.SOURCE;
import static org.elasticsearch.compute.gen.Types.VECTOR;
import static org.elasticsearch.compute.gen.Types.WARNINGS;
import static org.elasticsearch.compute.gen.Types.arrayVectorType;
import static org.elasticsearch.compute.gen.Types.blockType;
import static org.elasticsearch.compute.gen.Types.vectorType;

public class MvEvaluatorImplementer {
private final TypeElement declarationType;
Expand Down Expand Up @@ -197,16 +195,25 @@ private MethodSpec evalShell(
builder.addStatement("int positionCount = v.getPositionCount()");
if (nullable) {
TypeName resultBlockType = blockType(resultType);
builder.addStatement("$T.Builder builder = $T.newBlockBuilder(positionCount)", resultBlockType, resultBlockType);
builder.addStatement(
"$T.Builder builder = $T.newBlockBuilder(positionCount, driverContext.blockFactory())",
resultBlockType,
resultBlockType
);
} else if (resultType.equals(BYTES_REF)) {
TypeName resultVectorType = vectorType(resultType);
builder.addStatement(
"$T values = new $T(positionCount, $T.NON_RECYCLING_INSTANCE)", // TODO blocks should use recycling array
BYTES_REF_ARRAY,
BYTES_REF_ARRAY,
BIG_ARRAYS
"$T.Builder builder = $T.newVectorBuilder(positionCount, driverContext.blockFactory())",
resultVectorType,
resultVectorType
);
} else {
builder.addStatement("$T[] values = new $T[positionCount]", resultType, resultType);
TypeName resultVectorType = vectorType(resultType);
builder.addStatement(
"$T.FixedBuilder builder = $T.newVectorFixedBuilder(positionCount, driverContext.blockFactory())",
resultVectorType,
resultVectorType
);
}

if (false == workType.equals(fieldType) && workType.isPrimitive() == false) {
Expand Down Expand Up @@ -244,11 +251,7 @@ private MethodSpec evalShell(
}
builder.endControlFlow();

if (nullable) {
builder.addStatement("return builder.build()");
} else {
builder.addStatement("return new $T(values, positionCount)", arrayVectorType(resultType));
}
builder.addStatement("return builder.build()");
return builder.build();
}

Expand All @@ -268,7 +271,7 @@ private MethodSpec eval(String name, boolean nullable) {
builder.beginControlFlow("if (valueCount == 1)");
fetch(builder, "value", fieldType, "first", workType.equals(fieldType) ? "firstScratch" : "valueScratch");
singleValueFunction.call(builder);
writeResult(builder, nullable);
writeResult(builder);
builder.addStatement("continue");
builder.endControlFlow();
}
Expand Down Expand Up @@ -302,7 +305,7 @@ private MethodSpec eval(String name, boolean nullable) {
builder.endControlFlow();
finishFunction.call(builder, "work");
}
writeResult(builder, nullable);
writeResult(builder);
});
}

Expand All @@ -313,7 +316,7 @@ private MethodSpec evalSingleValued(String name, boolean nullable) {
builder.addStatement("int first = v.getFirstValueIndex(p)");
fetch(builder, "value", fieldType, "first", workType.equals(fieldType) ? "firstScratch" : "valueScratch");
singleValueFunction.call(builder);
writeResult(builder, nullable);
writeResult(builder);
});
}

Expand All @@ -332,17 +335,15 @@ private MethodSpec evalAscending(String name, boolean nullable) {
return evalShell(name, false, nullable, javadoc, builder -> {}, builder -> {
builder.addStatement("int first = v.getFirstValueIndex(p)");
ascendingFunction.call(builder);
writeResult(builder, nullable);
writeResult(builder);
});
}

private void writeResult(MethodSpec.Builder builder, boolean nullable) {
if (nullable) {
builder.addStatement("builder.$L(result)", appendMethod(resultType));
} else if (fieldType.equals(BYTES_REF)) {
builder.addStatement("values.append(result)");
private void writeResult(MethodSpec.Builder builder) {
if (fieldType.equals(BYTES_REF)) {
builder.addStatement("builder.appendBytesRef(result)");
} else {
builder.addStatement("values[p] = result");
builder.addStatement("builder.$L(result)", appendMethod(resultType));
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
*/
public final class BooleanArrayVector extends AbstractVector implements BooleanVector {

private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(BooleanArrayVector.class);
static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(BooleanArrayVector.class);

private final boolean[] values;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,10 +106,25 @@ static Builder newVectorBuilder(int estimatedSize) {
return newVectorBuilder(estimatedSize, BlockFactory.getNonBreakingInstance());
}

/**
* Creates a builder that grows as needed. Prefer {@link #newVectorFixedBuilder}
* if you know the size up front because it's faster.
*/
static Builder newVectorBuilder(int estimatedSize, BlockFactory blockFactory) {
return blockFactory.newBooleanVectorBuilder(estimatedSize);
}

/**
* Creates a builder that never grows. Prefer this over {@link #newVectorBuilder}
* if you know the size up front because it's faster.
*/
static FixedBuilder newVectorFixedBuilder(int size, BlockFactory blockFactory) {
return blockFactory.newBooleanVectorFixedBuilder(size);
}

/**
* A builder that grows as needed.
*/
sealed interface Builder extends Vector.Builder permits BooleanVectorBuilder {
/**
* Appends a boolean to the current entry.
Expand All @@ -119,4 +134,17 @@ sealed interface Builder extends Vector.Builder permits BooleanVectorBuilder {
@Override
BooleanVector build();
}

/**
* A builder that never grows.
*/
sealed interface FixedBuilder extends Vector.Builder permits BooleanVectorFixedBuilder {
/**
* Appends a boolean to the current entry.
*/
FixedBuilder appendBoolean(boolean value);

@Override
BooleanVector build();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import java.util.Arrays;

/**
* Block build of BooleanBlocks.
* Builder for {@link BooleanVector}s that grows as needed.
* This class is generated. Do not edit it.
*/
final class BooleanVectorBuilder extends AbstractVectorBuilder implements BooleanVector.Builder {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

package org.elasticsearch.compute.data;

import org.apache.lucene.util.RamUsageEstimator;

/**
* Builder for {@link BooleanVector}s that never grows. Prefer this to
* {@link BooleanVectorBuilder} if you know the precise size up front because
* it's faster.
* This class is generated. Do not edit it.
*/
final class BooleanVectorFixedBuilder implements BooleanVector.FixedBuilder {
private final BlockFactory blockFactory;
private final boolean[] values;
/**
* The next value to write into. {@code -1} means the vector has already
* been built.
*/
private int nextIndex;

BooleanVectorFixedBuilder(int size, BlockFactory blockFactory) {
blockFactory.adjustBreaker(ramBytesUsed(size), false);
this.blockFactory = blockFactory;
this.values = new boolean[size];
}

@Override
public BooleanVectorFixedBuilder appendBoolean(boolean value) {
values[nextIndex++] = value;
return this;
}

private static long ramBytesUsed(int size) {
return size == 1
? ConstantBooleanVector.RAM_BYTES_USED
: BooleanArrayVector.BASE_RAM_BYTES_USED + RamUsageEstimator.alignObjectSize(
(long) RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + size * Byte.BYTES
);
}

@Override
public BooleanVector build() {
if (nextIndex < 0) {
throw new IllegalStateException("already closed");
}
if (nextIndex != values.length) {
throw new IllegalStateException("expected to write [" + values.length + "] entries but wrote [" + nextIndex + "]");
}
nextIndex = -1;
if (values.length == 1) {
return new ConstantBooleanVector(values[0], 1, blockFactory);
}
return new BooleanArrayVector(values, values.length, blockFactory);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
*/
public final class BytesRefArrayVector extends AbstractVector implements BytesRefVector {

private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(BytesRefArrayVector.class);
static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(BytesRefArrayVector.class);

private final BytesRefArray values;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,10 +106,16 @@ static Builder newVectorBuilder(int estimatedSize) {
return newVectorBuilder(estimatedSize, BlockFactory.getNonBreakingInstance());
}

/**
* Creates a builder that grows as needed.
*/
static Builder newVectorBuilder(int estimatedSize, BlockFactory blockFactory) {
return blockFactory.newBytesRefVectorBuilder(estimatedSize);
}

/**
* A builder that grows as needed.
*/
sealed interface Builder extends Vector.Builder permits BytesRefVectorBuilder {
/**
* Appends a BytesRef to the current entry.
Expand All @@ -119,4 +125,5 @@ sealed interface Builder extends Vector.Builder permits BytesRefVectorBuilder {
@Override
BytesRefVector build();
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import org.elasticsearch.core.Releasables;

/**
* Block build of BytesRefBlocks.
* Builder for {@link BytesRefVector}s that grows as needed.
* This class is generated. Do not edit it.
*/
final class BytesRefVectorBuilder extends AbstractVectorBuilder implements BytesRefVector.Builder {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
*/
public final class ConstantBooleanVector extends AbstractVector implements BooleanVector {

private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(ConstantBooleanVector.class);
static final long RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(ConstantBooleanVector.class);

private final boolean value;

Expand Down Expand Up @@ -55,7 +55,7 @@ public boolean isConstant() {

@Override
public long ramBytesUsed() {
return BASE_RAM_BYTES_USED + RamUsageEstimator.shallowSizeOfInstance(boolean.class);
return RAM_BYTES_USED;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
*/
public final class ConstantBytesRefVector extends AbstractVector implements BytesRefVector {

private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(ConstantBytesRefVector.class);

static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(ConstantBytesRefVector.class) + RamUsageEstimator
.shallowSizeOfInstance(BytesRef.class);
private final BytesRef value;

public ConstantBytesRefVector(BytesRef value, int positionCount) {
Expand Down Expand Up @@ -56,7 +56,7 @@ public boolean isConstant() {

@Override
public long ramBytesUsed() {
return BASE_RAM_BYTES_USED + RamUsageEstimator.shallowSizeOfInstance(BytesRef.class);
return BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(value.bytes);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
*/
public final class ConstantDoubleVector extends AbstractVector implements DoubleVector {

private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(ConstantDoubleVector.class);
static final long RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(ConstantDoubleVector.class);

private final double value;

Expand Down Expand Up @@ -55,7 +55,7 @@ public boolean isConstant() {

@Override
public long ramBytesUsed() {
return BASE_RAM_BYTES_USED + RamUsageEstimator.shallowSizeOfInstance(double.class);
return RAM_BYTES_USED;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
*/
public final class ConstantIntVector extends AbstractVector implements IntVector {

private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(ConstantIntVector.class);
static final long RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(ConstantIntVector.class);

private final int value;

Expand Down Expand Up @@ -55,7 +55,7 @@ public boolean isConstant() {

@Override
public long ramBytesUsed() {
return BASE_RAM_BYTES_USED + RamUsageEstimator.shallowSizeOfInstance(int.class);
return RAM_BYTES_USED;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
*/
public final class ConstantLongVector extends AbstractVector implements LongVector {

private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(ConstantLongVector.class);
static final long RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(ConstantLongVector.class);

private final long value;

Expand Down Expand Up @@ -55,7 +55,7 @@ public boolean isConstant() {

@Override
public long ramBytesUsed() {
return BASE_RAM_BYTES_USED + RamUsageEstimator.shallowSizeOfInstance(long.class);
return RAM_BYTES_USED;
}

@Override
Expand Down
Loading

0 comments on commit 76cc43e

Please sign in to comment.