Skip to content

Commit

Permalink
ESQL: Compute support for filtering ungrouped aggs (#112717)
Browse files Browse the repository at this point in the history
Adds support to the compute engine for filtering which positions are
processed by ungrouping aggs. This should allow syntax like:

```
| STATS
       success = COUNT(*) WHERE 200 <= response_code AND response_code < 300,
      redirect = COUNT(*) WHERE 300 <= response_code AND response_code < 400,
    client_err = COUNT(*) WHERE 400 <= response_code AND response_code < 500,
    server_err = COUNT(*) WHERE 500 <= response_code AND response_code < 600,
   total_count = COUNT(*)
```

We could translate the WHERE expression into an `ExpressionEvaluator`
and run it, then plug it into the filtering support added in this PR.

The actual filtering is done by creating a `FilteredAggregatorFunction`
which wraps a regular `AggregatorFunction` first executing the filter
against the incoming `Page` and then passing the resulting mask to the
`AggregatorFunction`. We've then added a `mask` to
`AggregatorFunction#process` which each aggregation function must use
for filtering.

We keep the unfiltered behavior by sending a constant block with `true`
in it. Each agg detects this and takes an "unfiltered" path, preserving
the original performance.

Importantly, when you don't turn this on it doesn't effect performance:

```
 (blockType)  (grouping)   (op)  Score    Error -> Score    Error  Units
vector_longs        none  count  0.007 ±  0.001 -> 0.007 ±  0.001  ns/op
vector_longs        none    min  0.123 ±  0.004 -> 0.128 ±  0.005  ns/op
vector_longs       longs  count  4.311 ±  0.192 -> 4.218 ±  0.053  ns/op
vector_longs       longs    min  5.476 ±  0.077 -> 5.451 ±  0.074  ns/op
```
  • Loading branch information
nik9000 authored and davidkyle committed Sep 12, 2024
1 parent 1a05488 commit ca2b144
Show file tree
Hide file tree
Showing 63 changed files with 2,500 additions and 179 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -240,8 +240,10 @@ private TypeSpec type() {
builder.addMethod(intermediateStateDesc());
builder.addMethod(intermediateBlockCount());
builder.addMethod(addRawInput());
builder.addMethod(addRawVector());
builder.addMethod(addRawBlock());
builder.addMethod(addRawVector(false));
builder.addMethod(addRawVector(true));
builder.addMethod(addRawBlock(false));
builder.addMethod(addRawBlock(true));
builder.addMethod(addIntermediateInput());
builder.addMethod(evaluateIntermediate());
builder.addMethod(evaluateFinal());
Expand Down Expand Up @@ -345,22 +347,48 @@ private MethodSpec intermediateBlockCount() {

private MethodSpec addRawInput() {
MethodSpec.Builder builder = MethodSpec.methodBuilder("addRawInput");
builder.addAnnotation(Override.class).addModifiers(Modifier.PUBLIC).addParameter(PAGE, "page");
builder.addAnnotation(Override.class).addModifiers(Modifier.PUBLIC).addParameter(PAGE, "page").addParameter(BOOLEAN_VECTOR, "mask");
if (stateTypeHasFailed) {
builder.beginControlFlow("if (state.failed())");
builder.addStatement("return");
builder.endControlFlow();
}
builder.beginControlFlow("if (mask.isConstant())");
{
builder.beginControlFlow("if (mask.getBoolean(0) == false)");
{
builder.addComment("Entire page masked away");
builder.addStatement("return");
}
builder.endControlFlow();
builder.addComment("No masking");
builder.addStatement("$T block = page.getBlock(channels.get(0))", valueBlockType(init, combine));
builder.addStatement("$T vector = block.asVector()", valueVectorType(init, combine));
builder.beginControlFlow("if (vector != null)");
builder.addStatement("addRawVector(vector)");
builder.nextControlFlow("else");
builder.addStatement("addRawBlock(block)");
builder.endControlFlow();
builder.addStatement("return");
}
builder.endControlFlow();
builder.addComment("Some positions masked away, others kept");
builder.addStatement("$T block = page.getBlock(channels.get(0))", valueBlockType(init, combine));
builder.addStatement("$T vector = block.asVector()", valueVectorType(init, combine));
builder.beginControlFlow("if (vector != null)").addStatement("addRawVector(vector)");
builder.nextControlFlow("else").addStatement("addRawBlock(block)").endControlFlow();
builder.beginControlFlow("if (vector != null)");
builder.addStatement("addRawVector(vector, mask)");
builder.nextControlFlow("else");
builder.addStatement("addRawBlock(block, mask)");
builder.endControlFlow();
return builder.build();
}

private MethodSpec addRawVector() {
private MethodSpec addRawVector(boolean masked) {
MethodSpec.Builder builder = MethodSpec.methodBuilder("addRawVector");
builder.addModifiers(Modifier.PRIVATE).addParameter(valueVectorType(init, combine), "vector");
if (masked) {
builder.addParameter(BOOLEAN_VECTOR, "mask");
}

if (stateTypeHasSeen) {
builder.addStatement("state.seen(true)");
Expand All @@ -372,6 +400,9 @@ private MethodSpec addRawVector() {

builder.beginControlFlow("for (int i = 0; i < vector.getPositionCount(); i++)");
{
if (masked) {
builder.beginControlFlow("if (mask.getBoolean(i) == false)").addStatement("continue").endControlFlow();
}
combineRawInput(builder, "vector");
}
builder.endControlFlow();
Expand All @@ -381,16 +412,22 @@ private MethodSpec addRawVector() {
return builder.build();
}

private MethodSpec addRawBlock() {
private MethodSpec addRawBlock(boolean masked) {
MethodSpec.Builder builder = MethodSpec.methodBuilder("addRawBlock");
builder.addModifiers(Modifier.PRIVATE).addParameter(valueBlockType(init, combine), "block");
if (masked) {
builder.addParameter(BOOLEAN_VECTOR, "mask");
}

if (valuesIsBytesRef) {
// Add bytes_ref scratch var that will only be used for bytes_ref blocks/vectors
builder.addStatement("$T scratch = new $T()", BYTES_REF, BYTES_REF);
}
builder.beginControlFlow("for (int p = 0; p < block.getPositionCount(); p++)");
{
if (masked) {
builder.beginControlFlow("if (mask.getBoolean(p) == false)").addStatement("continue").endControlFlow();
}
builder.beginControlFlow("if (block.isNull(p))");
builder.addStatement("continue");
builder.endControlFlow();
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit ca2b144

Please sign in to comment.