Skip to content

Commit

Permalink
ESQL: Method to convert BooleanBlock to a "mask" (elastic#112253)
Browse files Browse the repository at this point in the history
This adds a method, `BooleanBlock#toMask` to convert `BooleanBlock`s
into a "mask" for use with `keepMask`.
  • Loading branch information
nik9000 authored and cbuescher committed Sep 4, 2024
1 parent 43dd0bb commit 459f047
Show file tree
Hide file tree
Showing 15 changed files with 306 additions and 4 deletions.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,11 @@ public OrdinalBytesRefBlock asOrdinals() {
return null;
}

@Override
public ToMask toMask() {
return new ToMask(blockFactory.newConstantBooleanVector(false, positionCount), false);
}

@Override
public boolean isNull(int position) {
return true;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

package org.elasticsearch.compute.data;

import org.elasticsearch.core.Releasable;

/**
* Result from calling {@link BooleanBlock#toMask}. {@link #close closing} this will
* close the contained {@link #mask()}. If you want to keep a reference to it then you'll
* have to {@link Block#incRef()} it.
*/
public record ToMask(BooleanVector mask, boolean hadMultivaluedFields) implements Releasable {
@Override
public void close() {
mask.close();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,28 @@ $if(BytesRef)$
public OrdinalBytesRefBlock asOrdinals() {
return null;
}

$elseif(boolean)$
@Override
public ToMask toMask() {
if (getPositionCount() == 0) {
return new ToMask(blockFactory().newConstantBooleanVector(false, 0), false);
}
try (BooleanVector.FixedBuilder builder = blockFactory().newBooleanVectorFixedBuilder(getPositionCount())) {
boolean hasMv = false;
for (int p = 0; p < getPositionCount(); p++) {
builder.appendBoolean(switch (getValueCount(p)) {
case 0 -> false;
case 1 -> getBoolean(getFirstValueIndex(p));
default -> {
hasMv = true;
yield false;
}
});
}
return new ToMask(builder.build(), hasMv);
}
}
$endif$

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,29 @@ public final class $Type$BigArrayBlock extends AbstractArrayBlock implements $Ty
return null;
}

$if(boolean)$
@Override
public ToMask toMask() {
if (getPositionCount() == 0) {
return new ToMask(blockFactory().newConstantBooleanVector(false, 0), false);
}
try (BooleanVector.FixedBuilder builder = blockFactory().newBooleanVectorFixedBuilder(getPositionCount())) {
boolean hasMv = false;
for (int p = 0; p < getPositionCount(); p++) {
builder.appendBoolean(switch (getValueCount(p)) {
case 0 -> false;
case 1 -> getBoolean(getFirstValueIndex(p));
default -> {
hasMv = true;
yield false;
}
});
}
return new ToMask(builder.build(), hasMv);
}
}
$endif$

@Override
public $type$ get$Type$(int valueIndex) {
return vector.get$Type$(valueIndex);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,16 @@ $if(BytesRef)$
* returns null. Callers must not release the returned block as no extra reference is retained by this method.
*/
OrdinalBytesRefBlock asOrdinals();
$endif$

$elseif(boolean)$
/**
* Convert this to a {@link BooleanVector "mask"} that's appropriate for
* passing to {@link #keepMask}. Null and multivalued positions will be
* converted to {@code false}.
*/
ToMask toMask();

$endif$
@Override
$Type$Block filter(int... positions);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,8 @@ $if(BytesRef)$
* returns null. Callers must not release the returned vector as no extra reference is retained by this method.
*/
OrdinalBytesRefVector asOrdinals();
$endif$

$endif$
@Override
$Type$Vector filter(int... positions);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,15 @@ $if(BytesRef)$
return null;
}
}
$endif$

$elseif(boolean)$
@Override
public ToMask toMask() {
vector.incRef();
return new ToMask(vector, false);
}

$endif$
@Override
$if(BytesRef)$
public BytesRef getBytesRef(int valueIndex, BytesRef dest) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -800,6 +800,12 @@ public void testBooleanBlock() {
}
assertLookup(block, positions(blockFactory, positionCount + 1000), singletonList(null));
assertEmptyLookup(blockFactory, block);
try (ToMask mask = block.toMask()) {
assertThat(mask.hadMultivaluedFields(), equalTo(false));
for (int p = 0; p < positionCount; p++) {
assertThat(mask.mask().getBoolean(p), equalTo(p % 10 == 0));
}
}

try (BooleanBlock.Builder blockBuilder = blockFactory.newBooleanBlockBuilder(1)) {
BooleanBlock copy = blockBuilder.copyFrom(block, 0, block.getPositionCount()).build();
Expand All @@ -826,6 +832,7 @@ public void testBooleanBlock() {
IntStream.range(0, positionCount).mapToObj(ii -> randomBoolean()).forEach(vectorBuilder::appendBoolean);
BooleanVector vector = vectorBuilder.build();
assertSingleValueDenseBlock(vector.asBlock());
assertToMask(vector);
releaseAndAssertBreaker(vector.asBlock());
}
}
Expand Down Expand Up @@ -1358,6 +1365,19 @@ <B extends Block, BB extends Block.Builder, T> void assertNullValues(
assertTrue(block.isNull(randomNullPosition));
assertFalse(block.isNull(randomNonNullPosition));
releaseAndAssertBreaker(block);
if (block instanceof BooleanBlock bb) {
try (ToMask mask = bb.toMask()) {
assertThat(mask.hadMultivaluedFields(), equalTo(false));
for (int p = 0; p < positionCount; p++) {
assertThat(mask.mask().getBoolean(p), equalTo(nullsMask.get(p) == false && p % 10 == 0));
}
}
}
}

void assertZeroPositionsAndRelease(BooleanBlock block) {
assertToMaskZeroPositions(block);
assertZeroPositionsAndRelease((Block) block);
}

void assertZeroPositionsAndRelease(Block block) {
Expand All @@ -1366,6 +1386,11 @@ void assertZeroPositionsAndRelease(Block block) {
releaseAndAssertBreaker(block);
}

void assertZeroPositionsAndRelease(BooleanVector vector) {
assertToMask(vector);
assertZeroPositionsAndRelease((Vector) vector);
}

void assertZeroPositionsAndRelease(Vector vector) {
assertThat(vector.getPositionCount(), is(0));
assertKeepMaskEmpty(vector);
Expand All @@ -1386,6 +1411,20 @@ static void assertKeepMaskEmpty(Vector vector) {
}
}

static void assertToMaskZeroPositions(BooleanBlock block) {
try (ToMask mask = block.toMask()) {
assertThat(mask.mask().getPositionCount(), equalTo(0));
assertThat(mask.hadMultivaluedFields(), equalTo(false));
}
}

static void assertToMask(BooleanVector vector) {
try (ToMask mask = vector.asBlock().toMask()) {
assertThat(mask.mask(), sameInstance(vector));
assertThat(mask.hadMultivaluedFields(), equalTo(false));
}
}

void releaseAndAssertBreaker(Block... blocks) {
assertThat(breaker.getUsed(), greaterThan(0L));
Page[] pages = Arrays.stream(blocks).map(Page::new).toArray(Page[]::new);
Expand Down Expand Up @@ -1836,7 +1875,7 @@ static void assertKeepMask(Block block) {
/**
* Build a random valid "mask" of single valued boolean fields that.
*/
private static BooleanVector randomMask(int positions) {
static BooleanVector randomMask(int positions) {
try (BooleanVector.Builder builder = TestBlockFactory.getNonBreakingInstance().newBooleanVectorFixedBuilder(positions)) {
for (int i = 0; i < positions; i++) {
builder.appendBoolean(randomBoolean());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,12 @@ public void testBooleanVector() throws IOException {
assertThat(block.getBoolean(i), equalTo(elements[i]));
}
assertKeepMask(block);
try (ToMask mask = block.toMask()) {
assertThat(mask.hadMultivaluedFields(), equalTo(false));
for (int p = 0; p < elements.length; p++) {
assertThat(mask.mask().getBoolean(p), equalTo(elements[p]));
}
}
try (var copy = serializeDeserializeBlock(block)) {
assertThat(copy, instanceOf(BooleanVectorBlock.class));
assertThat(block.asVector(), instanceOf(BooleanArrayVector.class));
Expand Down Expand Up @@ -224,6 +230,12 @@ public void testBooleanBlock() throws IOException {
assertThat(block.getBoolean(i), equalTo(elements[i]));
}
assertKeepMask(block);
try (ToMask mask = block.toMask()) {
assertThat(mask.hadMultivaluedFields(), equalTo(true));
for (int p = 0; p < elements.length; p++) {
assertThat(mask.mask().getBoolean(p), equalTo(false));
}
}
try (var copy = serializeDeserializeBlock(block)) {
assertThat(copy, instanceOf(BooleanArrayBlock.class));
assertNull(copy.asVector());
Expand Down Expand Up @@ -253,6 +265,12 @@ public void testBooleanBlock() throws IOException {
assertThat(block.getBoolean(i), equalTo(elements[i]));
}
assertKeepMask(block);
try (ToMask mask = block.toMask()) {
assertThat(mask.hadMultivaluedFields(), equalTo(true));
for (int p = 0; p < elements.length; p++) {
assertThat(mask.mask().getBoolean(p), equalTo(false));
}
}
try (var copy = serializeDeserializeBlock(block)) {
assertThat(copy, instanceOf(BooleanBigArrayBlock.class));
assertNull(block.asVector());
Expand All @@ -266,4 +284,52 @@ public void testBooleanBlock() throws IOException {
}
assertThat(blockFactory.breaker().getUsed(), equalTo(0L));
}

/**
* Tests a block with one value being multivalued and the rest are single valued.
*/
public void testBooleanBlockOneMv() {
int mvCount = between(2, 10);
int positionCount = randomIntBetween(1000, 5000);
blockFactory = new BlockFactory(blockFactory.breaker(), blockFactory.bigArrays(), ByteSizeValue.ofBytes(1));
try (var builder = blockFactory.newBooleanBlockBuilder(between(1, mvCount + positionCount))) {
boolean[] elements = new boolean[positionCount + mvCount];
builder.beginPositionEntry();
for (int i = 0; i < mvCount; i++) {
elements[i] = randomBoolean();
builder.appendBoolean(elements[i]);
}
builder.endPositionEntry();
for (int p = 1; p < positionCount; p++) {
elements[mvCount + p] = randomBoolean();
builder.appendBoolean(elements[mvCount + p]);
}
try (var block = builder.build()) {
assertThat(block, instanceOf(BooleanBigArrayBlock.class));
assertNull(block.asVector());
assertThat(block.getPositionCount(), equalTo(positionCount));
assertThat(block.getValueCount(0), equalTo(mvCount));
for (int i = 0; i < mvCount; i++) {
assertThat(block.getBoolean(block.getFirstValueIndex(0) + i), equalTo(elements[i]));
}
for (int p = 1; p < positionCount; p++) {
assertThat(block.getValueCount(p), equalTo(1));
assertThat(block.getBoolean(block.getFirstValueIndex(p)), equalTo(elements[mvCount + p]));
}
assertKeepMask(block);
try (ToMask mask = block.toMask()) {
/*
* NOTE: this test is customized to the layout above where we don't make
* any fields with 0 values.
*/
assertThat(mask.hadMultivaluedFields(), equalTo(true));
assertThat(mask.mask().getBoolean(0), equalTo(false));
for (int p = 1; p < positionCount; p++) {
assertThat(mask.mask().getBoolean(p), equalTo(elements[mvCount + p]));
}
}
}
}
assertThat(blockFactory.breaker().getUsed(), equalTo(0L));
}
}
Loading

0 comments on commit 459f047

Please sign in to comment.