Skip to content

Commit

Permalink
ARROW-5881: [Java] Provide functionalities to efficiently determine i…
Browse files Browse the repository at this point in the history
…f a validity buffer has completely 1 bits/0 bits

These utilities can be used to efficiently determine, for example,

If all values in a vector are null
If a vector contains no null
If a vector contains any valid element
If a vector contains any invalid element

Author: liyafan82 <[email protected]>

Closes #4829 from liyafan82/fly_0709_nullbit and squashes the following commits:

1762951 <liyafan82>  Merge methods and change method name
0dc0045 <liyafan82>  Do boundary check once at the beginning
c57cb6d <liyafan82>  Provide benchmark for allBitsNull
3deedaf <liyafan82>  Provide functionalities to efficiently determine if a validity buffer has completely 1 bits/0 bits
  • Loading branch information
liyafan82 authored and Pindikura Ravindra committed Jul 12, 2019
1 parent 4221db9 commit 5c61263
Show file tree
Hide file tree
Showing 4 changed files with 175 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ public class BitVectorHelperBenchmarks {

private ArrowBuf validityBuffer;

private ArrowBuf oneBitValidityBuffer;

/**
* Setup benchmarks.
*/
Expand All @@ -65,6 +67,11 @@ public void prepare() {
BitVectorHelper.setValidityBit(validityBuffer, i, (byte) 0);
}
}

// only one 1 bit in the middle of the buffer
oneBitValidityBuffer = allocator.buffer(VALIDITY_BUFFER_CAPACITY / 8);
oneBitValidityBuffer.setZero(0, VALIDITY_BUFFER_CAPACITY / 8);
BitVectorHelper.setValidityBit(oneBitValidityBuffer, VALIDITY_BUFFER_CAPACITY / 2, (byte) 1);
}

/**
Expand All @@ -73,6 +80,7 @@ public void prepare() {
@TearDown
public void tearDown() {
validityBuffer.close();
oneBitValidityBuffer.close();
allocator.close();
}

Expand All @@ -83,6 +91,13 @@ public int getNullCountBenchmark() {
return BitVectorHelper.getNullCount(validityBuffer, VALIDITY_BUFFER_CAPACITY);
}

@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
public boolean allBitsNullBenchmark() {
return BitVectorHelper.checkAllBitsEqualTo(oneBitValidityBuffer, VALIDITY_BUFFER_CAPACITY, true);
}

//@Test
public static void main(String [] args) throws RunnerException {
Options opt = new OptionsBuilder()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@

package org.apache.arrow.vector;

import static io.netty.util.internal.PlatformDependent.getByte;
import static io.netty.util.internal.PlatformDependent.getInt;
import static io.netty.util.internal.PlatformDependent.getLong;

import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.util.DataSizeRoundingUtil;
import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
Expand Down Expand Up @@ -179,6 +183,74 @@ public static int getNullCount(final ArrowBuf validityBuffer, final int valueCou
return 8 * sizeInBytes - count;
}

/**
* Tests if all bits in a validity buffer are equal 0 or 1, according to the specified parameter.
* @param validityBuffer the validity buffer.
* @param valueCount the bit count.
* @param checkOneBits if set to true, the method checks if all bits are equal to 1;
* otherwise, it checks if all bits are equal to 0.
* @return true if all bits are 0 or 1 according to the parameter, and false otherwise.
*/
public static boolean checkAllBitsEqualTo(
final ArrowBuf validityBuffer, final int valueCount, final boolean checkOneBits) {
if (valueCount == 0) {
return true;
}
final int sizeInBytes = getValidityBufferSize(valueCount);

// boundary check
validityBuffer.checkBytes(0, sizeInBytes);

// If value count is not a multiple of 8, then calculate number of used bits in the last byte
final int remainder = valueCount % 8;
final int fullBytesCount = remainder == 0 ? sizeInBytes : sizeInBytes - 1;

// the integer number to compare against
final int intToCompare = checkOneBits ? -1 : 0;

int index = 0;
while (index + 8 <= fullBytesCount) {
long longValue = getLong(validityBuffer.memoryAddress() + index);
if (longValue != (long) intToCompare) {
return false;
}
index += 8;
}

while (index + 4 <= fullBytesCount) {
int intValue = getInt(validityBuffer.memoryAddress() + index);
if (intValue != intToCompare) {
return false;
}
index += 4;
}

while (index < fullBytesCount) {
byte byteValue = getByte(validityBuffer.memoryAddress() + index);
if (byteValue != (byte) intToCompare) {
return false;
}
index += 1;
}

// handling with the last bits
if (remainder != 0) {
byte byteValue = getByte(validityBuffer.memoryAddress() + sizeInBytes - 1);
byte mask = (byte) ((1 << remainder) - 1);
byteValue = (byte) (byteValue & mask);
if (checkOneBits) {
if ((mask & byteValue) != mask) {
return false;
}
} else {
if (byteValue != (byte) 0) {
return false;
}
}
}
return true;
}

/** Returns the byte at index from data right-shifted by offset. */
public static byte getBitsFromCurrentByte(final ArrowBuf data, final int index, final int offset) {
return (byte) ((data.getByte(index) & 0xFF) >>> offset);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@

import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.util.Preconditions;
import org.apache.arrow.vector.BitVectorHelper;
import org.apache.arrow.vector.FieldVector;
import org.apache.arrow.vector.ValueVector;
import org.apache.arrow.vector.types.pojo.FieldType;
Expand Down Expand Up @@ -139,12 +140,7 @@ public <T extends FieldVector> T addOrGet(String childName, FieldType fieldType,
}

private boolean nullFilled(ValueVector vector) {
for (int r = 0; r < vector.getValueCount(); r++) {
if (!vector.isNull(r)) {
return false;
}
}
return true;
return BitVectorHelper.checkAllBitsEqualTo(vector.getValidityBuffer(), vector.getValueCount(), false);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,16 @@
package org.apache.arrow.vector;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.jupiter.api.Assertions.assertFalse;

import org.apache.arrow.memory.ReferenceManager;
import org.apache.arrow.memory.RootAllocator;
import org.junit.Test;

import io.netty.buffer.ArrowBuf;
import io.netty.buffer.PooledByteBufAllocatorL;
import io.netty.util.internal.PlatformDependent;

public class TestBitVectorHelper {
@Test
Expand Down Expand Up @@ -63,4 +67,86 @@ public void testGetNullCount() throws Exception {
count = BitVectorHelper.getNullCount(validityBuffer, 11);
assertEquals(count, 5);
}

@Test
public void testAllBitsNull() {
final int bufferLength = 32 * 1024;
try (RootAllocator allocator = new RootAllocator(bufferLength);
ArrowBuf validityBuffer = allocator.buffer(bufferLength)) {

validityBuffer.setZero(0, bufferLength);
int bitLength = 1024;
assertTrue(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, false));

bitLength = 1027;
assertTrue(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, false));

validityBuffer.setZero(0, bufferLength);
bitLength = 1025;
BitVectorHelper.setValidityBit(validityBuffer, 12, 1);
assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, false));

validityBuffer.setZero(0, bufferLength);
bitLength = 1025;
BitVectorHelper.setValidityBit(validityBuffer, 1024, 1);
assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, false));

validityBuffer.setZero(0, bufferLength);
bitLength = 1026;
BitVectorHelper.setValidityBit(validityBuffer, 1024, 1);
assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, false));

validityBuffer.setZero(0, bufferLength);
bitLength = 1027;
BitVectorHelper.setValidityBit(validityBuffer, 1025, 1);
assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, false));

validityBuffer.setZero(0, bufferLength);
bitLength = 1031;
BitVectorHelper.setValidityBit(validityBuffer, 1029, 1);
BitVectorHelper.setValidityBit(validityBuffer, 1030, 1);
assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, false));
}
}

@Test
public void testAllBitsSet() {
final int bufferLength = 32 * 1024;
try (RootAllocator allocator = new RootAllocator(bufferLength);
ArrowBuf validityBuffer = allocator.buffer(bufferLength)) {

PlatformDependent.setMemory(validityBuffer.memoryAddress(), bufferLength, (byte) -1);
int bitLength = 1024;
assertTrue(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, true));

bitLength = 1028;
assertTrue(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, true));

PlatformDependent.setMemory(validityBuffer.memoryAddress(), bufferLength, (byte) -1);
bitLength = 1025;
BitVectorHelper.setValidityBit(validityBuffer, 12, 0);
assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, true));

PlatformDependent.setMemory(validityBuffer.memoryAddress(), bufferLength, (byte) -1);
bitLength = 1025;
BitVectorHelper.setValidityBit(validityBuffer, 1024, 0);
assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, true));

PlatformDependent.setMemory(validityBuffer.memoryAddress(), bufferLength, (byte) -1);
bitLength = 1026;
BitVectorHelper.setValidityBit(validityBuffer, 1024, 0);
assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, true));

PlatformDependent.setMemory(validityBuffer.memoryAddress(), bufferLength, (byte) -1);
bitLength = 1027;
BitVectorHelper.setValidityBit(validityBuffer, 1025, 0);
assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, true));

PlatformDependent.setMemory(validityBuffer.memoryAddress(), bufferLength, (byte) -1);
bitLength = 1031;
BitVectorHelper.setValidityBit(validityBuffer, 1029, 0);
BitVectorHelper.setValidityBit(validityBuffer, 1030, 0);
assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, true));
}
}
}

0 comments on commit 5c61263

Please sign in to comment.