Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix writing of parquet bloom filters #23604

Merged
merged 3 commits into from
Sep 30, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
import java.util.Optional;
import java.util.OptionalInt;

import static com.google.common.base.Preconditions.checkArgument;
import static java.util.Objects.requireNonNull;

public interface ColumnWriter
Expand Down Expand Up @@ -51,7 +50,6 @@ public BufferData(List<ParquetDataOutput> data, OptionalInt dictionaryPageSize,
this.dictionaryPageSize = requireNonNull(dictionaryPageSize, "dictionaryPageSize is null");
this.bloomFilter = requireNonNull(bloomFilter, "bloomFilter is null");
this.metaData = requireNonNull(metaData, "metaData is null");
checkArgument(dictionaryPageSize.isEmpty() || bloomFilter.isEmpty(), "dictionaryPagesSize and bloomFilter cannot both be set");
}

public ColumnMetaData getMetaData()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import org.apache.parquet.bytes.BytesInput;
import org.apache.parquet.column.ColumnDescriptor;
import org.apache.parquet.column.Encoding;
import org.apache.parquet.column.EncodingStats;
import org.apache.parquet.column.page.DictionaryPage;
import org.apache.parquet.column.statistics.Statistics;
import org.apache.parquet.column.values.bloomfilter.BloomFilter;
Expand All @@ -51,6 +52,7 @@
import static com.google.common.base.Preconditions.checkState;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static io.airlift.slice.SizeOf.instanceSize;
import static io.trino.parquet.ParquetMetadataConverter.convertEncodingStats;
import static io.trino.parquet.ParquetMetadataConverter.getEncoding;
import static io.trino.parquet.writer.ParquetCompressor.getCompressor;
import static io.trino.parquet.writer.ParquetDataOutput.createDataOutput;
Expand Down Expand Up @@ -183,7 +185,16 @@ public List<BufferData> getBuffer()
{
checkState(closed);
DataStreams dataStreams = getDataStreams();
return ImmutableList.of(new BufferData(dataStreams.data(), dataStreams.dictionaryPageSize(), dataStreams.bloomFilter(), getColumnMetaData()));
ColumnMetaData columnMetaData = getColumnMetaData();

EncodingStats stats = convertEncodingStats(columnMetaData.getEncoding_stats());
boolean isOnlyDictionaryEncodingPages = stats.hasDictionaryPages() && !stats.hasNonDictionaryEncodedPages();

return ImmutableList.of(new BufferData(
dataStreams.data(),
dataStreams.dictionaryPageSize(),
isOnlyDictionaryEncodingPages ? Optional.empty() : dataStreams.bloomFilter(),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why we can't have both always? Seems like we should be able to perform bloom filtering even in the presence of dictionaries?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For fully dictionary encoded columns, the reader is already able to perform row-group pruning based on dictionary entries. Having a bloom filter doesn't give us anything extra.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For fully dictionary encoded columns, the reader is already able to perform row-group pruning based on dictionary entries

I thought that bloom filters could be more efficient at filtering for tactical (e.g. single value search) queries.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's lookup in a set vs lookup in a bloom filter. The CPU difference will be barely noticeable compared to everything else that goes on in parquet reader. Reading bloom filter takes extra reads from file and potentially has false positives.

columnMetaData));
}

// Returns ColumnMetaData that offset is invalid
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,38 +66,38 @@ private ValuesWriter getFixedLenByteArrayValuesWriter(ColumnDescriptor path, Opt

private ValuesWriter getBinaryValuesWriter(ColumnDescriptor path, Optional<BloomFilter> bloomFilter)
{
ValuesWriter fallbackWriter = createBloomFilterValuesWriter(new PlainValuesWriter(INITIAL_SLAB_SIZE, maxPageSize, new HeapByteBufferAllocator()), bloomFilter);
return dictWriterWithFallBack(path, getEncodingForDictionaryPage(), getEncodingForDataPage(), fallbackWriter);
ValuesWriter fallbackWriter = new PlainValuesWriter(INITIAL_SLAB_SIZE, maxPageSize, new HeapByteBufferAllocator());
return createBloomFilterValuesWriter(dictWriterWithFallBack(path, getEncodingForDictionaryPage(), getEncodingForDataPage(), fallbackWriter), bloomFilter);
}

private ValuesWriter getInt32ValuesWriter(ColumnDescriptor path, Optional<BloomFilter> bloomFilter)
{
ValuesWriter fallbackWriter = createBloomFilterValuesWriter(new PlainValuesWriter(INITIAL_SLAB_SIZE, maxPageSize, new HeapByteBufferAllocator()), bloomFilter);
return dictWriterWithFallBack(path, getEncodingForDictionaryPage(), getEncodingForDataPage(), fallbackWriter);
ValuesWriter fallbackWriter = new PlainValuesWriter(INITIAL_SLAB_SIZE, maxPageSize, new HeapByteBufferAllocator());
return createBloomFilterValuesWriter(dictWriterWithFallBack(path, getEncodingForDictionaryPage(), getEncodingForDataPage(), fallbackWriter), bloomFilter);
}

private ValuesWriter getInt64ValuesWriter(ColumnDescriptor path, Optional<BloomFilter> bloomFilter)
{
ValuesWriter fallbackWriter = createBloomFilterValuesWriter(new PlainValuesWriter(INITIAL_SLAB_SIZE, maxPageSize, new HeapByteBufferAllocator()), bloomFilter);
return dictWriterWithFallBack(path, getEncodingForDictionaryPage(), getEncodingForDataPage(), fallbackWriter);
ValuesWriter fallbackWriter = new PlainValuesWriter(INITIAL_SLAB_SIZE, maxPageSize, new HeapByteBufferAllocator());
return createBloomFilterValuesWriter(dictWriterWithFallBack(path, getEncodingForDictionaryPage(), getEncodingForDataPage(), fallbackWriter), bloomFilter);
}

private ValuesWriter getInt96ValuesWriter(ColumnDescriptor path, Optional<BloomFilter> bloomFilter)
{
ValuesWriter fallbackWriter = createBloomFilterValuesWriter(new FixedLenByteArrayPlainValuesWriter(12, INITIAL_SLAB_SIZE, maxPageSize, new HeapByteBufferAllocator()), bloomFilter);
return dictWriterWithFallBack(path, getEncodingForDictionaryPage(), getEncodingForDataPage(), fallbackWriter);
ValuesWriter fallbackWriter = new FixedLenByteArrayPlainValuesWriter(12, INITIAL_SLAB_SIZE, maxPageSize, new HeapByteBufferAllocator());
return createBloomFilterValuesWriter(dictWriterWithFallBack(path, getEncodingForDictionaryPage(), getEncodingForDataPage(), fallbackWriter), bloomFilter);
}

private ValuesWriter getDoubleValuesWriter(ColumnDescriptor path, Optional<BloomFilter> bloomFilter)
{
ValuesWriter fallbackWriter = createBloomFilterValuesWriter(new PlainValuesWriter(INITIAL_SLAB_SIZE, maxPageSize, new HeapByteBufferAllocator()), bloomFilter);
return dictWriterWithFallBack(path, getEncodingForDictionaryPage(), getEncodingForDataPage(), fallbackWriter);
ValuesWriter fallbackWriter = new PlainValuesWriter(INITIAL_SLAB_SIZE, maxPageSize, new HeapByteBufferAllocator());
return createBloomFilterValuesWriter(dictWriterWithFallBack(path, getEncodingForDictionaryPage(), getEncodingForDataPage(), fallbackWriter), bloomFilter);
}

private ValuesWriter getFloatValuesWriter(ColumnDescriptor path, Optional<BloomFilter> bloomFilter)
{
ValuesWriter fallbackWriter = createBloomFilterValuesWriter(new PlainValuesWriter(INITIAL_SLAB_SIZE, maxPageSize, new HeapByteBufferAllocator()), bloomFilter);
return dictWriterWithFallBack(path, getEncodingForDictionaryPage(), getEncodingForDataPage(), fallbackWriter);
ValuesWriter fallbackWriter = new PlainValuesWriter(INITIAL_SLAB_SIZE, maxPageSize, new HeapByteBufferAllocator());
return createBloomFilterValuesWriter(dictWriterWithFallBack(path, getEncodingForDictionaryPage(), getEncodingForDataPage(), fallbackWriter), bloomFilter);
}

@SuppressWarnings("deprecation")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ public abstract class AbstractColumnWriterBenchmark
@Param({
"1", "1048576" // 1MB is default page size
})
public int dictionaryPageSize;
public int maxDictionaryPageSize;

public enum BloomFilterType
{
Expand Down Expand Up @@ -94,7 +94,7 @@ Optional<BloomFilter> getBloomFilter()

private PrimitiveValueWriter createValuesWriter()
{
TrinoValuesWriterFactory valuesWriterFactory = new TrinoValuesWriterFactory(1024 * 1024, dictionaryPageSize);
TrinoValuesWriterFactory valuesWriterFactory = new TrinoValuesWriterFactory(1024 * 1024, maxDictionaryPageSize);
ColumnDescriptor columnDescriptor = new ColumnDescriptor(new String[] {"test"}, getParquetType(), 0, 0);
return getValueWriter(valuesWriterFactory.newValuesWriter(columnDescriptor, bloomFilterType.getBloomFilter()), getTrinoType(), columnDescriptor.getPrimitiveType(), Optional.empty());
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.trino.parquet.writer;

import com.google.common.collect.ImmutableList;
import org.junit.jupiter.api.Test;

import java.io.IOException;

final class TestColumnWriterBenchmark
{
@Test
void testLongColumnWriterBenchmark()
throws IOException
{
for (int bitWidth = 1; bitWidth <= 64; bitWidth += 4) {
for (AbstractColumnWriterBenchmark.BloomFilterType bloomFilterType : AbstractColumnWriterBenchmark.BloomFilterType.values()) {
for (int maxDictionaryPageSize : ImmutableList.of(1, 1048576)) {
BenchmarkLongColumnWriter benchmark = new BenchmarkLongColumnWriter();
benchmark.bitWidth = bitWidth;
benchmark.bloomFilterType = bloomFilterType;
benchmark.maxDictionaryPageSize = maxDictionaryPageSize;
benchmark.setup();
benchmark.write();
}
}
}
}

@Test
void testBinaryColumnWriterBenchmark()
throws IOException
{
for (BenchmarkBinaryColumnWriter.FieldType fieldType : BenchmarkBinaryColumnWriter.FieldType.values()) {
for (BenchmarkBinaryColumnWriter.PositionLength positionLength : BenchmarkBinaryColumnWriter.PositionLength.values()) {
for (AbstractColumnWriterBenchmark.BloomFilterType bloomFilterType : AbstractColumnWriterBenchmark.BloomFilterType.values()) {
for (int maxDictionaryPageSize : ImmutableList.of(1, 1048576)) {
BenchmarkBinaryColumnWriter benchmark = new BenchmarkBinaryColumnWriter();
benchmark.type = fieldType;
benchmark.positionLength = positionLength;
benchmark.bloomFilterType = bloomFilterType;
benchmark.maxDictionaryPageSize = maxDictionaryPageSize;
benchmark.setup();
benchmark.write();
}
}
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@
import static com.google.common.collect.Iterables.getOnlyElement;
import static io.trino.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext;
import static io.trino.operator.scalar.CharacterStringCasts.varcharToVarcharSaturatedFloorCast;
import static io.trino.parquet.BloomFilterStore.hasBloomFilter;
import static io.trino.parquet.ParquetCompressionUtils.decompress;
import static io.trino.parquet.ParquetTestUtils.createParquetReader;
import static io.trino.parquet.ParquetTestUtils.createParquetWriter;
Expand Down Expand Up @@ -354,6 +355,10 @@ public void testDictionaryPageOffset()
assertThat(chunkMetaData.getDictionaryPageOffset()).isGreaterThan(0);
int dictionaryPageSize = toIntExact(chunkMetaData.getFirstDataPageOffset() - chunkMetaData.getDictionaryPageOffset());
assertThat(dictionaryPageSize).isGreaterThan(0);
assertThat(chunkMetaData.getEncodingStats().hasDictionaryPages()).isTrue();
assertThat(chunkMetaData.getEncodingStats().hasDictionaryEncodedPages()).isTrue();
assertThat(chunkMetaData.getEncodingStats().hasNonDictionaryEncodedPages()).isFalse();
assertThat(hasBloomFilter(chunkMetaData)).isFalse();

// verify reading dictionary page
SliceInput inputStream = dataSource.readFully(chunkMetaData.getStartingPos(), dictionaryPageSize).getInput();
Expand All @@ -379,9 +384,8 @@ public void testDictionaryPageOffset()
public void testWriteBloomFilters(Type type, List<?> data)
throws IOException
{
String columnName = "column";
List<String> columnNames = ImmutableList.of(columnName);
List<Type> types = ImmutableList.of(type);
List<String> columnNames = ImmutableList.of("columnA", "columnB");
List<Type> types = ImmutableList.of(type, type);
ParquetDataSource dataSource = new TestingParquetDataSource(
writeParquetFile(
ParquetWriterOptions.builder()
Expand All @@ -397,17 +401,25 @@ public void testWriteBloomFilters(Type type, List<?> data)
// Check that bloom filters are right after each other
int bloomFilterSize = Integer.highestOneBit(BlockSplitBloomFilter.optimalNumOfBits(BLOOM_FILTER_EXPECTED_ENTRIES, DEFAULT_BLOOM_FILTER_FPP) / 8) << 1;
for (BlockMetadata block : parquetMetadata.getBlocks()) {
for (int i = 1; i < block.columns().size(); i++) {
assertThat(block.columns().get(i - 1).getBloomFilterOffset() + bloomFilterSize + 17) // + 17 bytes for Bloom filter metadata
.isEqualTo(block.columns().get(i).getBloomFilterOffset());
for (int i = 0; i < block.columns().size(); i++) {
ColumnChunkMetadata chunkMetaData = block.columns().get(i);
assertThat(hasBloomFilter(chunkMetaData)).isTrue();
assertThat(chunkMetaData.getEncodingStats().hasDictionaryPages()).isFalse();
assertThat(chunkMetaData.getEncodingStats().hasDictionaryEncodedPages()).isFalse();
assertThat(chunkMetaData.getEncodingStats().hasNonDictionaryEncodedPages()).isTrue();

if (i < block.columns().size() - 1) {
assertThat(chunkMetaData.getBloomFilterOffset() + bloomFilterSize + 17) // + 17 bytes for Bloom filter metadata
.isEqualTo(block.columns().get(i + 1).getBloomFilterOffset());
}
}
}
int rowGroupCount = parquetMetadata.getBlocks().size();
assertThat(rowGroupCount).isGreaterThanOrEqualTo(2);

TupleDomain<String> predicate = TupleDomain.withColumnDomains(
ImmutableMap.of(
columnName, Domain.singleValue(type, data.get(data.size() / 2))));
"columnA", Domain.singleValue(type, data.get(data.size() / 2))));
try (ParquetReader reader = createParquetReader(dataSource, parquetMetadata, new ParquetReaderOptions().withBloomFilter(true), newSimpleAggregatedMemoryContext(), types, columnNames, predicate)) {
Page page = reader.nextPage();
int rowsRead = 0;
Expand All @@ -429,6 +441,36 @@ public void testWriteBloomFilters(Type type, List<?> data)
}
}

@Test
void testBloomFilterWithDictionaryFallback()
throws IOException
{
List<String> columnNames = ImmutableList.of("column");
List<Type> types = ImmutableList.of(BIGINT);
ParquetDataSource dataSource = new TestingParquetDataSource(
writeParquetFile(
ParquetWriterOptions.builder()
.setMaxPageValueCount(200)
.setBloomFilterColumns(ImmutableSet.copyOf(columnNames))
.build(),
types,
columnNames,
ImmutableList.<io.trino.spi.Page>builder()
.addAll(generateInputPages(types, 10, 10))
// Max size of dictionary page is 1024 * 1024
.addAll(generateInputPages(types, 200, shuffle(new Random(42), (1024 * 1025) / Long.BYTES)))
.build()),
new ParquetReaderOptions());

ParquetMetadata parquetMetadata = MetadataReader.readFooter(dataSource, Optional.empty());
BlockMetadata blockMetaData = getOnlyElement(parquetMetadata.getBlocks());
ColumnChunkMetadata chunkMetaData = getOnlyElement(blockMetaData.columns());
assertThat(chunkMetaData.getEncodingStats().hasDictionaryPages()).isTrue();
assertThat(chunkMetaData.getEncodingStats().hasDictionaryEncodedPages()).isTrue();
assertThat(chunkMetaData.getEncodingStats().hasNonDictionaryEncodedPages()).isTrue();
assertThat(hasBloomFilter(chunkMetaData)).isTrue();
}

public static Stream<Arguments> testWriteBloomFiltersParams()
{
int size = 2000;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -176,11 +176,11 @@ private void validateFallbackWriter(ValuesWriter writer, Class<? extends ValuesW

private void validateFallbackWriterBloomFilter(ValuesWriter writer, Class<? extends ValuesWriter> initialWriterClass, Class<? extends ValuesWriter> fallbackWriterClass)
{
validateWriterType(writer, DictionaryFallbackValuesWriter.class);
validateWriterType(writer, BloomFilterValuesWriter.class);

DictionaryFallbackValuesWriter fallbackValuesWriter = (DictionaryFallbackValuesWriter) writer;
BloomFilterValuesWriter bloomFilterValuesWriter = (BloomFilterValuesWriter) writer;
DictionaryFallbackValuesWriter fallbackValuesWriter = (DictionaryFallbackValuesWriter) bloomFilterValuesWriter.getWriter();
validateWriterType(fallbackValuesWriter.getInitialWriter(), initialWriterClass);
BloomFilterValuesWriter bloomFilterValuesWriter = (BloomFilterValuesWriter) fallbackValuesWriter.getFallBackWriter();
validateWriterType(bloomFilterValuesWriter.getWriter(), fallbackWriterClass);
validateWriterType(fallbackValuesWriter.getFallBackWriter(), fallbackWriterClass);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
*/
package io.trino.testing;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import io.trino.Session;
import io.trino.spi.connector.CatalogSchemaTableName;
Expand Down Expand Up @@ -51,6 +52,12 @@ public void testBloomFilterRowGroupPruning()
testBloomFilterRowGroupPruning(tableName, COLUMN_NAME);
}

@Test
void testBloomFilterColumnWithDictionaryPage()
{
createParquetTableWithBloomFilter(COLUMN_NAME, ImmutableList.of(1, 1));
}

protected void testBloomFilterRowGroupPruning(CatalogSchemaTableName tableName, String columnName)
{
// assert table is populated with data
Expand Down
Loading