From a64e89eb57454f89c834d59dc8a8aa24dff4d781 Mon Sep 17 00:00:00 2001 From: Piotr Findeisen Date: Mon, 26 Apr 2021 22:15:52 +0200 Subject: [PATCH 1/4] Add a test for BenchmarkSortedRangeSet --- .../predicate/BenchmarkSortedRangeSet.java | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/core/trino-spi/src/test/java/io/trino/spi/predicate/BenchmarkSortedRangeSet.java b/core/trino-spi/src/test/java/io/trino/spi/predicate/BenchmarkSortedRangeSet.java index 518149f9b4b7b..cc582962a6f24 100644 --- a/core/trino-spi/src/test/java/io/trino/spi/predicate/BenchmarkSortedRangeSet.java +++ b/core/trino-spi/src/test/java/io/trino/spi/predicate/BenchmarkSortedRangeSet.java @@ -29,6 +29,7 @@ import org.openjdk.jmh.runner.options.Options; import org.openjdk.jmh.runner.options.OptionsBuilder; import org.openjdk.jmh.runner.options.VerboseMode; +import org.testng.annotations.Test; import java.util.ArrayList; import java.util.List; @@ -244,6 +245,33 @@ private SortedRangeSet generateRangeSet(int size) } } + @Test + public void test() + { + Data data = new Data(); + data.init(); + + benchmarkBuilder(data); + + equalsSmall(data); + equalsLarge(data); + + unionSmall(data); + unionLarge(data); + + overlapsSmall(data); + overlapsLarge(data); + + containsValueSmall(data); + containsValueLarge(data); + + complementSmall(data); + complementLarge(data); + + getOrderedRangesSmall(data); + getOrderedRangesLarge(data); + } + public static void main(String[] args) throws RunnerException { From 0cd7be48d10ef1742862c7eadb2b93789081f49e Mon Sep 17 00:00:00 2001 From: Piotr Findeisen Date: Mon, 26 Apr 2021 22:30:21 +0200 Subject: [PATCH 2/4] Benchmark Parquet dictionary to Domain conversion --- .../BenchmarkTupleDomainParquetPredicate.java | 126 ++++++++++++++++++ 1 file changed, 126 insertions(+) create mode 100644 lib/trino-parquet/src/test/java/io/trino/parquet/predicate/BenchmarkTupleDomainParquetPredicate.java diff --git a/lib/trino-parquet/src/test/java/io/trino/parquet/predicate/BenchmarkTupleDomainParquetPredicate.java b/lib/trino-parquet/src/test/java/io/trino/parquet/predicate/BenchmarkTupleDomainParquetPredicate.java new file mode 100644 index 0000000000000..e91ee9b5a9855 --- /dev/null +++ b/lib/trino-parquet/src/test/java/io/trino/parquet/predicate/BenchmarkTupleDomainParquetPredicate.java @@ -0,0 +1,126 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.parquet.predicate; + +import io.airlift.slice.DynamicSliceOutput; +import io.airlift.slice.Slice; +import io.trino.parquet.DictionaryPage; +import io.trino.parquet.ParquetEncoding; +import io.trino.spi.predicate.Domain; +import org.apache.parquet.column.ColumnDescriptor; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.runner.Runner; +import org.openjdk.jmh.runner.RunnerException; +import org.openjdk.jmh.runner.options.Options; +import org.openjdk.jmh.runner.options.OptionsBuilder; +import org.openjdk.jmh.runner.options.VerboseMode; +import org.testng.annotations.Test; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; +import java.util.concurrent.ThreadLocalRandom; +import java.util.concurrent.TimeUnit; + +import static io.trino.spi.type.BigintType.BIGINT; +import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT64; + +@Fork(1) +@Warmup(iterations = 5) +@Measurement(iterations = 10) +@OutputTimeUnit(TimeUnit.MILLISECONDS) +@BenchmarkMode(Mode.AverageTime) +public class BenchmarkTupleDomainParquetPredicate +{ + @Benchmark + public List domainFromDictionary(Data data) + { + List result = new ArrayList<>(data.bigintDictionaries.size()); + for (DictionaryDescriptor dictionary : data.bigintDictionaries) { + result.add(TupleDomainParquetPredicate.getDomain(BIGINT, dictionary)); + } + return result; + } + + @State(Scope.Thread) + public static class Data + { + public List bigintDictionaries; + + @Setup(Level.Iteration) + public void init() + { + bigintDictionaries = new ArrayList<>(); + + for (int i = 0; i < 1_000; i++) { + bigintDictionaries.add(createBigintDictionary()); + } + } + + private DictionaryDescriptor createBigintDictionary() + { + int size = 1_000; + Slice slice; + try (DynamicSliceOutput sliceOutput = new DynamicSliceOutput(0)) { + for (int i = 0; i < size; i++) { + sliceOutput.appendLong(ThreadLocalRandom.current().nextLong()); + } + slice = sliceOutput.slice(); + } + catch (IOException e) { + throw new RuntimeException(e); + } + + return new DictionaryDescriptor( + new ColumnDescriptor(new String[] {"path"}, INT64, 0, 0), + Optional.of( + new DictionaryPage( + slice, + slice.length(), + size, + ParquetEncoding.PLAIN))); + } + } + + @Test + public void test() + { + Data data = new Data(); + data.init(); + + domainFromDictionary(data); + } + + public static void main(String[] args) + throws RunnerException + { + Options options = new OptionsBuilder() + .verbosity(VerboseMode.NORMAL) + .include(".*" + BenchmarkTupleDomainParquetPredicate.class.getSimpleName() + ".*") + .build(); + + new Runner(options).run(); + } +} From a2bcebc576735791e511e851e0027687185d2ebd Mon Sep 17 00:00:00 2001 From: Piotr Findeisen Date: Mon, 26 Apr 2021 22:36:31 +0200 Subject: [PATCH 3/4] Fix generics in declaration of ValueSet.copyOf For example when providing values to a `BIGINT`-based `ValueSet`, one should be able to pass a `List` as values. --- .../src/main/java/io/trino/spi/predicate/EquatableValueSet.java | 2 +- .../src/main/java/io/trino/spi/predicate/ValueSet.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/core/trino-spi/src/main/java/io/trino/spi/predicate/EquatableValueSet.java b/core/trino-spi/src/main/java/io/trino/spi/predicate/EquatableValueSet.java index 73152d74b93a5..992956a1535d2 100644 --- a/core/trino-spi/src/main/java/io/trino/spi/predicate/EquatableValueSet.java +++ b/core/trino-spi/src/main/java/io/trino/spi/predicate/EquatableValueSet.java @@ -99,7 +99,7 @@ static EquatableValueSet of(Type type, Object first, Object... rest) return new EquatableValueSet(type, true, set); } - static EquatableValueSet copyOf(Type type, Collection values) + static EquatableValueSet copyOf(Type type, Collection values) { return new EquatableValueSet(type, true, values.stream() .map(value -> ValueEntry.create(type, value)) diff --git a/core/trino-spi/src/main/java/io/trino/spi/predicate/ValueSet.java b/core/trino-spi/src/main/java/io/trino/spi/predicate/ValueSet.java index 734be317ae29c..51aa496830ed3 100644 --- a/core/trino-spi/src/main/java/io/trino/spi/predicate/ValueSet.java +++ b/core/trino-spi/src/main/java/io/trino/spi/predicate/ValueSet.java @@ -63,7 +63,7 @@ static ValueSet of(Type type, Object first, Object... rest) throw new IllegalArgumentException("Cannot create discrete ValueSet with non-comparable type: " + type); } - static ValueSet copyOf(Type type, Collection values) + static ValueSet copyOf(Type type, Collection values) { if (type.isOrderable()) { return SortedRangeSet.of(type, values); From baeef04f96172a5cc8b8cfa83f98ba802808f863 Mon Sep 17 00:00:00 2001 From: Piotr Findeisen Date: Mon, 26 Apr 2021 22:42:07 +0200 Subject: [PATCH 4/4] Decode Parquet dictionary faster This fixes a performance regression introduced by 82e8f93ab3149e56b2daed4b5102423a953e9a9f. --- .../TupleDomainParquetPredicate.java | 35 ++++++++----------- 1 file changed, 15 insertions(+), 20 deletions(-) diff --git a/lib/trino-parquet/src/main/java/io/trino/parquet/predicate/TupleDomainParquetPredicate.java b/lib/trino-parquet/src/main/java/io/trino/parquet/predicate/TupleDomainParquetPredicate.java index 787bf39c83569..5a005daa5f447 100644 --- a/lib/trino-parquet/src/main/java/io/trino/parquet/predicate/TupleDomainParquetPredicate.java +++ b/lib/trino-parquet/src/main/java/io/trino/parquet/predicate/TupleDomainParquetPredicate.java @@ -308,56 +308,51 @@ public static Domain getDomain(Type type, DictionaryDescriptor dictionaryDescrip int dictionarySize = dictionaryPage.get().getDictionarySize(); if (type.equals(BIGINT) && columnDescriptor.getPrimitiveType().getPrimitiveTypeName() == PrimitiveTypeName.INT64) { - List domains = new ArrayList<>(); + List values = new ArrayList<>(dictionarySize); for (int i = 0; i < dictionarySize; i++) { - domains.add(Domain.singleValue(type, dictionary.decodeToLong(i))); + values.add(dictionary.decodeToLong(i)); } - domains.add(Domain.onlyNull(type)); - return Domain.union(domains); + return Domain.create(ValueSet.copyOf(type, values), true); } if ((type.equals(BIGINT) || type.equals(DATE)) && columnDescriptor.getPrimitiveType().getPrimitiveTypeName() == PrimitiveTypeName.INT32) { - List domains = new ArrayList<>(); + List values = new ArrayList<>(dictionarySize); for (int i = 0; i < dictionarySize; i++) { - domains.add(Domain.singleValue(type, (long) dictionary.decodeToInt(i))); + values.add((long) dictionary.decodeToInt(i)); } - domains.add(Domain.onlyNull(type)); - return Domain.union(domains); + return Domain.create(ValueSet.copyOf(type, values), true); } if (type.equals(DOUBLE) && columnDescriptor.getPrimitiveType().getPrimitiveTypeName() == PrimitiveTypeName.DOUBLE) { - List domains = new ArrayList<>(); + List values = new ArrayList<>(dictionarySize); for (int i = 0; i < dictionarySize; i++) { double value = dictionary.decodeToDouble(i); if (Double.isNaN(value)) { return Domain.all(type); } - domains.add(Domain.singleValue(type, value)); + values.add(value); } - domains.add(Domain.onlyNull(type)); - return Domain.union(domains); + return Domain.create(ValueSet.copyOf(type, values), true); } if (type.equals(DOUBLE) && columnDescriptor.getPrimitiveType().getPrimitiveTypeName() == PrimitiveTypeName.FLOAT) { - List domains = new ArrayList<>(); + List values = new ArrayList<>(dictionarySize); for (int i = 0; i < dictionarySize; i++) { float value = dictionary.decodeToFloat(i); if (Float.isNaN(value)) { return Domain.all(type); } - domains.add(Domain.singleValue(type, (double) value)); + values.add((double) value); } - domains.add(Domain.onlyNull(type)); - return Domain.union(domains); + return Domain.create(ValueSet.copyOf(type, values), true); } if (type instanceof VarcharType && columnDescriptor.getPrimitiveType().getPrimitiveTypeName() == PrimitiveTypeName.BINARY) { - List domains = new ArrayList<>(); + List values = new ArrayList<>(dictionarySize); for (int i = 0; i < dictionarySize; i++) { - domains.add(Domain.singleValue(type, Slices.wrappedBuffer(dictionary.decodeToBinary(i).getBytes()))); + values.add(Slices.wrappedBuffer(dictionary.decodeToBinary(i).getBytes())); } - domains.add(Domain.onlyNull(type)); - return Domain.union(domains); + return Domain.create(ValueSet.copyOf(type, values), true); } return Domain.all(type);