diff --git a/presto-orc/src/main/java/com/facebook/presto/orc/writer/SegmentedSliceBlockBuilder.java b/presto-orc/src/main/java/com/facebook/presto/orc/writer/SegmentedSliceBlockBuilder.java deleted file mode 100644 index d1b15d7fbc60..000000000000 --- a/presto-orc/src/main/java/com/facebook/presto/orc/writer/SegmentedSliceBlockBuilder.java +++ /dev/null @@ -1,415 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.facebook.presto.orc.writer; - -import com.facebook.presto.common.block.AbstractVariableWidthBlock; -import com.facebook.presto.common.block.Block; -import com.facebook.presto.common.block.BlockBuilder; -import com.facebook.presto.common.block.BlockBuilderStatus; -import com.google.common.annotations.VisibleForTesting; -import io.airlift.slice.DynamicSliceOutput; -import io.airlift.slice.Slice; -import io.airlift.slice.SliceInput; -import io.airlift.slice.XxHash64; -import org.openjdk.jol.info.ClassLayout; - -import java.util.Arrays; -import java.util.function.BiConsumer; - -import static io.airlift.slice.SizeOf.sizeOf; -import static java.lang.String.format; - -/** - * Custom Block Builder implementation for use with SliceDictionaryBuilder. - * Instead of using one large contiguous Slice for storing the unique Strings - * in String dictionary, this class uses Segmented Slices. The main advantage - * of this class over VariableWidthBlockBuilder is memory. Non contiguous - * memory is more likely to be available and hence reduce the chance of OOMs. - *

- * Why implement a block builder ? - * SliceDictionaryBuilder takes in a Block and Position to write. - * 1. It can create a slice for the position and write it. This does not - * require a block builder. But temporary slice, produces lot of - * short lived garbage. - * 2. A block and position can be copied to BlockBuilder using the method - * Block.writeBytesTo . But this requires implementing the BlockBuilder interface. - * Most of the methods are going to be unused and left as Unsupported. - *

- * What's the difference between this class and VariableWidthBlockBuilder? - * This class is different from VariableWidthBlockBuilder in the following ways - * 1. It does not support nulls. (So null byte array and management is avoided). - * 2. Instead of using one contiguous chunk for storing all the entries, - * they are segmented. - *

- * How is it implemented ? - * The Strings from 0 to SEGMENT_SIZE-1 are stored in the first segment. - * The string from SEGMENT_SIZE to 2 * SEGMENT_SIZE -1 goes to the second. - * Each segment has Slice(data is concatenated and stored in one slice) - * and offsets to capture the start offset and length. New slices are appended - * to the open segment. Once the segment is full the segment is - * finalized and appended to the closed segments. A new open segment is - * created for further appends. - */ -public class SegmentedSliceBlockBuilder - extends AbstractVariableWidthBlock - implements BlockBuilder -{ - private static final int INSTANCE_SIZE = ClassLayout.parseClass(SegmentedSliceBlockBuilder.class).instanceSize(); - - private final DynamicSliceOutput openSliceOutput; - - private int openSegmentIndex; - private int openSegmentOffset; - private int[][] offsets; - private Slice[] closedSlices; - private long closedSlicesRetainedSize; - private long closedSlicesSizeInBytes; - - public SegmentedSliceBlockBuilder(int expectedEntries, int expectedBytes) - { - int initialSize = Math.max(Segments.INITIAL_SEGMENTS, Segments.segment(expectedEntries) + 1); - offsets = new int[initialSize][]; - closedSlices = new Slice[initialSize]; - offsets[0] = new int[Segments.SEGMENT_SIZE + 1]; - openSliceOutput = new DynamicSliceOutput(expectedBytes); - } - - public void reset() - { - openSliceOutput.reset(); - - Arrays.fill(closedSlices, null); - closedSlicesRetainedSize = 0; - closedSlicesSizeInBytes = 0; - - // Fill the first offset array with 0, and free up the rest of the offsets array. - Arrays.fill(offsets[0], 0); - Arrays.fill(offsets, 1, offsets.length, null); - openSegmentIndex = 0; - openSegmentOffset = 0; - } - - @Override - public int getPositionOffset(int position) - { - return getOffset(position); - } - - @Override - public int getSliceLength(int position) - { - int offset = Segments.offset(position); - int segment = Segments.segment(position); - return offsets[segment][offset + 1] - offsets[segment][offset]; - } - - private Slice getSegmentRawSlice(int segment) - { - return segment == openSegmentIndex ? openSliceOutput.getUnderlyingSlice() : closedSlices[segment]; - } - - @Override - public Slice getRawSlice(int position) - { - return getSegmentRawSlice(Segments.segment(position)); - } - - @Override - public int getPositionCount() - { - return Segments.getPositions(openSegmentIndex, openSegmentOffset); - } - - @Override - public long getSizeInBytes() - { - long offsetsSizeInBytes = Integer.BYTES * (long) getPositionCount(); - return openSliceOutput.size() + offsetsSizeInBytes + closedSlicesSizeInBytes; - } - - @Override - public long getRegionSizeInBytes(int position, int length) - { - throw new UnsupportedOperationException("getRegionSizeInBytes is not supported by SegmentedSliceBlockBuilder"); - } - - @Override - public long getPositionsSizeInBytes(boolean[] positions) - { - throw new UnsupportedOperationException("getPositionsSizeInBytes is not supported by SegmentedSliceBlockBuilder"); - } - - @Override - public long getRetainedSizeInBytes() - { - long offsetsSize = sizeOf(offsets) + (openSegmentIndex + 1) * sizeOf(offsets[0]); - long closedSlicesSize = sizeOf(closedSlices) + closedSlicesRetainedSize; - return INSTANCE_SIZE + openSliceOutput.getRetainedSize() + offsetsSize + closedSlicesSize; - } - - @Override - public void retainedBytesForEachPart(BiConsumer consumer) - { - throw new UnsupportedOperationException("retainedBytesForEachPart is not supported by SegmentedSliceBlockBuilder"); - } - - @Override - public Block copyPositions(int[] positions, int offset, int length) - { - throw new UnsupportedOperationException("copyPositions is not supported by SegmentedSliceBlockBuilder"); - } - - @Override - public BlockBuilder writeByte(int value) - { - throw new UnsupportedOperationException("writeByte is not supported by SegmentedSliceBlockBuilder"); - } - - @Override - public BlockBuilder writeShort(int value) - { - throw new UnsupportedOperationException("writeShort is not supported by SegmentedSliceBlockBuilder"); - } - - @Override - public BlockBuilder writeInt(int value) - { - throw new UnsupportedOperationException("writeInt is not supported by SegmentedSliceBlockBuilder"); - } - - @Override - public BlockBuilder writeLong(long value) - { - throw new UnsupportedOperationException("writeLong is not supported by SegmentedSliceBlockBuilder"); - } - - @Override - public BlockBuilder writeBytes(Slice source, int sourceIndex, int length) - { - if (openSegmentOffset == 0) { - // Expand Segments if necessary. - if (openSegmentIndex >= offsets.length) { - int newCapacity = Math.max(openSegmentIndex + 1, (int) (offsets.length * 1.5)); - closedSlices = Arrays.copyOf(closedSlices, newCapacity); - offsets = Arrays.copyOf(offsets, newCapacity); - } - - if (offsets[openSegmentIndex] == null) { - offsets[openSegmentIndex] = new int[Segments.SEGMENT_SIZE + 1]; - } - } - openSliceOutput.writeBytes(source, sourceIndex, length); - return this; - } - - @Override - public BlockBuilder closeEntry() - { - openSegmentOffset++; - offsets[openSegmentIndex][openSegmentOffset] = openSliceOutput.size(); - if (openSegmentOffset == Segments.SEGMENT_SIZE) { - // Copy the content from the openSlice and append it to the closedSlices. - // Note: openSlice will be reused for next segment, so a copy is required. - Slice slice = openSliceOutput.copySlice(); - closedSlices[openSegmentIndex] = slice; - closedSlicesSizeInBytes += slice.length(); - closedSlicesRetainedSize += slice.getRetainedSize(); - - // Prepare the open slice for next write. - openSliceOutput.reset(); - openSegmentOffset = 0; - openSegmentIndex++; - } - return this; - } - - @Override - public BlockBuilder appendNull() - { - throw new UnsupportedOperationException("appendNull is not supported by SegmentedSliceBlockBuilder"); - } - - @Override - public BlockBuilder readPositionFrom(SliceInput input) - { - throw new UnsupportedOperationException("readPositionFrom is not supported by SegmentedSliceBlockBuilder"); - } - - @Override - public boolean mayHaveNull() - { - return false; - } - - @Override - protected boolean isEntryNull(int position) - { - return false; - } - - @Override - public Block getRegion(int positionOffset, int length) - { - throw new UnsupportedOperationException("getRegion is not supported by SegmentedSliceBlockBuilder"); - } - - @Override - public Block copyRegion(int position, int length) - { - throw new UnsupportedOperationException("copyRegion is not supported by SegmentedSliceBlockBuilder"); - } - - @Override - public Block build() - { - // No implementation of Segmented Slice based block exists. There is also no use for this yet. - throw new UnsupportedOperationException("build is not supported by SegmentedSliceBlockBuilder"); - } - - @Override - public BlockBuilder newBlockBuilderLike(BlockBuilderStatus blockBuilderStatus) - { - return newBlockBuilderLike(blockBuilderStatus, getPositionCount()); - } - - @Override - public BlockBuilder newBlockBuilderLike(BlockBuilderStatus blockBuilderStatus, int expectedEntries) - { - if (blockBuilderStatus != null) { - throw new UnsupportedOperationException("blockBuilderStatus is not supported by SegmentedSliceBlockBuilder"); - } - return new SegmentedSliceBlockBuilder(expectedEntries, openSliceOutput.getUnderlyingSlice().length()); - } - - private int getOffset(int position) - { - int offset = Segments.offset(position); - int segment = Segments.segment(position); - return offsets[segment][offset]; - } - - @Override - public String toString() - { - return format("SegmentedSliceBlockBuilder(%d){positionCount=%d,size=%d}", hashCode(), getPositionCount(), openSliceOutput.size()); - } - - @Override - public boolean isNullUnchecked(int internalPosition) - { - return false; - } - - @Override - public int getOffsetBase() - { - return 0; - } - - public boolean equals(int position, Block block, int blockPosition, int blockLength) - { - int segment = Segments.segment(position); - int segmentOffset = Segments.offset(position); - - int offset = offsets[segment][segmentOffset]; - int length = offsets[segment][segmentOffset + 1] - offset; - return blockLength == length && block.bytesEqual(blockPosition, 0, getSegmentRawSlice(segment), offset, length); - } - - public int compareTo(int left, int right) - { - int leftSegment = Segments.segment(left); - int leftSegmentOffset = Segments.offset(left); - - int rightSegment = Segments.segment(right); - int rightSegmentOffset = Segments.offset(right); - - Slice leftRawSlice = getSegmentRawSlice(leftSegment); - int leftOffset = offsets[leftSegment][leftSegmentOffset]; - int leftLen = offsets[leftSegment][leftSegmentOffset + 1] - leftOffset; - - Slice rightRawSlice = getSegmentRawSlice(rightSegment); - int rightOffset = offsets[rightSegment][rightSegmentOffset]; - int rightLen = offsets[rightSegment][rightSegmentOffset + 1] - rightOffset; - - return leftRawSlice.compareTo(leftOffset, leftLen, rightRawSlice, rightOffset, rightLen); - } - - public long hash(int position) - { - int segment = Segments.segment(position); - int segmentOffset = Segments.offset(position); - - int offset = offsets[segment][segmentOffset]; - int length = offsets[segment][segmentOffset + 1] - offset; - // There are several methods which computes hash, Block.hash, BlockBuilder.hash and - // Slice.hash. There is an expectations that all these methods return the same - // hash value. For insertion, block.hash is called, but rehash relies on BlockBuilder.hash - // So changing the hashing algorithm is hard. If a block implements different hashing - // algorithm, it is going to produce incorrect results after rehashing. - return XxHash64.hash(getSegmentRawSlice(segment), offset, length); - } - - @VisibleForTesting - int getOpenSegmentIndex() - { - return openSegmentIndex; - } - - // This class is copied from com.facebook.presto.orc.array.BigArrays and the - // Sizes and Initial Segments are tuned for the SliceDictionaryBuilder use case. - static class Segments - { - public static final int INITIAL_SEGMENTS = 64; - - public static final int SEGMENT_SHIFT = 14; - - /** - * Size of a single segment of a BigArray - */ - public static final int SEGMENT_SIZE = 1 << SEGMENT_SHIFT; - - /** - * The mask used to compute the offset associated to an index. - */ - public static final int SEGMENT_MASK = SEGMENT_SIZE - 1; - - /** - * Computes the segment associated with a given index. - * - * @param index an index into a big array. - * @return the associated segment. - */ - public static int segment(int index) - { - return index >>> SEGMENT_SHIFT; - } - - /** - * Computes the offset associated with a given index. - * - * @param index an index into a big array. - * @return the associated offset (in the associated {@linkplain #segment(int) segment}). - */ - public static int offset(int index) - { - return index & SEGMENT_MASK; - } - - public static int getPositions(int segment, int offset) - { - return (segment << SEGMENT_SHIFT) + offset; - } - } -} diff --git a/presto-orc/src/main/java/com/facebook/presto/orc/writer/SliceDictionaryBuilder.java b/presto-orc/src/main/java/com/facebook/presto/orc/writer/SliceDictionaryBuilder.java index c37f30dcdc85..aa4358c7b2dc 100644 --- a/presto-orc/src/main/java/com/facebook/presto/orc/writer/SliceDictionaryBuilder.java +++ b/presto-orc/src/main/java/com/facebook/presto/orc/writer/SliceDictionaryBuilder.java @@ -14,6 +14,7 @@ package com.facebook.presto.orc.writer; import com.facebook.presto.common.block.Block; +import com.facebook.presto.common.block.VariableWidthBlockBuilder; import com.facebook.presto.orc.array.IntBigArray; import io.airlift.slice.Slice; import org.openjdk.jol.info.ClassLayout; @@ -33,8 +34,8 @@ public class SliceDictionaryBuilder private static final int EMPTY_SLOT = -1; private static final int EXPECTED_BYTES_PER_ENTRY = 32; - private final IntBigArray slicePositionByHash = new IntBigArray(); - private final SegmentedSliceBlockBuilder segmentedSliceBuilder; + private final IntBigArray blockPositionByHash = new IntBigArray(); + private VariableWidthBlockBuilder elementBlock; private int maxFill; private int hashMask; @@ -46,7 +47,8 @@ public SliceDictionaryBuilder(int expectedSize) // todo we can do better int expectedEntries = min(expectedSize, DEFAULT_MAX_PAGE_SIZE_IN_BYTES / EXPECTED_BYTES_PER_ENTRY); // it is guaranteed expectedEntries * EXPECTED_BYTES_PER_ENTRY will not overflow - this.segmentedSliceBuilder = new SegmentedSliceBlockBuilder( + this.elementBlock = new VariableWidthBlockBuilder( + null, expectedEntries, expectedEntries * EXPECTED_BYTES_PER_ENTRY); @@ -54,68 +56,75 @@ public SliceDictionaryBuilder(int expectedSize) this.maxFill = calculateMaxFill(hashSize); this.hashMask = hashSize - 1; - slicePositionByHash.ensureCapacity(hashSize); - slicePositionByHash.fill(EMPTY_SLOT); + blockPositionByHash.ensureCapacity(hashSize); + blockPositionByHash.fill(EMPTY_SLOT); } public long getSizeInBytes() { - return segmentedSliceBuilder.getSizeInBytes(); + return elementBlock.getSizeInBytes(); } public long getRetainedSizeInBytes() { - return INSTANCE_SIZE + segmentedSliceBuilder.getRetainedSizeInBytes() + slicePositionByHash.sizeOf(); + return INSTANCE_SIZE + elementBlock.getRetainedSizeInBytes() + blockPositionByHash.sizeOf(); } public int compareIndex(int left, int right) { - return segmentedSliceBuilder.compareTo(left, right); + return elementBlock.compareTo( + left, + 0, + elementBlock.getSliceLength(left), + elementBlock, + right, + 0, + elementBlock.getSliceLength(right)); } public int getSliceLength(int position) { - return segmentedSliceBuilder.getSliceLength(position); + return elementBlock.getSliceLength(position); } public Slice getSlice(int position, int length) { - return segmentedSliceBuilder.getSlice(position, 0, length); + return elementBlock.getSlice(position, 0, length); } public Slice getRawSlice(int position) { - return segmentedSliceBuilder.getRawSlice(position); + return elementBlock.getRawSlice(position); } public int getRawSliceOffset(int position) { - return segmentedSliceBuilder.getPositionOffset(position); + return elementBlock.getPositionOffset(position); } public void clear() { - slicePositionByHash.fill(EMPTY_SLOT); - segmentedSliceBuilder.reset(); + blockPositionByHash.fill(EMPTY_SLOT); + elementBlock = (VariableWidthBlockBuilder) elementBlock.newBlockBuilderLike(null); } public int putIfAbsent(Block block, int position) { requireNonNull(block, "block must not be null"); - int slicePosition; + int blockPosition; long hashPosition = getHashPositionOfElement(block, position); - if (slicePositionByHash.get(hashPosition) != EMPTY_SLOT) { - slicePosition = slicePositionByHash.get(hashPosition); + if (blockPositionByHash.get(hashPosition) != EMPTY_SLOT) { + blockPosition = blockPositionByHash.get(hashPosition); } else { - slicePosition = addNewElement(hashPosition, block, position); + blockPosition = addNewElement(hashPosition, block, position); } - return slicePosition; + return blockPosition; } public int getEntryCount() { - return segmentedSliceBuilder.getPositionCount(); + return elementBlock.getPositionCount(); } /** @@ -127,12 +136,12 @@ private long getHashPositionOfElement(Block block, int position) int length = block.getSliceLength(position); long hashPosition = getMaskedHash(block.hash(position, 0, length)); while (true) { - int slicePosition = this.slicePositionByHash.get(hashPosition); - if (slicePosition == EMPTY_SLOT) { + int blockPosition = blockPositionByHash.get(hashPosition); + if (blockPosition == EMPTY_SLOT) { // Doesn't have this element return hashPosition; } - else if (segmentedSliceBuilder.equals(slicePosition, block, position, length)) { + else if (elementBlock.getSliceLength(blockPosition) == length && block.equals(position, 0, elementBlock, blockPosition, 0, length)) { // Already has this element return hashPosition; } @@ -141,10 +150,12 @@ else if (segmentedSliceBuilder.equals(slicePosition, block, position, length)) { } } - private long getRehashPositionOfElement(int position) + private long getRehashPositionOfElement(Block block, int position) { - long hashPosition = getMaskedHash(segmentedSliceBuilder.hash(position)); - while (slicePositionByHash.get(hashPosition) != EMPTY_SLOT) { + checkArgument(!block.isNull(position), "position is null"); + int length = block.getSliceLength(position); + long hashPosition = getMaskedHash(block.hash(position, 0, length)); + while (blockPositionByHash.get(hashPosition) != EMPTY_SLOT) { // in Re-hash there is no collision and continue to search until an empty spot is found. hashPosition = getMaskedHash(hashPosition + 1); } @@ -154,14 +165,14 @@ private long getRehashPositionOfElement(int position) private int addNewElement(long hashPosition, Block block, int position) { checkArgument(!block.isNull(position), "position is null"); - block.writeBytesTo(position, 0, block.getSliceLength(position), segmentedSliceBuilder); - segmentedSliceBuilder.closeEntry(); + block.writeBytesTo(position, 0, block.getSliceLength(position), elementBlock); + elementBlock.closeEntry(); - int newElementPositionInBlock = segmentedSliceBuilder.getPositionCount() - 1; - slicePositionByHash.set(hashPosition, newElementPositionInBlock); + int newElementPositionInBlock = elementBlock.getPositionCount() - 1; + blockPositionByHash.set(hashPosition, newElementPositionInBlock); // increase capacity, if necessary - if (segmentedSliceBuilder.getPositionCount() >= maxFill) { + if (elementBlock.getPositionCount() >= maxFill) { rehash(maxFill * 2); } @@ -173,11 +184,11 @@ private void rehash(int size) int newHashSize = arraySize(size + 1, FILL_RATIO); hashMask = newHashSize - 1; maxFill = calculateMaxFill(newHashSize); - slicePositionByHash.ensureCapacity(newHashSize); - slicePositionByHash.fill(EMPTY_SLOT); + blockPositionByHash.ensureCapacity(newHashSize); + blockPositionByHash.fill(EMPTY_SLOT); - for (int slicePosition = 0; slicePosition < segmentedSliceBuilder.getPositionCount(); slicePosition++) { - slicePositionByHash.set(getRehashPositionOfElement(slicePosition), slicePosition); + for (int blockPosition = 0; blockPosition < elementBlock.getPositionCount(); blockPosition++) { + blockPositionByHash.set(getRehashPositionOfElement(elementBlock, blockPosition), blockPosition); } } diff --git a/presto-orc/src/test/java/com/facebook/presto/orc/BenchmarkDictionaryWriter.java b/presto-orc/src/test/java/com/facebook/presto/orc/BenchmarkDictionaryWriter.java index 894da37c6105..70490bc1580a 100644 --- a/presto-orc/src/test/java/com/facebook/presto/orc/BenchmarkDictionaryWriter.java +++ b/presto-orc/src/test/java/com/facebook/presto/orc/BenchmarkDictionaryWriter.java @@ -47,6 +47,7 @@ import java.util.Optional; import java.util.OptionalInt; import java.util.Random; +import java.util.UUID; import static com.facebook.presto.common.type.BigintType.BIGINT; import static com.facebook.presto.common.type.IntegerType.INTEGER; @@ -156,14 +157,11 @@ private DictionaryColumnWriter getDictionaryColumnWriter(BenchmarkData data) @State(Scope.Thread) public static class BenchmarkData { - private static final int NUM_BLOCKS = 1_000; - private static final int ROWS_PER_BLOCK = 10_000; + private static final int NUM_BLOCKS = 10_000; + private static final int ROWS_PER_BLOCK = 1_000; private static final String INTEGER_TYPE = "integer"; private static final String BIGINT_TYPE = "bigint"; private static final String VARCHAR_TYPE = "varchar"; - private static final String POSSIBLE_CHARS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 +-_*#"; - private static final int MAX_STRING_LENGTH = 30; - private static final int MIN_STRING_LENGTH = 10; private final Random random = new Random(0); private final List blocks; @@ -174,7 +172,6 @@ public static class BenchmarkData VARCHAR_TYPE }) private String typeSignature = INTEGER_TYPE; - @Param({ "1", "5", @@ -230,22 +227,12 @@ private int getUniqueValues(int numRows) return max(uniqueValues, 1); } - private String getNextString(char[] chars, int length) - { - for (int i = 0; i < length; i++) { - chars[i] = POSSIBLE_CHARS.charAt(random.nextInt(POSSIBLE_CHARS.length())); - } - return String.valueOf(chars); - } - private List generateStrings(int numRows) { int valuesToGenerate = getUniqueValues(numRows); List strings = new ArrayList<>(numRows); - char[] chars = new char[MAX_STRING_LENGTH]; for (int i = 0; i < valuesToGenerate; i++) { - int length = MIN_STRING_LENGTH + random.nextInt(MAX_STRING_LENGTH - MIN_STRING_LENGTH); - strings.add(getNextString(chars, length)); + strings.add(UUID.randomUUID().toString()); } for (int i = valuesToGenerate; i < numRows; i++) { diff --git a/presto-orc/src/test/java/com/facebook/presto/orc/writer/TestSegmentedSliceBlockBuilder.java b/presto-orc/src/test/java/com/facebook/presto/orc/writer/TestSegmentedSliceBlockBuilder.java deleted file mode 100644 index 27afa9835e6a..000000000000 --- a/presto-orc/src/test/java/com/facebook/presto/orc/writer/TestSegmentedSliceBlockBuilder.java +++ /dev/null @@ -1,122 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.facebook.presto.orc.writer; - -import com.facebook.presto.common.block.Block; -import com.facebook.presto.common.block.VariableWidthBlockBuilder; -import io.airlift.slice.Slice; -import org.testng.annotations.Test; - -import static com.facebook.airlift.testing.Assertions.assertGreaterThan; -import static com.facebook.airlift.testing.Assertions.assertGreaterThanOrEqual; -import static com.facebook.airlift.testing.Assertions.assertLessThan; -import static io.airlift.slice.Slices.utf8Slice; -import static org.testng.Assert.assertEquals; -import static org.testng.Assert.assertTrue; - -public class TestSegmentedSliceBlockBuilder -{ - private static final Slice SLICE = utf8Slice("abcdefghijklmnopqrstuvwxyz"); - - @Test - public void testBasicOperations() - { - SegmentedSliceBlockBuilder blockBuilder = new SegmentedSliceBlockBuilder(10, 10); - long retainedSize = addElementsToBlockBuilder(blockBuilder); - blockBuilder.reset(); - assertEquals(blockBuilder.getSizeInBytes(), 0); - assertLessThan(blockBuilder.getRetainedSizeInBytes(), retainedSize); - addElementsToBlockBuilder(blockBuilder); - - int index = 0; - for (int j = 0; j < 100_000; j++) { - for (int i = 0; i < SLICE.length(); i++) { - Slice rawSlice = blockBuilder.getRawSlice(index); - int offset = blockBuilder.getPositionOffset(index); - int length = blockBuilder.getSliceLength(index); - index++; - assertEquals(length, 1); - assertTrue(SLICE.equals(i, 1, rawSlice, offset, length)); - } - } - } - - @Test - public void testEqualsAndHashCode() - { - SegmentedSliceBlockBuilder blockBuilder = new SegmentedSliceBlockBuilder(10, 10); - VariableWidthBlockBuilder variableBlockBuilder = new VariableWidthBlockBuilder(null, 10, 10); - for (int i = 0; i < SLICE.length(); i++) { - blockBuilder.writeBytes(SLICE, i, 1); - blockBuilder.closeEntry(); - - variableBlockBuilder.writeBytes(SLICE, i, 1); - variableBlockBuilder.closeEntry(); - } - - Block block = variableBlockBuilder.build(); - for (int i = 0; i < SLICE.length(); i++) { - assertTrue(blockBuilder.equals(i, block, i, 1)); - assertTrue(blockBuilder.equals(i, 0, block, i, 0, 1)); - assertEquals(blockBuilder.hash(i), variableBlockBuilder.hash(i, 0, 1)); - } - } - - @Test - public void testCompareTo() - { - SegmentedSliceBlockBuilder blockBuilder = new SegmentedSliceBlockBuilder(10, 10); - for (int i = 0; i < SLICE.length(); i++) { - blockBuilder.writeBytes(SLICE, i, 1); - blockBuilder.closeEntry(); - } - - for (int i = 0; i < SLICE.length() - 1; i++) { - assertLessThan(blockBuilder.compareTo(i, i + 1), 0); - assertEquals(blockBuilder.compareTo(i, i), 0); - assertGreaterThan(blockBuilder.compareTo(i + 1, i), 0); - } - } - - private long addElementsToBlockBuilder(SegmentedSliceBlockBuilder blockBuilder) - { - int size = 1; - long retainedSize = blockBuilder.getRetainedSizeInBytes(); - int lastOpenSegmentIndex = blockBuilder.getOpenSegmentIndex(); - for (int j = 0; j < 100_000; j++) { - for (int i = 0; i < SLICE.length(); i++) { - blockBuilder.writeBytes(SLICE, i, 1); - blockBuilder.closeEntry(); - assertEquals(blockBuilder.getPositionCount(), size++); - Slice rawSlice = blockBuilder.getRawSlice(blockBuilder.getPositionCount() - 1); - int offset = blockBuilder.getPositionOffset(blockBuilder.getPositionCount() - 1); - int length = blockBuilder.getSliceLength(blockBuilder.getPositionCount() - 1); - assertEquals(length, 1); - assertTrue(SLICE.equals(i, 1, rawSlice, offset, length)); - } - // Each element has 1 character and 1 offset so (1 + Integer.BYTES) - assertEquals(blockBuilder.getSizeInBytes(), blockBuilder.getPositionCount() * (1L + Integer.BYTES)); - - if (blockBuilder.getOpenSegmentIndex() > lastOpenSegmentIndex) { - // When new segment is created, retained should should increase due to - // copied slices and new offsets array allocation. - assertGreaterThan(blockBuilder.getRetainedSizeInBytes(), retainedSize); - } - assertGreaterThanOrEqual(blockBuilder.getRetainedSizeInBytes(), retainedSize); - retainedSize = blockBuilder.getRetainedSizeInBytes(); - lastOpenSegmentIndex = blockBuilder.getOpenSegmentIndex(); - } - return retainedSize; - } -}