diff --git a/presto-orc/src/main/java/com/facebook/presto/orc/writer/SegmentedSliceBlockBuilder.java b/presto-orc/src/main/java/com/facebook/presto/orc/writer/SegmentedSliceBlockBuilder.java
deleted file mode 100644
index d1b15d7fbc60..000000000000
--- a/presto-orc/src/main/java/com/facebook/presto/orc/writer/SegmentedSliceBlockBuilder.java
+++ /dev/null
@@ -1,415 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.facebook.presto.orc.writer;
-
-import com.facebook.presto.common.block.AbstractVariableWidthBlock;
-import com.facebook.presto.common.block.Block;
-import com.facebook.presto.common.block.BlockBuilder;
-import com.facebook.presto.common.block.BlockBuilderStatus;
-import com.google.common.annotations.VisibleForTesting;
-import io.airlift.slice.DynamicSliceOutput;
-import io.airlift.slice.Slice;
-import io.airlift.slice.SliceInput;
-import io.airlift.slice.XxHash64;
-import org.openjdk.jol.info.ClassLayout;
-
-import java.util.Arrays;
-import java.util.function.BiConsumer;
-
-import static io.airlift.slice.SizeOf.sizeOf;
-import static java.lang.String.format;
-
-/**
- * Custom Block Builder implementation for use with SliceDictionaryBuilder.
- * Instead of using one large contiguous Slice for storing the unique Strings
- * in String dictionary, this class uses Segmented Slices. The main advantage
- * of this class over VariableWidthBlockBuilder is memory. Non contiguous
- * memory is more likely to be available and hence reduce the chance of OOMs.
- *
- * Why implement a block builder ?
- * SliceDictionaryBuilder takes in a Block and Position to write.
- * 1. It can create a slice for the position and write it. This does not
- * require a block builder. But temporary slice, produces lot of
- * short lived garbage.
- * 2. A block and position can be copied to BlockBuilder using the method
- * Block.writeBytesTo . But this requires implementing the BlockBuilder interface.
- * Most of the methods are going to be unused and left as Unsupported.
- *
- * What's the difference between this class and VariableWidthBlockBuilder?
- * This class is different from VariableWidthBlockBuilder in the following ways
- * 1. It does not support nulls. (So null byte array and management is avoided).
- * 2. Instead of using one contiguous chunk for storing all the entries,
- * they are segmented.
- *
- * How is it implemented ?
- * The Strings from 0 to SEGMENT_SIZE-1 are stored in the first segment.
- * The string from SEGMENT_SIZE to 2 * SEGMENT_SIZE -1 goes to the second.
- * Each segment has Slice(data is concatenated and stored in one slice)
- * and offsets to capture the start offset and length. New slices are appended
- * to the open segment. Once the segment is full the segment is
- * finalized and appended to the closed segments. A new open segment is
- * created for further appends.
- */
-public class SegmentedSliceBlockBuilder
- extends AbstractVariableWidthBlock
- implements BlockBuilder
-{
- private static final int INSTANCE_SIZE = ClassLayout.parseClass(SegmentedSliceBlockBuilder.class).instanceSize();
-
- private final DynamicSliceOutput openSliceOutput;
-
- private int openSegmentIndex;
- private int openSegmentOffset;
- private int[][] offsets;
- private Slice[] closedSlices;
- private long closedSlicesRetainedSize;
- private long closedSlicesSizeInBytes;
-
- public SegmentedSliceBlockBuilder(int expectedEntries, int expectedBytes)
- {
- int initialSize = Math.max(Segments.INITIAL_SEGMENTS, Segments.segment(expectedEntries) + 1);
- offsets = new int[initialSize][];
- closedSlices = new Slice[initialSize];
- offsets[0] = new int[Segments.SEGMENT_SIZE + 1];
- openSliceOutput = new DynamicSliceOutput(expectedBytes);
- }
-
- public void reset()
- {
- openSliceOutput.reset();
-
- Arrays.fill(closedSlices, null);
- closedSlicesRetainedSize = 0;
- closedSlicesSizeInBytes = 0;
-
- // Fill the first offset array with 0, and free up the rest of the offsets array.
- Arrays.fill(offsets[0], 0);
- Arrays.fill(offsets, 1, offsets.length, null);
- openSegmentIndex = 0;
- openSegmentOffset = 0;
- }
-
- @Override
- public int getPositionOffset(int position)
- {
- return getOffset(position);
- }
-
- @Override
- public int getSliceLength(int position)
- {
- int offset = Segments.offset(position);
- int segment = Segments.segment(position);
- return offsets[segment][offset + 1] - offsets[segment][offset];
- }
-
- private Slice getSegmentRawSlice(int segment)
- {
- return segment == openSegmentIndex ? openSliceOutput.getUnderlyingSlice() : closedSlices[segment];
- }
-
- @Override
- public Slice getRawSlice(int position)
- {
- return getSegmentRawSlice(Segments.segment(position));
- }
-
- @Override
- public int getPositionCount()
- {
- return Segments.getPositions(openSegmentIndex, openSegmentOffset);
- }
-
- @Override
- public long getSizeInBytes()
- {
- long offsetsSizeInBytes = Integer.BYTES * (long) getPositionCount();
- return openSliceOutput.size() + offsetsSizeInBytes + closedSlicesSizeInBytes;
- }
-
- @Override
- public long getRegionSizeInBytes(int position, int length)
- {
- throw new UnsupportedOperationException("getRegionSizeInBytes is not supported by SegmentedSliceBlockBuilder");
- }
-
- @Override
- public long getPositionsSizeInBytes(boolean[] positions)
- {
- throw new UnsupportedOperationException("getPositionsSizeInBytes is not supported by SegmentedSliceBlockBuilder");
- }
-
- @Override
- public long getRetainedSizeInBytes()
- {
- long offsetsSize = sizeOf(offsets) + (openSegmentIndex + 1) * sizeOf(offsets[0]);
- long closedSlicesSize = sizeOf(closedSlices) + closedSlicesRetainedSize;
- return INSTANCE_SIZE + openSliceOutput.getRetainedSize() + offsetsSize + closedSlicesSize;
- }
-
- @Override
- public void retainedBytesForEachPart(BiConsumer consumer)
- {
- throw new UnsupportedOperationException("retainedBytesForEachPart is not supported by SegmentedSliceBlockBuilder");
- }
-
- @Override
- public Block copyPositions(int[] positions, int offset, int length)
- {
- throw new UnsupportedOperationException("copyPositions is not supported by SegmentedSliceBlockBuilder");
- }
-
- @Override
- public BlockBuilder writeByte(int value)
- {
- throw new UnsupportedOperationException("writeByte is not supported by SegmentedSliceBlockBuilder");
- }
-
- @Override
- public BlockBuilder writeShort(int value)
- {
- throw new UnsupportedOperationException("writeShort is not supported by SegmentedSliceBlockBuilder");
- }
-
- @Override
- public BlockBuilder writeInt(int value)
- {
- throw new UnsupportedOperationException("writeInt is not supported by SegmentedSliceBlockBuilder");
- }
-
- @Override
- public BlockBuilder writeLong(long value)
- {
- throw new UnsupportedOperationException("writeLong is not supported by SegmentedSliceBlockBuilder");
- }
-
- @Override
- public BlockBuilder writeBytes(Slice source, int sourceIndex, int length)
- {
- if (openSegmentOffset == 0) {
- // Expand Segments if necessary.
- if (openSegmentIndex >= offsets.length) {
- int newCapacity = Math.max(openSegmentIndex + 1, (int) (offsets.length * 1.5));
- closedSlices = Arrays.copyOf(closedSlices, newCapacity);
- offsets = Arrays.copyOf(offsets, newCapacity);
- }
-
- if (offsets[openSegmentIndex] == null) {
- offsets[openSegmentIndex] = new int[Segments.SEGMENT_SIZE + 1];
- }
- }
- openSliceOutput.writeBytes(source, sourceIndex, length);
- return this;
- }
-
- @Override
- public BlockBuilder closeEntry()
- {
- openSegmentOffset++;
- offsets[openSegmentIndex][openSegmentOffset] = openSliceOutput.size();
- if (openSegmentOffset == Segments.SEGMENT_SIZE) {
- // Copy the content from the openSlice and append it to the closedSlices.
- // Note: openSlice will be reused for next segment, so a copy is required.
- Slice slice = openSliceOutput.copySlice();
- closedSlices[openSegmentIndex] = slice;
- closedSlicesSizeInBytes += slice.length();
- closedSlicesRetainedSize += slice.getRetainedSize();
-
- // Prepare the open slice for next write.
- openSliceOutput.reset();
- openSegmentOffset = 0;
- openSegmentIndex++;
- }
- return this;
- }
-
- @Override
- public BlockBuilder appendNull()
- {
- throw new UnsupportedOperationException("appendNull is not supported by SegmentedSliceBlockBuilder");
- }
-
- @Override
- public BlockBuilder readPositionFrom(SliceInput input)
- {
- throw new UnsupportedOperationException("readPositionFrom is not supported by SegmentedSliceBlockBuilder");
- }
-
- @Override
- public boolean mayHaveNull()
- {
- return false;
- }
-
- @Override
- protected boolean isEntryNull(int position)
- {
- return false;
- }
-
- @Override
- public Block getRegion(int positionOffset, int length)
- {
- throw new UnsupportedOperationException("getRegion is not supported by SegmentedSliceBlockBuilder");
- }
-
- @Override
- public Block copyRegion(int position, int length)
- {
- throw new UnsupportedOperationException("copyRegion is not supported by SegmentedSliceBlockBuilder");
- }
-
- @Override
- public Block build()
- {
- // No implementation of Segmented Slice based block exists. There is also no use for this yet.
- throw new UnsupportedOperationException("build is not supported by SegmentedSliceBlockBuilder");
- }
-
- @Override
- public BlockBuilder newBlockBuilderLike(BlockBuilderStatus blockBuilderStatus)
- {
- return newBlockBuilderLike(blockBuilderStatus, getPositionCount());
- }
-
- @Override
- public BlockBuilder newBlockBuilderLike(BlockBuilderStatus blockBuilderStatus, int expectedEntries)
- {
- if (blockBuilderStatus != null) {
- throw new UnsupportedOperationException("blockBuilderStatus is not supported by SegmentedSliceBlockBuilder");
- }
- return new SegmentedSliceBlockBuilder(expectedEntries, openSliceOutput.getUnderlyingSlice().length());
- }
-
- private int getOffset(int position)
- {
- int offset = Segments.offset(position);
- int segment = Segments.segment(position);
- return offsets[segment][offset];
- }
-
- @Override
- public String toString()
- {
- return format("SegmentedSliceBlockBuilder(%d){positionCount=%d,size=%d}", hashCode(), getPositionCount(), openSliceOutput.size());
- }
-
- @Override
- public boolean isNullUnchecked(int internalPosition)
- {
- return false;
- }
-
- @Override
- public int getOffsetBase()
- {
- return 0;
- }
-
- public boolean equals(int position, Block block, int blockPosition, int blockLength)
- {
- int segment = Segments.segment(position);
- int segmentOffset = Segments.offset(position);
-
- int offset = offsets[segment][segmentOffset];
- int length = offsets[segment][segmentOffset + 1] - offset;
- return blockLength == length && block.bytesEqual(blockPosition, 0, getSegmentRawSlice(segment), offset, length);
- }
-
- public int compareTo(int left, int right)
- {
- int leftSegment = Segments.segment(left);
- int leftSegmentOffset = Segments.offset(left);
-
- int rightSegment = Segments.segment(right);
- int rightSegmentOffset = Segments.offset(right);
-
- Slice leftRawSlice = getSegmentRawSlice(leftSegment);
- int leftOffset = offsets[leftSegment][leftSegmentOffset];
- int leftLen = offsets[leftSegment][leftSegmentOffset + 1] - leftOffset;
-
- Slice rightRawSlice = getSegmentRawSlice(rightSegment);
- int rightOffset = offsets[rightSegment][rightSegmentOffset];
- int rightLen = offsets[rightSegment][rightSegmentOffset + 1] - rightOffset;
-
- return leftRawSlice.compareTo(leftOffset, leftLen, rightRawSlice, rightOffset, rightLen);
- }
-
- public long hash(int position)
- {
- int segment = Segments.segment(position);
- int segmentOffset = Segments.offset(position);
-
- int offset = offsets[segment][segmentOffset];
- int length = offsets[segment][segmentOffset + 1] - offset;
- // There are several methods which computes hash, Block.hash, BlockBuilder.hash and
- // Slice.hash. There is an expectations that all these methods return the same
- // hash value. For insertion, block.hash is called, but rehash relies on BlockBuilder.hash
- // So changing the hashing algorithm is hard. If a block implements different hashing
- // algorithm, it is going to produce incorrect results after rehashing.
- return XxHash64.hash(getSegmentRawSlice(segment), offset, length);
- }
-
- @VisibleForTesting
- int getOpenSegmentIndex()
- {
- return openSegmentIndex;
- }
-
- // This class is copied from com.facebook.presto.orc.array.BigArrays and the
- // Sizes and Initial Segments are tuned for the SliceDictionaryBuilder use case.
- static class Segments
- {
- public static final int INITIAL_SEGMENTS = 64;
-
- public static final int SEGMENT_SHIFT = 14;
-
- /**
- * Size of a single segment of a BigArray
- */
- public static final int SEGMENT_SIZE = 1 << SEGMENT_SHIFT;
-
- /**
- * The mask used to compute the offset associated to an index.
- */
- public static final int SEGMENT_MASK = SEGMENT_SIZE - 1;
-
- /**
- * Computes the segment associated with a given index.
- *
- * @param index an index into a big array.
- * @return the associated segment.
- */
- public static int segment(int index)
- {
- return index >>> SEGMENT_SHIFT;
- }
-
- /**
- * Computes the offset associated with a given index.
- *
- * @param index an index into a big array.
- * @return the associated offset (in the associated {@linkplain #segment(int) segment}).
- */
- public static int offset(int index)
- {
- return index & SEGMENT_MASK;
- }
-
- public static int getPositions(int segment, int offset)
- {
- return (segment << SEGMENT_SHIFT) + offset;
- }
- }
-}
diff --git a/presto-orc/src/main/java/com/facebook/presto/orc/writer/SliceDictionaryBuilder.java b/presto-orc/src/main/java/com/facebook/presto/orc/writer/SliceDictionaryBuilder.java
index c37f30dcdc85..aa4358c7b2dc 100644
--- a/presto-orc/src/main/java/com/facebook/presto/orc/writer/SliceDictionaryBuilder.java
+++ b/presto-orc/src/main/java/com/facebook/presto/orc/writer/SliceDictionaryBuilder.java
@@ -14,6 +14,7 @@
package com.facebook.presto.orc.writer;
import com.facebook.presto.common.block.Block;
+import com.facebook.presto.common.block.VariableWidthBlockBuilder;
import com.facebook.presto.orc.array.IntBigArray;
import io.airlift.slice.Slice;
import org.openjdk.jol.info.ClassLayout;
@@ -33,8 +34,8 @@ public class SliceDictionaryBuilder
private static final int EMPTY_SLOT = -1;
private static final int EXPECTED_BYTES_PER_ENTRY = 32;
- private final IntBigArray slicePositionByHash = new IntBigArray();
- private final SegmentedSliceBlockBuilder segmentedSliceBuilder;
+ private final IntBigArray blockPositionByHash = new IntBigArray();
+ private VariableWidthBlockBuilder elementBlock;
private int maxFill;
private int hashMask;
@@ -46,7 +47,8 @@ public SliceDictionaryBuilder(int expectedSize)
// todo we can do better
int expectedEntries = min(expectedSize, DEFAULT_MAX_PAGE_SIZE_IN_BYTES / EXPECTED_BYTES_PER_ENTRY);
// it is guaranteed expectedEntries * EXPECTED_BYTES_PER_ENTRY will not overflow
- this.segmentedSliceBuilder = new SegmentedSliceBlockBuilder(
+ this.elementBlock = new VariableWidthBlockBuilder(
+ null,
expectedEntries,
expectedEntries * EXPECTED_BYTES_PER_ENTRY);
@@ -54,68 +56,75 @@ public SliceDictionaryBuilder(int expectedSize)
this.maxFill = calculateMaxFill(hashSize);
this.hashMask = hashSize - 1;
- slicePositionByHash.ensureCapacity(hashSize);
- slicePositionByHash.fill(EMPTY_SLOT);
+ blockPositionByHash.ensureCapacity(hashSize);
+ blockPositionByHash.fill(EMPTY_SLOT);
}
public long getSizeInBytes()
{
- return segmentedSliceBuilder.getSizeInBytes();
+ return elementBlock.getSizeInBytes();
}
public long getRetainedSizeInBytes()
{
- return INSTANCE_SIZE + segmentedSliceBuilder.getRetainedSizeInBytes() + slicePositionByHash.sizeOf();
+ return INSTANCE_SIZE + elementBlock.getRetainedSizeInBytes() + blockPositionByHash.sizeOf();
}
public int compareIndex(int left, int right)
{
- return segmentedSliceBuilder.compareTo(left, right);
+ return elementBlock.compareTo(
+ left,
+ 0,
+ elementBlock.getSliceLength(left),
+ elementBlock,
+ right,
+ 0,
+ elementBlock.getSliceLength(right));
}
public int getSliceLength(int position)
{
- return segmentedSliceBuilder.getSliceLength(position);
+ return elementBlock.getSliceLength(position);
}
public Slice getSlice(int position, int length)
{
- return segmentedSliceBuilder.getSlice(position, 0, length);
+ return elementBlock.getSlice(position, 0, length);
}
public Slice getRawSlice(int position)
{
- return segmentedSliceBuilder.getRawSlice(position);
+ return elementBlock.getRawSlice(position);
}
public int getRawSliceOffset(int position)
{
- return segmentedSliceBuilder.getPositionOffset(position);
+ return elementBlock.getPositionOffset(position);
}
public void clear()
{
- slicePositionByHash.fill(EMPTY_SLOT);
- segmentedSliceBuilder.reset();
+ blockPositionByHash.fill(EMPTY_SLOT);
+ elementBlock = (VariableWidthBlockBuilder) elementBlock.newBlockBuilderLike(null);
}
public int putIfAbsent(Block block, int position)
{
requireNonNull(block, "block must not be null");
- int slicePosition;
+ int blockPosition;
long hashPosition = getHashPositionOfElement(block, position);
- if (slicePositionByHash.get(hashPosition) != EMPTY_SLOT) {
- slicePosition = slicePositionByHash.get(hashPosition);
+ if (blockPositionByHash.get(hashPosition) != EMPTY_SLOT) {
+ blockPosition = blockPositionByHash.get(hashPosition);
}
else {
- slicePosition = addNewElement(hashPosition, block, position);
+ blockPosition = addNewElement(hashPosition, block, position);
}
- return slicePosition;
+ return blockPosition;
}
public int getEntryCount()
{
- return segmentedSliceBuilder.getPositionCount();
+ return elementBlock.getPositionCount();
}
/**
@@ -127,12 +136,12 @@ private long getHashPositionOfElement(Block block, int position)
int length = block.getSliceLength(position);
long hashPosition = getMaskedHash(block.hash(position, 0, length));
while (true) {
- int slicePosition = this.slicePositionByHash.get(hashPosition);
- if (slicePosition == EMPTY_SLOT) {
+ int blockPosition = blockPositionByHash.get(hashPosition);
+ if (blockPosition == EMPTY_SLOT) {
// Doesn't have this element
return hashPosition;
}
- else if (segmentedSliceBuilder.equals(slicePosition, block, position, length)) {
+ else if (elementBlock.getSliceLength(blockPosition) == length && block.equals(position, 0, elementBlock, blockPosition, 0, length)) {
// Already has this element
return hashPosition;
}
@@ -141,10 +150,12 @@ else if (segmentedSliceBuilder.equals(slicePosition, block, position, length)) {
}
}
- private long getRehashPositionOfElement(int position)
+ private long getRehashPositionOfElement(Block block, int position)
{
- long hashPosition = getMaskedHash(segmentedSliceBuilder.hash(position));
- while (slicePositionByHash.get(hashPosition) != EMPTY_SLOT) {
+ checkArgument(!block.isNull(position), "position is null");
+ int length = block.getSliceLength(position);
+ long hashPosition = getMaskedHash(block.hash(position, 0, length));
+ while (blockPositionByHash.get(hashPosition) != EMPTY_SLOT) {
// in Re-hash there is no collision and continue to search until an empty spot is found.
hashPosition = getMaskedHash(hashPosition + 1);
}
@@ -154,14 +165,14 @@ private long getRehashPositionOfElement(int position)
private int addNewElement(long hashPosition, Block block, int position)
{
checkArgument(!block.isNull(position), "position is null");
- block.writeBytesTo(position, 0, block.getSliceLength(position), segmentedSliceBuilder);
- segmentedSliceBuilder.closeEntry();
+ block.writeBytesTo(position, 0, block.getSliceLength(position), elementBlock);
+ elementBlock.closeEntry();
- int newElementPositionInBlock = segmentedSliceBuilder.getPositionCount() - 1;
- slicePositionByHash.set(hashPosition, newElementPositionInBlock);
+ int newElementPositionInBlock = elementBlock.getPositionCount() - 1;
+ blockPositionByHash.set(hashPosition, newElementPositionInBlock);
// increase capacity, if necessary
- if (segmentedSliceBuilder.getPositionCount() >= maxFill) {
+ if (elementBlock.getPositionCount() >= maxFill) {
rehash(maxFill * 2);
}
@@ -173,11 +184,11 @@ private void rehash(int size)
int newHashSize = arraySize(size + 1, FILL_RATIO);
hashMask = newHashSize - 1;
maxFill = calculateMaxFill(newHashSize);
- slicePositionByHash.ensureCapacity(newHashSize);
- slicePositionByHash.fill(EMPTY_SLOT);
+ blockPositionByHash.ensureCapacity(newHashSize);
+ blockPositionByHash.fill(EMPTY_SLOT);
- for (int slicePosition = 0; slicePosition < segmentedSliceBuilder.getPositionCount(); slicePosition++) {
- slicePositionByHash.set(getRehashPositionOfElement(slicePosition), slicePosition);
+ for (int blockPosition = 0; blockPosition < elementBlock.getPositionCount(); blockPosition++) {
+ blockPositionByHash.set(getRehashPositionOfElement(elementBlock, blockPosition), blockPosition);
}
}
diff --git a/presto-orc/src/test/java/com/facebook/presto/orc/BenchmarkDictionaryWriter.java b/presto-orc/src/test/java/com/facebook/presto/orc/BenchmarkDictionaryWriter.java
index 894da37c6105..70490bc1580a 100644
--- a/presto-orc/src/test/java/com/facebook/presto/orc/BenchmarkDictionaryWriter.java
+++ b/presto-orc/src/test/java/com/facebook/presto/orc/BenchmarkDictionaryWriter.java
@@ -47,6 +47,7 @@
import java.util.Optional;
import java.util.OptionalInt;
import java.util.Random;
+import java.util.UUID;
import static com.facebook.presto.common.type.BigintType.BIGINT;
import static com.facebook.presto.common.type.IntegerType.INTEGER;
@@ -156,14 +157,11 @@ private DictionaryColumnWriter getDictionaryColumnWriter(BenchmarkData data)
@State(Scope.Thread)
public static class BenchmarkData
{
- private static final int NUM_BLOCKS = 1_000;
- private static final int ROWS_PER_BLOCK = 10_000;
+ private static final int NUM_BLOCKS = 10_000;
+ private static final int ROWS_PER_BLOCK = 1_000;
private static final String INTEGER_TYPE = "integer";
private static final String BIGINT_TYPE = "bigint";
private static final String VARCHAR_TYPE = "varchar";
- private static final String POSSIBLE_CHARS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 +-_*#";
- private static final int MAX_STRING_LENGTH = 30;
- private static final int MIN_STRING_LENGTH = 10;
private final Random random = new Random(0);
private final List blocks;
@@ -174,7 +172,6 @@ public static class BenchmarkData
VARCHAR_TYPE
})
private String typeSignature = INTEGER_TYPE;
-
@Param({
"1",
"5",
@@ -230,22 +227,12 @@ private int getUniqueValues(int numRows)
return max(uniqueValues, 1);
}
- private String getNextString(char[] chars, int length)
- {
- for (int i = 0; i < length; i++) {
- chars[i] = POSSIBLE_CHARS.charAt(random.nextInt(POSSIBLE_CHARS.length()));
- }
- return String.valueOf(chars);
- }
-
private List generateStrings(int numRows)
{
int valuesToGenerate = getUniqueValues(numRows);
List strings = new ArrayList<>(numRows);
- char[] chars = new char[MAX_STRING_LENGTH];
for (int i = 0; i < valuesToGenerate; i++) {
- int length = MIN_STRING_LENGTH + random.nextInt(MAX_STRING_LENGTH - MIN_STRING_LENGTH);
- strings.add(getNextString(chars, length));
+ strings.add(UUID.randomUUID().toString());
}
for (int i = valuesToGenerate; i < numRows; i++) {
diff --git a/presto-orc/src/test/java/com/facebook/presto/orc/writer/TestSegmentedSliceBlockBuilder.java b/presto-orc/src/test/java/com/facebook/presto/orc/writer/TestSegmentedSliceBlockBuilder.java
deleted file mode 100644
index 27afa9835e6a..000000000000
--- a/presto-orc/src/test/java/com/facebook/presto/orc/writer/TestSegmentedSliceBlockBuilder.java
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.facebook.presto.orc.writer;
-
-import com.facebook.presto.common.block.Block;
-import com.facebook.presto.common.block.VariableWidthBlockBuilder;
-import io.airlift.slice.Slice;
-import org.testng.annotations.Test;
-
-import static com.facebook.airlift.testing.Assertions.assertGreaterThan;
-import static com.facebook.airlift.testing.Assertions.assertGreaterThanOrEqual;
-import static com.facebook.airlift.testing.Assertions.assertLessThan;
-import static io.airlift.slice.Slices.utf8Slice;
-import static org.testng.Assert.assertEquals;
-import static org.testng.Assert.assertTrue;
-
-public class TestSegmentedSliceBlockBuilder
-{
- private static final Slice SLICE = utf8Slice("abcdefghijklmnopqrstuvwxyz");
-
- @Test
- public void testBasicOperations()
- {
- SegmentedSliceBlockBuilder blockBuilder = new SegmentedSliceBlockBuilder(10, 10);
- long retainedSize = addElementsToBlockBuilder(blockBuilder);
- blockBuilder.reset();
- assertEquals(blockBuilder.getSizeInBytes(), 0);
- assertLessThan(blockBuilder.getRetainedSizeInBytes(), retainedSize);
- addElementsToBlockBuilder(blockBuilder);
-
- int index = 0;
- for (int j = 0; j < 100_000; j++) {
- for (int i = 0; i < SLICE.length(); i++) {
- Slice rawSlice = blockBuilder.getRawSlice(index);
- int offset = blockBuilder.getPositionOffset(index);
- int length = blockBuilder.getSliceLength(index);
- index++;
- assertEquals(length, 1);
- assertTrue(SLICE.equals(i, 1, rawSlice, offset, length));
- }
- }
- }
-
- @Test
- public void testEqualsAndHashCode()
- {
- SegmentedSliceBlockBuilder blockBuilder = new SegmentedSliceBlockBuilder(10, 10);
- VariableWidthBlockBuilder variableBlockBuilder = new VariableWidthBlockBuilder(null, 10, 10);
- for (int i = 0; i < SLICE.length(); i++) {
- blockBuilder.writeBytes(SLICE, i, 1);
- blockBuilder.closeEntry();
-
- variableBlockBuilder.writeBytes(SLICE, i, 1);
- variableBlockBuilder.closeEntry();
- }
-
- Block block = variableBlockBuilder.build();
- for (int i = 0; i < SLICE.length(); i++) {
- assertTrue(blockBuilder.equals(i, block, i, 1));
- assertTrue(blockBuilder.equals(i, 0, block, i, 0, 1));
- assertEquals(blockBuilder.hash(i), variableBlockBuilder.hash(i, 0, 1));
- }
- }
-
- @Test
- public void testCompareTo()
- {
- SegmentedSliceBlockBuilder blockBuilder = new SegmentedSliceBlockBuilder(10, 10);
- for (int i = 0; i < SLICE.length(); i++) {
- blockBuilder.writeBytes(SLICE, i, 1);
- blockBuilder.closeEntry();
- }
-
- for (int i = 0; i < SLICE.length() - 1; i++) {
- assertLessThan(blockBuilder.compareTo(i, i + 1), 0);
- assertEquals(blockBuilder.compareTo(i, i), 0);
- assertGreaterThan(blockBuilder.compareTo(i + 1, i), 0);
- }
- }
-
- private long addElementsToBlockBuilder(SegmentedSliceBlockBuilder blockBuilder)
- {
- int size = 1;
- long retainedSize = blockBuilder.getRetainedSizeInBytes();
- int lastOpenSegmentIndex = blockBuilder.getOpenSegmentIndex();
- for (int j = 0; j < 100_000; j++) {
- for (int i = 0; i < SLICE.length(); i++) {
- blockBuilder.writeBytes(SLICE, i, 1);
- blockBuilder.closeEntry();
- assertEquals(blockBuilder.getPositionCount(), size++);
- Slice rawSlice = blockBuilder.getRawSlice(blockBuilder.getPositionCount() - 1);
- int offset = blockBuilder.getPositionOffset(blockBuilder.getPositionCount() - 1);
- int length = blockBuilder.getSliceLength(blockBuilder.getPositionCount() - 1);
- assertEquals(length, 1);
- assertTrue(SLICE.equals(i, 1, rawSlice, offset, length));
- }
- // Each element has 1 character and 1 offset so (1 + Integer.BYTES)
- assertEquals(blockBuilder.getSizeInBytes(), blockBuilder.getPositionCount() * (1L + Integer.BYTES));
-
- if (blockBuilder.getOpenSegmentIndex() > lastOpenSegmentIndex) {
- // When new segment is created, retained should should increase due to
- // copied slices and new offsets array allocation.
- assertGreaterThan(blockBuilder.getRetainedSizeInBytes(), retainedSize);
- }
- assertGreaterThanOrEqual(blockBuilder.getRetainedSizeInBytes(), retainedSize);
- retainedSize = blockBuilder.getRetainedSizeInBytes();
- lastOpenSegmentIndex = blockBuilder.getOpenSegmentIndex();
- }
- return retainedSize;
- }
-}