From 746d61423898c5bb801eebc6d8a454d3c008eb8a Mon Sep 17 00:00:00 2001 From: binlijin Date: Wed, 14 Sep 2022 10:27:18 +0800 Subject: [PATCH 1/3] HBASE-27329 Introduce prefix tree index block encoding use less space --- .../hbase/io/encoding/IndexBlockEncoder.java | 85 +++ .../hbase/io/encoding/IndexBlockEncoding.java | 23 +- .../encoding/PrefixTreeIndexBlockEncoder.java | 223 +++++++ .../hbase/io/encoding/PrefixTreeUtil.java | 596 ++++++++++++++++++ .../hadoop/hbase/io/util/UFIntTool.java | 137 ++++ .../hbase/io/encoding/TestPrefixTreeUtil.java | 96 +++ .../hbase/io/hfile/BlockIndexChunk.java | 4 + .../hbase/io/hfile/HFileBlockIndex.java | 10 + .../io/hfile/HFileIndexBlockEncoderImpl.java | 196 +++++- .../hbase/io/hfile/HFileWriterImpl.java | 10 +- 10 files changed, 1375 insertions(+), 5 deletions(-) create mode 100644 hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/IndexBlockEncoder.java create mode 100644 hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/PrefixTreeIndexBlockEncoder.java create mode 100644 hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/PrefixTreeUtil.java create mode 100644 hbase-common/src/main/java/org/apache/hadoop/hbase/io/util/UFIntTool.java create mode 100644 hbase-common/src/test/java/org/apache/hadoop/hbase/io/encoding/TestPrefixTreeUtil.java diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/IndexBlockEncoder.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/IndexBlockEncoder.java new file mode 100644 index 000000000000..65dda5d02409 --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/IndexBlockEncoder.java @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.io.encoding; + +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.CellComparator; +import org.apache.hadoop.hbase.io.HeapSize; +import org.apache.hadoop.hbase.nio.ByteBuff; +import org.apache.yetus.audience.InterfaceAudience; +import java.io.DataOutput; +import java.io.IOException; +import java.util.List; + +@InterfaceAudience.Private +public interface IndexBlockEncoder { + /** + * Starts encoding for a block of Index data. + */ + void startBlockEncoding(boolean rootIndexBlock, DataOutput out) throws IOException; + + /** + * Encodes index block. + */ + void encode(List blockKeys, List blockOffsets, List onDiskDataSizes, + DataOutput out) throws IOException; + + /** + * Ends encoding for a block of index data. + */ + void endBlockEncoding(DataOutput out) throws IOException; + + /** + * Create a HFileIndexBlock seeker which find data within a block. + * @return A newly created seeker. + */ + IndexEncodedSeeker createSeeker(); + + /** + * An interface which enable to seek while underlying data is encoded. It works on one HFile Index Block. + */ + interface IndexEncodedSeeker extends HeapSize { + /** + * Init with root index block. + */ + void initRootIndex(ByteBuff buffer, int numEntries, CellComparator comparator, int treeLevel) + throws IOException; + + /** + * Get i's entry in root index block. + */ + Cell getRootBlockKey(int i); + + int rootBlockContainingKey(Cell key); + + long rootBlockBlockOffsets(int rootLevelIndex); + + int rootBlockOnDiskDataSizes(int rootLevelIndex); + + /** + * Search non-root index block. + */ + SearchResult locateNonRootIndexEntry(ByteBuff nonRootBlock, Cell key) throws IOException; + } + + class SearchResult { + public int entryIndex; + public long offset; + public int onDiskSize; + } +} diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/IndexBlockEncoding.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/IndexBlockEncoding.java index ed97147ac9bb..0f4908228a1e 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/IndexBlockEncoding.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/IndexBlockEncoding.java @@ -33,10 +33,11 @@ public enum IndexBlockEncoding { /** Disable index block encoding. */ NONE(0, null), // id 1 is reserved for the PREFIX_TREE algorithm to be added later - PREFIX_TREE(1, null); + PREFIX_TREE(1, "org.apache.hadoop.hbase.io.encoding.PrefixTreeIndexBlockEncoder"); private final short id; private final byte[] idInBytes; + private IndexBlockEncoder encoder; private final String encoderCls; public static final int ID_SIZE = Bytes.SIZEOF_SHORT; @@ -118,4 +119,24 @@ public static IndexBlockEncoding getEncodingById(short indexBlockEncodingId) { return algorithm; } + /** + * Return new index block encoder for given algorithm type. + * @return index block encoder if algorithm is specified, null if none is selected. + */ + public IndexBlockEncoder getEncoder() { + if (encoder == null && id != 0) { + // lazily create the encoder + encoder = createEncoder(encoderCls); + } + return encoder; + } + + static IndexBlockEncoder createEncoder(String fullyQualifiedClassName) { + try { + return Class.forName(fullyQualifiedClassName).asSubclass(IndexBlockEncoder.class) + .getDeclaredConstructor().newInstance(); + } catch (Exception e) { + throw new RuntimeException(e); + } + } } diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/PrefixTreeIndexBlockEncoder.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/PrefixTreeIndexBlockEncoder.java new file mode 100644 index 000000000000..99481a077d86 --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/PrefixTreeIndexBlockEncoder.java @@ -0,0 +1,223 @@ +package org.apache.hadoop.hbase.io.encoding; + +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.CellComparator; +import org.apache.hadoop.hbase.CellUtil; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.PrivateCellUtil; +import org.apache.hadoop.hbase.io.ByteArrayOutputStream; +import org.apache.hadoop.hbase.io.util.UFIntTool; +import org.apache.hadoop.hbase.nio.ByteBuff; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.ClassSize; +import org.apache.hadoop.hbase.util.ObjectIntPair; +import org.apache.yetus.audience.InterfaceAudience; +import java.io.DataOutput; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.List; + +@InterfaceAudience.Private +public class PrefixTreeIndexBlockEncoder implements IndexBlockEncoder { + private static byte VERSION = 0; + + @Override + public void startBlockEncoding(boolean rootIndexBlock, DataOutput out) + throws IOException { + } + + @Override + public void encode(List blockKeys, List blockOffsets, List onDiskDataSizes, + DataOutput out) throws IOException { + List rowKeys = new ArrayList<>(blockKeys.size()); + for (int i = 0; i < blockKeys.size(); i++) { + byte[] key = blockKeys.get(i); + KeyValue.KeyOnlyKeyValue rowKey = new KeyValue.KeyOnlyKeyValue(key, 0, key.length); + rowKeys.add(CellUtil.cloneRow(rowKey)); + } + + PrefixTreeUtil.TokenizerNode node = PrefixTreeUtil.buildPrefixTree(rowKeys); + PrefixTreeUtil.PrefixTreeDataWidth dataWidth = new PrefixTreeUtil.PrefixTreeDataWidth(); + ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); + PrefixTreeUtil.serializePrefixTree(node, dataWidth, outputStream); + byte[] data = outputStream.toByteArray(); + + out.write(VERSION); + PrefixTreeUtil.serialize(out, dataWidth); + out.writeInt(blockKeys.size()); + out.writeInt(data.length); + out.write(data); + + long minBlockOffset = blockOffsets.get(0); + long maxBlockOffset = blockOffsets.get(blockOffsets.size() - 1); + int minOnDiskDataSize = Integer.MAX_VALUE; + int maxOnDiskDataSize = Integer.MIN_VALUE; + for (int i = 0; i < onDiskDataSizes.size(); ++i) { + if (minOnDiskDataSize > onDiskDataSizes.get(i)) { + minOnDiskDataSize = onDiskDataSizes.get(i); + } + if (maxOnDiskDataSize < onDiskDataSizes.get(i)) { + maxOnDiskDataSize = onDiskDataSizes.get(i); + } + } + + int blockOffsetWidth = UFIntTool.numBytes(maxBlockOffset - minBlockOffset); + int onDiskDataSizeWidth = UFIntTool.numBytes(maxOnDiskDataSize - minOnDiskDataSize); + + out.write(blockOffsetWidth); + out.write(onDiskDataSizeWidth); + out.writeLong(minBlockOffset); + out.writeInt(minOnDiskDataSize); + + outputStream.reset(); + for (int i = 0; i < blockOffsets.size(); ++i) { + UFIntTool.writeBytes(blockOffsetWidth, (blockOffsets.get(i) - minBlockOffset), outputStream); + UFIntTool.writeBytes(onDiskDataSizeWidth, (onDiskDataSizes.get(i) - minOnDiskDataSize), + outputStream); + } + data = outputStream.toByteArray(); + out.write(data); + } + + @Override + public void endBlockEncoding(DataOutput out) throws IOException { + } + + @Override + public IndexEncodedSeeker createSeeker() { + return new PrefixTreeIndexBlockEncodedSeeker(); + } + + static class PrefixTreeIndexBlockEncodedSeeker implements IndexEncodedSeeker{ + + private PrefixTreeUtil.PrefixTreeDataWidth dataWidth = new PrefixTreeUtil.PrefixTreeDataWidth(); + private ByteBuffer prefixTreeNodeData = null; + private ByteBuffer blockOffsetAndSizeData = null; + private int blockOffsetWidth; + private int onDiskDataSizeWidth; + private long minBlockOffset; + private int minOnDiskDataSize; + + @Override + public long heapSize() { + long heapSize = ClassSize.align(ClassSize.OBJECT); + + if (prefixTreeNodeData != null) { + heapSize += ClassSize.align(ClassSize.BYTE_BUFFER + prefixTreeNodeData.capacity()); + } + if (blockOffsetAndSizeData != null) { + heapSize += ClassSize.align(ClassSize.BYTE_BUFFER + blockOffsetAndSizeData.capacity()); + } + + // dataWidth + heapSize += ClassSize.REFERENCE; + // blockOffsetWidth onDiskDataSizeWidth minOnDiskDataSize + heapSize += 3 * Bytes.SIZEOF_INT; + // PrefixTreeDataWidth's data. + heapSize += 5 * Bytes.SIZEOF_INT; + // minBlockOffset + heapSize += Bytes.SIZEOF_LONG; + return ClassSize.align(heapSize); + } + + @Override + public void initRootIndex(ByteBuff data, int numEntries, CellComparator comparator, + int treeLevel) throws IOException { + byte version = data.get(); + if (version != VERSION) { + throw new IOException("Corrupted data, version should be 0, but it is " + version); + } + PrefixTreeUtil.deserialize(data, dataWidth); + int numEntry = data.getInt(); + int prefixNodeLength = data.getInt(); + + ObjectIntPair tmpPair = new ObjectIntPair<>(); + data.asSubByteBuffer(data.position(), prefixNodeLength, tmpPair); + ByteBuffer dup = tmpPair.getFirst().duplicate(); + dup.position(tmpPair.getSecond()); + dup.limit(tmpPair.getSecond() + prefixNodeLength); + prefixTreeNodeData = dup.slice(); + + data.skip(prefixNodeLength); + blockOffsetWidth = data.get(); + onDiskDataSizeWidth = data.get(); + minBlockOffset = data.getLong(); + minOnDiskDataSize = data.getInt(); + int blockOffsetsAndonDiskDataSize = numEntry * (blockOffsetWidth + onDiskDataSizeWidth); + + data.asSubByteBuffer(data.position(), blockOffsetsAndonDiskDataSize, tmpPair); + dup = tmpPair.getFirst().duplicate(); + dup.position(tmpPair.getSecond()); + dup.limit(tmpPair.getSecond() + blockOffsetsAndonDiskDataSize); + blockOffsetAndSizeData = dup.slice(); + } + + @Override + public Cell getRootBlockKey(int i) { + byte[] row = PrefixTreeUtil.get(prefixTreeNodeData, 0, dataWidth, i); + return PrivateCellUtil.createFirstOnRow(row); + } + + @Override + public int rootBlockContainingKey(Cell key) { + return PrefixTreeUtil.search(prefixTreeNodeData, 0, CellUtil.cloneRow(key), 0, dataWidth); + } + + @Override + public long rootBlockBlockOffsets(int rootLevelIndex) { + int pos = rootLevelIndex * (blockOffsetWidth + onDiskDataSizeWidth); + return UFIntTool.fromBytes(blockOffsetAndSizeData, pos, blockOffsetWidth) + minBlockOffset; + } + + @Override + public int rootBlockOnDiskDataSizes(int rootLevelIndex) { + int pos = rootLevelIndex * (blockOffsetWidth + onDiskDataSizeWidth); + int currentOnDiskSize = + (int) UFIntTool.fromBytes(blockOffsetAndSizeData, pos + blockOffsetWidth, + onDiskDataSizeWidth) + minOnDiskDataSize; + return currentOnDiskSize; + } + + @Override + public SearchResult locateNonRootIndexEntry(ByteBuff nonRootBlock, Cell key) + throws IOException { + PrefixTreeUtil.PrefixTreeDataWidth meta = new PrefixTreeUtil.PrefixTreeDataWidth(); + byte version = nonRootBlock.get(); + if (version != VERSION) { + throw new IOException("Corrupted data, version should be 0, but it is " + version); + } + PrefixTreeUtil.deserialize(nonRootBlock, meta); + int numEntry = nonRootBlock.getInt(); + int prefixNodeLength = nonRootBlock.getInt(); + + ObjectIntPair tmpPair = new ObjectIntPair<>(); + nonRootBlock.asSubByteBuffer(nonRootBlock.position(), prefixNodeLength, tmpPair); + ByteBuffer dup = tmpPair.getFirst().duplicate(); + dup.position(tmpPair.getSecond()); + dup.limit(tmpPair.getSecond() + prefixNodeLength); + ByteBuffer prefixTreeNodeData = dup.slice(); + + nonRootBlock.skip(prefixNodeLength); + + int entryIndex = PrefixTreeUtil.search(prefixTreeNodeData, 0, CellUtil.cloneRow(key), 0, meta); + SearchResult result = new SearchResult(); + result.entryIndex = entryIndex; + + if (entryIndex >= 0 && entryIndex < numEntry) { + int blockOffsetWidth = nonRootBlock.get(); + int onDiskDataSizeWidth = nonRootBlock.get(); + long minBlockOffset = nonRootBlock.getLong(); + int minOnDiskDataSize = nonRootBlock.getInt(); + + int pos = nonRootBlock.position() + entryIndex * (blockOffsetWidth + onDiskDataSizeWidth); + result.offset = UFIntTool.fromBytes(nonRootBlock, pos, blockOffsetWidth) + minBlockOffset; + result.onDiskSize = + (int) UFIntTool.fromBytes(nonRootBlock, pos + blockOffsetWidth, onDiskDataSizeWidth) + + minOnDiskDataSize; + } + + return result; + } + } +} diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/PrefixTreeUtil.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/PrefixTreeUtil.java new file mode 100644 index 000000000000..529608d7b636 --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/PrefixTreeUtil.java @@ -0,0 +1,596 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.io.encoding; + +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.io.ByteArrayOutputStream; +import org.apache.hadoop.hbase.io.util.UFIntTool; +import org.apache.hadoop.hbase.nio.ByteBuff; +import org.apache.hadoop.hbase.util.ByteBufferUtils; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.yetus.audience.InterfaceAudience; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import java.io.DataOutput; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.List; + +@InterfaceAudience.Private +public class PrefixTreeUtil { + + private static final Logger LOG = LoggerFactory.getLogger(PrefixTreeUtil.class); + + /** + * Build tree from begin + * + * @return the tree + */ + public static TokenizerNode buildPrefixTree(List rowKeys) { + // root node. + TokenizerNode node = new TokenizerNode(); + int start = 0; + // Get max common prefix + int common = maxCommonPrefix(rowKeys, 0, rowKeys.size() - 1, 0); + if (common > 0) { + byte[] commonB = Bytes.copy(rowKeys.get(0), 0, common); + node.nodeData = commonB; + for (int i = 0; i < rowKeys.size(); i++) { + if (rowKeys.get(i).length == common) { + node.numOccurrences++; + if (node.index == null) { + node.index = new ArrayList<>(1); + } + node.index.add(i); + start = i + 1; + } else { + break; + } + } + } else { + // Only root node data can be empty. + node.nodeData = new byte[0]; + } + constructAndSplitChild(node, rowKeys, start, rowKeys.size() - 1, common); + return node; + } + + /** + * Calculate max common prefix + * + * @return the max common prefix num bytes + */ + static int maxCommonPrefix(List rowKeys, int start, int end, int startPos) { + // only one entry. + if (start == end) { + return rowKeys.get(start).length - startPos; + } + int common = 0; + for (int round = 0; round <= rowKeys.get(start).length - startPos - 1; round++) { + boolean same = true; + for (int i = start + 1; i <= end; i++) { + if (startPos + common > rowKeys.get(i).length - 1) { + same = false; + break; + } + if (rowKeys.get(start)[startPos + common] != rowKeys.get(i)[startPos + common]) { + same = false; + break; + } + } + if (same) { + common++; + } else { + break; + } + } + return common; + } + + /** + * No common prefix split it. + * + */ + static void constructAndSplitChild(TokenizerNode node, List rowKeys, int start, + int end, int startPos) { + int middle = start; + for (int i = start + 1; i <= end; i++) { + if (startPos > rowKeys.get(i).length - 1) { + middle = i - 1; + break; + } + if (rowKeys.get(start)[startPos] != rowKeys.get(i)[startPos]) { + middle = i - 1; + break; + } + } + constructCommonNodeAndChild(node, rowKeys, start, middle, startPos); + if (middle + 1 <= end) { + // right + constructCommonNodeAndChild(node, rowKeys, middle + 1, end, startPos); + } + } + + /** + * Get max common prefix as node and build children. + * + */ + static TokenizerNode constructCommonNodeAndChild(TokenizerNode node, List rowKeys, int start, + int end, int startPos) { + int common = maxCommonPrefix(rowKeys, start, end, startPos); + if (common > 0) { + TokenizerNode child = new TokenizerNode(); + child.parent = node; + node.children.add(child); + byte[] commonB = Bytes.copy(rowKeys.get(start), startPos, common); + child.nodeData = commonB; + int newStart = start; + for (int i = start; i <= end; i++) { + if (rowKeys.get(i).length == (startPos + common)) { + child.numOccurrences++; + if (child.index == null) { + child.index = new ArrayList<>(1); + } + child.index.add(i); + newStart = i + 1; + } else { + break; + } + } + if (start != end && newStart <= end) { + if (newStart == start) { + // no common prefix. + constructAndSplitChild(child, rowKeys, newStart, end, startPos + common); + } else { + // can have common prefix. + constructCommonNodeAndChild(child, rowKeys, newStart, end, startPos + common); + } + } + } else { + // no common prefix, split + constructAndSplitChild(node, rowKeys, start, end, startPos); + } + return node; + } + + public static void getNodeMetaInfo(TokenizerNode node, TokenizerNodeMeta meta) { + if (node.nodeData.length > meta.maxNodeDataLength) { + meta.maxNodeDataLength = node.nodeData.length; + } + meta.totalNodeDataLength += node.nodeData.length; + meta.countNodeDataNum++; + + if (node.children.size() > meta.maxChildNum) { + meta.maxChildNum = node.children.size(); + } + meta.totalChildNum += node.children.size(); + meta.countChildNum++; + + if (node.numOccurrences > meta.maxNumOccurrences) { + meta.maxNumOccurrences = node.numOccurrences; + } + meta.countNumOccurrences++; + if (node.index != null) { + for (Integer entry : node.index) { + if (entry > meta.maxIndex) { + meta.maxIndex = entry; + } + } + } + if (node.children.isEmpty()) { + meta.leafNodes.add(node); + meta.countIndexNum++; + } else { + meta.nonLeafNodes.add(node); + } + for (TokenizerNode child : node.children) { + getNodeMetaInfo(child, meta); + } + } + + public static void serializePrefixTree(TokenizerNode node, PrefixTreeDataWidth dataWidth, + ByteArrayOutputStream outputStream) + throws IOException { + TokenizerNodeMeta meta = new TokenizerNodeMeta(); + PrefixTreeUtil.getNodeMetaInfo(node, meta); + int totalLength = 0; + dataWidth.nodeDataLengthWidth = UFIntTool.numBytes(meta.maxNodeDataLength); + totalLength += meta.totalNodeDataLength; + totalLength += dataWidth.nodeDataLengthWidth * meta.countNodeDataNum; + + dataWidth.fanOutWidth = UFIntTool.numBytes(meta.maxChildNum); + // fan Out + totalLength += dataWidth.fanOutWidth * meta.countChildNum; + // fan Byte + totalLength += meta.totalChildNum; + + // nextnodeoffset + totalLength += 4 * meta.countChildNum; + + dataWidth.occurrencesWidth = UFIntTool.numBytes(meta.maxNumOccurrences); + totalLength += dataWidth.occurrencesWidth * meta.countNumOccurrences; + + dataWidth.indexWidth = UFIntTool.numBytes(meta.maxIndex); + totalLength += dataWidth.indexWidth * meta.countIndexNum; + + dataWidth.childNodeOffsetWidth = UFIntTool.numBytes(totalLength); + + // track the starting position of each node in final output + int negativeIndex = 0; + for (int i = meta.leafNodes.size() - 1; i >= 0; i--) { + TokenizerNode leaf = meta.leafNodes.get(i); + // no children + int leafNodeWidth = dataWidth.nodeDataLengthWidth + leaf.nodeData.length + dataWidth.fanOutWidth + + dataWidth.occurrencesWidth + leaf.numOccurrences * dataWidth.indexWidth; + negativeIndex += leafNodeWidth; + leaf.nodeWidth = leafNodeWidth; + leaf.negativeIndex = negativeIndex; + } + for (int i = meta.nonLeafNodes.size() - 1; i >= 0; i--) { + TokenizerNode nonLeaf = meta.nonLeafNodes.get(i); + int leafNodeWidth = dataWidth.nodeDataLengthWidth + nonLeaf.nodeData.length + dataWidth.fanOutWidth + + nonLeaf.children.size() + nonLeaf.children.size() * dataWidth.childNodeOffsetWidth + + dataWidth.occurrencesWidth + nonLeaf.numOccurrences * dataWidth.indexWidth; + negativeIndex += leafNodeWidth; + nonLeaf.nodeWidth = leafNodeWidth; + nonLeaf.negativeIndex = negativeIndex; + } + + for (int i = 0; i < meta.nonLeafNodes.size(); i++) { + serialize(meta.nonLeafNodes.get(i), outputStream, dataWidth); + } + for (int i = 0; i < meta.leafNodes.size(); i++) { + serialize(meta.leafNodes.get(i), outputStream, dataWidth); + } + } + + static void serialize(TokenizerNode node, ByteArrayOutputStream os, PrefixTreeDataWidth dataWidth) + throws IOException { + UFIntTool.writeBytes(dataWidth.nodeDataLengthWidth, node.nodeData.length, os); + os.write(node.nodeData, 0, node.nodeData.length); + UFIntTool.writeBytes(dataWidth.fanOutWidth, node.children.size(), os); + for (TokenizerNode child : node.children) { + // child's first byte. + os.write(child.nodeData[0]); + } + for (TokenizerNode child : node.children) { + UFIntTool.writeBytes(dataWidth.childNodeOffsetWidth, node.negativeIndex - child.negativeIndex, os); + } + UFIntTool.writeBytes(dataWidth.occurrencesWidth, node.numOccurrences, os); + for (int i = 0; i < node.numOccurrences; i++) { + UFIntTool.writeBytes(dataWidth.indexWidth, node.index.get(i), os); + } + } + + public static void serialize(DataOutput out, PrefixTreeDataWidth dataWidth) throws + IOException { + out.writeByte(dataWidth.nodeDataLengthWidth); + out.writeByte(dataWidth.fanOutWidth); + out.writeByte(dataWidth.occurrencesWidth); + out.writeByte(dataWidth.indexWidth); + out.writeByte(dataWidth.childNodeOffsetWidth); + } + + public static void deserialize(ByteBuff data, PrefixTreeDataWidth dataWidth) { + dataWidth.nodeDataLengthWidth = data.get(); + dataWidth.fanOutWidth = data.get(); + dataWidth.occurrencesWidth = data.get(); + dataWidth.indexWidth = data.get(); + dataWidth.childNodeOffsetWidth = data.get(); + } + + /** + * Get the node index, that search key >= index and search key < (index + 1) + * + */ + public static int search(ByteBuffer data, int bbStartPos, byte[] skey, int keyStartPos, + PrefixTreeDataWidth meta) { + int nodeDataLength = getNodeDataLength(data, bbStartPos, meta); + int cs = + ByteBufferUtils.compareTo(skey, keyStartPos, Math.min(skey.length - keyStartPos, nodeDataLength), + data, bbStartPos + meta.nodeDataLengthWidth, nodeDataLength); + + int pos = bbStartPos + meta.nodeDataLengthWidth + nodeDataLength; + int fanOut = getNodeFanOut(data, pos, meta); + pos += meta.fanOutWidth + fanOut + fanOut * meta.childNodeOffsetWidth; + int numOccurrences = getNodeNumOccurrences(data, pos, meta); + pos += meta.occurrencesWidth; + + if (cs == 0) { + // continue search + if (fanOut == 0) { + // no children, should be numOccurrences > 0 + int index = getNodeIndex(data, pos, 0, meta); + if (skey.length == keyStartPos + nodeDataLength) { + // == current node + return index; + } else { + // > current node. + return index; + } + } + if (skey.length > keyStartPos + nodeDataLength) { + int fanOffset = bbStartPos + meta.nodeDataLengthWidth + nodeDataLength + meta.fanOutWidth; + byte searchForByte = skey[keyStartPos + nodeDataLength]; + + int fanIndexInBlock = + unsignedBinarySearch(data, fanOffset, fanOffset + fanOut, searchForByte); + int nodeOffsetStartPos = fanOffset + fanOut; + if (fanIndexInBlock >= 0) { + // found it, but need to adjust for position of fan in overall block + int fanIndex = fanIndexInBlock - fanOffset; + int nodeOffset = getNodeOffset(data, nodeOffsetStartPos, fanIndex, meta); + return search(data, bbStartPos + nodeOffset, skey, keyStartPos + nodeDataLength, meta); + } else { + int fanIndex = fanIndexInBlock + fanOffset;// didn't find it, so compensate in reverse + int insertionPoint = (-fanIndex - 1) - 1; + if (insertionPoint < 0) { + // < first children + int nodeOffset = getNodeOffset(data, nodeOffsetStartPos, 0, meta); + return getFirstLeafNode(data, bbStartPos + nodeOffset, meta) - 1; + } else { + int nodeOffset = getNodeOffset(data, nodeOffsetStartPos, insertionPoint, meta); + return getLastLeafNode(data, bbStartPos + nodeOffset, meta); + } + } + } else { + //skey.length == keyStartPos + nodeDataLength + if (numOccurrences > 0) { + // == current node and current node is a leaf node. + return getNodeIndex(data, pos, 0, meta); + } else { + // need -1, == current node and current node not a leaf node. + return getFirstLeafNode(data, bbStartPos, meta) - 1; + } + } + } else if (cs > 0) { + // search key bigger than (>) current node, get biggest + if (fanOut == 0) { + if (numOccurrences > 0) { + if (numOccurrences == 1) { + return getNodeIndex(data, pos, 0, meta); + } else { + //TODO + throw new IllegalStateException( + "numOccurrences = " + numOccurrences + " > 1 not expected."); + } + } else { + throw new IllegalStateException( + "numOccurrences = " + numOccurrences + ", fanOut = " + fanOut + " not expected."); + } + } else { + return getLastLeafNode(data, bbStartPos, meta); + } + } else { + // search key small than (<) current node, get smallest. + if (numOccurrences > 0) { + return getNodeIndex(data, pos, 0, meta) - 1; + } else { + return getFirstLeafNode(data, bbStartPos, meta) - 1; + } + } + } + + static int getNodeDataLength(ByteBuffer data, int offset, PrefixTreeDataWidth meta) { + int dataLength = (int) UFIntTool.fromBytes(data, offset, meta.nodeDataLengthWidth); + return dataLength; + } + + static int getNodeFanOut(ByteBuffer data, int offset, PrefixTreeDataWidth meta) { + int fanOut = (int) UFIntTool.fromBytes(data, offset, meta.fanOutWidth); + return fanOut; + } + + static int getNodeNumOccurrences(ByteBuffer data, int offset, PrefixTreeDataWidth meta) { + int numOccurrences = (int) UFIntTool.fromBytes(data, offset, meta.occurrencesWidth); + return numOccurrences; + } + + static int getNodeOffset(ByteBuffer data, int offset, int index, PrefixTreeDataWidth meta) { + int nodeOffset = + (int) UFIntTool.fromBytes(data, offset + (index * meta.childNodeOffsetWidth), meta.childNodeOffsetWidth); + return nodeOffset; + } + + static int getNodeIndex(ByteBuffer data, int offset, int index, PrefixTreeDataWidth meta) { + int nodeIndex = + (int) UFIntTool.fromBytes(data, offset + (index * meta.indexWidth), meta.indexWidth); + return nodeIndex; + } + + /** + * Get the node's first leaf node + * + */ + static int getFirstLeafNode(ByteBuffer data, int bbStartPos, PrefixTreeDataWidth meta) { + int dataLength = getNodeDataLength(data, bbStartPos, meta); + int pos = bbStartPos + meta.nodeDataLengthWidth + dataLength; + int fanOut = getNodeFanOut(data, pos, meta); + pos += meta.fanOutWidth + fanOut + fanOut * meta.childNodeOffsetWidth; + int numOccurrences = getNodeNumOccurrences(data, pos, meta); + pos += meta.occurrencesWidth; + if (numOccurrences > 0 || fanOut == 0) { + // return current node. + return getNodeIndex(data, pos, 0, meta); + } else { + int nodeOffsetStartPos = + bbStartPos + meta.nodeDataLengthWidth + dataLength + meta.fanOutWidth + fanOut; + int nodeOffset = getNodeOffset(data, nodeOffsetStartPos, 0, meta); + return getFirstLeafNode(data, bbStartPos + nodeOffset, meta); + } + } + + /** + * Get the node's last leaf node + * + */ + static int getLastLeafNode(ByteBuffer data, int bbStartPos, PrefixTreeDataWidth meta) { + int dataLength = getNodeDataLength(data, bbStartPos, meta); + int pos = bbStartPos + meta.nodeDataLengthWidth + dataLength; + int fanOut = getNodeFanOut(data, pos, meta); + pos += meta.fanOutWidth + fanOut + fanOut * meta.childNodeOffsetWidth; + //int numOccurrences = getNodeNumOccurrences(data, pos, meta); + pos += meta.occurrencesWidth; + if (fanOut == 0) { + return getNodeIndex(data, pos, 0, meta); + } else { + int nodeOffsetStartPos = + bbStartPos + meta.nodeDataLengthWidth + dataLength + meta.fanOutWidth + fanOut; + int nodeOffset = getNodeOffset(data, nodeOffsetStartPos, fanOut - 1, meta); + return getLastLeafNode(data, bbStartPos + nodeOffset, meta); + } + } + + public static int unsignedBinarySearch(ByteBuffer a, int fromIndex, int toIndex, byte key) { + int unsignedKey = key & 0xff; + int low = fromIndex; + int high = toIndex - 1; + + while (low <= high) { + int mid = low + ((high - low) >> 1); + int midVal = a.get(mid) & 0xff; + + if (midVal < unsignedKey) { + low = mid + 1; + } else if (midVal > unsignedKey) { + high = mid - 1; + } else { + return mid; // key found + } + } + return -(low + 1); // key not found. + } + + public static byte[] get(ByteBuffer data, int bbStartPos, PrefixTreeDataWidth dataWidth, int index) { + return get(data, bbStartPos, dataWidth, index, new byte[0]); + } + + static byte[] get(ByteBuffer data, int bbStartPos, PrefixTreeDataWidth meta, int index, byte[] prefix) { + int dataLength = getNodeDataLength(data, bbStartPos, meta); + byte[] bdata = new byte[dataLength]; + ByteBuffer dup = data.duplicate(); + dup.position(bbStartPos + meta.nodeDataLengthWidth); + dup.get(bdata, 0, dataLength); + bdata = Bytes.add(prefix, bdata); + + int pos = bbStartPos + meta.nodeDataLengthWidth + dataLength; + int fanOut = getNodeFanOut(data, pos, meta); + pos += meta.fanOutWidth + fanOut + fanOut * meta.childNodeOffsetWidth; + int numOccurrences = getNodeNumOccurrences(data, pos, meta); + pos += meta.occurrencesWidth; + if (numOccurrences > 0) { + int currentNodeIndex = getNodeIndex(data, pos, 0, meta); + if (currentNodeIndex == index) { + return bdata; + } + } + if (fanOut == 0) { + int currentNodeIndex = getNodeIndex(data, pos, 0, meta); + if (currentNodeIndex == index) { + return bdata; + } else { + throw new IllegalStateException( + "Unexpected, search index=" + index + ", but find to " + currentNodeIndex); + } + } else { + int nodeOffsetStartPos = + bbStartPos + meta.nodeDataLengthWidth + dataLength + meta.fanOutWidth + fanOut; + int locateIndex = locateWhichChild(data, bbStartPos, meta, index, fanOut, nodeOffsetStartPos); + int nodeOffset = getNodeOffset(data, nodeOffsetStartPos, locateIndex, meta); + return get(data, bbStartPos + nodeOffset, meta, index, bdata); + } + } + + static int locateWhichChild(ByteBuffer data, int bbStartPos, PrefixTreeDataWidth meta, int index, int fanOut, int nodeOffsetStartPos) { + for (int i = 0; i < fanOut; i++) { + int nodeOffset = getNodeOffset(data, nodeOffsetStartPos, i, meta); + int lastLeafNode = getLastLeafNode(data, bbStartPos + nodeOffset, meta); + if (lastLeafNode >= index) { + return i; + } + } + throw new IllegalStateException("Unexpected unable to find index=" + index); + } + + public static class TokenizerNode { + + public byte[] nodeData = null; + + /** + * ref to parent trie node + */ + public TokenizerNode parent = null; + + /** + * child nodes. + */ + public ArrayList children = new ArrayList<>(); + + /* + * A count of occurrences in the input byte[]s, not the trie structure. 0 for branch nodes, 1+ for + * nubs and leaves. + */ + public int numOccurrences = 0; + + public List index = null; + + public List keys = null; + + /* + * A positive value indicating how many bytes before the end of the block this node will start. If + * the section is 55 bytes and negativeOffset is 9, then the node will start at 46. + */ + public int negativeIndex = 0; + + public int nodeWidth = 0; + } + + public static class TokenizerNodeMeta { + + public int maxNodeDataLength = 0; + public int totalNodeDataLength = 0; + public int countNodeDataNum = 0; + + public int maxChildNum = 0; + public int totalChildNum = 0; + public int countChildNum = 0; + + public int maxNumOccurrences = 0; + public int countNumOccurrences = 0; + + public int maxIndex = 0; + public int countIndexNum = 0; + + public ArrayList nonLeafNodes = new ArrayList<>(); + + public ArrayList leafNodes = new ArrayList<>(); + } + + public static class PrefixTreeDataWidth { + public int nodeDataLengthWidth = 0; + + public int fanOutWidth = 0; + + public int occurrencesWidth = 0; + + public int indexWidth = 0; + + public int childNodeOffsetWidth = 0; + } +} diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/util/UFIntTool.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/util/UFIntTool.java new file mode 100644 index 000000000000..9d0ba8fbed77 --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/util/UFIntTool.java @@ -0,0 +1,137 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.io.util; + +import org.apache.hadoop.hbase.nio.ByteBuff; +import org.apache.yetus.audience.InterfaceAudience; + +import java.io.IOException; +import java.io.OutputStream; +import java.nio.ByteBuffer; + +/** + * UFInt is an abbreviation for Unsigned Fixed-width Integer. + * + * This class converts between positive ints and 1-4 bytes that represent the int. All input ints + * must be positive. Max values stored in N bytes are: + * + * N=1: 2^8 => 256 + * N=2: 2^16 => 65,536 + * N=3: 2^24 => 16,777,216 + * N=4: 2^31 => 2,147,483,648 (Integer.MAX_VALUE) + * + * This was created to get most of the memory savings of a variable length integer when encoding + * an array of input integers, but to fix the number of bytes for each integer to the number needed + * to store the maximum integer in the array. This enables a binary search to be performed on the + * array of encoded integers. + * + * PrefixTree nodes often store offsets into a block that can fit into 1 or 2 bytes. Note that if + * the maximum value of an array of numbers needs 2 bytes, then it's likely that a majority of the + * numbers will also require 2 bytes. + * + * warnings: + * * no input validation for max performance + * * no negatives + */ +@InterfaceAudience.Private +public class UFIntTool { + + private static final int NUM_BITS_IN_LONG = 64; + + public static long maxValueForNumBytes(int numBytes) { + return (1L << (numBytes * 8)) - 1; + } + + public static int numBytes(final long value) { + if (value == 0) {// 0 doesn't work with the formula below + return 1; + } + return (NUM_BITS_IN_LONG + 7 - Long.numberOfLeadingZeros(value)) / 8; + } + + public static byte[] getBytes(int outputWidth, final long value) { + byte[] bytes = new byte[outputWidth]; + writeBytes(outputWidth, value, bytes, 0); + return bytes; + } + + public static void writeBytes(int outputWidth, final long value, byte[] bytes, int offset) { + bytes[offset + outputWidth - 1] = (byte) value; + for (int i = outputWidth - 2; i >= 0; --i) { + bytes[offset + i] = (byte) (value >>> (outputWidth - i - 1) * 8); + } + } + + private static final long[] MASKS = new long[] { + (long) 255, + (long) 255 << 8, + (long) 255 << 16, + (long) 255 << 24, + (long) 255 << 32, + (long) 255 << 40, + (long) 255 << 48, + (long) 255 << 56 + }; + + public static void writeBytes(int outputWidth, final long value, OutputStream os) throws IOException { + for (int i = outputWidth - 1; i >= 0; --i) { + os.write((byte) ((value & MASKS[i]) >>> (8 * i))); + } + } + + public static long fromBytes(final byte[] bytes) { + long value = 0; + value |= bytes[0] & 0xff;// these seem to do ok without casting the byte to int + for (int i = 1; i < bytes.length; ++i) { + value <<= 8; + value |= bytes[i] & 0xff; + } + return value; + } + + public static long fromBytes(final byte[] bytes, final int offset, final int width) { + long value = 0; + value |= bytes[0 + offset] & 0xff;// these seem to do ok without casting the byte to int + for (int i = 1; i < width; ++i) { + value <<= 8; + value |= bytes[i + offset] & 0xff; + } + return value; + } + + public static long fromBytes(final ByteBuffer buffer, final int offset, final int width) { + long value = 0; + value |= buffer.get(0 + offset) & 0xff;// these seem to do ok without casting the byte to int + for (int i = 1; i < width; ++i) { + value <<= 8; + value |= buffer.get(i + offset) & 0xff; + } + return value; + } + + public static long fromBytes(final ByteBuff buffer, final int offset, final int width) { + long value = 0; + value |= buffer.get(0 + offset) & 0xff;// these seem to do ok without casting the byte to int + for (int i = 1; i < width; ++i) { + value <<= 8; + value |= buffer.get(i + offset) & 0xff; + } + return value; + } +} diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/io/encoding/TestPrefixTreeUtil.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/io/encoding/TestPrefixTreeUtil.java new file mode 100644 index 000000000000..399f5708b894 --- /dev/null +++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/io/encoding/TestPrefixTreeUtil.java @@ -0,0 +1,96 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.io.encoding; + +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.io.ByteArrayOutputStream; +import org.apache.hadoop.hbase.testclassification.IOTests; +import org.apache.hadoop.hbase.testclassification.SmallTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.Assert; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.List; + +@Category({ IOTests.class, SmallTests.class }) +public class TestPrefixTreeUtil { + private static final Logger LOG = LoggerFactory.getLogger(TestPrefixTreeUtil.class); + + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestPrefixTreeUtil.class); + + @Test + public void testSearchPrefixTree() throws IOException { + List childs = new ArrayList<>(); + childs.add(Bytes.toBytes("00c7-202206201519-wx0t")); + childs.add(Bytes.toBytes("00c7-202206201519-wx0zcldi7lnsiyas-N")); + childs.add(Bytes.toBytes("00c7-202206201520-wx0re")); + childs.add(Bytes.toBytes("00c7-202206201520-wx0ulgrwi7d542tm-N")); + childs.add(Bytes.toBytes("00c7-202206201520-wx0x7")); + childs.add(Bytes.toBytes("00c7-202206201521")); + childs.add(Bytes.toBytes("00c7-202206201521-wx05xfbtw2mopyhs-C")); + childs.add(Bytes.toBytes("00c7-202206201521-wx08")); + childs.add(Bytes.toBytes("00c7-202206201521-wx0c")); + childs.add(Bytes.toBytes("00c7-202206201521-wx0go")); + childs.add(Bytes.toBytes("00c7-202206201522-wx0t")); + childs.add(Bytes.toBytes("00c8-202206200751-wx0ah4gnbwptdyna-F")); + + PrefixTreeUtil.TokenizerNode node = PrefixTreeUtil.buildPrefixTree(childs); + PrefixTreeUtil.PrefixTreeDataWidth dataWidth = new PrefixTreeUtil.PrefixTreeDataWidth(); + ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); + PrefixTreeUtil.serializePrefixTree(node, dataWidth, outputStream); + byte[] data = outputStream.toByteArray(); + ByteBuffer prefixTreeNodeData = ByteBuffer.wrap(data); + for (int i = 0; i < childs.size(); i++) { + byte[] result = PrefixTreeUtil.get(prefixTreeNodeData, 0, dataWidth, i); + Assert.assertTrue(Bytes.compareTo(result, childs.get(i)) == 0); + } + + for (int i = 0; i < childs.size(); i++) { + int result = PrefixTreeUtil.search(prefixTreeNodeData, 0, childs.get(i), 0, dataWidth); + Assert.assertEquals(result, i); + } + + byte[] skey = Bytes.toBytes("00c7-202206201519"); + int result = PrefixTreeUtil.search(prefixTreeNodeData, 0, skey, 0, dataWidth); + Assert.assertEquals(-1, result); + + skey = Bytes.toBytes("00c7-202206201520"); + result = PrefixTreeUtil.search(prefixTreeNodeData, 0, skey, 0, dataWidth); + Assert.assertEquals(1, result); + + skey = Bytes.toBytes("00c7-202206201520-wx0x7-"); + result = PrefixTreeUtil.search(prefixTreeNodeData, 0, skey, 0, dataWidth); + Assert.assertEquals(4, result); + + skey = Bytes.toBytes("00c7-202206201521-wx0"); + result = PrefixTreeUtil.search(prefixTreeNodeData, 0, skey, 0, dataWidth); + Assert.assertEquals(5, result); + + skey = Bytes.toBytes("00c8-202206200751-wx0ah4gnbwptdyna-F-"); + result = PrefixTreeUtil.search(prefixTreeNodeData, 0, skey, 0, dataWidth); + Assert.assertEquals(11, result); + } + +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockIndexChunk.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockIndexChunk.java index 221b60bca927..679c33ed89a6 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockIndexChunk.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockIndexChunk.java @@ -26,6 +26,10 @@ public interface BlockIndexChunk { List getBlockKeys(); + List getBlockOffsets(); + + List getOnDiskDataSizes(); + List getSecondaryIndexOffsetMarks(); int getEntryBySubEntry(long k); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java index 6e72890be120..b5b451e9a0d3 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java @@ -1527,6 +1527,16 @@ public List getBlockKeys() { return blockKeys; } + @Override + public List getBlockOffsets() { + return blockOffsets; + } + + @Override + public List getOnDiskDataSizes() { + return onDiskDataSizes; + } + @Override public List getSecondaryIndexOffsetMarks() { return secondaryIndexOffsetMarks; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileIndexBlockEncoderImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileIndexBlockEncoderImpl.java index f3bdb10b95c1..a8d983318fe2 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileIndexBlockEncoderImpl.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileIndexBlockEncoderImpl.java @@ -19,8 +19,16 @@ import java.io.DataOutput; import java.io.IOException; +import java.util.concurrent.atomic.AtomicReference; +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.CellComparator; +import org.apache.hadoop.hbase.CellUtil; +import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; +import org.apache.hadoop.hbase.io.encoding.IndexBlockEncoder; import org.apache.hadoop.hbase.io.encoding.IndexBlockEncoding; +import org.apache.hadoop.hbase.nio.ByteBuff; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.ClassSize; import org.apache.yetus.audience.InterfaceAudience; /** @@ -70,16 +78,200 @@ public IndexBlockEncoding getIndexBlockEncoding() { @Override public void encode(BlockIndexChunk blockIndexChunk, boolean rootIndexBlock, DataOutput out) throws IOException { - // TODO + IndexBlockEncoder encoder = this.indexBlockEncoding.getEncoder(); + encoder.startBlockEncoding(rootIndexBlock, out); + encoder.encode(blockIndexChunk.getBlockKeys(), blockIndexChunk.getBlockOffsets(), + blockIndexChunk.getOnDiskDataSizes(), out); + encoder.endBlockEncoding(out); } @Override public EncodedSeeker createSeeker() { - return null; + return new IndexBlockEncodedSeeker(this.indexBlockEncoding.getEncoder().createSeeker()); } @Override public String toString() { return getClass().getSimpleName() + "(indexBlockEncoding=" + indexBlockEncoding + ")"; } + + protected static class IndexBlockEncodedSeeker implements EncodedSeeker { + private int rootIndexNumEntries; + protected int searchTreeLevel; + private IndexBlockEncoder.IndexEncodedSeeker encodedSeeker; + + /** + * Pre-computed mid-key + */ + private AtomicReference midKey = new AtomicReference<>(); + + IndexBlockEncodedSeeker(IndexBlockEncoder.IndexEncodedSeeker encodedSeeker) { + this.encodedSeeker = encodedSeeker; + } + + @Override + public long heapSize() { + long heapSize = ClassSize.align(ClassSize.OBJECT); + + if (encodedSeeker != null) { + heapSize += ClassSize.REFERENCE; + heapSize += ClassSize.align(encodedSeeker.heapSize()); + } + + // the midkey atomicreference + heapSize += ClassSize.REFERENCE; + // rootIndexNumEntries searchTreeLevel + heapSize += 2 * Bytes.SIZEOF_INT; + return ClassSize.align(heapSize); + } + + @Override + public void initRootIndex(HFileBlock blk, int numEntries, CellComparator comparator, + int treeLevel) throws IOException { + this.rootIndexNumEntries = numEntries; + this.searchTreeLevel = treeLevel; + ByteBuff data = blk.getBufferWithoutHeader(); + encodedSeeker.initRootIndex(data, numEntries, comparator, treeLevel); + } + + @Override + public boolean isEmpty() { + return rootIndexNumEntries <= 0; + } + + @Override + public Cell getRootBlockKey(int i) { + return encodedSeeker.getRootBlockKey(i); + } + + @Override + public int getRootBlockCount() { + return rootIndexNumEntries; + } + + @Override + public Cell midkey(HFile.CachingBlockReader cachingBlockReader) throws IOException { + if (rootIndexNumEntries == 0) { + throw new IOException("HFile empty"); + } + + Cell targetMidKey = this.midKey.get(); + if (targetMidKey != null) { + return targetMidKey; + } + targetMidKey = getRootBlockKey(rootIndexNumEntries / 2); + this.midKey.set(targetMidKey); + return targetMidKey; + } + + @Override + public int rootBlockContainingKey(Cell key) { + return encodedSeeker.rootBlockContainingKey(key); + } + + @Override + public BlockWithScanInfo loadDataBlockWithScanInfo(Cell key, HFileBlock currentBlock, + boolean cacheBlocks, boolean pread, boolean isCompaction, + DataBlockEncoding expectedDataBlockEncoding, HFile.CachingBlockReader cachingBlockReader) + throws IOException { + int rootLevelIndex = rootBlockContainingKey(key); + if (rootLevelIndex < 0 || rootLevelIndex >= rootIndexNumEntries) { + return null; + } + + // Read the next-level (intermediate or leaf) index block. + long currentOffset = encodedSeeker.rootBlockBlockOffsets(rootLevelIndex); + int currentOnDiskSize = encodedSeeker.rootBlockOnDiskDataSizes(rootLevelIndex); + + int lookupLevel = 1; // How many levels deep we are in our lookup. + IndexBlockEncoder.SearchResult searchResult = null; + + HFileBlock block = null; + while (true) { + try { + // Must initialize it with null here, because if don't and once an exception happen in + // readBlock, then we'll release the previous assigned block twice in the finally block. + // (See HBASE-22422) + block = null; + if (currentBlock != null && currentBlock.getOffset() == currentOffset) { + // Avoid reading the same block again, even with caching turned off. + // This is crucial for compaction-type workload which might have + // caching turned off. This is like a one-block cache inside the + // scanner. + block = currentBlock; + } else { + // Call HFile's caching block reader API. We always cache index + // blocks, otherwise we might get terrible performance. + boolean shouldCache = cacheBlocks || (lookupLevel < searchTreeLevel); + BlockType expectedBlockType; + if (lookupLevel < searchTreeLevel - 1) { + expectedBlockType = BlockType.INTERMEDIATE_INDEX; + } else if (lookupLevel == searchTreeLevel - 1) { + expectedBlockType = BlockType.LEAF_INDEX; + } else { + // this also accounts for ENCODED_DATA + expectedBlockType = BlockType.DATA; + } + block = + cachingBlockReader.readBlock(currentOffset, currentOnDiskSize, shouldCache, pread, + isCompaction, true, expectedBlockType, expectedDataBlockEncoding); + } + + if (block == null) { + throw new IOException( + "Failed to read block at offset " + currentOffset + ", onDiskSize=" + currentOnDiskSize); + } + + // Found a data block, break the loop and check our level in the tree. + if (block.getBlockType().isData()) { + break; + } + + // Not a data block. This must be a leaf-level or intermediate-level + // index block. We don't allow going deeper than searchTreeLevel. + if (++lookupLevel > searchTreeLevel) { + throw new IOException( + "Search Tree Level overflow: lookupLevel=" + lookupLevel + ", searchTreeLevel=" + + searchTreeLevel); + } + + // Locate the entry corresponding to the given key in the non-root + // (leaf or intermediate-level) index block. + ByteBuff buffer = block.getBufferWithoutHeader(); + searchResult = encodedSeeker.locateNonRootIndexEntry(buffer, key); + if (searchResult.entryIndex == -1) { + // This has to be changed + // For now change this to key value + throw new IOException("The key " + CellUtil.getCellKeyAsString(key) + " is before the" + + " first key of the non-root index block " + block); + } + + currentOffset = searchResult.offset; + currentOnDiskSize = searchResult.onDiskSize; + + } finally { + if (block != null && !block.getBlockType().isData()) { + // Release the block immediately if it is not the data block + block.release(); + } + } + } + + if (lookupLevel != searchTreeLevel) { + assert block.getBlockType().isData(); + // Though we have retrieved a data block we have found an issue + // in the retrieved data block. Hence returned the block so that + // the ref count can be decremented + if (block != null) { + block.release(); + } + throw new IOException( + "Reached a data block at level " + lookupLevel + " but the number of levels is " + + searchTreeLevel); + } + + // set the next indexed key for the current block. + return new BlockWithScanInfo(block, null); + } + } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java index 6b7cf3caaa39..e95a786b4c97 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java @@ -239,8 +239,14 @@ protected boolean checkKey(final Cell cell) throws IOException { throw new IOException("Key cannot be null or empty"); } if (lastCell != null) { - int keyComp = PrivateCellUtil.compareKeyIgnoresMvcc(this.hFileContext.getCellComparator(), - lastCell, cell); + int keyComp = 0; + if (hFileContext.getIndexBlockEncoding() == IndexBlockEncoding.PREFIX_TREE) { + keyComp = this.hFileContext.getCellComparator().compareRows(lastCell, cell); + } else { + keyComp = + PrivateCellUtil.compareKeyIgnoresMvcc(this.hFileContext.getCellComparator(), lastCell, + cell); + } if (keyComp > 0) { String message = getLexicalErrorMessage(cell); throw new IOException(message); From 6906fc5e893421ac084631e119f2098b03ec4ce9 Mon Sep 17 00:00:00 2001 From: binlijin Date: Wed, 14 Sep 2022 12:04:07 +0800 Subject: [PATCH 2/3] run spotless:apply --- .../hbase/io/encoding/IndexBlockEncoder.java | 9 +- .../encoding/PrefixTreeIndexBlockEncoder.java | 40 ++++++--- .../hbase/io/encoding/PrefixTreeUtil.java | 83 +++++++++---------- .../hadoop/hbase/io/util/UFIntTool.java | 53 ++++-------- .../hbase/io/encoding/TestPrefixTreeUtil.java | 15 ++-- .../io/hfile/HFileIndexBlockEncoderImpl.java | 19 ++--- .../hbase/io/hfile/HFileWriterImpl.java | 5 +- 7 files changed, 107 insertions(+), 117 deletions(-) diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/IndexBlockEncoder.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/IndexBlockEncoder.java index 65dda5d02409..1f5f0c1ca3db 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/IndexBlockEncoder.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/IndexBlockEncoder.java @@ -17,14 +17,14 @@ */ package org.apache.hadoop.hbase.io.encoding; +import java.io.DataOutput; +import java.io.IOException; +import java.util.List; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellComparator; import org.apache.hadoop.hbase.io.HeapSize; import org.apache.hadoop.hbase.nio.ByteBuff; import org.apache.yetus.audience.InterfaceAudience; -import java.io.DataOutput; -import java.io.IOException; -import java.util.List; @InterfaceAudience.Private public interface IndexBlockEncoder { @@ -51,7 +51,8 @@ void encode(List blockKeys, List blockOffsets, List onDis IndexEncodedSeeker createSeeker(); /** - * An interface which enable to seek while underlying data is encoded. It works on one HFile Index Block. + * An interface which enable to seek while underlying data is encoded. It works on one HFile Index + * Block. */ interface IndexEncodedSeeker extends HeapSize { /** diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/PrefixTreeIndexBlockEncoder.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/PrefixTreeIndexBlockEncoder.java index 99481a077d86..fe2c2984910b 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/PrefixTreeIndexBlockEncoder.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/PrefixTreeIndexBlockEncoder.java @@ -1,5 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.hadoop.hbase.io.encoding; +import java.io.DataOutput; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.List; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellComparator; import org.apache.hadoop.hbase.CellUtil; @@ -12,19 +34,13 @@ import org.apache.hadoop.hbase.util.ClassSize; import org.apache.hadoop.hbase.util.ObjectIntPair; import org.apache.yetus.audience.InterfaceAudience; -import java.io.DataOutput; -import java.io.IOException; -import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.List; @InterfaceAudience.Private public class PrefixTreeIndexBlockEncoder implements IndexBlockEncoder { private static byte VERSION = 0; @Override - public void startBlockEncoding(boolean rootIndexBlock, DataOutput out) - throws IOException { + public void startBlockEncoding(boolean rootIndexBlock, DataOutput out) throws IOException { } @Override @@ -89,7 +105,7 @@ public IndexEncodedSeeker createSeeker() { return new PrefixTreeIndexBlockEncodedSeeker(); } - static class PrefixTreeIndexBlockEncodedSeeker implements IndexEncodedSeeker{ + static class PrefixTreeIndexBlockEncodedSeeker implements IndexEncodedSeeker { private PrefixTreeUtil.PrefixTreeDataWidth dataWidth = new PrefixTreeUtil.PrefixTreeDataWidth(); private ByteBuffer prefixTreeNodeData = null; @@ -173,9 +189,8 @@ public long rootBlockBlockOffsets(int rootLevelIndex) { @Override public int rootBlockOnDiskDataSizes(int rootLevelIndex) { int pos = rootLevelIndex * (blockOffsetWidth + onDiskDataSizeWidth); - int currentOnDiskSize = - (int) UFIntTool.fromBytes(blockOffsetAndSizeData, pos + blockOffsetWidth, - onDiskDataSizeWidth) + minOnDiskDataSize; + int currentOnDiskSize = (int) UFIntTool.fromBytes(blockOffsetAndSizeData, + pos + blockOffsetWidth, onDiskDataSizeWidth) + minOnDiskDataSize; return currentOnDiskSize; } @@ -200,7 +215,8 @@ public SearchResult locateNonRootIndexEntry(ByteBuff nonRootBlock, Cell key) nonRootBlock.skip(prefixNodeLength); - int entryIndex = PrefixTreeUtil.search(prefixTreeNodeData, 0, CellUtil.cloneRow(key), 0, meta); + int entryIndex = + PrefixTreeUtil.search(prefixTreeNodeData, 0, CellUtil.cloneRow(key), 0, meta); SearchResult result = new SearchResult(); result.entryIndex = entryIndex; diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/PrefixTreeUtil.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/PrefixTreeUtil.java index 529608d7b636..04bb891a2954 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/PrefixTreeUtil.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/PrefixTreeUtil.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,6 +17,11 @@ */ package org.apache.hadoop.hbase.io.encoding; +import java.io.DataOutput; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.List; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.io.ByteArrayOutputStream; import org.apache.hadoop.hbase.io.util.UFIntTool; @@ -26,11 +31,6 @@ import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.DataOutput; -import java.io.IOException; -import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.List; @InterfaceAudience.Private public class PrefixTreeUtil { @@ -39,7 +39,6 @@ public class PrefixTreeUtil { /** * Build tree from begin - * * @return the tree */ public static TokenizerNode buildPrefixTree(List rowKeys) { @@ -73,10 +72,9 @@ public static TokenizerNode buildPrefixTree(List rowKeys) { /** * Calculate max common prefix - * * @return the max common prefix num bytes */ - static int maxCommonPrefix(List rowKeys, int start, int end, int startPos) { + static int maxCommonPrefix(List rowKeys, int start, int end, int startPos) { // only one entry. if (start == end) { return rowKeys.get(start).length - startPos; @@ -105,10 +103,9 @@ static int maxCommonPrefix(List rowKeys, int start, int end, int startP /** * No common prefix split it. - * */ - static void constructAndSplitChild(TokenizerNode node, List rowKeys, int start, - int end, int startPos) { + static void constructAndSplitChild(TokenizerNode node, List rowKeys, int start, int end, + int startPos) { int middle = start; for (int i = start + 1; i <= end; i++) { if (startPos > rowKeys.get(i).length - 1) { @@ -129,10 +126,9 @@ static void constructAndSplitChild(TokenizerNode node, List rowKeys, int /** * Get max common prefix as node and build children. - * */ - static TokenizerNode constructCommonNodeAndChild(TokenizerNode node, List rowKeys, int start, - int end, int startPos) { + static TokenizerNode constructCommonNodeAndChild(TokenizerNode node, List rowKeys, + int start, int end, int startPos) { int common = maxCommonPrefix(rowKeys, start, end, startPos); if (common > 0) { TokenizerNode child = new TokenizerNode(); @@ -205,8 +201,7 @@ public static void getNodeMetaInfo(TokenizerNode node, TokenizerNodeMeta meta) { } public static void serializePrefixTree(TokenizerNode node, PrefixTreeDataWidth dataWidth, - ByteArrayOutputStream outputStream) - throws IOException { + ByteArrayOutputStream outputStream) throws IOException { TokenizerNodeMeta meta = new TokenizerNodeMeta(); PrefixTreeUtil.getNodeMetaInfo(node, meta); int totalLength = 0; @@ -236,17 +231,19 @@ public static void serializePrefixTree(TokenizerNode node, PrefixTreeDataWidth d for (int i = meta.leafNodes.size() - 1; i >= 0; i--) { TokenizerNode leaf = meta.leafNodes.get(i); // no children - int leafNodeWidth = dataWidth.nodeDataLengthWidth + leaf.nodeData.length + dataWidth.fanOutWidth - + dataWidth.occurrencesWidth + leaf.numOccurrences * dataWidth.indexWidth; + int leafNodeWidth = + dataWidth.nodeDataLengthWidth + leaf.nodeData.length + dataWidth.fanOutWidth + + dataWidth.occurrencesWidth + leaf.numOccurrences * dataWidth.indexWidth; negativeIndex += leafNodeWidth; leaf.nodeWidth = leafNodeWidth; leaf.negativeIndex = negativeIndex; } for (int i = meta.nonLeafNodes.size() - 1; i >= 0; i--) { TokenizerNode nonLeaf = meta.nonLeafNodes.get(i); - int leafNodeWidth = dataWidth.nodeDataLengthWidth + nonLeaf.nodeData.length + dataWidth.fanOutWidth - + nonLeaf.children.size() + nonLeaf.children.size() * dataWidth.childNodeOffsetWidth - + dataWidth.occurrencesWidth + nonLeaf.numOccurrences * dataWidth.indexWidth; + int leafNodeWidth = + dataWidth.nodeDataLengthWidth + nonLeaf.nodeData.length + dataWidth.fanOutWidth + + nonLeaf.children.size() + nonLeaf.children.size() * dataWidth.childNodeOffsetWidth + + dataWidth.occurrencesWidth + nonLeaf.numOccurrences * dataWidth.indexWidth; negativeIndex += leafNodeWidth; nonLeaf.nodeWidth = leafNodeWidth; nonLeaf.negativeIndex = negativeIndex; @@ -270,7 +267,8 @@ static void serialize(TokenizerNode node, ByteArrayOutputStream os, PrefixTreeDa os.write(child.nodeData[0]); } for (TokenizerNode child : node.children) { - UFIntTool.writeBytes(dataWidth.childNodeOffsetWidth, node.negativeIndex - child.negativeIndex, os); + UFIntTool.writeBytes(dataWidth.childNodeOffsetWidth, node.negativeIndex - child.negativeIndex, + os); } UFIntTool.writeBytes(dataWidth.occurrencesWidth, node.numOccurrences, os); for (int i = 0; i < node.numOccurrences; i++) { @@ -278,8 +276,7 @@ static void serialize(TokenizerNode node, ByteArrayOutputStream os, PrefixTreeDa } } - public static void serialize(DataOutput out, PrefixTreeDataWidth dataWidth) throws - IOException { + public static void serialize(DataOutput out, PrefixTreeDataWidth dataWidth) throws IOException { out.writeByte(dataWidth.nodeDataLengthWidth); out.writeByte(dataWidth.fanOutWidth); out.writeByte(dataWidth.occurrencesWidth); @@ -297,14 +294,13 @@ public static void deserialize(ByteBuff data, PrefixTreeDataWidth dataWidth) { /** * Get the node index, that search key >= index and search key < (index + 1) - * */ public static int search(ByteBuffer data, int bbStartPos, byte[] skey, int keyStartPos, PrefixTreeDataWidth meta) { int nodeDataLength = getNodeDataLength(data, bbStartPos, meta); - int cs = - ByteBufferUtils.compareTo(skey, keyStartPos, Math.min(skey.length - keyStartPos, nodeDataLength), - data, bbStartPos + meta.nodeDataLengthWidth, nodeDataLength); + int cs = ByteBufferUtils.compareTo(skey, keyStartPos, + Math.min(skey.length - keyStartPos, nodeDataLength), data, + bbStartPos + meta.nodeDataLengthWidth, nodeDataLength); int pos = bbStartPos + meta.nodeDataLengthWidth + nodeDataLength; int fanOut = getNodeFanOut(data, pos, meta); @@ -350,7 +346,7 @@ public static int search(ByteBuffer data, int bbStartPos, byte[] skey, int keySt } } } else { - //skey.length == keyStartPos + nodeDataLength + // skey.length == keyStartPos + nodeDataLength if (numOccurrences > 0) { // == current node and current node is a leaf node. return getNodeIndex(data, pos, 0, meta); @@ -366,7 +362,7 @@ public static int search(ByteBuffer data, int bbStartPos, byte[] skey, int keySt if (numOccurrences == 1) { return getNodeIndex(data, pos, 0, meta); } else { - //TODO + // TODO throw new IllegalStateException( "numOccurrences = " + numOccurrences + " > 1 not expected."); } @@ -403,8 +399,8 @@ static int getNodeNumOccurrences(ByteBuffer data, int offset, PrefixTreeDataWidt } static int getNodeOffset(ByteBuffer data, int offset, int index, PrefixTreeDataWidth meta) { - int nodeOffset = - (int) UFIntTool.fromBytes(data, offset + (index * meta.childNodeOffsetWidth), meta.childNodeOffsetWidth); + int nodeOffset = (int) UFIntTool.fromBytes(data, offset + (index * meta.childNodeOffsetWidth), + meta.childNodeOffsetWidth); return nodeOffset; } @@ -416,7 +412,6 @@ static int getNodeIndex(ByteBuffer data, int offset, int index, PrefixTreeDataWi /** * Get the node's first leaf node - * */ static int getFirstLeafNode(ByteBuffer data, int bbStartPos, PrefixTreeDataWidth meta) { int dataLength = getNodeDataLength(data, bbStartPos, meta); @@ -438,14 +433,13 @@ static int getFirstLeafNode(ByteBuffer data, int bbStartPos, PrefixTreeDataWidth /** * Get the node's last leaf node - * */ static int getLastLeafNode(ByteBuffer data, int bbStartPos, PrefixTreeDataWidth meta) { int dataLength = getNodeDataLength(data, bbStartPos, meta); int pos = bbStartPos + meta.nodeDataLengthWidth + dataLength; int fanOut = getNodeFanOut(data, pos, meta); pos += meta.fanOutWidth + fanOut + fanOut * meta.childNodeOffsetWidth; - //int numOccurrences = getNodeNumOccurrences(data, pos, meta); + // int numOccurrences = getNodeNumOccurrences(data, pos, meta); pos += meta.occurrencesWidth; if (fanOut == 0) { return getNodeIndex(data, pos, 0, meta); @@ -477,11 +471,13 @@ public static int unsignedBinarySearch(ByteBuffer a, int fromIndex, int toIndex, return -(low + 1); // key not found. } - public static byte[] get(ByteBuffer data, int bbStartPos, PrefixTreeDataWidth dataWidth, int index) { + public static byte[] get(ByteBuffer data, int bbStartPos, PrefixTreeDataWidth dataWidth, + int index) { return get(data, bbStartPos, dataWidth, index, new byte[0]); } - static byte[] get(ByteBuffer data, int bbStartPos, PrefixTreeDataWidth meta, int index, byte[] prefix) { + static byte[] get(ByteBuffer data, int bbStartPos, PrefixTreeDataWidth meta, int index, + byte[] prefix) { int dataLength = getNodeDataLength(data, bbStartPos, meta); byte[] bdata = new byte[dataLength]; ByteBuffer dup = data.duplicate(); @@ -517,7 +513,8 @@ static byte[] get(ByteBuffer data, int bbStartPos, PrefixTreeDataWidth meta, int } } - static int locateWhichChild(ByteBuffer data, int bbStartPos, PrefixTreeDataWidth meta, int index, int fanOut, int nodeOffsetStartPos) { + static int locateWhichChild(ByteBuffer data, int bbStartPos, PrefixTreeDataWidth meta, int index, + int fanOut, int nodeOffsetStartPos) { for (int i = 0; i < fanOut; i++) { int nodeOffset = getNodeOffset(data, nodeOffsetStartPos, i, meta); int lastLeafNode = getLastLeafNode(data, bbStartPos + nodeOffset, meta); @@ -543,8 +540,8 @@ public static class TokenizerNode { public ArrayList children = new ArrayList<>(); /* - * A count of occurrences in the input byte[]s, not the trie structure. 0 for branch nodes, 1+ for - * nubs and leaves. + * A count of occurrences in the input byte[]s, not the trie structure. 0 for branch nodes, 1+ + * for nubs and leaves. */ public int numOccurrences = 0; @@ -553,8 +550,8 @@ public static class TokenizerNode { public List keys = null; /* - * A positive value indicating how many bytes before the end of the block this node will start. If - * the section is 55 bytes and negativeOffset is 9, then the node will start at 46. + * A positive value indicating how many bytes before the end of the block this node will start. + * If the section is 55 bytes and negativeOffset is 9, then the node will start at 46. */ public int negativeIndex = 0; diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/util/UFIntTool.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/util/UFIntTool.java index 9d0ba8fbed77..262ed477e749 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/util/UFIntTool.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/util/UFIntTool.java @@ -15,39 +15,25 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.hadoop.hbase.io.util; -import org.apache.hadoop.hbase.nio.ByteBuff; -import org.apache.yetus.audience.InterfaceAudience; - import java.io.IOException; import java.io.OutputStream; import java.nio.ByteBuffer; +import org.apache.hadoop.hbase.nio.ByteBuff; +import org.apache.yetus.audience.InterfaceAudience; /** - * UFInt is an abbreviation for Unsigned Fixed-width Integer. - * - * This class converts between positive ints and 1-4 bytes that represent the int. All input ints - * must be positive. Max values stored in N bytes are: - * - * N=1: 2^8 => 256 - * N=2: 2^16 => 65,536 - * N=3: 2^24 => 16,777,216 - * N=4: 2^31 => 2,147,483,648 (Integer.MAX_VALUE) - * - * This was created to get most of the memory savings of a variable length integer when encoding - * an array of input integers, but to fix the number of bytes for each integer to the number needed - * to store the maximum integer in the array. This enables a binary search to be performed on the - * array of encoded integers. - * - * PrefixTree nodes often store offsets into a block that can fit into 1 or 2 bytes. Note that if - * the maximum value of an array of numbers needs 2 bytes, then it's likely that a majority of the - * numbers will also require 2 bytes. - * - * warnings: - * * no input validation for max performance - * * no negatives + * UFInt is an abbreviation for Unsigned Fixed-width Integer. This class converts between positive + * ints and 1-4 bytes that represent the int. All input ints must be positive. Max values stored in + * N bytes are: N=1: 2^8 => 256 N=2: 2^16 => 65,536 N=3: 2^24 => 16,777,216 N=4: 2^31 => + * 2,147,483,648 (Integer.MAX_VALUE) This was created to get most of the memory savings of a + * variable length integer when encoding an array of input integers, but to fix the number of bytes + * for each integer to the number needed to store the maximum integer in the array. This enables a + * binary search to be performed on the array of encoded integers. PrefixTree nodes often store + * offsets into a block that can fit into 1 or 2 bytes. Note that if the maximum value of an array + * of numbers needs 2 bytes, then it's likely that a majority of the numbers will also require 2 + * bytes. warnings: * no input validation for max performance * no negatives */ @InterfaceAudience.Private public class UFIntTool { @@ -78,18 +64,11 @@ public static void writeBytes(int outputWidth, final long value, byte[] bytes, i } } - private static final long[] MASKS = new long[] { - (long) 255, - (long) 255 << 8, - (long) 255 << 16, - (long) 255 << 24, - (long) 255 << 32, - (long) 255 << 40, - (long) 255 << 48, - (long) 255 << 56 - }; + private static final long[] MASKS = new long[] { (long) 255, (long) 255 << 8, (long) 255 << 16, + (long) 255 << 24, (long) 255 << 32, (long) 255 << 40, (long) 255 << 48, (long) 255 << 56 }; - public static void writeBytes(int outputWidth, final long value, OutputStream os) throws IOException { + public static void writeBytes(int outputWidth, final long value, OutputStream os) + throws IOException { for (int i = outputWidth - 1; i >= 0; --i) { os.write((byte) ((value & MASKS[i]) >>> (8 * i))); } diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/io/encoding/TestPrefixTreeUtil.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/io/encoding/TestPrefixTreeUtil.java index 399f5708b894..365b4bc35a48 100644 --- a/hbase-common/src/test/java/org/apache/hadoop/hbase/io/encoding/TestPrefixTreeUtil.java +++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/io/encoding/TestPrefixTreeUtil.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -6,7 +6,9 @@ * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - * http://www.apache.org/licenses/LICENSE-2.0 + * + * http://www.apache.org/licenses/LICENSE-2.0 + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -15,6 +17,10 @@ */ package org.apache.hadoop.hbase.io.encoding; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.List; import org.apache.hadoop.hbase.HBaseClassTestRule; import org.apache.hadoop.hbase.io.ByteArrayOutputStream; import org.apache.hadoop.hbase.testclassification.IOTests; @@ -27,11 +33,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.IOException; -import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.List; - @Category({ IOTests.class, SmallTests.class }) public class TestPrefixTreeUtil { private static final Logger LOG = LoggerFactory.getLogger(TestPrefixTreeUtil.class); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileIndexBlockEncoderImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileIndexBlockEncoderImpl.java index a8d983318fe2..358620ba91c5 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileIndexBlockEncoderImpl.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileIndexBlockEncoderImpl.java @@ -212,14 +212,13 @@ public BlockWithScanInfo loadDataBlockWithScanInfo(Cell key, HFileBlock currentB // this also accounts for ENCODED_DATA expectedBlockType = BlockType.DATA; } - block = - cachingBlockReader.readBlock(currentOffset, currentOnDiskSize, shouldCache, pread, - isCompaction, true, expectedBlockType, expectedDataBlockEncoding); + block = cachingBlockReader.readBlock(currentOffset, currentOnDiskSize, shouldCache, + pread, isCompaction, true, expectedBlockType, expectedDataBlockEncoding); } if (block == null) { - throw new IOException( - "Failed to read block at offset " + currentOffset + ", onDiskSize=" + currentOnDiskSize); + throw new IOException("Failed to read block at offset " + currentOffset + + ", onDiskSize=" + currentOnDiskSize); } // Found a data block, break the loop and check our level in the tree. @@ -230,9 +229,8 @@ public BlockWithScanInfo loadDataBlockWithScanInfo(Cell key, HFileBlock currentB // Not a data block. This must be a leaf-level or intermediate-level // index block. We don't allow going deeper than searchTreeLevel. if (++lookupLevel > searchTreeLevel) { - throw new IOException( - "Search Tree Level overflow: lookupLevel=" + lookupLevel + ", searchTreeLevel=" - + searchTreeLevel); + throw new IOException("Search Tree Level overflow: lookupLevel=" + lookupLevel + + ", searchTreeLevel=" + searchTreeLevel); } // Locate the entry corresponding to the given key in the non-root @@ -265,9 +263,8 @@ public BlockWithScanInfo loadDataBlockWithScanInfo(Cell key, HFileBlock currentB if (block != null) { block.release(); } - throw new IOException( - "Reached a data block at level " + lookupLevel + " but the number of levels is " - + searchTreeLevel); + throw new IOException("Reached a data block at level " + lookupLevel + + " but the number of levels is " + searchTreeLevel); } // set the next indexed key for the current block. diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java index e95a786b4c97..6e373c67db34 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java @@ -243,9 +243,8 @@ protected boolean checkKey(final Cell cell) throws IOException { if (hFileContext.getIndexBlockEncoding() == IndexBlockEncoding.PREFIX_TREE) { keyComp = this.hFileContext.getCellComparator().compareRows(lastCell, cell); } else { - keyComp = - PrivateCellUtil.compareKeyIgnoresMvcc(this.hFileContext.getCellComparator(), lastCell, - cell); + keyComp = PrivateCellUtil.compareKeyIgnoresMvcc(this.hFileContext.getCellComparator(), + lastCell, cell); } if (keyComp > 0) { String message = getLexicalErrorMessage(cell); From d70022e144ccad4e36c9840c8342d818ff4eab34 Mon Sep 17 00:00:00 2001 From: binlijin Date: Thu, 29 Sep 2022 11:56:40 +0800 Subject: [PATCH 3/3] version 2 --- .../hbase/io/encoding/IndexBlockEncoding.java | 2 +- .../PrefixTreeIndexBlockEncoderV2.java | 239 +++++ .../hbase/io/encoding/PrefixTreeUtil.java | 26 +- .../hbase/io/encoding/PrefixTreeUtilV2.java | 848 ++++++++++++++++++ .../io/encoding/TestPrefixTreeUtilV2.java | 210 +++++ .../hbase/io/hfile/HFileWriterImpl.java | 9 +- 6 files changed, 1319 insertions(+), 15 deletions(-) create mode 100644 hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/PrefixTreeIndexBlockEncoderV2.java create mode 100644 hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/PrefixTreeUtilV2.java create mode 100644 hbase-common/src/test/java/org/apache/hadoop/hbase/io/encoding/TestPrefixTreeUtilV2.java diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/IndexBlockEncoding.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/IndexBlockEncoding.java index 0f4908228a1e..936720790b07 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/IndexBlockEncoding.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/IndexBlockEncoding.java @@ -33,7 +33,7 @@ public enum IndexBlockEncoding { /** Disable index block encoding. */ NONE(0, null), // id 1 is reserved for the PREFIX_TREE algorithm to be added later - PREFIX_TREE(1, "org.apache.hadoop.hbase.io.encoding.PrefixTreeIndexBlockEncoder"); + PREFIX_TREE(1, "org.apache.hadoop.hbase.io.encoding.PrefixTreeIndexBlockEncoderV2"); private final short id; private final byte[] idInBytes; diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/PrefixTreeIndexBlockEncoderV2.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/PrefixTreeIndexBlockEncoderV2.java new file mode 100644 index 000000000000..79a4d719538b --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/PrefixTreeIndexBlockEncoderV2.java @@ -0,0 +1,239 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.io.encoding; + +import java.io.DataOutput; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.List; +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.CellComparator; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.PrivateCellUtil; +import org.apache.hadoop.hbase.io.ByteArrayOutputStream; +import org.apache.hadoop.hbase.io.encoding.PrefixTreeUtil.PrefixTreeDataWidth; +import org.apache.hadoop.hbase.io.encoding.PrefixTreeUtil.TokenizerNode; +import org.apache.hadoop.hbase.io.util.UFIntTool; +import org.apache.hadoop.hbase.nio.ByteBuff; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.ClassSize; +import org.apache.hadoop.hbase.util.ObjectIntPair; +import org.apache.yetus.audience.InterfaceAudience; + +@InterfaceAudience.Private +public class PrefixTreeIndexBlockEncoderV2 implements IndexBlockEncoder { + private static byte VERSION = 0; + + @Override + public void startBlockEncoding(boolean rootIndexBlock, DataOutput out) throws IOException { + } + + @Override + public void encode(List blockKeys, List blockOffsets, List onDiskDataSizes, + DataOutput out) throws IOException { + List rowKeys = new ArrayList<>(blockKeys.size()); + for (int i = 0; i < blockKeys.size(); i++) { + byte[] key = blockKeys.get(i); + KeyValue.KeyOnlyKeyValue rowKey = new KeyValue.KeyOnlyKeyValue(key, 0, key.length); + rowKeys.add(rowKey); + } + + TokenizerNode node = PrefixTreeUtilV2.buildPrefixTree(rowKeys); + PrefixTreeDataWidth dataWidth = new PrefixTreeDataWidth(); + ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); + PrefixTreeUtilV2.serializePrefixTree(node, dataWidth, outputStream); + byte[] data = outputStream.toByteArray(); + + out.write(VERSION); + PrefixTreeUtilV2.serialize(out, dataWidth); + out.writeInt(blockKeys.size()); + out.writeInt(data.length); + out.write(data); + + long minBlockOffset = blockOffsets.get(0); + long maxBlockOffset = blockOffsets.get(blockOffsets.size() - 1); + int minOnDiskDataSize = Integer.MAX_VALUE; + int maxOnDiskDataSize = Integer.MIN_VALUE; + for (int i = 0; i < onDiskDataSizes.size(); ++i) { + if (minOnDiskDataSize > onDiskDataSizes.get(i)) { + minOnDiskDataSize = onDiskDataSizes.get(i); + } + if (maxOnDiskDataSize < onDiskDataSizes.get(i)) { + maxOnDiskDataSize = onDiskDataSizes.get(i); + } + } + + int blockOffsetWidth = UFIntTool.numBytes(maxBlockOffset - minBlockOffset); + int onDiskDataSizeWidth = UFIntTool.numBytes(maxOnDiskDataSize - minOnDiskDataSize); + + out.write(blockOffsetWidth); + out.write(onDiskDataSizeWidth); + out.writeLong(minBlockOffset); + out.writeInt(minOnDiskDataSize); + + outputStream.reset(); + for (int i = 0; i < blockOffsets.size(); ++i) { + UFIntTool.writeBytes(blockOffsetWidth, (blockOffsets.get(i) - minBlockOffset), outputStream); + UFIntTool.writeBytes(onDiskDataSizeWidth, (onDiskDataSizes.get(i) - minOnDiskDataSize), + outputStream); + } + data = outputStream.toByteArray(); + out.write(data); + } + + @Override + public void endBlockEncoding(DataOutput out) throws IOException { + } + + @Override + public IndexEncodedSeeker createSeeker() { + return new PrefixTreeIndexBlockEncodedSeeker(); + } + + static class PrefixTreeIndexBlockEncodedSeeker implements IndexEncodedSeeker { + + private PrefixTreeDataWidth dataWidth = new PrefixTreeDataWidth(); + private ByteBuffer prefixTreeNodeData = null; + private ByteBuffer blockOffsetAndSizeData = null; + private int blockOffsetWidth; + private int onDiskDataSizeWidth; + private long minBlockOffset; + private int minOnDiskDataSize; + + @Override + public long heapSize() { + long heapSize = ClassSize.align(ClassSize.OBJECT); + + if (prefixTreeNodeData != null) { + heapSize += ClassSize.align(ClassSize.BYTE_BUFFER + prefixTreeNodeData.capacity()); + } + if (blockOffsetAndSizeData != null) { + heapSize += ClassSize.align(ClassSize.BYTE_BUFFER + blockOffsetAndSizeData.capacity()); + } + + // dataWidth + heapSize += ClassSize.REFERENCE; + // blockOffsetWidth onDiskDataSizeWidth minOnDiskDataSize + heapSize += 3 * Bytes.SIZEOF_INT; + // PrefixTreeDataWidth's data. + heapSize += 5 * Bytes.SIZEOF_INT; + // minBlockOffset + heapSize += Bytes.SIZEOF_LONG; + return ClassSize.align(heapSize); + } + + @Override + public void initRootIndex(ByteBuff data, int numEntries, CellComparator comparator, + int treeLevel) throws IOException { + byte version = data.get(); + if (version != VERSION) { + throw new IOException("Corrupted data, version should be 0, but it is " + version); + } + PrefixTreeUtilV2.deserialize(data, dataWidth); + int numEntry = data.getInt(); + int prefixNodeLength = data.getInt(); + + ObjectIntPair tmpPair = new ObjectIntPair<>(); + data.asSubByteBuffer(data.position(), prefixNodeLength, tmpPair); + ByteBuffer dup = tmpPair.getFirst().duplicate(); + dup.position(tmpPair.getSecond()); + dup.limit(tmpPair.getSecond() + prefixNodeLength); + prefixTreeNodeData = dup.slice(); + + data.skip(prefixNodeLength); + blockOffsetWidth = data.get(); + onDiskDataSizeWidth = data.get(); + minBlockOffset = data.getLong(); + minOnDiskDataSize = data.getInt(); + int blockOffsetsAndonDiskDataSize = numEntry * (blockOffsetWidth + onDiskDataSizeWidth); + + data.asSubByteBuffer(data.position(), blockOffsetsAndonDiskDataSize, tmpPair); + dup = tmpPair.getFirst().duplicate(); + dup.position(tmpPair.getSecond()); + dup.limit(tmpPair.getSecond() + blockOffsetsAndonDiskDataSize); + blockOffsetAndSizeData = dup.slice(); + } + + @Override + public Cell getRootBlockKey(int i) { + byte[] row = PrefixTreeUtilV2.get(prefixTreeNodeData, 0, dataWidth, i); + return PrivateCellUtil.createFirstOnRow(row); + } + + @Override + public int rootBlockContainingKey(Cell key) { + return PrefixTreeUtilV2.search(prefixTreeNodeData, 0, key, 0, dataWidth); + } + + @Override + public long rootBlockBlockOffsets(int rootLevelIndex) { + int pos = rootLevelIndex * (blockOffsetWidth + onDiskDataSizeWidth); + return UFIntTool.fromBytes(blockOffsetAndSizeData, pos, blockOffsetWidth) + minBlockOffset; + } + + @Override + public int rootBlockOnDiskDataSizes(int rootLevelIndex) { + int pos = rootLevelIndex * (blockOffsetWidth + onDiskDataSizeWidth); + int currentOnDiskSize = (int) UFIntTool.fromBytes(blockOffsetAndSizeData, + pos + blockOffsetWidth, onDiskDataSizeWidth) + minOnDiskDataSize; + return currentOnDiskSize; + } + + @Override + public SearchResult locateNonRootIndexEntry(ByteBuff nonRootBlock, Cell key) + throws IOException { + PrefixTreeDataWidth meta = new PrefixTreeDataWidth(); + byte version = nonRootBlock.get(); + if (version != VERSION) { + throw new IOException("Corrupted data, version should be 0, but it is " + version); + } + PrefixTreeUtilV2.deserialize(nonRootBlock, meta); + int numEntry = nonRootBlock.getInt(); + int prefixNodeLength = nonRootBlock.getInt(); + + ObjectIntPair tmpPair = new ObjectIntPair<>(); + nonRootBlock.asSubByteBuffer(nonRootBlock.position(), prefixNodeLength, tmpPair); + ByteBuffer dup = tmpPair.getFirst().duplicate(); + dup.position(tmpPair.getSecond()); + dup.limit(tmpPair.getSecond() + prefixNodeLength); + ByteBuffer prefixTreeNodeData = dup.slice(); + + nonRootBlock.skip(prefixNodeLength); + + int entryIndex = PrefixTreeUtilV2.search(prefixTreeNodeData, 0, key, 0, meta); + SearchResult result = new SearchResult(); + result.entryIndex = entryIndex; + + if (entryIndex >= 0 && entryIndex < numEntry) { + int blockOffsetWidth = nonRootBlock.get(); + int onDiskDataSizeWidth = nonRootBlock.get(); + long minBlockOffset = nonRootBlock.getLong(); + int minOnDiskDataSize = nonRootBlock.getInt(); + + int pos = nonRootBlock.position() + entryIndex * (blockOffsetWidth + onDiskDataSizeWidth); + result.offset = UFIntTool.fromBytes(nonRootBlock, pos, blockOffsetWidth) + minBlockOffset; + result.onDiskSize = + (int) UFIntTool.fromBytes(nonRootBlock, pos + blockOffsetWidth, onDiskDataSizeWidth) + + minOnDiskDataSize; + } + + return result; + } + } +} diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/PrefixTreeUtil.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/PrefixTreeUtil.java index 04bb891a2954..801225b0b556 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/PrefixTreeUtil.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/PrefixTreeUtil.java @@ -172,8 +172,8 @@ public static void getNodeMetaInfo(TokenizerNode node, TokenizerNodeMeta meta) { meta.totalNodeDataLength += node.nodeData.length; meta.countNodeDataNum++; - if (node.children.size() > meta.maxChildNum) { - meta.maxChildNum = node.children.size(); + if (node.children.size() > meta.maxFanOut) { + meta.maxFanOut = node.children.size(); } meta.totalChildNum += node.children.size(); meta.countChildNum++; @@ -191,7 +191,7 @@ public static void getNodeMetaInfo(TokenizerNode node, TokenizerNodeMeta meta) { } if (node.children.isEmpty()) { meta.leafNodes.add(node); - meta.countIndexNum++; + meta.totalIndexNum++; } else { meta.nonLeafNodes.add(node); } @@ -209,7 +209,7 @@ public static void serializePrefixTree(TokenizerNode node, PrefixTreeDataWidth d totalLength += meta.totalNodeDataLength; totalLength += dataWidth.nodeDataLengthWidth * meta.countNodeDataNum; - dataWidth.fanOutWidth = UFIntTool.numBytes(meta.maxChildNum); + dataWidth.fanOutWidth = UFIntTool.numBytes(meta.maxFanOut); // fan Out totalLength += dataWidth.fanOutWidth * meta.countChildNum; // fan Byte @@ -222,7 +222,7 @@ public static void serializePrefixTree(TokenizerNode node, PrefixTreeDataWidth d totalLength += dataWidth.occurrencesWidth * meta.countNumOccurrences; dataWidth.indexWidth = UFIntTool.numBytes(meta.maxIndex); - totalLength += dataWidth.indexWidth * meta.countIndexNum; + totalLength += dataWidth.indexWidth * meta.totalIndexNum; dataWidth.childNodeOffsetWidth = UFIntTool.numBytes(totalLength); @@ -525,6 +525,9 @@ static int locateWhichChild(ByteBuffer data, int bbStartPos, PrefixTreeDataWidth throw new IllegalStateException("Unexpected unable to find index=" + index); } + /** + * Used only when serialize for build the prefix tree. + */ public static class TokenizerNode { public byte[] nodeData = null; @@ -549,6 +552,9 @@ public static class TokenizerNode { public List keys = null; + public int qualifierLength = 0; + public int qualifierNum = 0; + /* * A positive value indicating how many bytes before the end of the block this node will start. * If the section is 55 bytes and negativeOffset is 9, then the node will start at 46. @@ -564,7 +570,7 @@ public static class TokenizerNodeMeta { public int totalNodeDataLength = 0; public int countNodeDataNum = 0; - public int maxChildNum = 0; + public int maxFanOut = 0; public int totalChildNum = 0; public int countChildNum = 0; @@ -572,7 +578,11 @@ public static class TokenizerNodeMeta { public int countNumOccurrences = 0; public int maxIndex = 0; - public int countIndexNum = 0; + public int totalIndexNum = 0; + + public int maxQualifierLength = 0; + public int countQualifierNum = 0; + public int totalQualifierLength = 0; public ArrayList nonLeafNodes = new ArrayList<>(); @@ -589,5 +599,7 @@ public static class PrefixTreeDataWidth { public int indexWidth = 0; public int childNodeOffsetWidth = 0; + + public int qualifierLengthWidth = 0; } } diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/PrefixTreeUtilV2.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/PrefixTreeUtilV2.java new file mode 100644 index 000000000000..5e9e706e1cd9 --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/PrefixTreeUtilV2.java @@ -0,0 +1,848 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.io.encoding; + +import java.io.DataOutput; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.List; +import org.apache.hadoop.hbase.ByteBufferExtendedCell; +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.io.ByteArrayOutputStream; +import org.apache.hadoop.hbase.io.encoding.PrefixTreeUtil.PrefixTreeDataWidth; +import org.apache.hadoop.hbase.io.encoding.PrefixTreeUtil.TokenizerNode; +import org.apache.hadoop.hbase.io.encoding.PrefixTreeUtil.TokenizerNodeMeta; +import org.apache.hadoop.hbase.io.util.StreamUtils; +import org.apache.hadoop.hbase.io.util.UFIntTool; +import org.apache.hadoop.hbase.nio.ByteBuff; +import org.apache.hadoop.hbase.util.ByteBufferUtils; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.yetus.audience.InterfaceAudience; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +@InterfaceAudience.Private +public class PrefixTreeUtilV2 { + + private static final Logger LOG = LoggerFactory.getLogger(PrefixTreeUtilV2.class); + + /** + * Build tree from begin + * @return the tree + */ + public static TokenizerNode buildPrefixTree(List rowKeys) { + // root node. + TokenizerNode node = new TokenizerNode(); + int start = 0; + // Get max common prefix + int common = maxCommonPrefix(rowKeys, 0, rowKeys.size() - 1, 0); + if (common > 0) { + byte[] commonB = + Bytes.copy(rowKeys.get(0).getRowArray(), rowKeys.get(0).getRowOffset(), common); + node.nodeData = commonB; + for (int i = 0; i < rowKeys.size(); i++) { + if (rowKeys.get(i).getRowLength() == common) { + node.numOccurrences++; + if (node.index == null) { + node.index = new ArrayList<>(1); + } + node.index.add(i); + if (node.keys == null) { + node.keys = new ArrayList<>(1); + } + node.keys.add(rowKeys.get(i)); + start = i + 1; + } else { + break; + } + } + } else { + // Only root node data can be empty. + node.nodeData = new byte[0]; + } + if (start <= rowKeys.size() - 1) { + constructAndSplitChild(node, rowKeys, start, rowKeys.size() - 1, common); + } + return node; + } + + /** + * Calculate max common prefix + * @return the max common prefix num bytes + */ + static int maxCommonPrefix(List rowKeys, int start, int end, + int startPos) { + // only one entry. + if (start == end) { + return rowKeys.get(start).getRowLength() - startPos; + } + int common = 0; + KeyValue.KeyOnlyKeyValue startRowKey = rowKeys.get(start); + for (int round = 0; round <= startRowKey.getRowLength() - startPos - 1; round++) { + boolean same = true; + for (int i = start + 1; i <= end; i++) { + KeyValue.KeyOnlyKeyValue rowKey = rowKeys.get(i); + if (startPos + common > rowKey.getRowLength() - 1) { + same = false; + break; + } + if ( + startRowKey.getRowArray()[startRowKey.getRowOffset() + startPos + common] + != rowKey.getRowArray()[rowKey.getRowOffset() + startPos + common] + ) { + same = false; + break; + } + } + if (same) { + common++; + } else { + break; + } + } + return common; + } + + /** + * No common prefix split it. + */ + static void constructAndSplitChild(TokenizerNode node, List rowKeys, + int start, int end, int startPos) { + int middle = start; + KeyValue.KeyOnlyKeyValue startRowKey = rowKeys.get(start); + for (int i = start + 1; i <= end; i++) { + if (startPos > rowKeys.get(i).getRowLength() - 1) { + middle = i - 1; + break; + } + KeyValue.KeyOnlyKeyValue rowKey = rowKeys.get(i); + if ( + startRowKey.getRowArray()[startRowKey.getRowOffset() + startPos] + != rowKey.getRowArray()[rowKey.getRowOffset() + startPos] + ) { + middle = i - 1; + break; + } + if (i == end) { + middle = end; + } + } + constructCommonNodeAndChild(node, rowKeys, start, middle, startPos); + if (middle + 1 <= end) { + // right + constructCommonNodeAndChild(node, rowKeys, middle + 1, end, startPos); + } + } + + /** + * Get max common prefix as node and build children. + */ + static TokenizerNode constructCommonNodeAndChild(TokenizerNode node, + List rowKeys, int start, int end, int startPos) { + int common = maxCommonPrefix(rowKeys, start, end, startPos); + if (common > 0) { + TokenizerNode child = new TokenizerNode(); + child.parent = node; + node.children.add(child); + byte[] commonB = Bytes.copy(rowKeys.get(start).getRowArray(), + rowKeys.get(start).getRowOffset() + startPos, common); + child.nodeData = commonB; + int newStart = start; + for (int i = start; i <= end; i++) { + if (rowKeys.get(i).getRowLength() == (startPos + common)) { + child.numOccurrences++; + if (child.index == null) { + child.index = new ArrayList<>(1); + } + child.index.add(i); + if (child.keys == null) { + child.keys = new ArrayList<>(1); + } + child.keys.add(rowKeys.get(i)); + newStart = i + 1; + } else { + break; + } + } + if (start != end && newStart <= end) { + if (newStart == start) { + // no common prefix. + constructAndSplitChild(child, rowKeys, newStart, end, startPos + common); + } else { + // can have common prefix. + constructCommonNodeAndChild(child, rowKeys, newStart, end, startPos + common); + } + } + } else { + // no common prefix, split + constructAndSplitChild(node, rowKeys, start, end, startPos); + } + return node; + } + + static void getNodeMetaInfo(TokenizerNode node, TokenizerNodeMeta meta) { + if (node.nodeData.length > meta.maxNodeDataLength) { + meta.maxNodeDataLength = node.nodeData.length; + } + meta.totalNodeDataLength += node.nodeData.length; + meta.countNodeDataNum++; + + if (node.children.size() > meta.maxFanOut) { + meta.maxFanOut = node.children.size(); + } + meta.totalChildNum += node.children.size(); + meta.countChildNum++; + + if (node.numOccurrences > meta.maxNumOccurrences) { + meta.maxNumOccurrences = node.numOccurrences; + } + meta.totalIndexNum += node.numOccurrences; + meta.countNumOccurrences++; + + if (node.index != null) { + for (Integer entry : node.index) { + if (entry > meta.maxIndex) { + meta.maxIndex = entry; + } + } + } + if (node.keys != null) { + for (KeyValue.KeyOnlyKeyValue keyValue : node.keys) { + int qualifierLength = keyValue.getQualifierLength(); + if (qualifierLength > 0) { + meta.countQualifierNum++; + if (qualifierLength > meta.maxQualifierLength) { + meta.maxQualifierLength = qualifierLength; + } + meta.totalQualifierLength += qualifierLength; + node.qualifierNum++; + node.qualifierLength += qualifierLength; + } + } + } + if (node.children.isEmpty()) { + meta.leafNodes.add(node); + } else { + meta.nonLeafNodes.add(node); + } + for (TokenizerNode child : node.children) { + getNodeMetaInfo(child, meta); + } + } + + public static void serializePrefixTree(TokenizerNode node, PrefixTreeDataWidth dataWidth, + ByteArrayOutputStream os) throws IOException { + TokenizerNodeMeta meta = new TokenizerNodeMeta(); + getNodeMetaInfo(node, meta); + + dataWidth.nodeDataLengthWidth = UFIntTool.numBytes(meta.maxNodeDataLength); + dataWidth.fanOutWidth = UFIntTool.numBytes(meta.maxFanOut); + dataWidth.occurrencesWidth = UFIntTool.numBytes(meta.maxNumOccurrences * 2 + 1); + dataWidth.indexWidth = UFIntTool.numBytes(meta.maxIndex); + dataWidth.qualifierLengthWidth = UFIntTool.numBytes(meta.maxQualifierLength); + + calculateSerializeInfo(meta, dataWidth); + + serialize(meta, os, dataWidth); + } + + static void calculateSerializeInfo(TokenizerNodeMeta meta, PrefixTreeDataWidth dataWidth) { + int totalLength = 0; + int nextNodeOffsetNum = 0; + for (TokenizerNode leafNode : meta.leafNodes) { + totalLength += dataWidth.nodeDataLengthWidth; + totalLength += leafNode.nodeData.length; + if (leafNode.parent != null) { + // exclude child's first bytes, child's first byte stored in parent node. + totalLength = totalLength - 1; + } + // fan Out + totalLength += dataWidth.fanOutWidth; + // fan Byte + totalLength += leafNode.children.size(); + nextNodeOffsetNum += leafNode.children.size(); + + totalLength += dataWidth.occurrencesWidth; + totalLength += (leafNode.numOccurrences * dataWidth.indexWidth); + + if (leafNode.qualifierNum > 0) { + // qualifier + for (int i = 0; i < leafNode.numOccurrences; i++) { + int qualifierLength = leafNode.keys.get(i).getQualifierLength(); + if (qualifierLength > 0) { + totalLength += dataWidth.qualifierLengthWidth; + totalLength += qualifierLength; + totalLength += (Bytes.SIZEOF_BYTE + Bytes.SIZEOF_LONG); + } else { + totalLength += dataWidth.qualifierLengthWidth; + } + } + } + } + for (TokenizerNode nonLeafNode : meta.nonLeafNodes) { + totalLength += dataWidth.nodeDataLengthWidth; + totalLength += nonLeafNode.nodeData.length; + if (nonLeafNode.parent != null) { + // exclude child's first bytes, child's first byte stored in parent node. + totalLength = totalLength - 1; + } + // fan Out + totalLength += dataWidth.fanOutWidth; + // fan Byte + totalLength += nonLeafNode.children.size(); + nextNodeOffsetNum += nonLeafNode.children.size(); + + totalLength += dataWidth.occurrencesWidth; + totalLength += (nonLeafNode.numOccurrences * dataWidth.indexWidth); + + if (nonLeafNode.qualifierNum > 0) { + // qualifier + for (int i = 0; i < nonLeafNode.numOccurrences; i++) { + int qualifierLength = nonLeafNode.keys.get(i).getQualifierLength(); + if (qualifierLength > 0) { + totalLength += dataWidth.qualifierLengthWidth; + totalLength += qualifierLength; + totalLength += (Bytes.SIZEOF_BYTE + Bytes.SIZEOF_LONG); + } else { + totalLength += dataWidth.qualifierLengthWidth; + } + } + } + } + + int totalBytesWithoutOffsets = totalLength; + // figure out how wide our offset FInts are + int offsetWidth = 0; + while (true) { + ++offsetWidth; + int numBytesFinder = totalBytesWithoutOffsets + (offsetWidth * nextNodeOffsetNum); + if (numBytesFinder < UFIntTool.maxValueForNumBytes(offsetWidth)) { + totalLength = numBytesFinder; + break; + } // it fits + } + dataWidth.childNodeOffsetWidth = offsetWidth; + + // track the starting position of each node in final output + int negativeIndex = 0; + for (int i = meta.leafNodes.size() - 1; i >= 0; i--) { + TokenizerNode leaf = meta.leafNodes.get(i); + int leafNodeWidth = dataWidth.nodeDataLengthWidth + leaf.nodeData.length; + if (leaf.parent != null) { + // leaves store all but their first token byte + leafNodeWidth = leafNodeWidth - 1; + } + // leaf node, no children. + leafNodeWidth += dataWidth.fanOutWidth; + // no fanOut bytes and nextNodeOffset + // index + leafNodeWidth += dataWidth.occurrencesWidth + leaf.numOccurrences * dataWidth.indexWidth; + if (leaf.qualifierNum > 0) { + // qualifier + for (int j = 0; j < leaf.numOccurrences; j++) { + int qualifierLength = leaf.keys.get(j).getQualifierLength(); + if (qualifierLength > 0) { + leafNodeWidth += dataWidth.qualifierLengthWidth; + leafNodeWidth += qualifierLength; + leafNodeWidth += (Bytes.SIZEOF_BYTE + Bytes.SIZEOF_LONG); + } else { + leafNodeWidth += dataWidth.qualifierLengthWidth; + } + } + } + negativeIndex += leafNodeWidth; + leaf.nodeWidth = leafNodeWidth; + leaf.negativeIndex = negativeIndex; + } + for (int i = meta.nonLeafNodes.size() - 1; i >= 0; i--) { + TokenizerNode nonLeaf = meta.nonLeafNodes.get(i); + int leafNodeWidth = dataWidth.nodeDataLengthWidth + nonLeaf.nodeData.length; + if (nonLeaf.parent != null) { + leafNodeWidth = leafNodeWidth - 1; + } + // fanOut, children's first byte, and children's offset. + leafNodeWidth += dataWidth.fanOutWidth + nonLeaf.children.size() + + nonLeaf.children.size() * dataWidth.childNodeOffsetWidth; + // index + leafNodeWidth += dataWidth.occurrencesWidth + nonLeaf.numOccurrences * dataWidth.indexWidth; + if (nonLeaf.qualifierNum > 0) { + // qualifier + for (int j = 0; j < nonLeaf.numOccurrences; j++) { + int qualifierLength = nonLeaf.keys.get(j).getQualifierLength(); + if (qualifierLength > 0) { + leafNodeWidth += dataWidth.qualifierLengthWidth; + leafNodeWidth += qualifierLength; + leafNodeWidth += (Bytes.SIZEOF_BYTE + Bytes.SIZEOF_LONG); + } else { + leafNodeWidth += dataWidth.qualifierLengthWidth; + } + } + } + negativeIndex += leafNodeWidth; + nonLeaf.nodeWidth = leafNodeWidth; + nonLeaf.negativeIndex = negativeIndex; + } + } + + static void serialize(TokenizerNodeMeta meta, ByteArrayOutputStream os, + PrefixTreeDataWidth dataWidth) throws IOException { + for (int i = 0; i < meta.nonLeafNodes.size(); i++) { + serialize(meta.nonLeafNodes.get(i), os, dataWidth); + } + for (int i = 0; i < meta.leafNodes.size(); i++) { + serialize(meta.leafNodes.get(i), os, dataWidth); + } + } + + static void serialize(TokenizerNode node, ByteArrayOutputStream os, PrefixTreeDataWidth dataWidth) + throws IOException { + if (node.parent != null) { + // The first byte do not need to store, it store in the parent. + if (node.nodeData.length - 1 > 0) { + UFIntTool.writeBytes(dataWidth.nodeDataLengthWidth, node.nodeData.length - 1, os); + os.write(node.nodeData, 1, node.nodeData.length - 1); + } else { + UFIntTool.writeBytes(dataWidth.nodeDataLengthWidth, 0, os); + } + } else { + UFIntTool.writeBytes(dataWidth.nodeDataLengthWidth, node.nodeData.length, os); + os.write(node.nodeData, 0, node.nodeData.length); + } + UFIntTool.writeBytes(dataWidth.fanOutWidth, node.children.size(), os); + for (TokenizerNode child : node.children) { + // child's first byte. + os.write(child.nodeData[0]); + } + for (TokenizerNode child : node.children) { + UFIntTool.writeBytes(dataWidth.childNodeOffsetWidth, node.negativeIndex - child.negativeIndex, + os); + } + int occurrences = node.numOccurrences << 1; + if (node.qualifierNum > 0) { + occurrences = occurrences | 0x01; + } + UFIntTool.writeBytes(dataWidth.occurrencesWidth, occurrences, os); + for (int i = 0; i < node.numOccurrences; i++) { + UFIntTool.writeBytes(dataWidth.indexWidth, node.index.get(i), os); + } + if (node.qualifierNum > 0) { + for (int i = 0; i < node.numOccurrences; i++) { + KeyValue.KeyOnlyKeyValue keyOnlyKeyValue = node.keys.get(i); + if (keyOnlyKeyValue.getQualifierLength() > 0) { + UFIntTool.writeBytes(dataWidth.qualifierLengthWidth, keyOnlyKeyValue.getQualifierLength(), + os); + os.write(keyOnlyKeyValue.getQualifierArray(), keyOnlyKeyValue.getQualifierOffset(), + keyOnlyKeyValue.getQualifierLength()); + // write timestamp + StreamUtils.writeLong(os, keyOnlyKeyValue.getTimestamp()); + // write the type + os.write(keyOnlyKeyValue.getTypeByte()); + } else { + UFIntTool.writeBytes(dataWidth.qualifierLengthWidth, 0, os); + } + } + } + } + + public static void serialize(DataOutput out, PrefixTreeDataWidth dataWidth) throws IOException { + out.writeByte(dataWidth.nodeDataLengthWidth); + out.writeByte(dataWidth.fanOutWidth); + out.writeByte(dataWidth.occurrencesWidth); + out.writeByte(dataWidth.indexWidth); + out.writeByte(dataWidth.childNodeOffsetWidth); + out.writeByte(dataWidth.qualifierLengthWidth); + } + + public static void deserialize(ByteBuff data, PrefixTreeDataWidth dataWidth) { + dataWidth.nodeDataLengthWidth = data.get(); + dataWidth.fanOutWidth = data.get(); + dataWidth.occurrencesWidth = data.get(); + dataWidth.indexWidth = data.get(); + dataWidth.childNodeOffsetWidth = data.get(); + dataWidth.qualifierLengthWidth = data.get(); + } + + /** + * Get the node index, that search key >= index and search key < (index + 1) + */ + public static int search(ByteBuffer data, int bbStartPos, Cell skey, int keyStartPos, + PrefixTreeDataWidth meta) { + int nodeDataLength = getNodeDataLength(data, bbStartPos, meta); + int cs = 0; + if (nodeDataLength > 0) { + cs = compareTo(skey, keyStartPos, Math.min(skey.getRowLength() - keyStartPos, nodeDataLength), + data, bbStartPos + meta.nodeDataLengthWidth, nodeDataLength); + } + + int pos = bbStartPos + meta.nodeDataLengthWidth + nodeDataLength; + int fanOut = getNodeFanOut(data, pos, meta); + pos += meta.fanOutWidth + fanOut + fanOut * meta.childNodeOffsetWidth; + int tmpNumOccurrences = getNodeNumOccurrences(data, pos, meta); + int numOccurrences = tmpNumOccurrences >> 1; + int hasQualifier = tmpNumOccurrences & 0x01; + + pos += meta.occurrencesWidth; + + if (cs == 0) { + // continue search + if (fanOut == 0) { + // no children, should be numOccurrences > 0 + if (skey.getRowLength() == keyStartPos + nodeDataLength) { + if (hasQualifier == 0) { + // == current node + return getNodeIndex(data, pos, 0, meta); + } else { + // compare qualifier + int qualifierPos = pos + numOccurrences * meta.indexWidth; + if (skey.getQualifierLength() == 0) { + int firstQualifierLength = getQualifierLength(data, qualifierPos, meta); + if (firstQualifierLength == 0) { + return getNodeIndex(data, pos, 0, meta); + } else { + // search key has no qualifier, but first index node has. + return getNodeIndex(data, pos, 0, meta) - 1; + } + } else { + for (int i = 0; i < numOccurrences; i++) { + int qualifierLength = getQualifierLength(data, qualifierPos, meta); + qualifierPos += meta.qualifierLengthWidth; + int qualifierCR = compareQualifierTo(skey, data, qualifierPos, qualifierLength); + if (qualifierCR == 0) { + // the same qualifier. + int timestampPos = qualifierPos + qualifierLength; + long timestamp = ByteBufferUtils.toLong(data, timestampPos); + byte byteType = ByteBufferUtils.toByte(data, timestampPos + Bytes.SIZEOF_LONG); + // higher numbers sort before those of lesser numbers. + if (skey.getTimestamp() > timestamp) { + return getNodeIndex(data, pos, i, meta) - 1; + } else if (skey.getTimestamp() < timestamp) { + return getNodeIndex(data, pos, i, meta); + } + // higher numbers sort before those of lesser numbers. + if ((0xff & skey.getTypeByte() - (0xff & byteType)) > 0) { + return getNodeIndex(data, pos, i, meta) - 1; + } else { + return getNodeIndex(data, pos, i, meta); + } + } else if (qualifierCR < 0) { + return getNodeIndex(data, pos, i, meta) - 1; + } + if (qualifierLength > 0) { + qualifierPos += (qualifierLength + Bytes.SIZEOF_LONG + Bytes.SIZEOF_BYTE); + } + } + return getNodeIndex(data, pos, numOccurrences - 1, meta); + } + } + } else { + // > current node. + return getNodeIndex(data, pos, numOccurrences - 1, meta); + } + } + if (skey.getRowLength() > keyStartPos + nodeDataLength) { + int fanOffset = bbStartPos + meta.nodeDataLengthWidth + nodeDataLength + meta.fanOutWidth; + byte searchForByte = getCellByte(skey, keyStartPos + nodeDataLength); + + int fanIndexInBlock = + unsignedBinarySearch(data, fanOffset, fanOffset + fanOut, searchForByte); + int nodeOffsetStartPos = fanOffset + fanOut; + if (fanIndexInBlock >= 0) { + // found it, but need to adjust for position of fan in overall block + int fanIndex = fanIndexInBlock - fanOffset; + int nodeOffset = getNodeOffset(data, nodeOffsetStartPos, fanIndex, meta); + return search(data, bbStartPos + nodeOffset, skey, keyStartPos + nodeDataLength + 1, + meta); + } else { + int fanIndex = fanIndexInBlock + fanOffset;// didn't find it, so compensate in reverse + int insertionPoint = (-fanIndex - 1) - 1; + if (insertionPoint < 0) { + // < first children + int nodeOffset = getNodeOffset(data, nodeOffsetStartPos, 0, meta); + return getFirstLeafNode(data, bbStartPos + nodeOffset, meta) - 1; + } else { + int nodeOffset = getNodeOffset(data, nodeOffsetStartPos, insertionPoint, meta); + return getLastLeafNode(data, bbStartPos + nodeOffset, meta); + } + } + } else { + // skey.length == keyStartPos + nodeDataLength + if (numOccurrences > 0) { + // == current node and current node is a leaf node. + if (hasQualifier == 0) { + // == current node + return getNodeIndex(data, pos, 0, meta); + } else { + // need compare qualifier + int qualifierPos = pos + numOccurrences * meta.indexWidth; + if (skey.getQualifierLength() == 0) { + int firstQualifierLength = getQualifierLength(data, qualifierPos, meta); + if (firstQualifierLength == 0) { + return getNodeIndex(data, pos, 0, meta); + } else { + // search key has no qualifier, but first index node has. + return getNodeIndex(data, pos, 0, meta) - 1; + } + } else { + for (int i = 0; i < numOccurrences; i++) { + int qualifierLength = getQualifierLength(data, qualifierPos, meta); + qualifierPos += meta.qualifierLengthWidth; + int qualifierCR = compareQualifierTo(skey, data, qualifierPos, qualifierLength); + if (qualifierCR == 0) { + // the same qualifier. + int timestampPos = qualifierPos + qualifierLength; + long timestamp = ByteBufferUtils.toLong(data, timestampPos); + byte byteType = ByteBufferUtils.toByte(data, timestampPos + Bytes.SIZEOF_LONG); + // higher numbers sort before those of lesser numbers. + if (skey.getTimestamp() > timestamp) { + return getNodeIndex(data, pos, i, meta) - 1; + } else if (skey.getTimestamp() < timestamp) { + return getNodeIndex(data, pos, i, meta); + } + // higher numbers sort before those of lesser numbers. + if ((0xff & skey.getTypeByte() - (0xff & byteType)) > 0) { + return getNodeIndex(data, pos, i, meta) - 1; + } else { + return getNodeIndex(data, pos, i, meta); + } + } else if (qualifierCR < 0) { + return getNodeIndex(data, pos, i, meta) - 1; + } + if (qualifierLength > 0) { + qualifierPos += (qualifierLength + Bytes.SIZEOF_LONG + Bytes.SIZEOF_BYTE); + } + } + return getNodeIndex(data, pos, numOccurrences - 1, meta); + } + } + } else { + // need -1, == current node and current node not a leaf node. + return getFirstLeafNode(data, bbStartPos, meta) - 1; + } + } + } else if (cs > 0) { + // search key bigger than (>) current node, get biggest + if (fanOut == 0) { + if (numOccurrences > 0) { + return getNodeIndex(data, pos, numOccurrences - 1, meta); + } else { + throw new IllegalStateException( + "numOccurrences = " + numOccurrences + ", fanOut = " + fanOut + " not expected."); + } + } else { + return getLastLeafNode(data, bbStartPos, meta); + } + } else { + // search key small than (<) current node, get smallest. + if (numOccurrences > 0) { + return getNodeIndex(data, pos, 0, meta) - 1; + } else { + return getFirstLeafNode(data, bbStartPos, meta) - 1; + } + } + } + + static int compareTo(Cell skey, int o1, int l1, ByteBuffer data, int o2, int l2) { + if (skey instanceof ByteBufferExtendedCell) { + ByteBufferExtendedCell byteBufferExtendedCell = ((ByteBufferExtendedCell) skey); + return ByteBufferUtils.compareTo(byteBufferExtendedCell.getRowByteBuffer(), + byteBufferExtendedCell.getRowPosition() + o1, l1, data, o2, l2); + } + return ByteBufferUtils.compareTo(skey.getRowArray(), skey.getRowOffset() + o1, l1, data, o2, + l2); + } + + static int compareQualifierTo(Cell skey, ByteBuffer data, int o2, int l2) { + if (skey instanceof ByteBufferExtendedCell) { + ByteBufferExtendedCell byteBufferExtendedCell = ((ByteBufferExtendedCell) skey); + return ByteBufferUtils.compareTo(byteBufferExtendedCell.getQualifierByteBuffer(), + byteBufferExtendedCell.getQualifierPosition(), byteBufferExtendedCell.getQualifierLength(), + data, o2, l2); + } + return ByteBufferUtils.compareTo(skey.getQualifierArray(), skey.getQualifierOffset(), + skey.getQualifierLength(), data, o2, l2); + } + + static byte getCellByte(Cell skey, int position) { + if (skey instanceof ByteBufferExtendedCell) { + ByteBufferExtendedCell byteBufferExtendedCell = ((ByteBufferExtendedCell) skey); + return byteBufferExtendedCell.getRowByteBuffer() + .get(byteBufferExtendedCell.getRowPosition() + position); + } + return skey.getRowArray()[skey.getRowOffset() + position]; + } + + static int getNodeDataLength(ByteBuffer data, int offset, PrefixTreeDataWidth meta) { + int dataLength = (int) UFIntTool.fromBytes(data, offset, meta.nodeDataLengthWidth); + return dataLength; + } + + static int getNodeFanOut(ByteBuffer data, int offset, PrefixTreeDataWidth meta) { + int fanOut = (int) UFIntTool.fromBytes(data, offset, meta.fanOutWidth); + return fanOut; + } + + static int getNodeNumOccurrences(ByteBuffer data, int offset, PrefixTreeDataWidth meta) { + int numOccurrences = (int) UFIntTool.fromBytes(data, offset, meta.occurrencesWidth); + return numOccurrences; + } + + static int getNodeOffset(ByteBuffer data, int offset, int index, PrefixTreeDataWidth meta) { + int nodeOffset = (int) UFIntTool.fromBytes(data, offset + (index * meta.childNodeOffsetWidth), + meta.childNodeOffsetWidth); + return nodeOffset; + } + + static int getNodeIndex(ByteBuffer data, int offset, int index, PrefixTreeDataWidth meta) { + int nodeIndex = + (int) UFIntTool.fromBytes(data, offset + (index * meta.indexWidth), meta.indexWidth); + return nodeIndex; + } + + static int getQualifierLength(ByteBuffer data, int offset, PrefixTreeDataWidth meta) { + int nodeIndex = (int) UFIntTool.fromBytes(data, offset, meta.qualifierLengthWidth); + return nodeIndex; + } + + /** + * Get the node's first leaf node + */ + static int getFirstLeafNode(ByteBuffer data, int nodeStartPos, PrefixTreeDataWidth meta) { + int dataLength = getNodeDataLength(data, nodeStartPos, meta); + int pos = nodeStartPos + meta.nodeDataLengthWidth + dataLength; + int fanOut = getNodeFanOut(data, pos, meta); + pos += meta.fanOutWidth + fanOut + fanOut * meta.childNodeOffsetWidth; + int tmpNumOccurrences = getNodeNumOccurrences(data, pos, meta); + int numOccurrences = tmpNumOccurrences >> 1; + pos += meta.occurrencesWidth; + if (numOccurrences > 0 || fanOut == 0) { + // return current node. + return getNodeIndex(data, pos, 0, meta); + } else { + int nodeOffsetStartPos = + nodeStartPos + meta.nodeDataLengthWidth + dataLength + meta.fanOutWidth + fanOut; + int nodeOffset = getNodeOffset(data, nodeOffsetStartPos, 0, meta); + return getFirstLeafNode(data, nodeStartPos + nodeOffset, meta); + } + } + + /** + * Get the node's last leaf node + */ + static int getLastLeafNode(ByteBuffer data, int bbStartPos, PrefixTreeDataWidth meta) { + int dataLength = getNodeDataLength(data, bbStartPos, meta); + int pos = bbStartPos + meta.nodeDataLengthWidth + dataLength; + int fanOut = getNodeFanOut(data, pos, meta); + pos += meta.fanOutWidth + fanOut + fanOut * meta.childNodeOffsetWidth; + int tmpNumOccurrences = getNodeNumOccurrences(data, pos, meta); + int numOccurrences = tmpNumOccurrences >> 1; + pos += meta.occurrencesWidth; + if (fanOut == 0) { + return getNodeIndex(data, pos, numOccurrences - 1, meta); + } else { + int nodeOffsetStartPos = + bbStartPos + meta.nodeDataLengthWidth + dataLength + meta.fanOutWidth + fanOut; + int nodeOffset = getNodeOffset(data, nodeOffsetStartPos, fanOut - 1, meta); + return getLastLeafNode(data, bbStartPos + nodeOffset, meta); + } + } + + public static int unsignedBinarySearch(ByteBuffer a, int fromIndex, int toIndex, byte key) { + int unsignedKey = key & 0xff; + int low = fromIndex; + int high = toIndex - 1; + + while (low <= high) { + int mid = low + ((high - low) >> 1); + int midVal = a.get(mid) & 0xff; + + if (midVal < unsignedKey) { + low = mid + 1; + } else if (midVal > unsignedKey) { + high = mid - 1; + } else { + return mid; // key found + } + } + return -(low + 1); // key not found. + } + + public static byte[] get(ByteBuffer data, int bbStartPos, PrefixTreeDataWidth dataWidth, + int index) { + return get(data, bbStartPos, dataWidth, index, new byte[0]); + } + + static byte[] get(ByteBuffer data, int bbStartPos, PrefixTreeDataWidth meta, int index, + byte[] prefix) { + int dataLength = getNodeDataLength(data, bbStartPos, meta); + byte[] bdata = new byte[dataLength]; + ByteBuffer dup = data.duplicate(); + dup.position(bbStartPos + meta.nodeDataLengthWidth); + dup.get(bdata, 0, dataLength); + bdata = Bytes.add(prefix, bdata); + + int pos = bbStartPos + meta.nodeDataLengthWidth + dataLength; + int fanOut = getNodeFanOut(data, pos, meta); + pos += meta.fanOutWidth + fanOut + fanOut * meta.childNodeOffsetWidth; + int tmpNumOccurrences = getNodeNumOccurrences(data, pos, meta); + int numOccurrences = tmpNumOccurrences >> 1; + // int hasQualifier = tmpNumOccurrences &= 0x01; + pos += meta.occurrencesWidth; + if (numOccurrences > 0) { + for (int i = 0; i < numOccurrences; i++) { + int currentNodeIndex = getNodeIndex(data, pos, i, meta); + if (currentNodeIndex == index) { + return bdata; + } + } + } + if (fanOut == 0) { + for (int i = 0; i < numOccurrences; i++) { + int currentNodeIndex = getNodeIndex(data, pos, i, meta); + if (currentNodeIndex == index) { + return bdata; + } + } + throw new IllegalStateException("Unexpected, not find index=" + index + " node's data."); + } else { + int nodeOffsetStartPos = + bbStartPos + meta.nodeDataLengthWidth + dataLength + meta.fanOutWidth + fanOut; + int locateIndex = locateWhichChild(data, bbStartPos, meta, index, fanOut, nodeOffsetStartPos); + int nodeOffset = getNodeOffset(data, nodeOffsetStartPos, locateIndex, meta); + byte[] childBytes = new byte[1]; + childBytes[0] = data + .get(bbStartPos + meta.nodeDataLengthWidth + dataLength + meta.fanOutWidth + locateIndex); + bdata = Bytes.add(bdata, childBytes); + return get(data, bbStartPos + nodeOffset, meta, index, bdata); + } + } + + static int locateWhichChild(ByteBuffer data, int bbStartPos, PrefixTreeDataWidth meta, int index, + int fanOut, int nodeOffsetStartPos) { + for (int i = 0; i < fanOut; i++) { + int nodeOffset = getNodeOffset(data, nodeOffsetStartPos, i, meta); + int lastLeafNode = getLastLeafNode(data, bbStartPos + nodeOffset, meta); + if (lastLeafNode >= index) { + return i; + } + } + throw new IllegalStateException("Unexpected unable to find index=" + index); + } + +} diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/io/encoding/TestPrefixTreeUtilV2.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/io/encoding/TestPrefixTreeUtilV2.java new file mode 100644 index 000000000000..1d3258a24a18 --- /dev/null +++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/io/encoding/TestPrefixTreeUtilV2.java @@ -0,0 +1,210 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.io.encoding; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.List; +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.CellUtil; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.PrivateCellUtil; +import org.apache.hadoop.hbase.io.ByteArrayOutputStream; +import org.apache.hadoop.hbase.io.encoding.PrefixTreeUtil.PrefixTreeDataWidth; +import org.apache.hadoop.hbase.io.encoding.PrefixTreeUtil.TokenizerNode; +import org.apache.hadoop.hbase.testclassification.IOTests; +import org.apache.hadoop.hbase.testclassification.SmallTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.Assert; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +@Category({ IOTests.class, SmallTests.class }) +public class TestPrefixTreeUtilV2 { + private static final Logger LOG = LoggerFactory.getLogger(TestPrefixTreeUtilV2.class); + private static byte[] FAM = Bytes.toBytes("cf"); + + @Test + public void testSearchPrefixTree() throws IOException { + List rows = new ArrayList<>(); + + rows.add(new KeyValue(Bytes.toBytes("00c7-202206201519-wx0t"), FAM, Bytes.toBytes("qh"))); + rows.add(new KeyValue(Bytes.toBytes("00c7-202206201519-wx0zcldi7lnsiyas-N"), FAM, + Bytes.toBytes("qh"))); + rows.add(new KeyValue(Bytes.toBytes("00c7-202206201520-wx0re"), FAM, Bytes.toBytes("qh"))); + rows.add(new KeyValue(Bytes.toBytes("00c7-202206201520-wx0x7"), FAM, Bytes.toBytes("qh"))); + rows.add(new KeyValue(Bytes.toBytes("00c7-202206201521"), FAM, Bytes.toBytes("qh"))); + rows.add(new KeyValue(Bytes.toBytes("00c7-202206201521-wx05xfbtw2mopyhs-C"), FAM, + Bytes.toBytes("qh"))); + rows.add(new KeyValue(Bytes.toBytes("00c7-202206201521-wx08"), FAM, Bytes.toBytes("qh"))); + rows.add(new KeyValue(Bytes.toBytes("00c7-202206201521-wx0c"), FAM, Bytes.toBytes("qh"))); + rows.add(new KeyValue(Bytes.toBytes("00c7-202206201522-wx0t"), FAM, Bytes.toBytes("qh"))); + rows.add(new KeyValue(Bytes.toBytes("00c8-202206200751-wx0ah4gnbwptdyna-F"), FAM, + Bytes.toBytes("qh"))); + + List childs = new ArrayList<>(15); + childs.add(new KeyValue.KeyOnlyKeyValue(PrivateCellUtil + .getCellKeySerializedAsKeyValueKey(PrivateCellUtil.createFirstOnRow(rows.get(0))))); + childs.add(new KeyValue.KeyOnlyKeyValue(PrivateCellUtil + .getCellKeySerializedAsKeyValueKey(PrivateCellUtil.createFirstOnRow(rows.get(1))))); + childs.add(new KeyValue.KeyOnlyKeyValue(PrivateCellUtil + .getCellKeySerializedAsKeyValueKey(PrivateCellUtil.createFirstOnRow(rows.get(2))))); + + childs.add(new KeyValue.KeyOnlyKeyValue(PrivateCellUtil + .getCellKeySerializedAsKeyValueKey(PrivateCellUtil.createFirstOnRowCol(rows.get(3))))); + childs.add(new KeyValue.KeyOnlyKeyValue( + PrivateCellUtil.getCellKeySerializedAsKeyValueKey(PrivateCellUtil.createFirstOnRowCol( + new KeyValue(Bytes.toBytes("00c7-202206201520-wx0x7"), FAM, Bytes.toBytes("qo")))))); + + childs.add(new KeyValue.KeyOnlyKeyValue(PrivateCellUtil + .getCellKeySerializedAsKeyValueKey(PrivateCellUtil.createFirstOnRow(rows.get(4))))); + childs.add(new KeyValue.KeyOnlyKeyValue( + PrivateCellUtil.getCellKeySerializedAsKeyValueKey(PrivateCellUtil.createFirstOnRowCol( + new KeyValue(Bytes.toBytes("00c7-202206201521"), FAM, Bytes.toBytes("qb")))))); + childs.add(new KeyValue.KeyOnlyKeyValue( + PrivateCellUtil.getCellKeySerializedAsKeyValueKey(PrivateCellUtil.createFirstOnRowCol( + new KeyValue(Bytes.toBytes("00c7-202206201521"), FAM, Bytes.toBytes("qf")))))); + childs.add(new KeyValue.KeyOnlyKeyValue(PrivateCellUtil + .getCellKeySerializedAsKeyValueKey(PrivateCellUtil.createFirstOnRowCol(rows.get(4))))); + + childs.add(new KeyValue.KeyOnlyKeyValue(PrivateCellUtil + .getCellKeySerializedAsKeyValueKey(PrivateCellUtil.createFirstOnRow(rows.get(5))))); + childs.add(new KeyValue.KeyOnlyKeyValue(PrivateCellUtil + .getCellKeySerializedAsKeyValueKey(PrivateCellUtil.createFirstOnRow(rows.get(6))))); + childs.add(new KeyValue.KeyOnlyKeyValue(PrivateCellUtil + .getCellKeySerializedAsKeyValueKey(PrivateCellUtil.createFirstOnRow(rows.get(7))))); + childs.add(new KeyValue.KeyOnlyKeyValue(PrivateCellUtil + .getCellKeySerializedAsKeyValueKey(PrivateCellUtil.createFirstOnRow(rows.get(8))))); + childs.add(new KeyValue.KeyOnlyKeyValue(PrivateCellUtil + .getCellKeySerializedAsKeyValueKey(PrivateCellUtil.createFirstOnRow(rows.get(9))))); + + TokenizerNode node = PrefixTreeUtilV2.buildPrefixTree(childs); + + PrefixTreeDataWidth dataWidth = new PrefixTreeDataWidth(); + ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); + PrefixTreeUtilV2.serializePrefixTree(node, dataWidth, outputStream); + byte[] data = outputStream.toByteArray(); + ByteBuffer prefixTreeNodeData = ByteBuffer.wrap(data); + for (int i = 0; i < childs.size(); i++) { + byte[] result = PrefixTreeUtilV2.get(prefixTreeNodeData, 0, dataWidth, i); + Assert.assertTrue(Bytes.compareTo(result, CellUtil.cloneRow(childs.get(i))) == 0); + } + + for (int i = 0; i < childs.size(); i++) { + int result = PrefixTreeUtilV2.search(prefixTreeNodeData, 0, childs.get(i), 0, dataWidth); + Assert.assertEquals(i, result); + } + + Cell skey = PrivateCellUtil.createFirstOnRow(Bytes.toBytes("00c7-202206201519")); + int result = PrefixTreeUtilV2.search(prefixTreeNodeData, 0, skey, 0, dataWidth); + Assert.assertEquals(-1, result); + + skey = PrivateCellUtil.createFirstOnRow(Bytes.toBytes("00c7-202206201520")); + result = PrefixTreeUtilV2.search(prefixTreeNodeData, 0, skey, 0, dataWidth); + Assert.assertEquals(1, result); + + skey = PrivateCellUtil.createFirstOnRowCol( + new KeyValue(Bytes.toBytes("00c7-202206201520-wx0x7"), FAM, Bytes.toBytes("qa"))); + result = PrefixTreeUtilV2.search(prefixTreeNodeData, 0, skey, 0, dataWidth); + Assert.assertEquals(2, result); + + skey = PrivateCellUtil.createFirstOnRowCol( + new KeyValue(Bytes.toBytes("00c7-202206201520-wx0x7"), FAM, Bytes.toBytes("qm"))); + result = PrefixTreeUtilV2.search(prefixTreeNodeData, 0, skey, 0, dataWidth); + Assert.assertEquals(3, result); + + skey = PrivateCellUtil.createFirstOnRowCol( + new KeyValue(Bytes.toBytes("00c7-202206201520-wx0x7"), FAM, Bytes.toBytes("qs"))); + result = PrefixTreeUtilV2.search(prefixTreeNodeData, 0, skey, 0, dataWidth); + Assert.assertEquals(4, result); + + skey = PrivateCellUtil.createFirstOnRow(Bytes.toBytes("00c7-202206201520-wx0x7-")); + result = PrefixTreeUtilV2.search(prefixTreeNodeData, 0, skey, 0, dataWidth); + Assert.assertEquals(4, result); + + skey = PrivateCellUtil.createFirstOnRowCol( + new KeyValue(Bytes.toBytes("00c7-202206201521"), FAM, Bytes.toBytes("qa"))); + result = PrefixTreeUtilV2.search(prefixTreeNodeData, 0, skey, 0, dataWidth); + Assert.assertEquals(5, result); + + skey = PrivateCellUtil.createFirstOnRowCol( + new KeyValue(Bytes.toBytes("00c7-202206201521"), FAM, Bytes.toBytes("qe"))); + result = PrefixTreeUtilV2.search(prefixTreeNodeData, 0, skey, 0, dataWidth); + Assert.assertEquals(6, result); + + skey = PrivateCellUtil.createFirstOnRowCol( + new KeyValue(Bytes.toBytes("00c7-202206201521"), FAM, Bytes.toBytes("qg"))); + result = PrefixTreeUtilV2.search(prefixTreeNodeData, 0, skey, 0, dataWidth); + Assert.assertEquals(7, result); + + skey = PrivateCellUtil.createFirstOnRowCol( + new KeyValue(Bytes.toBytes("00c7-202206201521"), FAM, Bytes.toBytes("qu"))); + result = PrefixTreeUtilV2.search(prefixTreeNodeData, 0, skey, 0, dataWidth); + Assert.assertEquals(8, result); + + skey = PrivateCellUtil.createFirstOnRow(Bytes.toBytes("00c7-202206201521-wx0")); + result = PrefixTreeUtilV2.search(prefixTreeNodeData, 0, skey, 0, dataWidth); + Assert.assertEquals(8, result); + + skey = PrivateCellUtil.createFirstOnRow(Bytes.toBytes("00c8-202206200751-wx0ah4gnbwptdyna-F-")); + result = PrefixTreeUtilV2.search(prefixTreeNodeData, 0, skey, 0, dataWidth); + Assert.assertEquals(13, result); + } + + @Test + public void testSearchPrefixTreeWithTimeStampType() throws IOException { + List rows = new ArrayList<>(); + + rows.add(new KeyValue(Bytes.toBytes("00073123012802202_121_9223370375843575807"), FAM, + Bytes.toBytes("bg_id"), 1661023473524L, KeyValue.Type.Put)); + rows.add(new KeyValue(Bytes.toBytes("00073124151608102_121_9223370375238775807"), FAM, + Bytes.toBytes("cur_run_date"), 1661713633365L, KeyValue.Type.Put)); + rows.add(new KeyValue(Bytes.toBytes("00073124151608102_121_9223370375670775807"), FAM, + Bytes.toBytes("run"), Long.MAX_VALUE, KeyValue.Type.Maximum)); + + List childs = new ArrayList<>(3); + childs.add( + new KeyValue.KeyOnlyKeyValue(PrivateCellUtil.getCellKeySerializedAsKeyValueKey(rows.get(0)))); + childs.add( + new KeyValue.KeyOnlyKeyValue(PrivateCellUtil.getCellKeySerializedAsKeyValueKey(rows.get(1)))); + childs.add( + new KeyValue.KeyOnlyKeyValue(PrivateCellUtil.getCellKeySerializedAsKeyValueKey(rows.get(2)))); + + TokenizerNode node = PrefixTreeUtilV2.buildPrefixTree(childs); + + PrefixTreeDataWidth dataWidth = new PrefixTreeDataWidth(); + ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); + PrefixTreeUtilV2.serializePrefixTree(node, dataWidth, outputStream); + byte[] data = outputStream.toByteArray(); + ByteBuffer prefixTreeNodeData = ByteBuffer.wrap(data); + + for (int i = 0; i < childs.size(); i++) { + byte[] result = PrefixTreeUtilV2.get(prefixTreeNodeData, 0, dataWidth, i); + Assert.assertTrue(Bytes.compareTo(result, CellUtil.cloneRow(childs.get(i))) == 0); + } + + for (int i = 0; i < childs.size(); i++) { + int result = PrefixTreeUtilV2.search(prefixTreeNodeData, 0, childs.get(i), 0, dataWidth); + Assert.assertEquals(i, result); + } + } + +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java index 6e373c67db34..6b7cf3caaa39 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java @@ -239,13 +239,8 @@ protected boolean checkKey(final Cell cell) throws IOException { throw new IOException("Key cannot be null or empty"); } if (lastCell != null) { - int keyComp = 0; - if (hFileContext.getIndexBlockEncoding() == IndexBlockEncoding.PREFIX_TREE) { - keyComp = this.hFileContext.getCellComparator().compareRows(lastCell, cell); - } else { - keyComp = PrivateCellUtil.compareKeyIgnoresMvcc(this.hFileContext.getCellComparator(), - lastCell, cell); - } + int keyComp = PrivateCellUtil.compareKeyIgnoresMvcc(this.hFileContext.getCellComparator(), + lastCell, cell); if (keyComp > 0) { String message = getLexicalErrorMessage(cell); throw new IOException(message);