From 05ba4b4506863032b117bd9bb580173d8dd84377 Mon Sep 17 00:00:00 2001 From: Wellington Chevreuil Date: Thu, 21 Jul 2022 13:36:51 +0100 Subject: [PATCH 1/6] HBASE-27232 Fix checking for encoded block size when deciding if block should be closed --- .../hadoop/hbase/io/hfile/HFileBlock.java | 2 +- .../hbase/io/hfile/HFileWriterImpl.java | 21 ++++--- .../hbase/regionserver/TestHStoreFile.java | 63 +++++++++++++++++++ 3 files changed, 77 insertions(+), 9 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java index fe2e0c7fab9a..d33f6c9bc8d3 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java @@ -449,7 +449,7 @@ public int getOnDiskSizeWithHeader() { } /** Returns the on-disk size of the data part + checksum (header excluded). */ - int getOnDiskSizeWithoutHeader() { + public int getOnDiskSizeWithoutHeader() { return onDiskSizeWithoutHeader; } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java index a638a443fc48..a4557095674f 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java @@ -172,8 +172,10 @@ public HFileWriterImpl(final Configuration conf, CacheConfig cacheConf, Path pat } closeOutputStream = path != null; this.cacheConf = cacheConf; - float encodeBlockSizeRatio = conf.getFloat(UNIFIED_ENCODED_BLOCKSIZE_RATIO, 1f); - this.encodedBlockSizeLimit = (int) (hFileContext.getBlocksize() * encodeBlockSizeRatio); + float encodeBlockSizeRatio = conf.getFloat(UNIFIED_ENCODED_BLOCKSIZE_RATIO, 0f); + this.encodedBlockSizeLimit = encodeBlockSizeRatio >0 ? + (int) (hFileContext.getBlocksize() * encodeBlockSizeRatio) : 0; + finishInit(conf); if (LOG.isTraceEnabled()) { LOG.trace("Writer" + (path != null ? " for " + path : "") + " initialized with cacheConf: " @@ -309,12 +311,15 @@ protected void finishInit(final Configuration conf) { * At a block boundary, write all the inline blocks and opens new block. */ protected void checkBlockBoundary() throws IOException { - // For encoder like prefixTree, encoded size is not available, so we have to compare both - // encoded size and unencoded size to blocksize limit. - if ( - blockWriter.encodedBlockSizeWritten() >= encodedBlockSizeLimit - || blockWriter.blockSizeWritten() >= hFileContext.getBlocksize() - ) { + boolean shouldFinishBlock = false; + //This means hbase.writer.unified.encoded.blocksize.ratio was set to something different from 1 + //and we should use the encoding ratio + if (encodedBlockSizeLimit > 0){ + shouldFinishBlock = blockWriter.encodedBlockSizeWritten() >= encodedBlockSizeLimit; + } else { + shouldFinishBlock = blockWriter.blockSizeWritten() >= hFileContext.getBlocksize(); + } + if(shouldFinishBlock) { finishBlock(); writeInlineBlocks(false); newBlock(); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHStoreFile.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHStoreFile.java index 212326f95a23..7572b81f8735 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHStoreFile.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHStoreFile.java @@ -66,6 +66,9 @@ import org.apache.hadoop.hbase.io.hfile.BlockCacheFactory; import org.apache.hadoop.hbase.io.hfile.CacheConfig; import org.apache.hadoop.hbase.io.hfile.CacheStats; +import org.apache.hadoop.hbase.io.hfile.FixedFileTrailer; +import org.apache.hadoop.hbase.io.hfile.HFile; +import org.apache.hadoop.hbase.io.hfile.HFileBlock; import org.apache.hadoop.hbase.io.hfile.HFileContext; import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder; import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoder; @@ -1141,4 +1144,64 @@ public void testDataBlockEncodingMetaData() throws IOException { byte[] value = fileInfo.get(HFileDataBlockEncoder.DATA_BLOCK_ENCODING); assertArrayEquals(dataBlockEncoderAlgo.getNameInBytes(), value); } + + @Test + public void testDataBlockSizeEncoded() throws Exception { + // Make up a directory hierarchy that has a regiondir ("7e0102") and familyname. + Path dir = new Path(new Path(this.testDir, "7e0102"), "familyname"); + Path path = new Path(dir, "1234567890"); + + DataBlockEncoding dataBlockEncoderAlgo = + DataBlockEncoding.FAST_DIFF; + + conf.setDouble("hbase.writer.unified.encoded.blocksize.ratio", 1); + + cacheConf = new CacheConfig(conf); + HFileContext meta = new HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL) + .withChecksumType(CKTYPE) + .withBytesPerCheckSum(CKBYTES) + .withDataBlockEncoding(dataBlockEncoderAlgo) + .build(); + // Make a store file and write data to it. + StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs) + .withFilePath(path) + .withMaxKeyCount(2000) + .withFileContext(meta) + .build(); + writeStoreFile(writer); + //writer.close(); + + HStoreFile storeFile = new HStoreFile(fs, writer.getPath(), conf, + cacheConf, BloomType.NONE, true); + storeFile.initReader(); + StoreFileReader reader = storeFile.getReader(); + + Map fileInfo = reader.loadFileInfo(); + byte[] value = fileInfo.get(HFileDataBlockEncoder.DATA_BLOCK_ENCODING); + assertEquals(dataBlockEncoderAlgo.name(), Bytes.toString(value)); + + HFile.Reader fReader = HFile.createReader(fs, writer.getPath(), storeFile.getCacheConf(), + true, conf); + + FSDataInputStreamWrapper fsdis = new FSDataInputStreamWrapper(fs, writer.getPath()); + long fileSize = fs.getFileStatus(writer.getPath()).getLen(); + FixedFileTrailer trailer = + FixedFileTrailer.readFromStream(fsdis.getStream(false), fileSize); + long offset = trailer.getFirstDataBlockOffset(), + max = trailer.getLastDataBlockOffset(); + HFileBlock block; + int blockCount = 0; + while (offset <= max) { + block = fReader.readBlock(offset, -1, /* cacheBlock */ + false, /* pread */ false, + /* isCompaction */ false, /* updateCacheMetrics */ + false, null, null); + offset += block.getOnDiskSizeWithHeader(); + blockCount += 1; + double diff = block.getOnDiskSizeWithHeader() - BLOCKSIZE_SMALL; + if(offset <= max) + assertTrue(diff >=0 && diff < (BLOCKSIZE_SMALL*0.05)); + } + } + } From 79fd61e8764bd2ba37043f9a34df15f090521f49 Mon Sep 17 00:00:00 2001 From: Wellington Chevreuil Date: Thu, 21 Jul 2022 13:39:46 +0100 Subject: [PATCH 2/6] fixing comment --- .../java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java index a4557095674f..8b9edb5c592b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java @@ -312,7 +312,7 @@ protected void finishInit(final Configuration conf) { */ protected void checkBlockBoundary() throws IOException { boolean shouldFinishBlock = false; - //This means hbase.writer.unified.encoded.blocksize.ratio was set to something different from 1 + //This means hbase.writer.unified.encoded.blocksize.ratio was set to something different from 0 //and we should use the encoding ratio if (encodedBlockSizeLimit > 0){ shouldFinishBlock = blockWriter.encodedBlockSizeWritten() >= encodedBlockSizeLimit; From 4bf5d4951791563f22f2978b9d3bfd72aa7ba3ce Mon Sep 17 00:00:00 2001 From: Wellington Chevreuil Date: Thu, 21 Jul 2022 17:38:17 +0100 Subject: [PATCH 3/6] addressing review comments & spotless checks --- .../hbase/io/hfile/HFileWriterImpl.java | 11 +++--- .../hbase/regionserver/TestHStoreFile.java | 39 +++++++------------ 2 files changed, 18 insertions(+), 32 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java index 8b9edb5c592b..0d82fbb9031a 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java @@ -173,8 +173,7 @@ public HFileWriterImpl(final Configuration conf, CacheConfig cacheConf, Path pat closeOutputStream = path != null; this.cacheConf = cacheConf; float encodeBlockSizeRatio = conf.getFloat(UNIFIED_ENCODED_BLOCKSIZE_RATIO, 0f); - this.encodedBlockSizeLimit = encodeBlockSizeRatio >0 ? - (int) (hFileContext.getBlocksize() * encodeBlockSizeRatio) : 0; + this.encodedBlockSizeLimit = (int) (hFileContext.getBlocksize() * encodeBlockSizeRatio); finishInit(conf); if (LOG.isTraceEnabled()) { @@ -312,14 +311,14 @@ protected void finishInit(final Configuration conf) { */ protected void checkBlockBoundary() throws IOException { boolean shouldFinishBlock = false; - //This means hbase.writer.unified.encoded.blocksize.ratio was set to something different from 0 - //and we should use the encoding ratio - if (encodedBlockSizeLimit > 0){ + // This means hbase.writer.unified.encoded.blocksize.ratio was set to something different from 0 + // and we should use the encoding ratio + if (encodedBlockSizeLimit > 0) { shouldFinishBlock = blockWriter.encodedBlockSizeWritten() >= encodedBlockSizeLimit; } else { shouldFinishBlock = blockWriter.blockSizeWritten() >= hFileContext.getBlocksize(); } - if(shouldFinishBlock) { + if (shouldFinishBlock) { finishBlock(); writeInlineBlocks(false); newBlock(); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHStoreFile.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHStoreFile.java index 7572b81f8735..40039de4d75d 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHStoreFile.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHStoreFile.java @@ -1151,28 +1151,21 @@ public void testDataBlockSizeEncoded() throws Exception { Path dir = new Path(new Path(this.testDir, "7e0102"), "familyname"); Path path = new Path(dir, "1234567890"); - DataBlockEncoding dataBlockEncoderAlgo = - DataBlockEncoding.FAST_DIFF; + DataBlockEncoding dataBlockEncoderAlgo = DataBlockEncoding.FAST_DIFF; conf.setDouble("hbase.writer.unified.encoded.blocksize.ratio", 1); cacheConf = new CacheConfig(conf); - HFileContext meta = new HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL) - .withChecksumType(CKTYPE) - .withBytesPerCheckSum(CKBYTES) - .withDataBlockEncoding(dataBlockEncoderAlgo) - .build(); + HFileContext meta = + new HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL).withChecksumType(CKTYPE) + .withBytesPerCheckSum(CKBYTES).withDataBlockEncoding(dataBlockEncoderAlgo).build(); // Make a store file and write data to it. StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs) - .withFilePath(path) - .withMaxKeyCount(2000) - .withFileContext(meta) - .build(); + .withFilePath(path).withMaxKeyCount(2000).withFileContext(meta).build(); writeStoreFile(writer); - //writer.close(); - HStoreFile storeFile = new HStoreFile(fs, writer.getPath(), conf, - cacheConf, BloomType.NONE, true); + HStoreFile storeFile = + new HStoreFile(fs, writer.getPath(), conf, cacheConf, BloomType.NONE, true); storeFile.initReader(); StoreFileReader reader = storeFile.getReader(); @@ -1180,27 +1173,21 @@ public void testDataBlockSizeEncoded() throws Exception { byte[] value = fileInfo.get(HFileDataBlockEncoder.DATA_BLOCK_ENCODING); assertEquals(dataBlockEncoderAlgo.name(), Bytes.toString(value)); - HFile.Reader fReader = HFile.createReader(fs, writer.getPath(), storeFile.getCacheConf(), - true, conf); + HFile.Reader fReader = + HFile.createReader(fs, writer.getPath(), storeFile.getCacheConf(), true, conf); FSDataInputStreamWrapper fsdis = new FSDataInputStreamWrapper(fs, writer.getPath()); long fileSize = fs.getFileStatus(writer.getPath()).getLen(); - FixedFileTrailer trailer = - FixedFileTrailer.readFromStream(fsdis.getStream(false), fileSize); - long offset = trailer.getFirstDataBlockOffset(), - max = trailer.getLastDataBlockOffset(); + FixedFileTrailer trailer = FixedFileTrailer.readFromStream(fsdis.getStream(false), fileSize); + long offset = trailer.getFirstDataBlockOffset(), max = trailer.getLastDataBlockOffset(); HFileBlock block; - int blockCount = 0; while (offset <= max) { block = fReader.readBlock(offset, -1, /* cacheBlock */ - false, /* pread */ false, - /* isCompaction */ false, /* updateCacheMetrics */ + false, /* pread */ false, /* isCompaction */ false, /* updateCacheMetrics */ false, null, null); offset += block.getOnDiskSizeWithHeader(); - blockCount += 1; double diff = block.getOnDiskSizeWithHeader() - BLOCKSIZE_SMALL; - if(offset <= max) - assertTrue(diff >=0 && diff < (BLOCKSIZE_SMALL*0.05)); + if (offset <= max) assertTrue(diff >= 0 && diff < (BLOCKSIZE_SMALL * 0.05)); } } From 1b460551f81f278ebc88b9676784dada0f1f35ff Mon Sep 17 00:00:00 2001 From: Wellington Chevreuil Date: Fri, 22 Jul 2022 10:29:57 +0100 Subject: [PATCH 4/6] fixing UT error and checkstyle --- .../org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java | 3 ++- .../org/apache/hadoop/hbase/regionserver/TestHStoreFile.java | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java index 0d82fbb9031a..07fd86d03f37 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java @@ -316,7 +316,8 @@ protected void checkBlockBoundary() throws IOException { if (encodedBlockSizeLimit > 0) { shouldFinishBlock = blockWriter.encodedBlockSizeWritten() >= encodedBlockSizeLimit; } else { - shouldFinishBlock = blockWriter.blockSizeWritten() >= hFileContext.getBlocksize(); + shouldFinishBlock = blockWriter.encodedBlockSizeWritten() >= hFileContext.getBlocksize() || + blockWriter.blockSizeWritten() >= hFileContext.getBlocksize(); } if (shouldFinishBlock) { finishBlock(); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHStoreFile.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHStoreFile.java index 40039de4d75d..7eff766c0b25 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHStoreFile.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHStoreFile.java @@ -1187,7 +1187,9 @@ public void testDataBlockSizeEncoded() throws Exception { false, null, null); offset += block.getOnDiskSizeWithHeader(); double diff = block.getOnDiskSizeWithHeader() - BLOCKSIZE_SMALL; - if (offset <= max) assertTrue(diff >= 0 && diff < (BLOCKSIZE_SMALL * 0.05)); + if (offset <= max) { + assertTrue(diff >= 0 && diff < (BLOCKSIZE_SMALL * 0.05)); + } } } From 8eb91cc8e44aaacfd3d0a1974eda2ae5a11206af Mon Sep 17 00:00:00 2001 From: Wellington Chevreuil Date: Fri, 22 Jul 2022 16:23:52 +0100 Subject: [PATCH 5/6] spotless --- .../org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java index 07fd86d03f37..80e333050c6b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java @@ -316,8 +316,8 @@ protected void checkBlockBoundary() throws IOException { if (encodedBlockSizeLimit > 0) { shouldFinishBlock = blockWriter.encodedBlockSizeWritten() >= encodedBlockSizeLimit; } else { - shouldFinishBlock = blockWriter.encodedBlockSizeWritten() >= hFileContext.getBlocksize() || - blockWriter.blockSizeWritten() >= hFileContext.getBlocksize(); + shouldFinishBlock = blockWriter.encodedBlockSizeWritten() >= hFileContext.getBlocksize() + || blockWriter.blockSizeWritten() >= hFileContext.getBlocksize(); } if (shouldFinishBlock) { finishBlock(); From 3b57fe6c16c3a12622dbb550b5e1b8d4826d7a65 Mon Sep 17 00:00:00 2001 From: Wellington Chevreuil Date: Fri, 22 Jul 2022 17:41:55 +0100 Subject: [PATCH 6/6] review changes --- .../main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java index d33f6c9bc8d3..fe2e0c7fab9a 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java @@ -449,7 +449,7 @@ public int getOnDiskSizeWithHeader() { } /** Returns the on-disk size of the data part + checksum (header excluded). */ - public int getOnDiskSizeWithoutHeader() { + int getOnDiskSizeWithoutHeader() { return onDiskSizeWithoutHeader; }