apache · wchevreuil · Jul 25, 2022 · Jul 21, 2022 · Jul 21, 2022 · Jul 21, 2022
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
@@ -449,7 +449,7 @@ public int getOnDiskSizeWithHeader() {
   }
 
   /** Returns the on-disk size of the data part + checksum (header excluded). */
-  int getOnDiskSizeWithoutHeader() {
+  public int getOnDiskSizeWithoutHeader() {
     return onDiskSizeWithoutHeader;
   }
 

diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java
@@ -172,8 +172,10 @@ public HFileWriterImpl(final Configuration conf, CacheConfig cacheConf, Path pat
     }
     closeOutputStream = path != null;
     this.cacheConf = cacheConf;
-    float encodeBlockSizeRatio = conf.getFloat(UNIFIED_ENCODED_BLOCKSIZE_RATIO, 1f);
-    this.encodedBlockSizeLimit = (int) (hFileContext.getBlocksize() * encodeBlockSizeRatio);
+    float encodeBlockSizeRatio = conf.getFloat(UNIFIED_ENCODED_BLOCKSIZE_RATIO, 0f);
+    this.encodedBlockSizeLimit = encodeBlockSizeRatio >0 ?
+      (int) (hFileContext.getBlocksize() * encodeBlockSizeRatio) : 0;
+
     finishInit(conf);
     if (LOG.isTraceEnabled()) {
       LOG.trace("Writer" + (path != null ? " for " + path : "") + " initialized with cacheConf: "
@@ -309,12 +311,15 @@ protected void finishInit(final Configuration conf) {
    * At a block boundary, write all the inline blocks and opens new block.
    */
   protected void checkBlockBoundary() throws IOException {
-    // For encoder like prefixTree, encoded size is not available, so we have to compare both
-    // encoded size and unencoded size to blocksize limit.
-    if (
-      blockWriter.encodedBlockSizeWritten() >= encodedBlockSizeLimit
-        || blockWriter.blockSizeWritten() >= hFileContext.getBlocksize()
-    ) {
+    boolean shouldFinishBlock = false;
+    //This means hbase.writer.unified.encoded.blocksize.ratio was set to something different from 0
+    //and we should use the encoding ratio
+    if (encodedBlockSizeLimit > 0){
+      shouldFinishBlock = blockWriter.encodedBlockSizeWritten() >= encodedBlockSizeLimit;
+    } else {
+      shouldFinishBlock = blockWriter.blockSizeWritten() >= hFileContext.getBlocksize();
+    }
+    if(shouldFinishBlock) {
       finishBlock();
       writeInlineBlocks(false);
       newBlock();

diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHStoreFile.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHStoreFile.java
@@ -66,6 +66,9 @@
 import org.apache.hadoop.hbase.io.hfile.BlockCacheFactory;
 import org.apache.hadoop.hbase.io.hfile.CacheConfig;
 import org.apache.hadoop.hbase.io.hfile.CacheStats;
+import org.apache.hadoop.hbase.io.hfile.FixedFileTrailer;
+import org.apache.hadoop.hbase.io.hfile.HFile;
+import org.apache.hadoop.hbase.io.hfile.HFileBlock;
 import org.apache.hadoop.hbase.io.hfile.HFileContext;
 import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder;
 import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoder;
@@ -1141,4 +1144,64 @@ public void testDataBlockEncodingMetaData() throws IOException {
     byte[] value = fileInfo.get(HFileDataBlockEncoder.DATA_BLOCK_ENCODING);
     assertArrayEquals(dataBlockEncoderAlgo.getNameInBytes(), value);
   }
+
+  @Test
+  public void testDataBlockSizeEncoded() throws Exception {
+    // Make up a directory hierarchy that has a regiondir ("7e0102") and familyname.
+    Path dir = new Path(new Path(this.testDir, "7e0102"), "familyname");
+    Path path = new Path(dir, "1234567890");
+
+    DataBlockEncoding dataBlockEncoderAlgo =
+      DataBlockEncoding.FAST_DIFF;
+
+    conf.setDouble("hbase.writer.unified.encoded.blocksize.ratio", 1);
+
+    cacheConf = new CacheConfig(conf);
+    HFileContext meta = new HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL)
+      .withChecksumType(CKTYPE)
+      .withBytesPerCheckSum(CKBYTES)
+      .withDataBlockEncoding(dataBlockEncoderAlgo)
+      .build();
+    // Make a store file and write data to it.
+    StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs)
+      .withFilePath(path)
+      .withMaxKeyCount(2000)
+      .withFileContext(meta)
+      .build();
+    writeStoreFile(writer);
+    //writer.close();
+
+    HStoreFile storeFile = new HStoreFile(fs, writer.getPath(), conf,
+      cacheConf, BloomType.NONE, true);
+    storeFile.initReader();
+    StoreFileReader reader = storeFile.getReader();
+
+    Map<byte[], byte[]> fileInfo = reader.loadFileInfo();
+    byte[] value = fileInfo.get(HFileDataBlockEncoder.DATA_BLOCK_ENCODING);
+    assertEquals(dataBlockEncoderAlgo.name(), Bytes.toString(value));
+
+    HFile.Reader fReader = HFile.createReader(fs, writer.getPath(), storeFile.getCacheConf(),
+      true, conf);
+
+    FSDataInputStreamWrapper fsdis = new FSDataInputStreamWrapper(fs, writer.getPath());
+    long fileSize = fs.getFileStatus(writer.getPath()).getLen();
+    FixedFileTrailer trailer =
+      FixedFileTrailer.readFromStream(fsdis.getStream(false), fileSize);
+    long offset = trailer.getFirstDataBlockOffset(),
+      max = trailer.getLastDataBlockOffset();
+    HFileBlock block;
+    int blockCount = 0;
+    while (offset <= max) {
+      block = fReader.readBlock(offset, -1, /* cacheBlock */
+        false, /* pread */ false,
+        /* isCompaction */ false, /* updateCacheMetrics */
+        false, null, null);
+      offset += block.getOnDiskSizeWithHeader();
+      blockCount += 1;
+      double diff = block.getOnDiskSizeWithHeader() - BLOCKSIZE_SMALL;
+      if(offset <= max)
+        assertTrue(diff >=0 && diff < (BLOCKSIZE_SMALL*0.05));
+    }
+  }
+
 }