From 29f78b1c1810a5f8d52063484e7e86b9cf2c725b Mon Sep 17 00:00:00 2001 From: Vinayak Hegde Date: Thu, 4 Apr 2024 17:56:49 +0530 Subject: [PATCH 1/4] Implemented Data Tiering Framework --- .../regionserver/DataTieringException.java | 27 ++ .../regionserver/DataTieringManager.java | 170 ++++++++ .../hbase/regionserver/DataTieringType.java | 26 ++ .../hbase/regionserver/HRegionServer.java | 1 + .../regionserver/TestDataTieringManager.java | 378 ++++++++++++++++++ 5 files changed, 602 insertions(+) create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTieringException.java create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTieringManager.java create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTieringType.java create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDataTieringManager.java diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTieringException.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTieringException.java new file mode 100644 index 000000000000..8d356422f6e0 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTieringException.java @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import org.apache.yetus.audience.InterfaceAudience; + +@InterfaceAudience.Private +public class DataTieringException extends Exception { + DataTieringException(String reason) { + super(reason); + } +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTieringManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTieringManager.java new file mode 100644 index 000000000000..09890b563774 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTieringManager.java @@ -0,0 +1,170 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import java.util.HashSet; +import java.util.Map; +import java.util.OptionalLong; +import java.util.Set; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.io.hfile.BlockCacheKey; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; +import org.apache.yetus.audience.InterfaceAudience; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +@InterfaceAudience.Private +public class DataTieringManager { + private static final Logger LOG = LoggerFactory.getLogger(DataTieringManager.class); + public static final String DATATIERING_KEY = "hbase.hstore.datatiering.type"; + public static final String DATATIERING_HOT_DATA_AGE_KEY = + "hbase.hstore.datatiering.hot.age.millis"; + public static final DataTieringType DEFAULT_DATATIERING = DataTieringType.NONE; + public static final long DEFAULT_DATATIERING_HOT_DATA_AGE = 7 * 24 * 60 * 60 * 1000; // 7 Days + private static DataTieringManager instance; + private final Map onlineRegions; + + private DataTieringManager(Map onlineRegions) { + this.onlineRegions = onlineRegions; + } + + public static synchronized void instantiate(Map onlineRegions) { + if (instance == null) { + instance = new DataTieringManager(onlineRegions); + LOG.info("DataTieringManager instantiated successfully."); + } else { + LOG.warn("DataTieringManager is already instantiated."); + } + } + + public static synchronized DataTieringManager getInstance() { + if (instance == null) { + throw new IllegalStateException( + "DataTieringManager has not been instantiated. Call instantiate() first."); + } + return instance; + } + + public boolean isDataTieringEnabled(BlockCacheKey key) throws DataTieringException { + Path hFilePath = key.getFilePath(); + if (hFilePath == null) { + throw new DataTieringException("BlockCacheKey Doesn't Contain HFile Path"); + } + return isDataTieringEnabled(hFilePath); + } + + public boolean isDataTieringEnabled(Path hFilePath) throws DataTieringException { + Configuration configuration = getConfiguration(hFilePath); + DataTieringType dataTieringType = getDataTieringType(configuration); + return !dataTieringType.equals(DataTieringType.NONE); + } + + public boolean isHotData(BlockCacheKey key) throws DataTieringException { + Path hFilePath = key.getFilePath(); + if (hFilePath == null) { + throw new DataTieringException("BlockCacheKey Doesn't Contain HFile Path"); + } + return isHotData(hFilePath); + } + + public boolean isHotData(Path hFilePath) throws DataTieringException { + Configuration configuration = getConfiguration(hFilePath); + DataTieringType dataTieringType = getDataTieringType(configuration); + + if (dataTieringType.equals(DataTieringType.TIME_RANGE)) { + long hotDataAge = getDataTieringHotDataAge(configuration); + + HStoreFile hStoreFile = getHStoreFile(hFilePath); + if (hStoreFile == null) { + throw new DataTieringException( + "HStoreFile corresponding to " + hFilePath + " doesn't exist"); + } + OptionalLong maxTimestamp = hStoreFile.getMaximumTimestamp(); + if (!maxTimestamp.isPresent()) { + throw new DataTieringException("Maximum timestamp not present for " + hFilePath); + } + + long currentTimestamp = EnvironmentEdgeManager.getDelegate().currentTime(); + long diff = currentTimestamp - maxTimestamp.getAsLong(); + return diff <= hotDataAge; + } + return false; + } + + public Set getColdDataFiles(Set allCachedBlocks) + throws DataTieringException { + Set coldHFiles = new HashSet<>(); + for (BlockCacheKey key : allCachedBlocks) { + if (coldHFiles.contains(key.getHfileName())) { + continue; + } + if (isDataTieringEnabled(key) && !isHotData(key)) { + coldHFiles.add(key.getHfileName()); + } + } + return coldHFiles; + } + + private HRegion getHRegion(Path hFilePath) throws DataTieringException { + if (hFilePath.getParent() == null || hFilePath.getParent().getParent() == null) { + throw new DataTieringException("Incorrect HFile Path: " + hFilePath); + } + String regionId = hFilePath.getParent().getParent().getName(); + HRegion hRegion = this.onlineRegions.get(regionId); + if (hRegion == null) { + throw new DataTieringException("HRegion corresponding to " + hFilePath + " doesn't exist"); + } + return hRegion; + } + + private HStore getHStore(Path hFilePath) throws DataTieringException { + HRegion hRegion = getHRegion(hFilePath); + String columnFamily = hFilePath.getParent().getName(); + HStore hStore = hRegion.getStore(Bytes.toBytes(columnFamily)); + if (hStore == null) { + throw new DataTieringException("HStore corresponding to " + hFilePath + " doesn't exist"); + } + return hStore; + } + + private HStoreFile getHStoreFile(Path hFilePath) throws DataTieringException { + HStore hStore = getHStore(hFilePath); + for (HStoreFile file : hStore.getStorefiles()) { + if (file.getPath().equals(hFilePath)) { + return file; + } + } + return null; + } + + private Configuration getConfiguration(Path hFilePath) throws DataTieringException { + HStore hStore = getHStore(hFilePath); + return hStore.getReadOnlyConfiguration(); + } + + private DataTieringType getDataTieringType(Configuration conf) { + return DataTieringType.valueOf(conf.get(DATATIERING_KEY, DEFAULT_DATATIERING.name())); + } + + private long getDataTieringHotDataAge(Configuration conf) { + return Long.parseLong( + conf.get(DATATIERING_HOT_DATA_AGE_KEY, String.valueOf(DEFAULT_DATATIERING_HOT_DATA_AGE))); + } +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTieringType.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTieringType.java new file mode 100644 index 000000000000..ee54576a6487 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTieringType.java @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import org.apache.yetus.audience.InterfaceAudience; + +@InterfaceAudience.Public +public enum DataTieringType { + NONE, + TIME_RANGE +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java index 88863c06e4bd..e4da74f78cf2 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java @@ -531,6 +531,7 @@ public HRegionServer(final Configuration conf) throws IOException { regionServerAccounting = new RegionServerAccounting(conf); blockCache = BlockCacheFactory.createBlockCache(conf); + DataTieringManager.instantiate(onlineRegions); mobFileCache = new MobFileCache(conf); rsSnapshotVerifier = new RSSnapshotVerifier(conf); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDataTieringManager.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDataTieringManager.java new file mode 100644 index 000000000000..c472791c5a51 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDataTieringManager.java @@ -0,0 +1,378 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.fail; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.HBaseTestingUtil; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; +import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.client.RegionInfoBuilder; +import org.apache.hadoop.hbase.client.TableDescriptor; +import org.apache.hadoop.hbase.client.TableDescriptorBuilder; +import org.apache.hadoop.hbase.fs.HFileSystem; +import org.apache.hadoop.hbase.io.hfile.BlockCache; +import org.apache.hadoop.hbase.io.hfile.BlockCacheFactory; +import org.apache.hadoop.hbase.io.hfile.BlockCacheKey; +import org.apache.hadoop.hbase.io.hfile.BlockType; +import org.apache.hadoop.hbase.io.hfile.CacheConfig; +import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder; +import org.apache.hadoop.hbase.testclassification.RegionServerTests; +import org.apache.hadoop.hbase.testclassification.SmallTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.CommonFSUtils; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +/** + * This class is used to test the functionality of the DataTieringManager. + * + * The mock online regions are stored in {@link TestDataTieringManager#testOnlineRegions}. + * For all tests, the setup of {@link TestDataTieringManager#testOnlineRegions} occurs only once. + * Please refer to {@link TestDataTieringManager#setupOnlineRegions()} for the structure. + * Additionally, a list of all store files is maintained in {@link TestDataTieringManager#hStoreFiles}. + * The characteristics of these store files are listed below: + * ## HStoreFile Information + * + * | HStoreFile | Region | Store | DataTiering | isHot | + * |------------------|--------------------|---------------------|-----------------------|-------| + * | hStoreFile0 | region1 | hStore11 | TIME_RANGE | true | + * | hStoreFile1 | region1 | hStore12 | NONE | false | + * | hStoreFile2 | region2 | hStore21 | TIME_RANGE | true | + * | hStoreFile3 | region2 | hStore22 | TIME_RANGE | false | + */ + +@Category({ RegionServerTests.class, SmallTests.class }) +public class TestDataTieringManager { + + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestDataTieringManager.class); + + private static final HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil(); + private static Configuration defaultConf; + private static FileSystem fs; + private static CacheConfig cacheConf; + private static Path testDir; + private static Map testOnlineRegions; + + private static DataTieringManager dataTieringManager; + private static List hStoreFiles; + + @BeforeClass + public static void setupBeforeClass() throws Exception { + testDir = TEST_UTIL.getDataTestDir(TestDataTieringManager.class.getSimpleName()); + defaultConf = TEST_UTIL.getConfiguration(); + fs = HFileSystem.get(defaultConf); + BlockCache blockCache = BlockCacheFactory.createBlockCache(defaultConf); + cacheConf = new CacheConfig(defaultConf, blockCache); + setupOnlineRegions(); + DataTieringManager.instantiate(testOnlineRegions); + dataTieringManager = DataTieringManager.getInstance(); + } + + @FunctionalInterface + interface DataTieringMethodCallerWithPath { + boolean call(DataTieringManager manager, Path path) throws DataTieringException; + } + + @FunctionalInterface + interface DataTieringMethodCallerWithKey { + boolean call(DataTieringManager manager, BlockCacheKey key) throws DataTieringException; + } + + @Test + public void testDataTieringEnabledWithKey() { + DataTieringMethodCallerWithKey methodCallerWithKey = DataTieringManager::isDataTieringEnabled; + + // Test with valid key + BlockCacheKey key = new BlockCacheKey(hStoreFiles.get(0).getPath(), 0, true, BlockType.DATA); + testDataTieringMethodWithKeyNoException(methodCallerWithKey, key, true); + + // Test with another valid key + key = new BlockCacheKey(hStoreFiles.get(1).getPath(), 0, true, BlockType.DATA); + testDataTieringMethodWithKeyNoException(methodCallerWithKey, key, false); + + // Test with valid key with no HFile Path + key = new BlockCacheKey(hStoreFiles.get(0).getPath().getName(), 0); + testDataTieringMethodWithKeyExpectingException(methodCallerWithKey, key, + new DataTieringException("BlockCacheKey Doesn't Contain HFile Path")); + } + + @Test + public void testDataTieringEnabledWithPath() { + DataTieringMethodCallerWithPath methodCallerWithPath = DataTieringManager::isDataTieringEnabled; + + // Test with valid path + Path hFilePath = hStoreFiles.get(1).getPath(); + testDataTieringMethodWithPathNoException(methodCallerWithPath, hFilePath, false); + + // Test with another valid path + hFilePath = hStoreFiles.get(3).getPath(); + testDataTieringMethodWithPathNoException(methodCallerWithPath, hFilePath, true); + + // Test with an incorrect path + hFilePath = new Path("incorrectPath"); + testDataTieringMethodWithPathExpectingException(methodCallerWithPath, hFilePath, + new DataTieringException("Incorrect HFile Path: " + hFilePath)); + + // Test with a non-existing HRegion path + Path basePath = hStoreFiles.get(0).getPath().getParent().getParent().getParent(); + hFilePath = new Path(basePath, "incorrectRegion/cf1/filename"); + testDataTieringMethodWithPathExpectingException(methodCallerWithPath, hFilePath, + new DataTieringException("HRegion corresponding to " + hFilePath + " doesn't exist")); + + // Test with a non-existing HStore path + basePath = hStoreFiles.get(0).getPath().getParent().getParent(); + hFilePath = new Path(basePath, "incorrectCf/filename"); + testDataTieringMethodWithPathExpectingException(methodCallerWithPath, hFilePath, + new DataTieringException("HStore corresponding to " + hFilePath + " doesn't exist")); + } + + @Test + public void testHotDataWithKey() { + DataTieringMethodCallerWithKey methodCallerWithKey = DataTieringManager::isHotData; + + // Test with valid key + BlockCacheKey key = new BlockCacheKey(hStoreFiles.get(0).getPath(), 0, true, BlockType.DATA); + testDataTieringMethodWithKeyNoException(methodCallerWithKey, key, true); + + // Test with another valid key + key = new BlockCacheKey(hStoreFiles.get(1).getPath(), 0, true, BlockType.DATA); + testDataTieringMethodWithKeyNoException(methodCallerWithKey, key, false); + } + + @Test + public void testHotDataWithPath() { + DataTieringMethodCallerWithPath methodCallerWithPath = DataTieringManager::isHotData; + + // Test with valid path + Path hFilePath = hStoreFiles.get(2).getPath(); + testDataTieringMethodWithPathNoException(methodCallerWithPath, hFilePath, true); + + // Test with another valid path + hFilePath = hStoreFiles.get(3).getPath(); + testDataTieringMethodWithPathNoException(methodCallerWithPath, hFilePath, false); + + // Test with an incorrect filename + hFilePath = new Path(hStoreFiles.get(0).getPath().getParent(), "incorrectFileName"); + testDataTieringMethodWithPathExpectingException(methodCallerWithPath, hFilePath, + new DataTieringException("HStoreFile corresponding to " + hFilePath + " doesn't exist")); + } + + private void testDataTieringMethodWithPath(DataTieringMethodCallerWithPath caller, Path path, + boolean expectedResult, DataTieringException exception) { + try { + boolean value = caller.call(dataTieringManager, path); + if (exception != null) { + fail("Expected DataTieringException to be thrown"); + } + assertEquals(value, expectedResult); + } catch (DataTieringException e) { + if (exception == null) { + fail("Unexpected DataTieringException: " + e.getMessage()); + } + assertEquals(exception.getMessage(), e.getMessage()); + } + } + + private void testDataTieringMethodWithKey(DataTieringMethodCallerWithKey caller, + BlockCacheKey key, boolean expectedResult, DataTieringException exception) { + try { + boolean value = caller.call(dataTieringManager, key); + if (exception != null) { + fail("Expected DataTieringException to be thrown"); + } + assertEquals(value, expectedResult); + } catch (DataTieringException e) { + if (exception == null) { + fail("Unexpected DataTieringException: " + e.getMessage()); + } + assertEquals(exception.getMessage(), e.getMessage()); + } + } + + private void testDataTieringMethodWithPathExpectingException( + DataTieringMethodCallerWithPath caller, Path path, DataTieringException exception) { + testDataTieringMethodWithPath(caller, path, false, exception); + } + + private void testDataTieringMethodWithPathNoException(DataTieringMethodCallerWithPath caller, + Path path, boolean expectedResult) { + testDataTieringMethodWithPath(caller, path, expectedResult, null); + } + + private void testDataTieringMethodWithKeyExpectingException(DataTieringMethodCallerWithKey caller, + BlockCacheKey key, DataTieringException exception) { + testDataTieringMethodWithKey(caller, key, false, exception); + } + + private void testDataTieringMethodWithKeyNoException(DataTieringMethodCallerWithKey caller, + BlockCacheKey key, boolean expectedResult) { + testDataTieringMethodWithKey(caller, key, expectedResult, null); + } + + @Test + public void testColdDataFiles() { + Set allCachedBlocks = new HashSet<>(); + for (HStoreFile file : hStoreFiles) { + allCachedBlocks.add(new BlockCacheKey(file.getPath(), 0, true, BlockType.DATA)); + } + + try { + Set coldFilePaths = dataTieringManager.getColdDataFiles(allCachedBlocks); + assertEquals(1, coldFilePaths.size()); + } catch (DataTieringException e) { + fail("Unexpected DataTieringException: " + e.getMessage()); + } + } + + private static void setupOnlineRegions() throws IOException { + testOnlineRegions = new HashMap<>(); + hStoreFiles = new ArrayList<>(); + + long day = 24 * 60 * 60 * 1000; + long currentTime = System.currentTimeMillis(); + + HRegion region1 = createHRegion("table1"); + + HStore hStore11 = createHStore(region1, "cf1", getConfWithTimeRangeDataTieringEnabled(day)); + hStoreFiles + .add(createHStoreFile(hStore11.getStoreContext().getFamilyStoreDirectoryPath(), currentTime)); + hStore11.refreshStoreFiles(); + HStore hStore12 = createHStore(region1, "cf2"); + hStoreFiles.add(createHStoreFile(hStore12.getStoreContext().getFamilyStoreDirectoryPath(), + currentTime - day)); + hStore12.refreshStoreFiles(); + + region1.stores.put(Bytes.toBytes("cf1"), hStore11); + region1.stores.put(Bytes.toBytes("cf2"), hStore12); + + HRegion region2 = + createHRegion("table2", getConfWithTimeRangeDataTieringEnabled((long) (2.5 * day))); + + HStore hStore21 = createHStore(region2, "cf1"); + hStoreFiles.add(createHStoreFile(hStore21.getStoreContext().getFamilyStoreDirectoryPath(), + currentTime - 2 * day)); + hStore21.refreshStoreFiles(); + HStore hStore22 = createHStore(region2, "cf2"); + hStoreFiles.add(createHStoreFile(hStore22.getStoreContext().getFamilyStoreDirectoryPath(), + currentTime - 3 * day)); + hStore22.refreshStoreFiles(); + + region2.stores.put(Bytes.toBytes("cf1"), hStore21); + region2.stores.put(Bytes.toBytes("cf2"), hStore22); + + for (HStoreFile file : hStoreFiles) { + file.initReader(); + } + + testOnlineRegions.put(region1.getRegionInfo().getEncodedName(), region1); + testOnlineRegions.put(region2.getRegionInfo().getEncodedName(), region2); + } + + private static HRegion createHRegion(String table) throws IOException { + return createHRegion(table, defaultConf); + } + + private static HRegion createHRegion(String table, Configuration conf) throws IOException { + TableName tableName = TableName.valueOf(table); + + TableDescriptor htd = TableDescriptorBuilder.newBuilder(tableName) + .setValue(DataTieringManager.DATATIERING_KEY, conf.get(DataTieringManager.DATATIERING_KEY)) + .setValue(DataTieringManager.DATATIERING_HOT_DATA_AGE_KEY, + conf.get(DataTieringManager.DATATIERING_HOT_DATA_AGE_KEY)) + .build(); + RegionInfo hri = RegionInfoBuilder.newBuilder(tableName).build(); + + Configuration testConf = new Configuration(conf); + CommonFSUtils.setRootDir(testConf, testDir); + HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem(testConf, fs, + CommonFSUtils.getTableDir(testDir, hri.getTable()), hri); + + return new HRegion(regionFs, null, conf, htd, null); + } + + private static HStore createHStore(HRegion region, String columnFamily) throws IOException { + return createHStore(region, columnFamily, defaultConf); + } + + private static HStore createHStore(HRegion region, String columnFamily, Configuration conf) + throws IOException { + ColumnFamilyDescriptor columnFamilyDescriptor = + ColumnFamilyDescriptorBuilder.newBuilder(Bytes.toBytes(columnFamily)) + .setValue(DataTieringManager.DATATIERING_KEY, conf.get(DataTieringManager.DATATIERING_KEY)) + .setValue(DataTieringManager.DATATIERING_HOT_DATA_AGE_KEY, + conf.get(DataTieringManager.DATATIERING_HOT_DATA_AGE_KEY)) + .build(); + + return new HStore(region, columnFamilyDescriptor, conf, false); + } + + private static Configuration getConfWithTimeRangeDataTieringEnabled(long hotDataAge) { + Configuration conf = new Configuration(defaultConf); + conf.set(DataTieringManager.DATATIERING_KEY, DataTieringType.TIME_RANGE.name()); + conf.set(DataTieringManager.DATATIERING_HOT_DATA_AGE_KEY, String.valueOf(hotDataAge)); + return conf; + } + + private static HStoreFile createHStoreFile(Path storeDir, long timestamp) throws IOException { + String columnFamily = storeDir.getName(); + + StoreFileWriter storeFileWriter = new StoreFileWriter.Builder(defaultConf, cacheConf, fs) + .withOutputDir(storeDir).withFileContext(new HFileContextBuilder().build()).build(); + + writeStoreFileRandomData(storeFileWriter, Bytes.toBytes(columnFamily), Bytes.toBytes("random"), + timestamp); + + return new HStoreFile(fs, storeFileWriter.getPath(), defaultConf, cacheConf, BloomType.NONE, + true); + } + + private static void writeStoreFileRandomData(final StoreFileWriter writer, byte[] columnFamily, + byte[] qualifier, long timestamp) throws IOException { + try { + for (char d = 'a'; d <= 'z'; d++) { + for (char e = 'a'; e <= 'z'; e++) { + byte[] b = new byte[] { (byte) d, (byte) e }; + writer.append(new KeyValue(b, columnFamily, qualifier, timestamp, b)); + } + } + } finally { + writer.appendTrackedTimestampsToMetadata(); + writer.close(); + } + } +} From f484532250672cc486675a8692da26238d559703 Mon Sep 17 00:00:00 2001 From: Vinayak Hegde Date: Fri, 5 Apr 2024 20:03:21 +0530 Subject: [PATCH 2/4] addressed review comments --- .../regionserver/DataTieringManager.java | 60 +++++++++++++++++-- .../regionserver/TestDataTieringManager.java | 50 +++++++++------- 2 files changed, 86 insertions(+), 24 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTieringManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTieringManager.java index 09890b563774..0bc04ddc428b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTieringManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTieringManager.java @@ -30,6 +30,13 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +/** + * The DataTieringManager class categorizes data into hot data and cold data based on the specified + * {@link DataTieringType} when DataTiering is enabled. DataTiering is disabled by default with + * {@link DataTieringType} set to {@link DataTieringType#NONE}. The {@link DataTieringType} + * determines the logic for distinguishing data into hot or cold. By default, all data is considered + * as hot. + */ @InterfaceAudience.Private public class DataTieringManager { private static final Logger LOG = LoggerFactory.getLogger(DataTieringManager.class); @@ -45,6 +52,10 @@ private DataTieringManager(Map onlineRegions) { this.onlineRegions = onlineRegions; } + /** + * Initializes the DataTieringManager instance with the provided map of online regions. + * @param onlineRegions A map containing online regions. + */ public static synchronized void instantiate(Map onlineRegions) { if (instance == null) { instance = new DataTieringManager(onlineRegions); @@ -54,6 +65,11 @@ public static synchronized void instantiate(Map onlineRegions) } } + /** + * Retrieves the instance of DataTieringManager. + * @return The instance of DataTieringManager. + * @throws IllegalStateException if DataTieringManager has not been instantiated. + */ public static synchronized DataTieringManager getInstance() { if (instance == null) { throw new IllegalStateException( @@ -62,6 +78,13 @@ public static synchronized DataTieringManager getInstance() { return instance; } + /** + * Determines whether data tiering is enabled for the given block cache key. + * @param key the block cache key + * @return {@code true} if data tiering is enabled for the HFile associated with the key, + * {@code false} otherwise + * @throws DataTieringException if there is an error retrieving the HFile path or configuration + */ public boolean isDataTieringEnabled(BlockCacheKey key) throws DataTieringException { Path hFilePath = key.getFilePath(); if (hFilePath == null) { @@ -70,12 +93,25 @@ public boolean isDataTieringEnabled(BlockCacheKey key) throws DataTieringExcepti return isDataTieringEnabled(hFilePath); } + /** + * Determines whether data tiering is enabled for the given HFile path. + * @param hFilePath the path to the HFile + * @return {@code true} if data tiering is enabled, {@code false} otherwise + * @throws DataTieringException if there is an error retrieving the configuration + */ public boolean isDataTieringEnabled(Path hFilePath) throws DataTieringException { Configuration configuration = getConfiguration(hFilePath); DataTieringType dataTieringType = getDataTieringType(configuration); return !dataTieringType.equals(DataTieringType.NONE); } + /** + * Determines whether the data associated with the given block cache key is considered hot. + * @param key the block cache key + * @return {@code true} if the data is hot, {@code false} otherwise + * @throws DataTieringException if there is an error retrieving data tiering information or the + * HFile maximum timestamp + */ public boolean isHotData(BlockCacheKey key) throws DataTieringException { Path hFilePath = key.getFilePath(); if (hFilePath == null) { @@ -84,6 +120,14 @@ public boolean isHotData(BlockCacheKey key) throws DataTieringException { return isHotData(hFilePath); } + /** + * Determines whether the data in the HFile at the given path is considered hot based on the + * configured data tiering type and hot data age. + * @param hFilePath the path to the HFile + * @return {@code true} if the data is hot, {@code false} otherwise + * @throws DataTieringException if there is an error retrieving data tiering information or the + * HFile maximum timestamp + */ public boolean isHotData(Path hFilePath) throws DataTieringException { Configuration configuration = getConfiguration(hFilePath); DataTieringType dataTieringType = getDataTieringType(configuration); @@ -93,8 +137,8 @@ public boolean isHotData(Path hFilePath) throws DataTieringException { HStoreFile hStoreFile = getHStoreFile(hFilePath); if (hStoreFile == null) { - throw new DataTieringException( - "HStoreFile corresponding to " + hFilePath + " doesn't exist"); + LOG.error("HStoreFile corresponding to " + hFilePath + " doesn't exist"); + return false; } OptionalLong maxTimestamp = hStoreFile.getMaximumTimestamp(); if (!maxTimestamp.isPresent()) { @@ -105,9 +149,17 @@ public boolean isHotData(Path hFilePath) throws DataTieringException { long diff = currentTimestamp - maxTimestamp.getAsLong(); return diff <= hotDataAge; } - return false; + // DataTieringType.NONE or other types are considered hot by default + return true; } + /** + * Returns a set of cold data filenames from the given set of cached blocks. Cold data is + * determined by the configured data tiering type and hot data age. + * @param allCachedBlocks a set of all cached block cache keys + * @return a set of cold data filenames + * @throws DataTieringException if there is an error determining whether a block is hot + */ public Set getColdDataFiles(Set allCachedBlocks) throws DataTieringException { Set coldHFiles = new HashSet<>(); @@ -115,7 +167,7 @@ public Set getColdDataFiles(Set allCachedBlocks) if (coldHFiles.contains(key.getHfileName())) { continue; } - if (isDataTieringEnabled(key) && !isHotData(key)) { + if (!isHotData(key)) { coldHFiles.add(key.getHfileName()); } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDataTieringManager.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDataTieringManager.java index c472791c5a51..a207a16251a2 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDataTieringManager.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDataTieringManager.java @@ -69,7 +69,7 @@ * | HStoreFile | Region | Store | DataTiering | isHot | * |------------------|--------------------|---------------------|-----------------------|-------| * | hStoreFile0 | region1 | hStore11 | TIME_RANGE | true | - * | hStoreFile1 | region1 | hStore12 | NONE | false | + * | hStoreFile1 | region1 | hStore12 | NONE | true | * | hStoreFile2 | region2 | hStore21 | TIME_RANGE | true | * | hStoreFile3 | region2 | hStore22 | TIME_RANGE | false | */ @@ -170,7 +170,7 @@ public void testHotDataWithKey() { testDataTieringMethodWithKeyNoException(methodCallerWithKey, key, true); // Test with another valid key - key = new BlockCacheKey(hStoreFiles.get(1).getPath(), 0, true, BlockType.DATA); + key = new BlockCacheKey(hStoreFiles.get(3).getPath(), 0, true, BlockType.DATA); testDataTieringMethodWithKeyNoException(methodCallerWithKey, key, false); } @@ -186,10 +186,35 @@ public void testHotDataWithPath() { hFilePath = hStoreFiles.get(3).getPath(); testDataTieringMethodWithPathNoException(methodCallerWithPath, hFilePath, false); - // Test with an incorrect filename + // Test with a filename where corresponding HStoreFile in not present hFilePath = new Path(hStoreFiles.get(0).getPath().getParent(), "incorrectFileName"); - testDataTieringMethodWithPathExpectingException(methodCallerWithPath, hFilePath, - new DataTieringException("HStoreFile corresponding to " + hFilePath + " doesn't exist")); + testDataTieringMethodWithPathNoException(methodCallerWithPath, hFilePath, false); + } + + @Test + public void testColdDataFiles() { + Set allCachedBlocks = new HashSet<>(); + for (HStoreFile file : hStoreFiles) { + allCachedBlocks.add(new BlockCacheKey(file.getPath(), 0, true, BlockType.DATA)); + } + + // Verify hStoreFile3 is identified as cold data + DataTieringMethodCallerWithPath methodCallerWithPath = DataTieringManager::isHotData; + Path hFilePath = hStoreFiles.get(3).getPath(); + testDataTieringMethodWithPathNoException(methodCallerWithPath, hFilePath, false); + + // Verify all the other files in hStoreFiles are hot data + for (int i = 0; i < hStoreFiles.size() - 1; i++) { + hFilePath = hStoreFiles.get(i).getPath(); + testDataTieringMethodWithPathNoException(methodCallerWithPath, hFilePath, true); + } + + try { + Set coldFilePaths = dataTieringManager.getColdDataFiles(allCachedBlocks); + assertEquals(1, coldFilePaths.size()); + } catch (DataTieringException e) { + fail("Unexpected DataTieringException: " + e.getMessage()); + } } private void testDataTieringMethodWithPath(DataTieringMethodCallerWithPath caller, Path path, @@ -244,21 +269,6 @@ private void testDataTieringMethodWithKeyNoException(DataTieringMethodCallerWith testDataTieringMethodWithKey(caller, key, expectedResult, null); } - @Test - public void testColdDataFiles() { - Set allCachedBlocks = new HashSet<>(); - for (HStoreFile file : hStoreFiles) { - allCachedBlocks.add(new BlockCacheKey(file.getPath(), 0, true, BlockType.DATA)); - } - - try { - Set coldFilePaths = dataTieringManager.getColdDataFiles(allCachedBlocks); - assertEquals(1, coldFilePaths.size()); - } catch (DataTieringException e) { - fail("Unexpected DataTieringException: " + e.getMessage()); - } - } - private static void setupOnlineRegions() throws IOException { testOnlineRegions = new HashMap<>(); hStoreFiles = new ArrayList<>(); From 438ddb097ca9f30ecce851f43980ec43406e1b95 Mon Sep 17 00:00:00 2001 From: Vinayak Hegde Date: Fri, 5 Apr 2024 22:17:57 +0530 Subject: [PATCH 3/4] addressed review comments --- .../hadoop/hbase/regionserver/TestDataTieringManager.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDataTieringManager.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDataTieringManager.java index a207a16251a2..886507bc316f 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDataTieringManager.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDataTieringManager.java @@ -224,7 +224,7 @@ private void testDataTieringMethodWithPath(DataTieringMethodCallerWithPath calle if (exception != null) { fail("Expected DataTieringException to be thrown"); } - assertEquals(value, expectedResult); + assertEquals(expectedResult, value); } catch (DataTieringException e) { if (exception == null) { fail("Unexpected DataTieringException: " + e.getMessage()); @@ -240,7 +240,7 @@ private void testDataTieringMethodWithKey(DataTieringMethodCallerWithKey caller, if (exception != null) { fail("Expected DataTieringException to be thrown"); } - assertEquals(value, expectedResult); + assertEquals(expectedResult, value); } catch (DataTieringException e) { if (exception == null) { fail("Unexpected DataTieringException: " + e.getMessage()); From 843e836ea9574ec90516d75ff4b3f5459c208092 Mon Sep 17 00:00:00 2001 From: Vinayak Hegde Date: Mon, 8 Apr 2024 15:18:49 +0530 Subject: [PATCH 4/4] removed spotless errors --- .../hadoop/hbase/regionserver/TestDataTieringManager.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDataTieringManager.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDataTieringManager.java index 886507bc316f..afb5862a8a46 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDataTieringManager.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDataTieringManager.java @@ -64,14 +64,15 @@ * Please refer to {@link TestDataTieringManager#setupOnlineRegions()} for the structure. * Additionally, a list of all store files is maintained in {@link TestDataTieringManager#hStoreFiles}. * The characteristics of these store files are listed below: - * ## HStoreFile Information + * @formatter:off ## HStoreFile Information * * | HStoreFile | Region | Store | DataTiering | isHot | * |------------------|--------------------|---------------------|-----------------------|-------| * | hStoreFile0 | region1 | hStore11 | TIME_RANGE | true | - * | hStoreFile1 | region1 | hStore12 | NONE | true | + * | hStoreFile1 | region1 | hStore12 | NONE | true | * | hStoreFile2 | region2 | hStore21 | TIME_RANGE | true | * | hStoreFile3 | region2 | hStore22 | TIME_RANGE | false | + * @formatter:on */ @Category({ RegionServerTests.class, SmallTests.class })