Skip to content

Commit

Permalink
Addressing latest review suggestions:
Browse files Browse the repository at this point in the history
1) Changes to load files to split/merge from tracker, instead of listing from FS;
2) UT for above;
3) Changed tracker factory to load traker impl from CF first, then Table, then Configuration;

Change-Id: I3a09afd28afe5fe6297b28dafa5dec0eca4c323c
  • Loading branch information
Wellington Chevreuil committed Aug 27, 2021
1 parent 888f140 commit c39ad64
Showing 8 changed files with 217 additions and 39 deletions.
Original file line number Diff line number Diff line change
@@ -24,13 +24,16 @@
import java.util.Collections;
import java.util.List;
import java.util.stream.Stream;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.MetaMutationAnnotation;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.UnknownRegionException;
import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
import org.apache.hadoop.hbase.client.DoNotRetryRegionException;
import org.apache.hadoop.hbase.client.MasterSwitchType;
import org.apache.hadoop.hbase.client.Mutation;
@@ -52,7 +55,10 @@
import org.apache.hadoop.hbase.quotas.QuotaExceededException;
import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
import org.apache.hadoop.hbase.regionserver.HStoreFile;
import org.apache.hadoop.hbase.regionserver.StoreContext;
import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTracker;
import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTrackerFactory;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.CommonFSUtils;
import org.apache.hadoop.hbase.wal.WALSplitUtil;
@@ -594,7 +600,7 @@ private void createMergedRegion(final MasterProcedureEnv env) throws IOException
mergedFiles.addAll(mergeStoreFiles(env, regionFs, mergeRegionFs, mergedRegion));
}
assert mergeRegionFs != null;
mergeRegionFs.commitMergedRegion(mergedFiles);
mergeRegionFs.commitMergedRegion(mergedFiles, env);

// Prepare to create merged regions
env.getAssignmentManager().getRegionStates().
@@ -608,7 +614,11 @@ private List<Path> mergeStoreFiles(MasterProcedureEnv env, HRegionFileSystem reg
List<Path> mergedFiles = new ArrayList<>();
for (ColumnFamilyDescriptor hcd : htd.getColumnFamilies()) {
String family = hcd.getNameAsString();
final Collection<StoreFileInfo> storeFiles = regionFs.getStoreFiles(family);
Configuration trackerConfig =
StoreFileTrackerFactory.mergeConfigurations(env.getMasterConfiguration(), htd, hcd);
StoreFileTracker tracker = StoreFileTrackerFactory.create(trackerConfig,
mergedRegion.getTable(), true, family, regionFs);
final Collection<StoreFileInfo> storeFiles = tracker.load();
if (storeFiles != null && storeFiles.size() > 0) {
for (StoreFileInfo storeFileInfo : storeFiles) {
// Create reference file(s) to parent region file here in mergedDir.
Original file line number Diff line number Diff line change
@@ -64,6 +64,8 @@
import org.apache.hadoop.hbase.regionserver.RegionSplitPolicy;
import org.apache.hadoop.hbase.regionserver.RegionSplitRestriction;
import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTracker;
import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTrackerFactory;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.CommonFSUtils;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
@@ -624,13 +626,13 @@ public void createDaughterRegions(final MasterProcedureEnv env) throws IOExcepti

assertReferenceFileCount(fs, expectedReferences.getFirst().size(),
regionFs.getSplitsDir(daughterOneRI));
regionFs.commitDaughterRegion(daughterOneRI, expectedReferences.getFirst());
regionFs.commitDaughterRegion(daughterOneRI, expectedReferences.getFirst(), env);
assertReferenceFileCount(fs, expectedReferences.getFirst().size(),
new Path(tabledir, daughterOneRI.getEncodedName()));

assertReferenceFileCount(fs, expectedReferences.getSecond().size(),
regionFs.getSplitsDir(daughterTwoRI));
regionFs.commitDaughterRegion(daughterTwoRI, expectedReferences.getSecond());
regionFs.commitDaughterRegion(daughterTwoRI, expectedReferences.getSecond(), env);
assertReferenceFileCount(fs, expectedReferences.getSecond().size(),
new Path(tabledir, daughterTwoRI.getEncodedName()));
}
@@ -664,7 +666,11 @@ private Pair<List<Path>, List<Path>> splitStoreFiles(final MasterProcedureEnv en
new HashMap<String, Collection<StoreFileInfo>>(htd.getColumnFamilyCount());
for (ColumnFamilyDescriptor cfd : htd.getColumnFamilies()) {
String family = cfd.getNameAsString();
Collection<StoreFileInfo> sfis = regionFs.getStoreFiles(family);
Configuration trackerConfig = StoreFileTrackerFactory.
mergeConfigurations(env.getMasterConfiguration(), htd, htd.getColumnFamily(cfd.getName()));
StoreFileTracker tracker = StoreFileTrackerFactory.create(trackerConfig, htd.getTableName(),
true, family, regionFs);
Collection<StoreFileInfo> sfis = tracker.load();
if (sfis == null) {
continue;
}
Original file line number Diff line number Diff line change
@@ -41,13 +41,15 @@
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.PrivateCellUtil;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.backup.HFileArchiver;
import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.client.TableDescriptor;
import org.apache.hadoop.hbase.fs.HFileSystem;
import org.apache.hadoop.hbase.io.Reference;
import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTracker;
import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTrackerFactory;
import org.apache.hadoop.hbase.util.Bytes;
@@ -597,31 +599,36 @@ void cleanupDaughterRegion(final RegionInfo regionInfo) throws IOException {
* @param regionInfo daughter {@link org.apache.hadoop.hbase.client.RegionInfo}
* @throws IOException
*/
public Path commitDaughterRegion(final RegionInfo regionInfo, List<Path> allRegionFiles)
throws IOException {
public Path commitDaughterRegion(final RegionInfo regionInfo, List<Path> allRegionFiles,
MasterProcedureEnv env) throws IOException {
Path regionDir = this.getSplitsDir(regionInfo);
if (fs.exists(regionDir)) {
// Write HRI to a file in case we need to recover hbase:meta
Path regionInfoFile = new Path(regionDir, REGION_INFO_FILE);
byte[] regionInfoContent = getRegionInfoFileContent(regionInfo);
writeRegionInfoFileContent(conf, fs, regionInfoFile, regionInfoContent);
loadRegionFilesIntoStoreTracker(allRegionFiles);
HRegionFileSystem regionFs = HRegionFileSystem.openRegionFromFileSystem(
env.getMasterConfiguration(), fs, getTableDir(), regionInfo, false);
loadRegionFilesIntoStoreTracker(allRegionFiles, env, regionFs);
}
return regionDir;
}

private void loadRegionFilesIntoStoreTracker(List<Path> allFiles) throws IOException {
private void loadRegionFilesIntoStoreTracker(List<Path> allFiles, MasterProcedureEnv env,
HRegionFileSystem regionFs) throws IOException {
TableDescriptor tblDesc = env.getMasterServices().getTableDescriptors().
get(regionInfo.getTable());
//we need to map trackers per store
Map<String, StoreFileTracker> trackerMap = new HashMap<>();
//we need to map store files per store
Map<String, List<StoreFileInfo>> fileInfoMap = new HashMap<>();
for(Path file : allFiles) {
String familyName = file.getParent().getName();
trackerMap.computeIfAbsent(familyName, t -> {
ColumnFamilyDescriptorBuilder fDescBuilder =
ColumnFamilyDescriptorBuilder.newBuilder(Bytes.toBytes(familyName));
return StoreFileTrackerFactory.create(conf, regionInfo.getTable(), true,
StoreContext.getBuilder().withColumnFamilyDescriptor(fDescBuilder.build()).build());
Configuration config = StoreFileTrackerFactory.mergeConfigurations(conf, tblDesc,
tblDesc.getColumnFamily(Bytes.toBytes(familyName)));
return StoreFileTrackerFactory.
create(config, regionInfo.getTable(), true, familyName, regionFs);
});
fileInfoMap.computeIfAbsent(familyName, l -> new ArrayList<>());
List<StoreFileInfo> infos = fileInfoMap.get(familyName);
@@ -782,14 +789,16 @@ public Path mergeStoreFile(RegionInfo mergingRegion, String familyName, HStoreFi
* Commit a merged region, making it ready for use.
* @throws IOException
*/
public void commitMergedRegion(List<Path> allMergedFiles) throws IOException {
public void commitMergedRegion(List<Path> allMergedFiles, MasterProcedureEnv env)
throws IOException {
Path regionDir = getMergesDir(regionInfoForFs);
TableName tableName = TableName.valueOf(regionDir.getParent().getName());
if (regionDir != null && fs.exists(regionDir)) {
// Write HRI to a file in case we need to recover hbase:meta
Path regionInfoFile = new Path(regionDir, REGION_INFO_FILE);
byte[] regionInfoContent = getRegionInfoFileContent(regionInfo);
writeRegionInfoFileContent(conf, fs, regionInfoFile, regionInfoContent);
loadRegionFilesIntoStoreTracker(allMergedFiles);
loadRegionFilesIntoStoreTracker(allMergedFiles, env, this);
}
}

Original file line number Diff line number Diff line change
@@ -17,10 +17,18 @@
*/
package org.apache.hadoop.hbase.regionserver.storefiletracker;

import static org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTracker.STORE_FILE_TRACKER;
import static org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTracker.
STORE_FILE_TRACKER;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.client.TableDescriptor;
import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
import org.apache.hadoop.hbase.regionserver.StoreContext;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.ReflectionUtils;
import org.apache.yetus.audience.InterfaceAudience;
import org.slf4j.Logger;
@@ -47,4 +55,25 @@ public static StoreFileTracker create(Configuration conf, TableName tableName,
throw new RuntimeException(e);
}
}

public static StoreFileTracker create(Configuration conf, TableName tableName,
boolean isPrimaryReplica, String family, HRegionFileSystem regionFs) {
ColumnFamilyDescriptorBuilder fDescBuilder =
ColumnFamilyDescriptorBuilder.newBuilder(Bytes.toBytes(family));
StoreContext ctx = StoreContext.getBuilder().
withColumnFamilyDescriptor(fDescBuilder.build()).
withRegionFileSystem(regionFs).
build();
return StoreFileTrackerFactory.create(conf, tableName, isPrimaryReplica, ctx);
}

public static Configuration mergeConfigurations(Configuration global,
TableDescriptor table, ColumnFamilyDescriptor family) {
if(!StringUtils.isEmpty(family.getConfigurationValue(STORE_FILE_TRACKER))){
global.set(STORE_FILE_TRACKER, family.getConfigurationValue(STORE_FILE_TRACKER));
} else if(!StringUtils.isEmpty(table.getValue(STORE_FILE_TRACKER))) {
global.set(STORE_FILE_TRACKER, table.getValue(STORE_FILE_TRACKER));
}
return global;
}
}
Original file line number Diff line number Diff line change
@@ -33,6 +33,7 @@
import org.apache.hadoop.hbase.client.RegionInfoBuilder;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.master.assignment.SplitTableRegionProcedure;
import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
import org.apache.hadoop.hbase.procedure2.Procedure;
import org.apache.hadoop.hbase.testclassification.LargeTests;
import org.apache.hadoop.hbase.testclassification.RegionServerTests;
@@ -140,7 +141,9 @@ public void testCommitDaughterRegionNoFiles() throws Exception {
setRegionId(region.getRegionInfo().getRegionId() +
EnvironmentEdgeManager.currentTime()).build();
Path splitDir = regionFS.getSplitsDir(daughterA);
Path result = regionFS.commitDaughterRegion(daughterA, new ArrayList<>());
MasterProcedureEnv env = TEST_UTIL.getMiniHBaseCluster().getMaster().
getMasterProcedureExecutor().getEnvironment();
Path result = regionFS.commitDaughterRegion(daughterA, new ArrayList<>(), env);
assertEquals(splitDir, result);
}

@@ -171,8 +174,10 @@ public void testCommitDaughterRegionWithFiles() throws Exception {
filesB.add(regionFS
.splitStoreFile(daughterB, Bytes.toString(FAMILY_NAME), file,
Bytes.toBytes("002"), true, region.getSplitPolicy()));
Path resultA = regionFS.commitDaughterRegion(daughterA, filesA);
Path resultB = regionFS.commitDaughterRegion(daughterB, filesB);
MasterProcedureEnv env = TEST_UTIL.getMiniHBaseCluster().getMaster().
getMasterProcedureExecutor().getEnvironment();
Path resultA = regionFS.commitDaughterRegion(daughterA, filesA, env);
Path resultB = regionFS.commitDaughterRegion(daughterB, filesB, env);
assertEquals(splitDirA, resultA);
assertEquals(splitDirB, resultB);
}
@@ -208,7 +213,9 @@ public void testCommitMergedRegion() throws Exception {
file = (HStoreFile) second.getStore(FAMILY_NAME).getStorefiles().toArray()[0];
List<Path> mergedFiles = new ArrayList<>();
mergedFiles.add(mergeFileFromRegion(mergeRegionFs, second, file));
mergeRegionFs.commitMergedRegion(mergedFiles);
MasterProcedureEnv env = TEST_UTIL.getMiniHBaseCluster().getMaster().
getMasterProcedureExecutor().getEnvironment();
mergeRegionFs.commitMergedRegion(mergedFiles, env);
}

private void waitForSplitProcComplete(int attempts, int waitTime) throws Exception {
Original file line number Diff line number Diff line change
@@ -24,6 +24,7 @@
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;

@@ -49,12 +50,14 @@
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValueUtil;
import org.apache.hadoop.hbase.PrivateCellUtil;
import org.apache.hadoop.hbase.TableDescriptors;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.client.RegionInfoBuilder;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.client.TableDescriptor;
import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
import org.apache.hadoop.hbase.io.HFileLink;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
@@ -69,6 +72,8 @@
import org.apache.hadoop.hbase.io.hfile.HFileScanner;
import org.apache.hadoop.hbase.io.hfile.ReaderContext;
import org.apache.hadoop.hbase.io.hfile.ReaderContextBuilder;
import org.apache.hadoop.hbase.master.MasterServices;
import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
import org.apache.hadoop.hbase.testclassification.MediumTests;
import org.apache.hadoop.hbase.testclassification.RegionServerTests;
import org.apache.hadoop.hbase.util.BloomFilterFactory;
@@ -1057,7 +1062,17 @@ private Path splitStoreFile(final HRegionFileSystem regionFs, final RegionInfo h
}
List<Path> splitFiles = new ArrayList<>();
splitFiles.add(path);
Path regionDir = regionFs.commitDaughterRegion(hri, splitFiles);
MasterProcedureEnv mockEnv = mock(MasterProcedureEnv.class);
MasterServices mockServices = mock(MasterServices.class);
when(mockEnv.getMasterServices()).thenReturn(mockServices);
when(mockEnv.getMasterConfiguration()).thenReturn(new Configuration());
TableDescriptors mockTblDescs = mock(TableDescriptors.class);
when(mockServices.getTableDescriptors()).thenReturn(mockTblDescs);
TableDescriptor mockTblDesc = mock(TableDescriptor.class);
when(mockTblDescs.get(any())).thenReturn(mockTblDesc);
ColumnFamilyDescriptor mockCfDesc = mock(ColumnFamilyDescriptor.class);
when(mockTblDesc.getColumnFamily(any())).thenReturn(mockCfDesc);
Path regionDir = regionFs.commitDaughterRegion(hri, splitFiles, mockEnv);
return new Path(new Path(regionDir, family), path.getName());
}

Loading

0 comments on commit c39ad64

Please sign in to comment.