From 5cec62f0f9dff7bfea639dbe478fab31b8344bd7 Mon Sep 17 00:00:00 2001 From: Guanghao Zhang Date: Wed, 7 Aug 2019 19:17:26 -0500 Subject: [PATCH] HBASE-22807 HBCK Report showed wrong orphans regions on FileSystem (#461) Signed-off-by: Sakthi --- .../apache/hadoop/hbase/master/HbckChore.java | 38 +++++++++---------- .../hadoop/hbase/util/HbckRegionInfo.java | 3 +- .../resources/hbase-webapps/master/hbck.jsp | 3 +- .../master/assignment/TestHbckChore.java | 25 +++++++++++- 4 files changed, 46 insertions(+), 23 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HbckChore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HbckChore.java index 6a69bae09c9b..69a8d536dcf2 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HbckChore.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HbckChore.java @@ -19,13 +19,13 @@ import java.io.IOException; import java.util.HashMap; +import java.util.HashSet; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.locks.ReentrantReadWriteLock; -import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.ScheduledChore; @@ -40,8 +40,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.apache.hbase.thirdparty.com.google.common.collect.Lists; - /** * Used to do the hbck checking job at master side. */ @@ -69,7 +67,7 @@ public class HbckChore extends ScheduledChore { /** * The regions have directory on FileSystem, but no region info in meta. */ - private final List orphanRegionsOnFS = new LinkedList<>(); + private final Set orphanRegionsOnFS = new HashSet<>(); /** * The inconsistent regions. There are three case: * case 1. Master thought this region opened, but no regionserver reported it. @@ -83,7 +81,7 @@ public class HbckChore extends ScheduledChore { * The "snapshot" is used to save the last round's HBCK checking report. */ private final Map orphanRegionsOnRSSnapshot = new HashMap<>(); - private final List orphanRegionsOnFSSnapshot = new LinkedList<>(); + private final Set orphanRegionsOnFSSnapshot = new HashSet<>(); private final Map>> inconsistentRegionsSnapshot = new HashMap<>(); @@ -153,9 +151,11 @@ private void loadRegionsFromInMemoryState() { regionState.getStamp()); regionInfoMap.put(regionInfo.getEncodedName(), new HbckRegionInfo(metaEntry)); } + LOG.info("Loaded {} regions from in-memory state of AssignmentManager", regionStates.size()); } private void loadRegionsFromRSReport() { + int numRegions = 0; Map> rsReports = master.getAssignmentManager().getRSReports(); for (Map.Entry> entry : rsReports.entrySet()) { ServerName serverName = entry.getKey(); @@ -168,7 +168,10 @@ private void loadRegionsFromRSReport() { } hri.addServer(hri.getMetaEntry(), serverName); } + numRegions += entry.getValue().size(); } + LOG.info("Loaded {} regions from {} regionservers' reports and found {} orphan regions", + numRegions, rsReports.size(), orphanRegionsOnFS.size()); for (Map.Entry entry : regionInfoMap.entrySet()) { String encodedRegionName = entry.getKey(); @@ -191,27 +194,24 @@ private void loadRegionsFromFS() throws IOException { Path rootDir = master.getMasterFileSystem().getRootDir(); FileSystem fs = master.getMasterFileSystem().getFileSystem(); - // list all tables from HDFS - List tableDirs = Lists.newArrayList(); - List paths = FSUtils.getTableDirs(fs, rootDir); - for (Path path : paths) { - tableDirs.add(fs.getFileStatus(path)); - } - - for (FileStatus tableDir : tableDirs) { - FileStatus[] regionDirs = fs.listStatus(tableDir.getPath()); - for (FileStatus regionDir : regionDirs) { - String encodedRegionName = regionDir.getPath().getName(); + int numRegions = 0; + List tableDirs = FSUtils.getTableDirs(fs, rootDir); + for (Path tableDir : tableDirs) { + List regionDirs = FSUtils.getRegionDirs(fs, tableDir); + for (Path regionDir : regionDirs) { + String encodedRegionName = regionDir.getName(); HbckRegionInfo hri = regionInfoMap.get(encodedRegionName); if (hri == null) { orphanRegionsOnFS.add(encodedRegionName); continue; } - HbckRegionInfo.HdfsEntry hdfsEntry = - new HbckRegionInfo.HdfsEntry(regionDir.getPath(), regionDir.getModificationTime()); + HbckRegionInfo.HdfsEntry hdfsEntry = new HbckRegionInfo.HdfsEntry(regionDir); hri.setHdfsEntry(hdfsEntry); } + numRegions += regionDirs.size(); } + LOG.info("Loaded {} tables {} regions from filesyetem and found {} orphan regions", + tableDirs.size(), numRegions, orphanRegionsOnFS.size()); } /** @@ -237,7 +237,7 @@ public Map getOrphanRegionsOnRS() { /** * @return the regions have directory on FileSystem, but no region info in meta. */ - public List getOrphanRegionsOnFS() { + public Set getOrphanRegionsOnFS() { // Need synchronized here, as this "snapshot" may be changed after checking. rwLock.readLock().lock(); try { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HbckRegionInfo.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HbckRegionInfo.java index 6204071f69f8..de1d17942fcc 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HbckRegionInfo.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HbckRegionInfo.java @@ -330,9 +330,8 @@ public static class HdfsEntry { HdfsEntry() { } - public HdfsEntry(Path regionDir, long regionDirModTime) { + public HdfsEntry(Path regionDir) { this.regionDir = regionDir; - this.regionDirModTime = regionDirModTime; } } diff --git a/hbase-server/src/main/resources/hbase-webapps/master/hbck.jsp b/hbase-server/src/main/resources/hbase-webapps/master/hbck.jsp index fcc07329dd88..183740bb4e20 100644 --- a/hbase-server/src/main/resources/hbase-webapps/master/hbck.jsp +++ b/hbase-server/src/main/resources/hbase-webapps/master/hbck.jsp @@ -23,6 +23,7 @@ import="java.util.Date" import="java.util.List" import="java.util.Map" + import="java.util.Set" import="java.util.stream.Collectors" import="java.time.ZonedDateTime" import="java.time.format.DateTimeFormatter" @@ -41,7 +42,7 @@ HbckChore hbckChore = master.getHbckChore(); Map>> inconsistentRegions = null; Map orphanRegionsOnRS = null; - List orphanRegionsOnFS = null; + Set orphanRegionsOnFS = null; long startTimestamp = 0; long endTimestamp = 0; if (hbckChore != null) { diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestHbckChore.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestHbckChore.java index 2353e7a54707..7dfe9790e2c4 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestHbckChore.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestHbckChore.java @@ -26,14 +26,18 @@ import java.util.Map; import java.util.concurrent.Future; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.client.RegionInfoBuilder; import org.apache.hadoop.hbase.master.HbckChore; +import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.hadoop.hbase.testclassification.MasterTests; import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.apache.hadoop.hbase.util.FSUtils; import org.apache.hadoop.hbase.util.Pair; import org.junit.Before; import org.junit.ClassRule; @@ -141,4 +145,23 @@ public void testForUserTable() throws Exception { inconsistentRegions = hbckChore.getInconsistentRegions(); assertFalse(inconsistentRegions.containsKey(regionName)); } -} \ No newline at end of file + + @Test + public void testOrphanRegionsOnFS() throws Exception { + TableName tableName = TableName.valueOf("testOrphanRegionsOnFS"); + RegionInfo regionInfo = RegionInfoBuilder.newBuilder(tableName).build(); + Configuration conf = UTIL.getConfiguration(); + + hbckChore.choreForTesting(); + assertEquals(0, hbckChore.getOrphanRegionsOnFS().size()); + + HRegion.createRegionDir(conf, regionInfo, FSUtils.getRootDir(conf)); + hbckChore.choreForTesting(); + assertEquals(1, hbckChore.getOrphanRegionsOnFS().size()); + assertTrue(hbckChore.getOrphanRegionsOnFS().contains(regionInfo.getEncodedName())); + + FSUtils.deleteRegionDir(conf, new HRegionInfo(regionInfo)); + hbckChore.choreForTesting(); + assertEquals(0, hbckChore.getOrphanRegionsOnFS().size()); + } +}