From cf8114a82e3b5499af014b1d7b05218d14c42118 Mon Sep 17 00:00:00 2001 From: Guanghao Zhang Date: Sat, 27 Jul 2019 18:23:37 +0800 Subject: [PATCH] HBASE-22709 Add a chore thread in master to do hbck checking (#404) Signed-off-by: stack --- .../master/AssignmentManagerStatusTmpl.jamon | 76 ----- .../hbase/tmpl/master/MasterStatusTmpl.jamon | 3 +- .../apache/hadoop/hbase/master/HMaster.java | 8 + .../hadoop/hbase/master/HbckChecker.java | 282 ++++++++++++++++++ .../master/assignment/AssignmentManager.java | 54 +--- .../resources/hbase-webapps/master/hbck.jsp | 153 ++++++++++ ...maticRegions.java => TestHbckChecker.java} | 65 ++-- 7 files changed, 497 insertions(+), 144 deletions(-) create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/master/HbckChecker.java create mode 100644 hbase-server/src/main/resources/hbase-webapps/master/hbck.jsp rename hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/{TestAMProblematicRegions.java => TestHbckChecker.java} (70%) diff --git a/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/master/AssignmentManagerStatusTmpl.jamon b/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/master/AssignmentManagerStatusTmpl.jamon index b83f711d9504..c7b305c525b9 100644 --- a/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/master/AssignmentManagerStatusTmpl.jamon +++ b/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/master/AssignmentManagerStatusTmpl.jamon @@ -42,84 +42,8 @@ int limit = 100; <%java> SortedSet rit = assignmentManager.getRegionStates() .getRegionsInTransitionOrderedByTimestamp(); -Map>> problematicRegions = assignmentManager - .getProblematicRegions(); -<%if !problematicRegions.isEmpty() %> -<%java> -int totalSize = problematicRegions.size(); -int sizePerPage = Math.min(10, totalSize); -int numOfPages = (int) Math.ceil(totalSize * 1.0 / sizePerPage); - -
-

Problematic Regions

-

- - <% problematicRegions.size() %> problematic region(s). There are three case: 1. Master - thought this region opened, but no regionserver reported it. 2. Master thought this - region opened on Server1, but regionserver reported Server2. 3. More than one - regionservers reported opened this region. Notice: the reported online regionservers - may be not right when there are regions in transition. Please check them in - regionserver's web UI. - -

-
-
- <%java int recordItr = 0; %> - <%for Map.Entry>> entry : problematicRegions.entrySet() %> - <%if (recordItr % sizePerPage) == 0 %> - <%if recordItr == 0 %> -
- <%else> -
- - - - - - - - - - - - - - - <%java recordItr++; %> - <%if (recordItr % sizePerPage) == 0 %> -
RegionLocation in METAReported Online Region Servers
<% entry.getKey() %><% entry.getValue().getFirst() %><% entry.getValue().getSecond().stream().map(ServerName::getServerName) - .collect(Collectors.joining(", ")) %>
-
- - - - <%if (recordItr % sizePerPage) != 0 %> - <%for ; (recordItr % sizePerPage) != 0 ; recordItr++ %> - - - -
- - -
- -
-
- - <%if !rit.isEmpty() %> <%java> long currentTime = System.currentTimeMillis(); diff --git a/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/master/MasterStatusTmpl.jamon b/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/master/MasterStatusTmpl.jamon index 5c5f7fe7cf0b..f30cf5de26ca 100644 --- a/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/master/MasterStatusTmpl.jamon +++ b/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/master/MasterStatusTmpl.jamon @@ -149,7 +149,8 @@ AssignmentManager assignmentManager = master.getAssignmentManager();
  • Home
  • Table Details
  • <%if master.isActiveMaster() %> -
  • Procedures & Locks
  • +
  • Procedures & Locks
  • +
  • HBCK Report
  • Process Metrics
  • Local Logs
  • diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index 31587350bf56..8f11e0c6b317 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -385,6 +385,7 @@ public void run() { private ClusterStatusPublisher clusterStatusPublisherChore = null; private SnapshotCleanerChore snapshotCleanerChore = null; + private HbckChecker hbckChecker; CatalogJanitor catalogJanitorChore; private LogCleaner logCleaner; private HFileCleaner hfileCleaner; @@ -1108,6 +1109,8 @@ private void finishActiveMasterInitialization(MonitoredTask status) throws IOExc getChoreService().scheduleChore(normalizerChore); this.catalogJanitorChore = new CatalogJanitor(this); getChoreService().scheduleChore(catalogJanitorChore); + this.hbckChecker = new HbckChecker(this); + getChoreService().scheduleChore(hbckChecker); this.serverManager.startChore(); // Only for rolling upgrade, where we need to migrate the data in namespace table to meta table. @@ -1587,6 +1590,7 @@ private void stopChores() { choreService.cancelChore(this.hfileCleaner); choreService.cancelChore(this.replicationBarrierCleaner); choreService.cancelChore(this.snapshotCleanerChore); + choreService.cancelChore(this.hbckChecker); } } @@ -3756,4 +3760,8 @@ public Map getWalGroupsReplicationStatus() { } return super.getWalGroupsReplicationStatus(); } + + public HbckChecker getHbckChecker() { + return this.hbckChecker; + } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HbckChecker.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HbckChecker.java new file mode 100644 index 000000000000..fbc2c7072755 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HbckChecker.java @@ -0,0 +1,282 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master; + +import java.io.IOException; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.locks.ReentrantReadWriteLock; + +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.ScheduledChore; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; +import org.apache.hadoop.hbase.util.FSUtils; +import org.apache.hadoop.hbase.util.HbckRegionInfo; +import org.apache.hadoop.hbase.util.Pair; +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hbase.thirdparty.com.google.common.collect.Lists; + +/** + * Used to do the hbck checking job at master side. + */ +@InterfaceAudience.Private +@InterfaceStability.Evolving +public class HbckChecker extends ScheduledChore { + private static final Logger LOG = LoggerFactory.getLogger(HbckChecker.class.getName()); + + private static final String HBCK_CHECKER_INTERVAL = "hbase.master.hbck.checker.interval"; + private static final int DEFAULT_HBCK_CHECKER_INTERVAL = 60 * 60 * 1000; + + private final MasterServices master; + + /** + * This map contains the state of all hbck items. It maps from encoded region + * name to HbckRegionInfo structure. The information contained in HbckRegionInfo is used + * to detect and correct consistency (hdfs/meta/deployment) problems. + */ + private final Map regionInfoMap = new HashMap<>(); + + /** + * The regions only opened on RegionServers, but no region info in meta. + */ + private final Map orphanRegionsOnRS = new HashMap<>(); + /** + * The regions have directory on FileSystem, but no region info in meta. + */ + private final List orphanRegionsOnFS = new LinkedList<>(); + /** + * The inconsistent regions. There are three case: + * case 1. Master thought this region opened, but no regionserver reported it. + * case 2. Master thought this region opened on Server1, but regionserver reported Server2 + * case 3. More than one regionservers reported opened this region + */ + private final Map>> inconsistentRegions = + new HashMap<>(); + + /** + * The "snapshot" is used to save the last round's HBCK checking report. + */ + private final Map orphanRegionsOnRSSnapshot = new HashMap<>(); + private final List orphanRegionsOnFSSnapshot = new LinkedList<>(); + private final Map>> inconsistentRegionsSnapshot = + new HashMap<>(); + + /** + * The "snapshot" may be changed after checking. And this checking report "snapshot" may be + * accessed by web ui. Use this rwLock to synchronize. + */ + ReentrantReadWriteLock rwLock = new ReentrantReadWriteLock(); + + /** + * When running, the "snapshot" may be changed when this round's checking finish. + */ + private volatile boolean running = false; + private volatile long checkingStartTimestamp = 0; + private volatile long checkingEndTimestamp = 0; + + public HbckChecker(MasterServices master) { + super("HbckChecker-", master, + master.getConfiguration().getInt(HBCK_CHECKER_INTERVAL, DEFAULT_HBCK_CHECKER_INTERVAL)); + this.master = master; + } + + @Override + protected void chore() { + running = true; + regionInfoMap.clear(); + orphanRegionsOnRS.clear(); + orphanRegionsOnFS.clear(); + inconsistentRegions.clear(); + checkingStartTimestamp = EnvironmentEdgeManager.currentTime(); + loadRegionsFromInMemoryState(); + loadRegionsFromRSReport(); + try { + loadRegionsFromFS(); + } catch (IOException e) { + LOG.warn("Failed to load the regions from filesystem", e); + } + saveCheckResultToSnapshot(); + running = false; + } + + private void saveCheckResultToSnapshot() { + // Need synchronized here, as this "snapshot" may be access by web ui. + rwLock.writeLock().lock(); + try { + orphanRegionsOnRSSnapshot.clear(); + orphanRegionsOnRS.entrySet() + .forEach(e -> orphanRegionsOnRSSnapshot.put(e.getKey(), e.getValue())); + orphanRegionsOnFSSnapshot.clear(); + orphanRegionsOnFSSnapshot.addAll(orphanRegionsOnFS); + inconsistentRegionsSnapshot.clear(); + inconsistentRegions.entrySet() + .forEach(e -> inconsistentRegionsSnapshot.put(e.getKey(), e.getValue())); + checkingEndTimestamp = EnvironmentEdgeManager.currentTime(); + } finally { + rwLock.writeLock().unlock(); + } + } + + private void loadRegionsFromInMemoryState() { + List regionStates = + master.getAssignmentManager().getRegionStates().getRegionStates(); + for (RegionState regionState : regionStates) { + RegionInfo regionInfo = regionState.getRegion(); + HbckRegionInfo.MetaEntry metaEntry = + new HbckRegionInfo.MetaEntry(regionInfo, regionState.getServerName(), + regionState.getStamp()); + regionInfoMap.put(regionInfo.getEncodedName(), new HbckRegionInfo(metaEntry)); + } + } + + private void loadRegionsFromRSReport() { + Map> rsReports = master.getAssignmentManager().getRSReports(); + for (Map.Entry> entry : rsReports.entrySet()) { + ServerName serverName = entry.getKey(); + for (byte[] regionName : entry.getValue()) { + String encodedRegionName = RegionInfo.encodeRegionName(regionName); + HbckRegionInfo hri = regionInfoMap.get(encodedRegionName); + if (hri == null) { + orphanRegionsOnRS.put(encodedRegionName, serverName); + continue; + } + hri.addServer(hri.getMetaEntry(), serverName); + } + } + + for (Map.Entry entry : regionInfoMap.entrySet()) { + String encodedRegionName = entry.getKey(); + HbckRegionInfo hri = entry.getValue(); + ServerName locationInMeta = hri.getMetaEntry().getRegionServer(); + if (hri.getDeployedOn().size() == 0) { + // Master thought this region opened, but no regionserver reported it. + inconsistentRegions.put(encodedRegionName, new Pair<>(locationInMeta, new LinkedList<>())); + } else if (hri.getDeployedOn().size() > 1) { + // More than one regionserver reported opened this region + inconsistentRegions.put(encodedRegionName, new Pair<>(locationInMeta, hri.getDeployedOn())); + } else if (!hri.getDeployedOn().get(0).equals(locationInMeta)) { + // Master thought this region opened on Server1, but regionserver reported Server2 + inconsistentRegions.put(encodedRegionName, new Pair<>(locationInMeta, hri.getDeployedOn())); + } + } + } + + private void loadRegionsFromFS() throws IOException { + Path rootDir = master.getMasterFileSystem().getRootDir(); + FileSystem fs = master.getMasterFileSystem().getFileSystem(); + + // list all tables from HDFS + List tableDirs = Lists.newArrayList(); + List paths = FSUtils.getTableDirs(fs, rootDir); + for (Path path : paths) { + tableDirs.add(fs.getFileStatus(path)); + } + + for (FileStatus tableDir : tableDirs) { + FileStatus[] regionDirs = fs.listStatus(tableDir.getPath()); + for (FileStatus regionDir : regionDirs) { + String encodedRegionName = regionDir.getPath().getName(); + HbckRegionInfo hri = regionInfoMap.get(encodedRegionName); + if (hri == null) { + orphanRegionsOnFS.add(encodedRegionName); + continue; + } + HbckRegionInfo.HdfsEntry hdfsEntry = + new HbckRegionInfo.HdfsEntry(regionDir.getPath(), regionDir.getModificationTime()); + hri.setHdfsEntry(hdfsEntry); + } + } + } + + /** + * When running, the HBCK report may be changed later. + */ + public boolean isRunning() { + return running; + } + + /** + * @return the regions only opened on RegionServers, but no region info in meta. + */ + public Map getOrphanRegionsOnRS() { + // Need synchronized here, as this "snapshot" may be changed after checking. + rwLock.readLock().lock(); + try { + return this.orphanRegionsOnRSSnapshot; + } finally { + rwLock.readLock().unlock(); + } + } + + /** + * @return the regions have directory on FileSystem, but no region info in meta. + */ + public List getOrphanRegionsOnFS() { + // Need synchronized here, as this "snapshot" may be changed after checking. + rwLock.readLock().lock(); + try { + return this.orphanRegionsOnFSSnapshot; + } finally { + rwLock.readLock().unlock(); + } + } + + /** + * Found the inconsistent regions. There are three case: + * case 1. Master thought this region opened, but no regionserver reported it. + * case 2. Master thought this region opened on Server1, but regionserver reported Server2 + * case 3. More than one regionservers reported opened this region + * + * @return the map of inconsistent regions. Key is the region name. Value is a pair of location in + * meta and the regionservers which reported opened this region. + */ + public Map>> getInconsistentRegions() { + // Need synchronized here, as this "snapshot" may be changed after checking. + rwLock.readLock().lock(); + try { + return this.inconsistentRegionsSnapshot; + } finally { + rwLock.readLock().unlock(); + } + } + + /** + * Used for web ui to show when the HBCK checking started. + */ + public long getCheckingStartTimestamp() { + return this.checkingStartTimestamp; + } + + /** + * Used for web ui to show when the HBCK checking report generated. + */ + public long getCheckingEndTimestamp() { + return this.checkingStartTimestamp; + } +} \ No newline at end of file diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java index a71e85d4391a..cce3d3650f55 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java @@ -1467,6 +1467,12 @@ public long submitServerCrash(ServerName serverName, boolean shouldSplitWal) { LOG.info("Skip to add SCP for {} since this server should be OFFLINE already", serverName); return -1; } + + // Remove the in-memory rsReports result + synchronized (rsReports) { + rsReports.remove(serverName); + } + // we hold the write lock here for fencing on reportRegionStateTransition. Once we set the // server state to CRASHED, we will no longer accept the reportRegionStateTransition call from // this server. This is used to simplify the implementation for TRSP and SCP, where we can make @@ -2037,51 +2043,13 @@ MasterServices getMaster() { } /** - * Found the potentially problematic opened regions. There are three case: - * case 1. Master thought this region opened, but no regionserver reported it. - * case 2. Master thought this region opened on Server1, but regionserver reported Server2 - * case 3. More than one regionservers reported opened this region - * - * @return the map of potentially problematic opened regions. Key is the region name. Value is - * a pair of location in meta and the regionservers which reported opened this region. + * @return a snapshot of rsReports */ - public Map>> getProblematicRegions() { - Map> reportedOnlineRegions = new HashMap<>(); + public Map> getRSReports() { + Map> rsReportsSnapshot = new HashMap<>(); synchronized (rsReports) { - for (Map.Entry> entry : rsReports.entrySet()) { - for (byte[] regionName : entry.getValue()) { - reportedOnlineRegions - .computeIfAbsent(RegionInfo.getRegionNameAsString(regionName), r -> new HashSet<>()) - .add(entry.getKey()); - } - } + rsReports.entrySet().forEach(e -> rsReportsSnapshot.put(e.getKey(), e.getValue())); } - - Map>> problematicRegions = new HashMap<>(); - List rits = regionStates.getRegionsStateInTransition(); - for (RegionState regionState : regionStates.getRegionStates()) { - // Only consider the opened region and not in transition - if (!rits.contains(regionState) && regionState.isOpened()) { - String regionName = regionState.getRegion().getRegionNameAsString(); - ServerName serverName = regionState.getServerName(); - if (reportedOnlineRegions.containsKey(regionName)) { - Set reportedServers = reportedOnlineRegions.get(regionName); - if (reportedServers.contains(serverName)) { - if (reportedServers.size() > 1) { - // More than one regionserver reported opened this region - problematicRegions.put(regionName, new Pair<>(serverName, reportedServers)); - } - } else { - // Master thought this region opened on Server1, but regionserver reported Server2 - problematicRegions.put(regionName, new Pair<>(serverName, reportedServers)); - } - } else { - // Master thought this region opened, but no regionserver reported it. - problematicRegions.put(regionName, new Pair<>(serverName, new HashSet<>())); - } - } - } - - return problematicRegions; + return rsReportsSnapshot; } } diff --git a/hbase-server/src/main/resources/hbase-webapps/master/hbck.jsp b/hbase-server/src/main/resources/hbase-webapps/master/hbck.jsp new file mode 100644 index 000000000000..0245d4771da7 --- /dev/null +++ b/hbase-server/src/main/resources/hbase-webapps/master/hbck.jsp @@ -0,0 +1,153 @@ +<%-- +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +--%> +<%@ page contentType="text/html;charset=UTF-8" + import="java.util.Date" + import="java.util.List" + import="java.util.Map" + import="java.util.stream.Collectors" +%> +<%@ page import="org.apache.hadoop.hbase.master.HbckChecker" %> +<%@ page import="org.apache.hadoop.hbase.master.HMaster" %> +<%@ page import="org.apache.hadoop.hbase.ServerName" %> +<%@ page import="org.apache.hadoop.hbase.util.Pair" %> +<% + HMaster master = (HMaster) getServletContext().getAttribute(HMaster.MASTER); + pageContext.setAttribute("pageTitle", "HBase Master HBCK Report: " + master.getServerName()); + HbckChecker hbckChecker = master.getHbckChecker(); + Map>> inconsistentRegions = null; + Map orphanRegionsOnRS = null; + List orphanRegionsOnFS = null; + long startTimestamp = 0; + long endTimestamp = 0; + if (hbckChecker != null) { + inconsistentRegions = hbckChecker.getInconsistentRegions(); + orphanRegionsOnRS = hbckChecker.getOrphanRegionsOnRS(); + orphanRegionsOnFS = hbckChecker.getOrphanRegionsOnFS(); + startTimestamp = hbckChecker.getCheckingStartTimestamp(); + endTimestamp = hbckChecker.getCheckingEndTimestamp(); + } +%> + + + + +
    + + <% if (!master.isInitialized()) { %> +
    + +
    + + <% } else { %> + +
    + +
    + +
    + +
    + + <% if (inconsistentRegions != null && inconsistentRegions.size() > 0) { %> + + + + + + + <% for (Map.Entry>> entry : inconsistentRegions.entrySet()) { %> + + + + + + <% } %> + +

    <%= inconsistentRegions.size() %> region(s) in set.

    +
    RegionLocation in METAReported Online RegionServers
    <%= entry.getKey() %><%= entry.getValue().getFirst() %><%= entry.getValue().getSecond().stream().map(ServerName::getServerName) + .collect(Collectors.joining(", ")) %>
    + <% } %> + +
    + +
    + + <% if (orphanRegionsOnRS != null && orphanRegionsOnRS.size() > 0) { %> + + + + + + <% for (Map.Entry entry : orphanRegionsOnRS.entrySet()) { %> + + + + + <% } %> + +

    <%= orphanRegionsOnRS.size() %> region(s) in set.

    +
    RegionReported Online RegionServer
    <%= entry.getKey() %><%= entry.getValue() %>
    + <% } %> + +
    + +
    + + <% if (orphanRegionsOnFS != null && orphanRegionsOnFS.size() > 0) { %> + + + + + <% for (String region : orphanRegionsOnFS) { %> + + + + <% } %> + +

    <%= orphanRegionsOnFS.size() %> region(s) in set.

    +
    Region
    <%= region %>
    + <% } %> + + <% } %> +
    + + \ No newline at end of file diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAMProblematicRegions.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestHbckChecker.java similarity index 70% rename from hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAMProblematicRegions.java rename to hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestHbckChecker.java index 2c86a09bfcc1..0bb4dc41c838 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAMProblematicRegions.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestHbckChecker.java @@ -24,7 +24,6 @@ import java.util.Collections; import java.util.List; import java.util.Map; -import java.util.Set; import java.util.concurrent.Future; import org.apache.hadoop.hbase.HBaseClassTestRule; @@ -32,9 +31,11 @@ import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.client.RegionInfoBuilder; +import org.apache.hadoop.hbase.master.HbckChecker; import org.apache.hadoop.hbase.testclassification.MasterTests; import org.apache.hadoop.hbase.testclassification.MediumTests; import org.apache.hadoop.hbase.util.Pair; +import org.junit.Before; import org.junit.ClassRule; import org.junit.Test; import org.junit.experimental.categories.Category; @@ -42,41 +43,52 @@ import org.slf4j.LoggerFactory; @Category({ MasterTests.class, MediumTests.class }) -public class TestAMProblematicRegions extends TestAssignmentManagerBase { - private static final Logger LOG = LoggerFactory.getLogger(TestAMProblematicRegions.class); +public class TestHbckChecker extends TestAssignmentManagerBase { + private static final Logger LOG = LoggerFactory.getLogger(TestHbckChecker.class); @ClassRule public static final HBaseClassTestRule CLASS_RULE = - HBaseClassTestRule.forClass(TestAMProblematicRegions.class); + HBaseClassTestRule.forClass(TestHbckChecker.class); + + private HbckChecker hbckChecker; + + @Before + public void setUp() throws Exception { + super.setUp(); + hbckChecker = new HbckChecker(master); + } @Test public void testForMeta() { byte[] metaRegionNameAsBytes = RegionInfoBuilder.FIRST_META_REGIONINFO.getRegionName(); - String metaRegionName = RegionInfoBuilder.FIRST_META_REGIONINFO.getRegionNameAsString(); + String metaRegionName = RegionInfoBuilder.FIRST_META_REGIONINFO.getEncodedName(); List serverNames = master.getServerManager().getOnlineServersList(); assertEquals(NSERVERS, serverNames.size()); - Map>> problematicRegions = am.getProblematicRegions(); + hbckChecker.choreForTesting(); + Map>> inconsistentRegions = + hbckChecker.getInconsistentRegions(); // Test for case1: Master thought this region opened, but no regionserver reported it. - assertTrue(problematicRegions.containsKey(metaRegionName)); - Pair> pair = problematicRegions.get(metaRegionName); + assertTrue(inconsistentRegions.containsKey(metaRegionName)); + Pair> pair = inconsistentRegions.get(metaRegionName); ServerName locationInMeta = pair.getFirst(); - Set reportedRegionServers = pair.getSecond(); + List reportedRegionServers = pair.getSecond(); assertTrue(serverNames.contains(locationInMeta)); assertEquals(0, reportedRegionServers.size()); // Reported right region location. Then not in problematic regions. am.reportOnlineRegions(locationInMeta, Collections.singleton(metaRegionNameAsBytes)); - problematicRegions = am.getProblematicRegions(); - assertFalse(problematicRegions.containsKey(metaRegionName)); + hbckChecker.choreForTesting(); + inconsistentRegions = hbckChecker.getInconsistentRegions(); + assertFalse(inconsistentRegions.containsKey(metaRegionName)); } @Test public void testForUserTable() throws Exception { TableName tableName = TableName.valueOf("testForUserTable"); RegionInfo hri = createRegionInfo(tableName, 1); - String regionName = hri.getRegionNameAsString(); + String regionName = hri.getEncodedName(); rsDispatcher.setMockRsExecutor(new GoodRsExecutor()); Future future = submitProcedure(createAssignProcedure(hri)); waitOnFuture(future); @@ -85,11 +97,13 @@ public void testForUserTable() throws Exception { assertEquals(NSERVERS, serverNames.size()); // Test for case1: Master thought this region opened, but no regionserver reported it. - Map>> problematicRegions = am.getProblematicRegions(); - assertTrue(problematicRegions.containsKey(regionName)); - Pair> pair = problematicRegions.get(regionName); + hbckChecker.choreForTesting(); + Map>> inconsistentRegions = + hbckChecker.getInconsistentRegions(); + assertTrue(inconsistentRegions.containsKey(regionName)); + Pair> pair = inconsistentRegions.get(regionName); ServerName locationInMeta = pair.getFirst(); - Set reportedRegionServers = pair.getSecond(); + List reportedRegionServers = pair.getSecond(); assertTrue(serverNames.contains(locationInMeta)); assertEquals(0, reportedRegionServers.size()); @@ -99,9 +113,10 @@ public void testForUserTable() throws Exception { final ServerName anotherServer = serverNames.stream().filter(s -> !s.equals(tempLocationInMeta)).findFirst().get(); am.reportOnlineRegions(anotherServer, Collections.singleton(hri.getRegionName())); - problematicRegions = am.getProblematicRegions(); - assertTrue(problematicRegions.containsKey(regionName)); - pair = problematicRegions.get(regionName); + hbckChecker.choreForTesting(); + inconsistentRegions = hbckChecker.getInconsistentRegions(); + assertTrue(inconsistentRegions.containsKey(regionName)); + pair = inconsistentRegions.get(regionName); locationInMeta = pair.getFirst(); reportedRegionServers = pair.getSecond(); assertEquals(1, reportedRegionServers.size()); @@ -110,9 +125,10 @@ public void testForUserTable() throws Exception { // Test for case3: More than one regionservers reported opened this region. am.reportOnlineRegions(locationInMeta, Collections.singleton(hri.getRegionName())); - problematicRegions = am.getProblematicRegions(); - assertTrue(problematicRegions.containsKey(regionName)); - pair = problematicRegions.get(regionName); + hbckChecker.choreForTesting(); + inconsistentRegions = hbckChecker.getInconsistentRegions(); + assertTrue(inconsistentRegions.containsKey(regionName)); + pair = inconsistentRegions.get(regionName); locationInMeta = pair.getFirst(); reportedRegionServers = pair.getSecond(); assertEquals(2, reportedRegionServers.size()); @@ -121,7 +137,8 @@ public void testForUserTable() throws Exception { // Reported right region location. Then not in problematic regions. am.reportOnlineRegions(anotherServer, Collections.EMPTY_SET); - problematicRegions = am.getProblematicRegions(); - assertFalse(problematicRegions.containsKey(regionName)); + hbckChecker.choreForTesting(); + inconsistentRegions = hbckChecker.getInconsistentRegions(); + assertFalse(inconsistentRegions.containsKey(regionName)); } } \ No newline at end of file