From 4e5cb0f7aaa0b839581b95dfd5bafd5126d5fc80 Mon Sep 17 00:00:00 2001 From: haxiaolin Date: Thu, 1 Aug 2019 10:13:49 +0800 Subject: [PATCH] HBASE-22767 System table RIT STUCK if their RSGroup has no highest version RSes Signed-off-by: stack --- .../apache/hadoop/hbase/util/VersionInfo.java | 4 ++ hbase-common/src/saveVersion.sh | 4 +- .../hbase/rsgroup/TestRSGroupsKillRS.java | 54 +++++++++++++++++++ .../master/assignment/AssignmentManager.java | 25 ++++++++- 4 files changed, 84 insertions(+), 3 deletions(-) diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/VersionInfo.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/VersionInfo.java index b57255aeed6a..bc5b6dcdf2ea 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/VersionInfo.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/VersionInfo.java @@ -166,6 +166,10 @@ private static String[] getVersionComponents(final String version) { return comps; } + public static int getMajorVersion(String version) { + return Integer.parseInt(version.split("\\.")[0]); + } + public static void main(String[] args) { writeTo(System.out); } diff --git a/hbase-common/src/saveVersion.sh b/hbase-common/src/saveVersion.sh index 507bbb020001..8fda48c316bb 100644 --- a/hbase-common/src/saveVersion.sh +++ b/hbase-common/src/saveVersion.sh @@ -70,8 +70,10 @@ package org.apache.hadoop.hbase; import org.apache.yetus.audience.InterfaceAudience; @InterfaceAudience.Private +@edu.umd.cs.findbugs.annotations.SuppressWarnings(value="DM_STRING_CTOR", + justification="Intentional; to be modified in test") public class Version { - public static final String version = "$version"; + public static final String version = new String("$version"); public static final String revision = "$revision"; public static final String user = "$user"; public static final String date = "$date"; diff --git a/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/TestRSGroupsKillRS.java b/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/TestRSGroupsKillRS.java index 503a1a69f61d..539f351e1380 100644 --- a/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/TestRSGroupsKillRS.java +++ b/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/TestRSGroupsKillRS.java @@ -21,6 +21,8 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; +import java.lang.reflect.Field; +import java.lang.reflect.Modifier; import java.util.ArrayList; import java.util.HashSet; import java.util.List; @@ -30,6 +32,7 @@ import org.apache.hadoop.hbase.NamespaceDescriptor; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.Version; import org.apache.hadoop.hbase.Waiter; import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; import org.apache.hadoop.hbase.client.RegionInfo; @@ -40,6 +43,7 @@ import org.apache.hadoop.hbase.testclassification.MediumTests; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.JVMClusterUtil; +import org.apache.hadoop.hbase.util.VersionInfo; import org.junit.After; import org.junit.AfterClass; import org.junit.Before; @@ -224,4 +228,54 @@ public void testKillAllRSInGroup() throws Exception { // wait and check if table regions are online TEST_UTIL.waitTableAvailable(tableName, 30000); } + + @Test + public void testLowerMetaGroupVersion() throws Exception{ + // create a rsgroup and move one regionserver to it + String groupName = "meta_group"; + int groupRSCount = 1; + addGroup(groupName, groupRSCount); + + // move hbase:meta to meta_group + tableName = TableName.META_TABLE_NAME; + Set toAddTables = new HashSet<>(); + toAddTables.add(tableName); + rsGroupAdmin.moveTables(toAddTables, groupName); + assertTrue(rsGroupAdmin.getRSGroupInfo(groupName).getTables().contains(tableName)); + TEST_UTIL.waitTableAvailable(tableName, 30000); + + // restart the regionserver in meta_group, and lower its version + String originVersion = ""; + Set
servers = new HashSet<>(); + for(Address addr : rsGroupAdmin.getRSGroupInfo(groupName).getServers()) { + servers.add(addr); + TEST_UTIL.getMiniHBaseCluster().stopRegionServer(getServerName(addr)); + originVersion = master.getRegionServerVersion(getServerName(addr)); + } + // better wait for a while for region reassign + sleep(10000); + assertEquals(NUM_SLAVES_BASE - groupRSCount, + TEST_UTIL.getMiniHBaseCluster().getLiveRegionServerThreads().size()); + Address address = servers.iterator().next(); + int majorVersion = VersionInfo.getMajorVersion(originVersion); + assertTrue(majorVersion >= 1); + String lowerVersion = String.valueOf(majorVersion - 1) + originVersion.split("\\.")[1]; + setFinalStatic(Version.class.getField("version"), lowerVersion); + TEST_UTIL.getMiniHBaseCluster().startRegionServer(address.getHostname(), + address.getPort()); + assertEquals(NUM_SLAVES_BASE, + TEST_UTIL.getMiniHBaseCluster().getLiveRegionServerThreads().size()); + assertTrue(VersionInfo.compareVersion(originVersion, + master.getRegionServerVersion(getServerName(servers.iterator().next()))) > 0); + LOG.debug("wait for META assigned..."); + TEST_UTIL.waitTableAvailable(tableName, 30000); + } + + private static void setFinalStatic(Field field, Object newValue) throws Exception { + field.setAccessible(true); + Field modifiersField = Field.class.getDeclaredField("modifiers"); + modifiersField.setAccessible(true); + modifiersField.setInt(field, field.getModifiers() & ~Modifier.FINAL); + field.set(null, newValue); + } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java index 1512674b1ce0..787b0d2352b9 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java @@ -1934,12 +1934,24 @@ private void processAssignQueue() { LOG.debug("Processing assignQueue; systemServersCount=" + serversForSysTables.size() + ", allServersCount=" + servers.size()); processAssignmentPlans(regions, null, systemHRIs, - serversForSysTables.isEmpty()? servers: serversForSysTables); + serversForSysTables.isEmpty() && !containsBogusAssignments(regions, systemHRIs) ? + servers: serversForSysTables); } processAssignmentPlans(regions, retainMap, userHRIs, servers); } + private boolean containsBogusAssignments(Map regions, + List hirs) { + for (RegionInfo ri : hirs) { + if (regions.get(ri).getRegionLocation() != null && + regions.get(ri).getRegionLocation().equals(LoadBalancer.BOGUS_SERVER_NAME)){ + return true; + } + } + return false; + } + private void processAssignmentPlans(final HashMap regions, final HashMap retainMap, final List hris, final List servers) { @@ -1995,7 +2007,16 @@ private void acceptPlan(final HashMap regions, for (RegionInfo hri: entry.getValue()) { final RegionStateNode regionNode = regions.get(hri); regionNode.setRegionLocation(server); - events[evcount++] = regionNode.getProcedureEvent(); + if (server.equals(LoadBalancer.BOGUS_SERVER_NAME) && regionNode.isSystemTable()) { + assignQueueLock.lock(); + try { + pendingAssignQueue.add(regionNode); + } finally { + assignQueueLock.unlock(); + } + }else { + events[evcount++] = regionNode.getProcedureEvent(); + } } } ProcedureEvent.wakeEvents(getProcedureScheduler(), events);