From cb04c6c1a678bb274e75dc4ca8f3099ddcf202c2 Mon Sep 17 00:00:00 2001 From: Guanghao Zhang Date: Sat, 21 Sep 2019 21:11:05 +0800 Subject: [PATCH] HBASE-23035 Retain region to the last RegionServer make the failover slower (#631) Signed-off-by: Duo Zhang --- .../assignment/TransitRegionStateProcedure.java | 14 ++++++++++++-- .../master/procedure/ServerCrashProcedure.java | 4 +++- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/TransitRegionStateProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/TransitRegionStateProcedure.java index 6bf5dc239c14..ae2c993180a0 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/TransitRegionStateProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/TransitRegionStateProcedure.java @@ -398,6 +398,11 @@ public void reportTransition(MasterProcedureEnv env, RegionStateNode regionNode, // Should be called with RegionStateNode locked public void serverCrashed(MasterProcedureEnv env, RegionStateNode regionNode, ServerName serverName) throws IOException { + // force to assign to a new candidate server + // TODO: the forceNewPlan flag not be persistent so if master crash then the flag will be lost. + // But assign to old server is not big deal because it not effect correctness. + // See HBASE-23035 for more details. + forceNewPlan = true; if (remoteProc != null) { // this means we are waiting for the sub procedure, so wake it up remoteProc.serverCrashed(env, regionNode, serverName); @@ -545,8 +550,13 @@ public enum TransitionType { // anything. See the comment in executeFromState to find out why we need this assumption. public static TransitRegionStateProcedure assign(MasterProcedureEnv env, RegionInfo region, @Nullable ServerName targetServer) { - return setOwner(env, - new TransitRegionStateProcedure(env, region, targetServer, false, TransitionType.ASSIGN)); + return assign(env, region, false, targetServer); + } + + public static TransitRegionStateProcedure assign(MasterProcedureEnv env, RegionInfo region, + boolean forceNewPlan, @Nullable ServerName targetServer) { + return setOwner(env, new TransitRegionStateProcedure(env, region, targetServer, forceNewPlan, + TransitionType.ASSIGN)); } public static TransitRegionStateProcedure unassign(MasterProcedureEnv env, RegionInfo region) { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java index 292fa4547635..9d06e6f13afa 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java @@ -464,7 +464,9 @@ private void assignRegions(MasterProcedureEnv env, List regions) thr TableState.State.DISABLING, TableState.State.DISABLED)) { continue; } - TransitRegionStateProcedure proc = TransitRegionStateProcedure.assign(env, region, null); + // force to assign to a new candidate server, see HBASE-23035 for more details. + TransitRegionStateProcedure proc = + TransitRegionStateProcedure.assign(env, region, true, null); regionNode.setProcedure(proc); addChildProcedure(proc); }