apache · bbeaudreault · Dec 4, 2023 · Nov 9, 2023 · Nov 22, 2023 · Nov 28, 2023
diff --git a/...e-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ModifyTableProcedure.java b/...e-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ModifyTableProcedure.java
@@ -17,6 +17,11 @@
  */
 package org.apache.hadoop.hbase.master.procedure;
 
+import static org.apache.hadoop.hbase.master.procedure.ReopenTableRegionsProcedure.REOPEN_BATCH_BACKOFF_MILLIS_DEFAULT;
+import static org.apache.hadoop.hbase.master.procedure.ReopenTableRegionsProcedure.REOPEN_BATCH_BACKOFF_MILLIS_KEY;
+import static org.apache.hadoop.hbase.master.procedure.ReopenTableRegionsProcedure.REOPEN_BATCH_SIZE_DEFAULT;
+import static org.apache.hadoop.hbase.master.procedure.ReopenTableRegionsProcedure.REOPEN_BATCH_SIZE_KEY;
+
 import java.io.IOException;
 import java.util.Arrays;
 import java.util.Collections;
@@ -25,6 +30,7 @@
 import java.util.function.Supplier;
 import java.util.stream.Collectors;
 import java.util.stream.IntStream;
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.ConcurrentTableModificationException;
 import org.apache.hadoop.hbase.DoNotRetryIOException;
 import org.apache.hadoop.hbase.HBaseIOException;
@@ -147,7 +153,12 @@ protected Flow executeFromState(final MasterProcedureEnv env, final ModifyTableS
           break;
         case MODIFY_TABLE_REOPEN_ALL_REGIONS:
           if (isTableEnabled(env)) {
-            addChildProcedure(new ReopenTableRegionsProcedure(getTableName()));
+            Configuration conf = env.getMasterConfiguration();
+            long backoffMillis =
+              conf.getLong(REOPEN_BATCH_BACKOFF_MILLIS_KEY, REOPEN_BATCH_BACKOFF_MILLIS_DEFAULT);
+            int batchSize = conf.getInt(REOPEN_BATCH_SIZE_KEY, REOPEN_BATCH_SIZE_DEFAULT);
+            addChildProcedure(
+              new ReopenTableRegionsProcedure(getTableName(), backoffMillis, batchSize));
           }
           setNextState(ModifyTableState.MODIFY_TABLE_ASSIGN_NEW_REPLICAS);
           break;

diff --git a/...r/src/main/java/org/apache/hadoop/hbase/master/procedure/ReopenTableRegionsProcedure.java b/...r/src/main/java/org/apache/hadoop/hbase/master/procedure/ReopenTableRegionsProcedure.java
@@ -17,10 +17,12 @@
  */
 package org.apache.hadoop.hbase.master.procedure;
 
+import com.google.errorprone.annotations.RestrictedApi;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
+import java.util.Set;
 import java.util.stream.Collectors;
 import org.apache.hadoop.hbase.HRegionLocation;
 import org.apache.hadoop.hbase.TableName;
@@ -53,6 +55,12 @@ public class ReopenTableRegionsProcedure
 
   private static final Logger LOG = LoggerFactory.getLogger(ReopenTableRegionsProcedure.class);
 
+  public static final String REOPEN_BATCH_BACKOFF_MILLIS_KEY =
+    "hbase.table.regions.reopen.batch.backoff.ms";
+  public static final long REOPEN_BATCH_BACKOFF_MILLIS_DEFAULT = 0L;
+  public static final String REOPEN_BATCH_SIZE_KEY = "hbase.table.regions.reopen.batch.size";
+  public static final int REOPEN_BATCH_SIZE_DEFAULT = Integer.MAX_VALUE;
+
   private TableName tableName;
 
   // Specify specific regions of a table to reopen.
@@ -61,20 +69,36 @@ public class ReopenTableRegionsProcedure
 
   private List<HRegionLocation> regions = Collections.emptyList();
 
+  private List<HRegionLocation> currentRegionBatch = Collections.emptyList();
+
   private RetryCounter retryCounter;
 
+  private long reopenBatchBackoffMillis;
+  private int reopenBatchSize;
+
   public ReopenTableRegionsProcedure() {
-    regionNames = Collections.emptyList();
+    this(null);
   }
 
   public ReopenTableRegionsProcedure(TableName tableName) {
-    this.tableName = tableName;
-    this.regionNames = Collections.emptyList();
+    this(tableName, Collections.emptyList());
   }
 
   public ReopenTableRegionsProcedure(final TableName tableName, final List<byte[]> regionNames) {
+    this(tableName, regionNames, REOPEN_BATCH_BACKOFF_MILLIS_DEFAULT, REOPEN_BATCH_SIZE_DEFAULT);
+  }
+
+  public ReopenTableRegionsProcedure(final TableName tableName, long reopenBatchBackoffMillis,
+    int reopenBatchSize) {
+    this(tableName, Collections.emptyList(), reopenBatchBackoffMillis, reopenBatchSize);
+  }
+
+  public ReopenTableRegionsProcedure(final TableName tableName, final List<byte[]> regionNames,
+    long reopenBatchBackoffMillis, int reopenBatchSize) {
     this.tableName = tableName;
     this.regionNames = regionNames;
+    this.reopenBatchBackoffMillis = reopenBatchBackoffMillis;
+    this.reopenBatchSize = reopenBatchSize;
   }
 
   @Override
@@ -87,6 +111,12 @@ public TableOperationType getTableOperationType() {
     return TableOperationType.REGION_EDIT;
   }
 
+  @RestrictedApi(explanation = "Should only be called in tests", link = "",
+      allowedOnPath = ".*/src/test/.*")
+  public List<HRegionLocation> getCurrentRegionBatch() {
+    return new ArrayList<>(currentRegionBatch);
+  }
+
   private boolean canSchedule(MasterProcedureEnv env, HRegionLocation loc) {
     if (loc.getSeqNum() < 0) {
       return false;
@@ -114,7 +144,8 @@ protected Flow executeFromState(MasterProcedureEnv env, ReopenTableRegionsState
         setNextState(ReopenTableRegionsState.REOPEN_TABLE_REGIONS_REOPEN_REGIONS);
         return Flow.HAS_MORE_STATE;
       case REOPEN_TABLE_REGIONS_REOPEN_REGIONS:
-        for (HRegionLocation loc : regions) {
+        currentRegionBatch = regions.stream().limit(reopenBatchSize).collect(Collectors.toList());
+        for (HRegionLocation loc : currentRegionBatch) {
           RegionStateNode regionNode =
             env.getAssignmentManager().getRegionStates().getRegionStateNode(loc.getRegion());
           // this possible, maybe the region has already been merged or split, see HBASE-20921
@@ -139,33 +170,57 @@ protected Flow executeFromState(MasterProcedureEnv env, ReopenTableRegionsState
       case REOPEN_TABLE_REGIONS_CONFIRM_REOPENED:
         regions = regions.stream().map(env.getAssignmentManager().getRegionStates()::checkReopened)
           .filter(l -> l != null).collect(Collectors.toList());
-        if (regions.isEmpty()) {
-          return Flow.NO_MORE_STATE;
+        // we need to create a set of region names because the HRegionLocation hashcode is only
+        // based
+        // on the server name
+        Set<byte[]> currentRegionBatchNames = currentRegionBatch.stream()
+          .map(r -> r.getRegion().getRegionName()).collect(Collectors.toSet());
+        currentRegionBatch = regions.stream()
+          .filter(r -> currentRegionBatchNames.contains(r.getRegion().getRegionName()))
+          .collect(Collectors.toList());
+        if (currentRegionBatch.isEmpty()) {
+          if (regions.isEmpty()) {
+            return Flow.NO_MORE_STATE;
+          } else {
+            setNextState(ReopenTableRegionsState.REOPEN_TABLE_REGIONS_REOPEN_REGIONS);
+            if (reopenBatchBackoffMillis > 0) {
+              backoff(reopenBatchBackoffMillis);
+            }
+            return Flow.HAS_MORE_STATE;
+          }
         }
-        if (regions.stream().anyMatch(loc -> canSchedule(env, loc))) {
+        if (currentRegionBatch.stream().anyMatch(loc -> canSchedule(env, loc))) {
           retryCounter = null;
           setNextState(ReopenTableRegionsState.REOPEN_TABLE_REGIONS_REOPEN_REGIONS);
+          if (reopenBatchBackoffMillis > 0) {
+            backoff(reopenBatchBackoffMillis);
+          }
           return Flow.HAS_MORE_STATE;
         }
         // We can not schedule TRSP for all the regions need to reopen, wait for a while and retry
         // again.
         if (retryCounter == null) {
           retryCounter = ProcedureUtil.createRetryCounter(env.getMasterConfiguration());
         }
-        long backoff = retryCounter.getBackoffTimeAndIncrementAttempts();
+        long backoffMillis = retryCounter.getBackoffTimeAndIncrementAttempts();
         LOG.info(
-          "There are still {} region(s) which need to be reopened for table {} are in "
+          "There are still {} region(s) which need to be reopened for table {}. {} are in "
             + "OPENING state, suspend {}secs and try again later",
-          regions.size(), tableName, backoff / 1000);
-        setTimeout(Math.toIntExact(backoff));
-        setState(ProcedureProtos.ProcedureState.WAITING_TIMEOUT);
-        skipPersistence();
+          regions.size(), tableName, currentRegionBatch.size(), backoffMillis / 1000);
+        backoff(backoffMillis);
         throw new ProcedureSuspendedException();
       default:
         throw new UnsupportedOperationException("unhandled state=" + state);
     }
   }
 
+  private void backoff(long millis) throws ProcedureSuspendedException {
+    setTimeout(Math.toIntExact(millis));
+    setState(ProcedureProtos.ProcedureState.WAITING_TIMEOUT);
+    skipPersistence();
+    throw new ProcedureSuspendedException();
+  }
+
   private List<HRegionLocation>
     getRegionLocationsForReopen(List<HRegionLocation> tableRegionsForReopen) {
 

diff --git a/...org/apache/hadoop/hbase/master/procedure/TestReopenTableRegionsProcedureBatchBackoff.java b/...org/apache/hadoop/hbase/master/procedure/TestReopenTableRegionsProcedureBatchBackoff.java
@@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master.procedure;
+
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.time.Duration;
+import java.time.Instant;
+import java.util.List;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.HBaseTestingUtil;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.RegionInfo;
+import org.apache.hadoop.hbase.master.ServerManager;
+import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
+import org.apache.hadoop.hbase.testclassification.MasterTests;
+import org.apache.hadoop.hbase.testclassification.MediumTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.ClassRule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+/**
+ * Confirm that we will rate limit reopen batches when reopening all table regions. This can avoid
+ * the pain associated with reopening too many regions at once.
+ */
+@Category({ MasterTests.class, MediumTests.class })
+public class TestReopenTableRegionsProcedureBatchBackoff {
+
+  @ClassRule
+  public static final HBaseClassTestRule CLASS_RULE =
+    HBaseClassTestRule.forClass(TestReopenTableRegionsProcedureBatchBackoff.class);
+
+  private static final HBaseTestingUtil UTIL = new HBaseTestingUtil();
+
+  private static TableName TABLE_NAME = TableName.valueOf("BatchBackoff");
+  private static final int BACKOFF_MILLIS_PER_RS = 3_000;
+  private static final int REOPEN_BATCH_SIZE = 1;
+
+  private static byte[] CF = Bytes.toBytes("cf");
+
+  @BeforeClass
+  public static void setUp() throws Exception {
+    Configuration conf = UTIL.getConfiguration();
+    conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, 1);
+    UTIL.startMiniCluster(1);
+    UTIL.createMultiRegionTable(TABLE_NAME, CF, 10);
+  }
+
+  @AfterClass
+  public static void tearDown() throws Exception {
+    UTIL.shutdownMiniCluster();
+  }
+
+  @Test
+  public void testRegionBatchBackoff() throws IOException {
+    ProcedureExecutor<MasterProcedureEnv> procExec =
+      UTIL.getMiniHBaseCluster().getMaster().getMasterProcedureExecutor();
+    List<RegionInfo> regions = UTIL.getAdmin().getRegions(TABLE_NAME);
+    assertTrue(10 <= regions.size());
+    ReopenTableRegionsProcedure proc =
+      new ReopenTableRegionsProcedure(TABLE_NAME, BACKOFF_MILLIS_PER_RS, REOPEN_BATCH_SIZE);
+    procExec.submitProcedure(proc);
+    Instant startedAt = Instant.now();
+    ProcedureSyncWait.waitForProcedureToComplete(procExec, proc, 60_000);
+    Instant stoppedAt = Instant.now();
+    assertTrue(Duration.between(startedAt, stoppedAt).toMillis()
+        > (long) regions.size() * BACKOFF_MILLIS_PER_RS);
+  }
+
+  @Test
+  public void testRegionBatchNoBackoff() throws IOException {
+    ProcedureExecutor<MasterProcedureEnv> procExec =
+      UTIL.getMiniHBaseCluster().getMaster().getMasterProcedureExecutor();
+    List<RegionInfo> regions = UTIL.getAdmin().getRegions(TABLE_NAME);
+    assertTrue(10 <= regions.size());
+    int noBackoffMillis = 0;
+    ReopenTableRegionsProcedure proc =
+      new ReopenTableRegionsProcedure(TABLE_NAME, noBackoffMillis, REOPEN_BATCH_SIZE);
+    procExec.submitProcedure(proc);
+    ProcedureSyncWait.waitForProcedureToComplete(procExec, proc,
+      (long) regions.size() * BACKOFF_MILLIS_PER_RS);
+  }
+}