From 55fdff50ca8709576dc9bfeaf91c987f3cfd2f0c Mon Sep 17 00:00:00 2001
From: Wellington Ramos Chevreuil <wchevreuil@apache.org>
Date: Tue, 14 Dec 2021 21:22:28 +0000
Subject: [PATCH] HBASE-26556 IT and Chaos Monkey improvements (#3932)

Signed-off-by: Josh Elser <elserj@apache.org>
Reviewed-by: Tak Lon (Stephen) Wu <taklwu@apache.org>
(cherry picked from commit a36d41af739159073a6f4e6143fe26d77760535b)
---
 .../hadoop/hbase/HBaseClusterManager.java     |   8 +-
 ...gurableSlowDeterministicMonkeyFactory.java | 100 ++++++++++++++++++
 .../hbase/chaos/factories/MonkeyFactory.java  |   2 +
 .../SlowDeterministicMonkeyFactory.java       |  82 ++++++++------
 4 files changed, 155 insertions(+), 37 deletions(-)
 create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ConfigurableSlowDeterministicMonkeyFactory.java

diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/HBaseClusterManager.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/HBaseClusterManager.java
index 122fad5a0a90..f8df7a143256 100644
--- a/hbase-it/src/test/java/org/apache/hadoop/hbase/HBaseClusterManager.java
+++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/HBaseClusterManager.java
@@ -27,6 +27,7 @@
 import org.apache.hadoop.conf.Configured;
 import org.apache.hadoop.hbase.HBaseClusterManager.CommandProvider.Operation;
 import org.apache.hadoop.hbase.util.Pair;
+import org.apache.hadoop.hbase.util.ReflectionUtils;
 import org.apache.hadoop.hbase.util.RetryCounter;
 import org.apache.hadoop.hbase.util.RetryCounter.RetryConfig;
 import org.apache.hadoop.hbase.util.RetryCounterFactory;
@@ -216,7 +217,7 @@ protected String findPidCommand(ServiceType service) {
     }
 
     public String signalCommand(ServiceType service, String signal) {
-      return String.format("%s | xargs kill -s %s", findPidCommand(service), signal);
+      return String.format("%s | xargs sudo kill -s %s", findPidCommand(service), signal);
     }
   }
 
@@ -322,7 +323,10 @@ protected CommandProvider getCommandProvider(ServiceType service) throws IOExcep
       case ZOOKEEPER_SERVER:
         return new ZookeeperShellCommandProvider(getConf());
       default:
-        return new HBaseShellCommandProvider(getConf());
+        Class<? extends CommandProvider> provider = getConf()
+          .getClass("hbase.it.clustermanager.hbase.command.provider",
+            HBaseShellCommandProvider.class, CommandProvider.class);
+        return ReflectionUtils.newInstance(provider, getConf());
     }
   }
 
diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ConfigurableSlowDeterministicMonkeyFactory.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ConfigurableSlowDeterministicMonkeyFactory.java
new file mode 100644
index 000000000000..c8ee40c35ef6
--- /dev/null
+++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ConfigurableSlowDeterministicMonkeyFactory.java
@@ -0,0 +1,100 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.chaos.factories;
+
+import java.lang.reflect.Constructor;
+import java.util.function.Function;
+
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.chaos.actions.Action;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class ConfigurableSlowDeterministicMonkeyFactory extends SlowDeterministicMonkeyFactory {
+
+  private static final Logger LOG =
+    LoggerFactory.getLogger(ConfigurableSlowDeterministicMonkeyFactory.class);
+
+  final static String HEAVY_ACTIONS = "heavy.actions";
+  final static String TABLE_PARAM = "\\$table_name";
+
+  public enum SupportedTypes {
+    FLOAT(p->Float.parseFloat(p)),
+    LONG(p-> Long.parseLong(p)),
+    INT(p-> Integer.parseInt(p)),
+    TABLENAME(p-> TableName.valueOf(p));
+
+    final Function<String,Object> converter;
+
+    SupportedTypes(Function<String,Object> converter){
+      this.converter = converter;
+    }
+
+    Object convert(String param){
+      return converter.apply(param);
+    }
+  }
+
+  @Override
+  protected Action[] getHeavyWeightedActions() {
+    String actions = this.properties.getProperty(HEAVY_ACTIONS);
+    if(actions==null || actions.isEmpty()){
+      return super.getHeavyWeightedActions();
+    } else {
+      try {
+        String[] actionClasses = actions.split(";");
+        Action[] heavyActions = new Action[actionClasses.length];
+        for (int i = 0; i < actionClasses.length; i++) {
+          heavyActions[i] = instantiateAction(actionClasses[i]);
+        }
+        LOG.info("Created actions {}", heavyActions);
+        return heavyActions;
+      } catch(Exception e) {
+        LOG.error("Error trying to instantiate heavy actions. Returning null array.", e);
+      }
+      return null;
+    }
+  }
+
+  private Action instantiateAction(String actionString) throws Exception {
+    final String packageName = "org.apache.hadoop.hbase.chaos.actions";
+    String[] classAndParams = actionString.split("\\)")[0].split("\\(");
+    String className = packageName + "." + classAndParams[0];
+    String[] params = classAndParams[1].replaceAll(TABLE_PARAM,
+      tableName.getNameAsString()).split(",");
+    LOG.info("About to instantiate action class: {}; With constructor params: {}",
+      className, params);
+    Class<? extends Action> actionClass = (Class<? extends Action>)Class.forName(className);
+    Constructor<? extends Action>[] constructors =
+      (Constructor<? extends Action>[]) actionClass.getDeclaredConstructors();
+    for(Constructor<? extends Action> c : constructors){
+      if (c.getParameterCount() != params.length){
+        continue;
+      }
+      Class[] paramTypes = c.getParameterTypes();
+      Object[] constructorParams = new Object[paramTypes.length];
+      for(int i=0; i<paramTypes.length; i++){
+        constructorParams[i] = SupportedTypes.valueOf(paramTypes[i].getSimpleName().toUpperCase())
+          .convert(params[i]);
+      }
+      return c.newInstance(constructorParams);
+    }
+    throw new IllegalArgumentException("Couldn't find any matching constructor for: " +
+      actionString);
+  }
+}
diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/MonkeyFactory.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/MonkeyFactory.java
index 6f93715ea757..de7cfad11a2c 100644
--- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/MonkeyFactory.java
+++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/MonkeyFactory.java
@@ -79,6 +79,7 @@ public MonkeyFactory setProperties(Properties props) {
   public static final String SERVER_AND_DEPENDENCIES_KILLING = "serverAndDependenciesKilling";
   public static final String DISTRIBUTED_ISSUES = "distributedIssues";
   public static final String DATA_ISSUES = "dataIssues";
+  public static final String CONFIGURABLE_SLOW_DETERMINISTIC = "configurableSlowDeterministic";
 
   public static Map<String, MonkeyFactory> FACTORIES = ImmutableMap.<String,MonkeyFactory>builder()
     .put(CALM, new CalmMonkeyFactory())
@@ -93,6 +94,7 @@ public MonkeyFactory setProperties(Properties props) {
     .put(SERVER_AND_DEPENDENCIES_KILLING, new ServerAndDependenciesKillingMonkeyFactory())
     .put(DISTRIBUTED_ISSUES, new DistributedIssuesMonkeyFactory())
     .put(DATA_ISSUES, new DataIssuesMonkeyFactory())
+    .put(CONFIGURABLE_SLOW_DETERMINISTIC, new ConfigurableSlowDeterministicMonkeyFactory())
     .build();
 
   public static MonkeyFactory getFactory(String factoryName) {
diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/SlowDeterministicMonkeyFactory.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/SlowDeterministicMonkeyFactory.java
index 3bba74de488f..5eaa4d87a1a5 100644
--- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/SlowDeterministicMonkeyFactory.java
+++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/SlowDeterministicMonkeyFactory.java
@@ -74,6 +74,50 @@ public class SlowDeterministicMonkeyFactory extends MonkeyFactory {
   private long rollingBatchSuspendRSSleepTime;
   private float rollingBatchSuspendtRSRatio;
 
+  protected Action[] getLightWeightedActions(){
+    return new Action[] {
+      new CompactTableAction(tableName, compactTableRatio),
+      new CompactRandomRegionOfTableAction(tableName, compactRandomRegionRatio),
+      new FlushTableAction(tableName),
+      new FlushRandomRegionOfTableAction(tableName),
+      new MoveRandomRegionOfTableAction(tableName)
+    };
+  }
+
+  protected Action[] getMidWeightedActions(){
+    return new Action[] {
+      new SplitRandomRegionOfTableAction(tableName),
+      new MergeRandomAdjacentRegionsOfTableAction(tableName),
+      new SnapshotTableAction(tableName),
+      new AddColumnAction(tableName),
+      new RemoveColumnAction(tableName, columnFamilies),
+      new ChangeEncodingAction(tableName),
+      new ChangeCompressionAction(tableName),
+      new ChangeBloomFilterAction(tableName),
+      new ChangeVersionsAction(tableName),
+      new ChangeSplitPolicyAction(tableName),
+    };
+  }
+
+  protected Action[] getHeavyWeightedActions() {
+    return new Action[] {
+      new MoveRegionsOfTableAction(moveRegionsSleepTime, moveRegionsMaxTime,
+        tableName),
+      new MoveRandomRegionOfTableAction(moveRandomRegionSleepTime, tableName),
+      new RestartRandomRsAction(restartRandomRSSleepTime),
+      new BatchRestartRsAction(batchRestartRSSleepTime, batchRestartRSRatio),
+      new RestartActiveMasterAction(restartActiveMasterSleepTime),
+      new RollingBatchRestartRsAction(rollingBatchRestartRSSleepTime,
+        rollingBatchRestartRSRatio),
+      new RestartRsHoldingMetaAction(restartRsHoldingMetaSleepTime),
+      new DecreaseMaxHFileSizeAction(decreaseHFileSizeSleepTime, tableName),
+      new SplitAllRegionOfTableAction(tableName),
+      new GracefulRollingRestartRsAction(gracefulRollingRestartTSSLeepTime),
+      new RollingBatchSuspendResumeRsAction(rollingBatchSuspendRSSleepTime,
+        rollingBatchSuspendtRSRatio)
+    };
+  }
+
   @Override
   public ChaosMonkey build() {
 
@@ -81,47 +125,15 @@ public ChaosMonkey build() {
     // Actions such as compact/flush a table/region,
     // move one region around. They are not so destructive,
     // can be executed more frequently.
-    Action[] actions1 = new Action[] {
-        new CompactTableAction(tableName, compactTableRatio),
-        new CompactRandomRegionOfTableAction(tableName, compactRandomRegionRatio),
-        new FlushTableAction(tableName),
-        new FlushRandomRegionOfTableAction(tableName),
-        new MoveRandomRegionOfTableAction(tableName)
-    };
+    Action[] actions1 = getLightWeightedActions();
 
     // Actions such as split/merge/snapshot.
     // They should not cause data loss, or unreliability
     // such as region stuck in transition.
-    Action[] actions2 = new Action[] {
-        new SplitRandomRegionOfTableAction(tableName),
-        new MergeRandomAdjacentRegionsOfTableAction(tableName),
-        new SnapshotTableAction(tableName),
-        new AddColumnAction(tableName),
-        new RemoveColumnAction(tableName, columnFamilies),
-        new ChangeEncodingAction(tableName),
-        new ChangeCompressionAction(tableName),
-        new ChangeBloomFilterAction(tableName),
-        new ChangeVersionsAction(tableName),
-        new ChangeSplitPolicyAction(tableName),
-    };
+    Action[] actions2 = getMidWeightedActions();
 
     // Destructive actions to mess things around.
-    Action[] actions3 = new Action[] {
-        new MoveRegionsOfTableAction(moveRegionsSleepTime, moveRegionsMaxTime,
-            tableName),
-        new MoveRandomRegionOfTableAction(moveRandomRegionSleepTime, tableName),
-        new RestartRandomRsAction(restartRandomRSSleepTime),
-        new BatchRestartRsAction(batchRestartRSSleepTime, batchRestartRSRatio),
-        new RestartActiveMasterAction(restartActiveMasterSleepTime),
-        new RollingBatchRestartRsAction(rollingBatchRestartRSSleepTime,
-            rollingBatchRestartRSRatio),
-        new RestartRsHoldingMetaAction(restartRsHoldingMetaSleepTime),
-        new DecreaseMaxHFileSizeAction(decreaseHFileSizeSleepTime, tableName),
-        new SplitAllRegionOfTableAction(tableName),
-      new GracefulRollingRestartRsAction(gracefulRollingRestartTSSLeepTime),
-      new RollingBatchSuspendResumeRsAction(rollingBatchSuspendRSSleepTime,
-          rollingBatchSuspendtRSRatio)
-    };
+    Action[] actions3 = getHeavyWeightedActions();
 
     // Action to log more info for debugging
     Action[] actions4 = new Action[] {