Skip to content

Commit

Permalink
HBASE-26556 IT and Chaos Monkey improvements (apache#3932)
Browse files Browse the repository at this point in the history
Signed-off-by: Josh Elser <[email protected]>
Reviewed-by: Tak Lon (Stephen) Wu <[email protected]>
  • Loading branch information
wchevreuil authored Dec 14, 2021
1 parent 0f94ab5 commit a36d41a
Show file tree
Hide file tree
Showing 4 changed files with 155 additions and 37 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.hbase.HBaseClusterManager.CommandProvider.Operation;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.hbase.util.ReflectionUtils;
import org.apache.hadoop.hbase.util.RetryCounter;
import org.apache.hadoop.hbase.util.RetryCounter.RetryConfig;
import org.apache.hadoop.hbase.util.RetryCounterFactory;
Expand Down Expand Up @@ -216,7 +217,7 @@ protected String findPidCommand(ServiceType service) {
}

public String signalCommand(ServiceType service, String signal) {
return String.format("%s | xargs kill -s %s", findPidCommand(service), signal);
return String.format("%s | xargs sudo kill -s %s", findPidCommand(service), signal);
}
}

Expand Down Expand Up @@ -322,7 +323,10 @@ protected CommandProvider getCommandProvider(ServiceType service) throws IOExcep
case ZOOKEEPER_SERVER:
return new ZookeeperShellCommandProvider(getConf());
default:
return new HBaseShellCommandProvider(getConf());
Class<? extends CommandProvider> provider = getConf()
.getClass("hbase.it.clustermanager.hbase.command.provider",
HBaseShellCommandProvider.class, CommandProvider.class);
return ReflectionUtils.newInstance(provider, getConf());
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.chaos.factories;

import java.lang.reflect.Constructor;
import java.util.function.Function;

import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.chaos.actions.Action;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class ConfigurableSlowDeterministicMonkeyFactory extends SlowDeterministicMonkeyFactory {

private static final Logger LOG =
LoggerFactory.getLogger(ConfigurableSlowDeterministicMonkeyFactory.class);

final static String HEAVY_ACTIONS = "heavy.actions";
final static String TABLE_PARAM = "\\$table_name";

public enum SupportedTypes {
FLOAT(p->Float.parseFloat(p)),
LONG(p-> Long.parseLong(p)),
INT(p-> Integer.parseInt(p)),
TABLENAME(p-> TableName.valueOf(p));

final Function<String,Object> converter;

SupportedTypes(Function<String,Object> converter){
this.converter = converter;
}

Object convert(String param){
return converter.apply(param);
}
}

@Override
protected Action[] getHeavyWeightedActions() {
String actions = this.properties.getProperty(HEAVY_ACTIONS);
if(actions==null || actions.isEmpty()){
return super.getHeavyWeightedActions();
} else {
try {
String[] actionClasses = actions.split(";");
Action[] heavyActions = new Action[actionClasses.length];
for (int i = 0; i < actionClasses.length; i++) {
heavyActions[i] = instantiateAction(actionClasses[i]);
}
LOG.info("Created actions {}", heavyActions);
return heavyActions;
} catch(Exception e) {
LOG.error("Error trying to instantiate heavy actions. Returning null array.", e);
}
return null;
}
}

private Action instantiateAction(String actionString) throws Exception {
final String packageName = "org.apache.hadoop.hbase.chaos.actions";
String[] classAndParams = actionString.split("\\)")[0].split("\\(");
String className = packageName + "." + classAndParams[0];
String[] params = classAndParams[1].replaceAll(TABLE_PARAM,
tableName.getNameAsString()).split(",");
LOG.info("About to instantiate action class: {}; With constructor params: {}",
className, params);
Class<? extends Action> actionClass = (Class<? extends Action>)Class.forName(className);
Constructor<? extends Action>[] constructors =
(Constructor<? extends Action>[]) actionClass.getDeclaredConstructors();
for(Constructor<? extends Action> c : constructors){
if (c.getParameterCount() != params.length){
continue;
}
Class[] paramTypes = c.getParameterTypes();
Object[] constructorParams = new Object[paramTypes.length];
for(int i=0; i<paramTypes.length; i++){
constructorParams[i] = SupportedTypes.valueOf(paramTypes[i].getSimpleName().toUpperCase())
.convert(params[i]);
}
return c.newInstance(constructorParams);
}
throw new IllegalArgumentException("Couldn't find any matching constructor for: " +
actionString);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ public MonkeyFactory setProperties(Properties props) {
public static final String SERVER_AND_DEPENDENCIES_KILLING = "serverAndDependenciesKilling";
public static final String DISTRIBUTED_ISSUES = "distributedIssues";
public static final String DATA_ISSUES = "dataIssues";
public static final String CONFIGURABLE_SLOW_DETERMINISTIC = "configurableSlowDeterministic";

public static Map<String, MonkeyFactory> FACTORIES = ImmutableMap.<String,MonkeyFactory>builder()
.put(CALM, new CalmMonkeyFactory())
Expand All @@ -93,6 +94,7 @@ public MonkeyFactory setProperties(Properties props) {
.put(SERVER_AND_DEPENDENCIES_KILLING, new ServerAndDependenciesKillingMonkeyFactory())
.put(DISTRIBUTED_ISSUES, new DistributedIssuesMonkeyFactory())
.put(DATA_ISSUES, new DataIssuesMonkeyFactory())
.put(CONFIGURABLE_SLOW_DETERMINISTIC, new ConfigurableSlowDeterministicMonkeyFactory())
.build();

public static MonkeyFactory getFactory(String factoryName) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,53 +72,65 @@ public class SlowDeterministicMonkeyFactory extends MonkeyFactory {
private long rollingBatchSuspendRSSleepTime;
private float rollingBatchSuspendtRSRatio;

protected Action[] getLightWeightedActions(){
return new Action[] {
new CompactTableAction(tableName, compactTableRatio),
new CompactRandomRegionOfTableAction(tableName, compactRandomRegionRatio),
new FlushTableAction(tableName),
new FlushRandomRegionOfTableAction(tableName),
new MoveRandomRegionOfTableAction(tableName)
};
}

protected Action[] getMidWeightedActions(){
return new Action[] {
new SplitRandomRegionOfTableAction(tableName),
new MergeRandomAdjacentRegionsOfTableAction(tableName),
new SnapshotTableAction(tableName),
new AddColumnAction(tableName),
new RemoveColumnAction(tableName, columnFamilies),
new ChangeEncodingAction(tableName),
new ChangeCompressionAction(tableName),
new ChangeBloomFilterAction(tableName),
new ChangeVersionsAction(tableName),
new ChangeSplitPolicyAction(tableName),
};
}

protected Action[] getHeavyWeightedActions() {
return new Action[] {
new MoveRegionsOfTableAction(moveRegionsSleepTime, moveRegionsMaxTime,
tableName),
new MoveRandomRegionOfTableAction(moveRandomRegionSleepTime, tableName),
new RestartRandomRsAction(restartRandomRSSleepTime),
new BatchRestartRsAction(batchRestartRSSleepTime, batchRestartRSRatio),
new RestartActiveMasterAction(restartActiveMasterSleepTime),
new RollingBatchRestartRsAction(rollingBatchRestartRSSleepTime,
rollingBatchRestartRSRatio),
new RestartRsHoldingMetaAction(restartRsHoldingMetaSleepTime),
new DecreaseMaxHFileSizeAction(decreaseHFileSizeSleepTime, tableName),
new SplitAllRegionOfTableAction(tableName),
new GracefulRollingRestartRsAction(gracefulRollingRestartTSSLeepTime),
new RollingBatchSuspendResumeRsAction(rollingBatchSuspendRSSleepTime,
rollingBatchSuspendtRSRatio)
};
}

@Override
public ChaosMonkey build() {
loadProperties();
// Actions such as compact/flush a table/region,
// move one region around. They are not so destructive,
// can be executed more frequently.
Action[] actions1 = new Action[] {
new CompactTableAction(tableName, compactTableRatio),
new CompactRandomRegionOfTableAction(tableName, compactRandomRegionRatio),
new FlushTableAction(tableName),
new FlushRandomRegionOfTableAction(tableName),
new MoveRandomRegionOfTableAction(tableName)
};
Action[] actions1 = getLightWeightedActions();

// Actions such as split/merge/snapshot.
// They should not cause data loss, or unreliability
// such as region stuck in transition.
Action[] actions2 = new Action[] {
new SplitRandomRegionOfTableAction(tableName),
new MergeRandomAdjacentRegionsOfTableAction(tableName),
new SnapshotTableAction(tableName),
new AddColumnAction(tableName),
new RemoveColumnAction(tableName, columnFamilies),
new ChangeEncodingAction(tableName),
new ChangeCompressionAction(tableName),
new ChangeBloomFilterAction(tableName),
new ChangeVersionsAction(tableName),
new ChangeSplitPolicyAction(tableName),
};
Action[] actions2 = getMidWeightedActions();

// Destructive actions to mess things around.
Action[] actions3 = new Action[] {
new MoveRegionsOfTableAction(moveRegionsSleepTime, moveRegionsMaxTime,
tableName),
new MoveRandomRegionOfTableAction(moveRandomRegionSleepTime, tableName),
new RestartRandomRsAction(restartRandomRSSleepTime),
new BatchRestartRsAction(batchRestartRSSleepTime, batchRestartRSRatio),
new RestartActiveMasterAction(restartActiveMasterSleepTime),
new RollingBatchRestartRsAction(rollingBatchRestartRSSleepTime,
rollingBatchRestartRSRatio),
new RestartRsHoldingMetaAction(restartRsHoldingMetaSleepTime),
new DecreaseMaxHFileSizeAction(decreaseHFileSizeSleepTime, tableName),
new SplitAllRegionOfTableAction(tableName),
new GracefulRollingRestartRsAction(gracefulRollingRestartTSSLeepTime),
new RollingBatchSuspendResumeRsAction(rollingBatchSuspendRSSleepTime,
rollingBatchSuspendtRSRatio)
};
Action[] actions3 = getHeavyWeightedActions();

// Action to log more info for debugging
Action[] actions4 = new Action[] {
Expand Down

0 comments on commit a36d41a

Please sign in to comment.