Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HBASE-26556 IT and Chaos Monkey improvements #3932

Merged
merged 2 commits into from
Dec 14, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.hbase.HBaseClusterManager.CommandProvider.Operation;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.hbase.util.ReflectionUtils;
import org.apache.hadoop.hbase.util.RetryCounter;
import org.apache.hadoop.hbase.util.RetryCounter.RetryConfig;
import org.apache.hadoop.hbase.util.RetryCounterFactory;
Expand Down Expand Up @@ -216,7 +217,7 @@ protected String findPidCommand(ServiceType service) {
}

public String signalCommand(ServiceType service, String signal) {
return String.format("%s | xargs kill -s %s", findPidCommand(service), signal);
return String.format("%s | xargs sudo kill -s %s", findPidCommand(service), signal);
joshelser marked this conversation as resolved.
Show resolved Hide resolved
}
}

Expand Down Expand Up @@ -322,7 +323,10 @@ protected CommandProvider getCommandProvider(ServiceType service) throws IOExcep
case ZOOKEEPER_SERVER:
return new ZookeeperShellCommandProvider(getConf());
default:
return new HBaseShellCommandProvider(getConf());
Class<? extends CommandProvider> provider = getConf()
.getClass("hbase.it.clustermanager.hbase.command.provider",
joshelser marked this conversation as resolved.
Show resolved Hide resolved
HBaseShellCommandProvider.class, CommandProvider.class);
return ReflectionUtils.newInstance(provider, getConf());
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.chaos.factories;

import java.lang.reflect.Constructor;
import java.util.function.Function;

import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.chaos.actions.Action;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class ConfigurableSlowDeterministicMonkeyFactory extends SlowDeterministicMonkeyFactory {

private static final Logger LOG =
LoggerFactory.getLogger(ConfigurableSlowDeterministicMonkeyFactory.class);

final static String HEAVY_ACTIONS = "heavy.actions";
final static String TABLE_PARAM = "\\$table_name";

public enum SupportedTypes {
FLOAT(p->Float.parseFloat(p)),
LONG(p-> Long.parseLong(p)),
INT(p-> Integer.parseInt(p)),
TABLENAME(p-> TableName.valueOf(p));

final Function<String,Object> converter;

SupportedTypes(Function<String,Object> converter){
this.converter = converter;
}

Object convert(String param){
return converter.apply(param);
}
}

@Override
protected Action[] getHeavyWeightedActions() {
String actions = this.properties.getProperty(HEAVY_ACTIONS);
if(actions==null || actions.isEmpty()){
return super.getHeavyWeightedActions();
} else {
try {
String[] actionClasses = actions.split(";");
Action[] heavyActions = new Action[actionClasses.length];
for (int i = 0; i < actionClasses.length; i++) {
heavyActions[i] = instantiateAction(actionClasses[i]);
}
LOG.info("Created actions {}", heavyActions);
return heavyActions;
} catch(Exception e) {
LOG.error("Error trying to instantiate heavy actions. Returning null array.", e);
}
return null;
}
}

private Action instantiateAction(String actionString) throws Exception {
final String packageName = "org.apache.hadoop.hbase.chaos.actions";
String[] classAndParams = actionString.split("\\)")[0].split("\\(");
String className = packageName + "." + classAndParams[0];
String[] params = classAndParams[1].replaceAll(TABLE_PARAM,
tableName.getNameAsString()).split(",");
LOG.info("About to instantiate action class: {}; With constructor params: {}",
className, params);
Class<? extends Action> actionClass = (Class<? extends Action>)Class.forName(className);
joshelser marked this conversation as resolved.
Show resolved Hide resolved
Constructor<? extends Action>[] constructors =
(Constructor<? extends Action>[]) actionClass.getDeclaredConstructors();
for(Constructor<? extends Action> c : constructors){
if (c.getParameterCount() != params.length){
continue;
}
Class[] paramTypes = c.getParameterTypes();
Object[] constructorParams = new Object[paramTypes.length];
for(int i=0; i<paramTypes.length; i++){
constructorParams[i] = SupportedTypes.valueOf(paramTypes[i].getSimpleName().toUpperCase())
.convert(params[i]);
}
return c.newInstance(constructorParams);
}
throw new IllegalArgumentException("Couldn't find any matching constructor for: " +
actionString);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ public MonkeyFactory setProperties(Properties props) {
public static final String SERVER_AND_DEPENDENCIES_KILLING = "serverAndDependenciesKilling";
public static final String DISTRIBUTED_ISSUES = "distributedIssues";
public static final String DATA_ISSUES = "dataIssues";
public static final String CONFIGURABLE_SLOW_DETERMINISTIC = "configurableSlowDeterministic";

public static Map<String, MonkeyFactory> FACTORIES = ImmutableMap.<String,MonkeyFactory>builder()
.put(CALM, new CalmMonkeyFactory())
Expand All @@ -93,6 +94,7 @@ public MonkeyFactory setProperties(Properties props) {
.put(SERVER_AND_DEPENDENCIES_KILLING, new ServerAndDependenciesKillingMonkeyFactory())
.put(DISTRIBUTED_ISSUES, new DistributedIssuesMonkeyFactory())
.put(DATA_ISSUES, new DataIssuesMonkeyFactory())
.put(CONFIGURABLE_SLOW_DETERMINISTIC, new ConfigurableSlowDeterministicMonkeyFactory())
.build();

public static MonkeyFactory getFactory(String factoryName) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,53 +72,65 @@ public class SlowDeterministicMonkeyFactory extends MonkeyFactory {
private long rollingBatchSuspendRSSleepTime;
private float rollingBatchSuspendtRSRatio;

protected Action[] getLightWeightedActions(){
return new Action[] {
new CompactTableAction(tableName, compactTableRatio),
new CompactRandomRegionOfTableAction(tableName, compactRandomRegionRatio),
new FlushTableAction(tableName),
new FlushRandomRegionOfTableAction(tableName),
new MoveRandomRegionOfTableAction(tableName)
};
}

protected Action[] getMidWeightedActions(){
return new Action[] {
new SplitRandomRegionOfTableAction(tableName),
new MergeRandomAdjacentRegionsOfTableAction(tableName),
new SnapshotTableAction(tableName),
new AddColumnAction(tableName),
new RemoveColumnAction(tableName, columnFamilies),
new ChangeEncodingAction(tableName),
new ChangeCompressionAction(tableName),
new ChangeBloomFilterAction(tableName),
new ChangeVersionsAction(tableName),
new ChangeSplitPolicyAction(tableName),
};
}

protected Action[] getHeavyWeightedActions() {
return new Action[] {
new MoveRegionsOfTableAction(moveRegionsSleepTime, moveRegionsMaxTime,
tableName),
new MoveRandomRegionOfTableAction(moveRandomRegionSleepTime, tableName),
new RestartRandomRsAction(restartRandomRSSleepTime),
new BatchRestartRsAction(batchRestartRSSleepTime, batchRestartRSRatio),
new RestartActiveMasterAction(restartActiveMasterSleepTime),
new RollingBatchRestartRsAction(rollingBatchRestartRSSleepTime,
rollingBatchRestartRSRatio),
new RestartRsHoldingMetaAction(restartRsHoldingMetaSleepTime),
new DecreaseMaxHFileSizeAction(decreaseHFileSizeSleepTime, tableName),
new SplitAllRegionOfTableAction(tableName),
new GracefulRollingRestartRsAction(gracefulRollingRestartTSSLeepTime),
new RollingBatchSuspendResumeRsAction(rollingBatchSuspendRSSleepTime,
rollingBatchSuspendtRSRatio)
};
}

@Override
public ChaosMonkey build() {
loadProperties();
// Actions such as compact/flush a table/region,
// move one region around. They are not so destructive,
// can be executed more frequently.
Action[] actions1 = new Action[] {
new CompactTableAction(tableName, compactTableRatio),
new CompactRandomRegionOfTableAction(tableName, compactRandomRegionRatio),
new FlushTableAction(tableName),
new FlushRandomRegionOfTableAction(tableName),
new MoveRandomRegionOfTableAction(tableName)
};
Action[] actions1 = getLightWeightedActions();

// Actions such as split/merge/snapshot.
// They should not cause data loss, or unreliability
// such as region stuck in transition.
Action[] actions2 = new Action[] {
new SplitRandomRegionOfTableAction(tableName),
new MergeRandomAdjacentRegionsOfTableAction(tableName),
new SnapshotTableAction(tableName),
new AddColumnAction(tableName),
new RemoveColumnAction(tableName, columnFamilies),
new ChangeEncodingAction(tableName),
new ChangeCompressionAction(tableName),
new ChangeBloomFilterAction(tableName),
new ChangeVersionsAction(tableName),
new ChangeSplitPolicyAction(tableName),
};
Action[] actions2 = getMidWeightedActions();

// Destructive actions to mess things around.
Action[] actions3 = new Action[] {
new MoveRegionsOfTableAction(moveRegionsSleepTime, moveRegionsMaxTime,
tableName),
new MoveRandomRegionOfTableAction(moveRandomRegionSleepTime, tableName),
new RestartRandomRsAction(restartRandomRSSleepTime),
new BatchRestartRsAction(batchRestartRSSleepTime, batchRestartRSRatio),
new RestartActiveMasterAction(restartActiveMasterSleepTime),
new RollingBatchRestartRsAction(rollingBatchRestartRSSleepTime,
rollingBatchRestartRSRatio),
new RestartRsHoldingMetaAction(restartRsHoldingMetaSleepTime),
new DecreaseMaxHFileSizeAction(decreaseHFileSizeSleepTime, tableName),
new SplitAllRegionOfTableAction(tableName),
new GracefulRollingRestartRsAction(gracefulRollingRestartTSSLeepTime),
new RollingBatchSuspendResumeRsAction(rollingBatchSuspendRSSleepTime,
rollingBatchSuspendtRSRatio)
};
Action[] actions3 = getHeavyWeightedActions();

// Action to log more info for debugging
Action[] actions4 = new Action[] {
Expand Down