Skip to content
This repository has been archived by the owner on Aug 2, 2022. It is now read-only.

Commit

Permalink
Add integ test for queue rejection cluster RCA (#370)
Browse files Browse the repository at this point in the history
  • Loading branch information
rguo-aws authored Aug 14, 2020
1 parent e121192 commit 3f23b25
Show file tree
Hide file tree
Showing 7 changed files with 337 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.framework.api.Metric;

public class ThreadPool_QueueCapacity extends Metric {
public static final String NAME = AllMetrics.ThreadPoolValue.THREADPOOL_QUEUE_CAPACITY.toString();

public ThreadPool_QueueCapacity() {
super(AllMetrics.ThreadPoolValue.THREADPOOL_QUEUE_CAPACITY.toString(), 5);
super(NAME, 5);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.framework.api.Metric;

public class ThreadPool_RejectedReqs extends Metric {
public static final String NAME = AllMetrics.ThreadPoolValue.THREADPOOL_REJECTED_REQS.toString();

public ThreadPool_RejectedReqs(long evaluationIntervalSeconds) {
super(AllMetrics.ThreadPoolValue.THREADPOOL_REJECTED_REQS.toString(), evaluationIntervalSeconds);
super(NAME, evaluationIntervalSeconds);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ public class Consts {
public static final String RCAIT_DEFAULT_RCA_CONF_STANDBY_MASTER_NODE = TEST_RESOURCES_DIR + "rca_master.conf";
public static final String RCAIT_DEFAULT_RCA_CONF_DATA_NODE = TEST_RESOURCES_DIR + "rca.conf";

public static final String INTEG_TESTS_SRC_DIR =
"./src/test/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/rca/integTests/";

public static final String HOST_ID_KEY = "hostId";
public static final String HOST_ROLE_KEY = "hostRole";
public static final String DATA_KEY = "data";
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
/*
* Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/

package com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.integTests.tests.queue_tuning;

import com.amazon.opendistro.elasticsearch.performanceanalyzer.metrics.AllMetrics.ThreadPoolDimension;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.metrics.AllMetrics.ThreadPoolType;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.framework.api.metrics.ThreadPool_QueueCapacity;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.framework.api.metrics.ThreadPool_RejectedReqs;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.integTests.framework.RcaItMarker;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.integTests.framework.annotations.AClusterType;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.integTests.framework.annotations.AErrorPatternIgnored;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.integTests.framework.annotations.AExpect;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.integTests.framework.annotations.AMetric;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.integTests.framework.annotations.ARcaConf;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.integTests.framework.annotations.ARcaGraph;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.integTests.framework.annotations.ATable;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.integTests.framework.annotations.ATuple;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.integTests.framework.configs.ClusterType;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.integTests.framework.configs.Consts;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.integTests.framework.configs.HostTag;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.integTests.framework.runners.RcaItNotEncryptedRunner;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.integTests.tests.queue_tuning.validator.QueueRejectionValidator;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.store.ElasticSearchAnalysisGraph;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.store.rca.cluster.QueueRejectionClusterRca;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.junit.runner.RunWith;

@RunWith(RcaItNotEncryptedRunner.class)

@Category(RcaItMarker.class)
@AClusterType(ClusterType.MULTI_NODE_CO_LOCATED_MASTER)
@ARcaGraph(ElasticSearchAnalysisGraph.class)
//specify a custom rca.conf to set the rejection-time-period-in-seconds to 5s to reduce runtime
@ARcaConf(dataNode = RcaItQueueTuning.QUEUE_TUNING_RESOURCES_DIR + "rca.conf")
@AMetric(name = ThreadPool_RejectedReqs.class,
dimensionNames = {ThreadPoolDimension.Constants.TYPE_VALUE},
tables = {
@ATable(hostTag = HostTag.DATA_0,
tuple = {
@ATuple(dimensionValues = {ThreadPoolType.Constants.WRITE_NAME},
sum = 1.0, avg = 1.0, min = 1.0, max = 1.0),
@ATuple(dimensionValues = {ThreadPoolType.Constants.SEARCH_NAME},
sum = 0.0, avg = 0.0, min = 0.0, max = 0.0)
}
),
@ATable(hostTag = {HostTag.ELECTED_MASTER},
tuple = {
@ATuple(dimensionValues = {ThreadPoolType.Constants.WRITE_NAME},
sum = 0.0, avg = 0.0, min = 0.0, max = 0.0),
@ATuple(dimensionValues = {ThreadPoolType.Constants.SEARCH_NAME},
sum = 0.0, avg = 0.0, min = 0.0, max = 0.0)
}
)
}
)

@AMetric(name = ThreadPool_QueueCapacity.class,
dimensionNames = {ThreadPoolDimension.Constants.TYPE_VALUE},
tables = {
@ATable(hostTag = HostTag.DATA_0,
tuple = {
@ATuple(dimensionValues = {ThreadPoolType.Constants.WRITE_NAME},
sum = 500, avg = 500, min = 500, max = 500),
@ATuple(dimensionValues = {ThreadPoolType.Constants.SEARCH_NAME},
sum = 1500, avg = 1500, min = 1500, max = 1500)
}
),
@ATable(hostTag = {HostTag.ELECTED_MASTER},
tuple = {
@ATuple(dimensionValues = {ThreadPoolType.Constants.WRITE_NAME},
sum = 0.0, avg = 0.0, min = 0.0, max = 0.0),
@ATuple(dimensionValues = {ThreadPoolType.Constants.SEARCH_NAME},
sum = 1500, avg = 1500, min = 1500, max = 1500)
}
)
}
)
public class RcaItQueueTuning {
public static final String QUEUE_TUNING_RESOURCES_DIR = Consts.INTEG_TESTS_SRC_DIR + "./tests/queue_tuning/resource/";

// This integ test is built to test queue rejection RCA + queue rejection cluster RCA
// This test injects queue rejection metrics on one of the data node and queries the
// rest API on master to check whether queue rejection cluster RCA becomes unhealthy
//TODO : extend this integ test to cover Decision Maker framework and queue remediation actions
@Test
@AExpect(
what = AExpect.Type.REST_API,
on = HostTag.ELECTED_MASTER,
validator = QueueRejectionValidator.class,
forRca = QueueRejectionClusterRca.class,
timeoutSeconds = 500)
@AErrorPatternIgnored(pattern = "CacheUtil:getCacheMaxSize()",
reason = "Cache related configs are expected to be missing in this integ test")
@AErrorPatternIgnored(pattern = "AggregateMetric:gather()",
reason = "Cache metrics are expected to be missing in this integ test")
public void testQueueRejectionRca() {
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
{
"analysis-graph-implementor":
"com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.store.AnalysisGraphTest",
// it can be file:// or s3://
"rca-store-location": "s3://sifi-store/rcas/",

//it can be file:// or s3://
"threshold-store-location": "s3://sifi-store/thresholds/",

"new-rca-check-minutes": 60,

"new-threshold-check-minutes": 30,

// The size of the task queue for all networking operations.
// Small size queues may result in dropping of flow units, while large size queues can lead to a
// bigger backlog of tasks resulting in delays in sending and receiving.
"network-queue-length": 200,

// The size of the per-vertex buffer for flow units received from remote nodes.
// Small buffer sizes may result in dropping of flow units, while large buffer size can lead to
// high memory consumptions depending on how the analysis graph is configured.
"max-flow-units-per-vertex-buffer": 200,

"tags": {
"locus": "data-node",
"disk": "ssd",
"region": "use1",
"instance-type": "i3.8xl",
"domain": "rca-test-cluster"
},

"remote-peers": ["ip1", "ip2", "ip3"],

// Tells the runtime where the RCAs will be stored.
"datastore": {
// accepted types are sqlite, in-memory.
"type": "sqlite",
"location-dir": "/tmp",
"filename": "rca.sqlite",
"storage-file-retention-count": 5,
// How often the sqlite file be repeated in seconds. This file contains RCAs and therefore rotating it too frequently
// might not be as fruitful as there might not be any data.
"rotation-period-seconds": 21600
},

// Add config settings for different RCAs
"rca-config-settings": {
// old gen rca
"high-heap-usage-old-gen-rca": {
"top-k" : 3
},
//young gen rca
"high-heap-usage-young-gen-rca": {
"promotion-rate-mb-per-second" : 500,
"young-gen-gc-time-ms-per-second" : 400
},
"queue-rejection-rca": {
"rejection-time-period-in-seconds" : 5
},
//hot shard rca
"hot-shard-rca": {
"cpu-utilization" : 0.01,
"io-total-throughput-in-bytes" : 250000.0,
"io-total-syscallrate-per-second" : 0.1
}
},

"muted-rcas": [],
"muted-deciders": [],
"muted-actions": [],

"decider-config-settings": {
"cache-decider-config": {
"field-data-cache-upper-bound" : 0.4,
"shard-request-cache-upper-bound" : 0.05
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
/*
* Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/

package com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.integTests.tests.queue_tuning.validator;

import static com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.framework.api.summaries.HotNodeSummary.SQL_SCHEMA_CONSTANTS.HOST_IP_ADDRESS_COL_NAME;
import static com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.framework.api.summaries.HotNodeSummary.SQL_SCHEMA_CONSTANTS.NODE_ID_COL_NAME;
import static com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.framework.api.summaries.HotResourceSummary.SQL_SCHEMA_CONSTANTS.RESOURCE_TYPE_COL_NAME;

import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.framework.api.summaries.HotClusterSummary;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.framework.api.summaries.HotNodeSummary;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.framework.api.summaries.HotResourceSummary;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.integTests.framework.api.IValidator;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.integTests.tests.util.JsonParserUtil;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.store.rca.cluster.QueueRejectionClusterRca;
import com.google.gson.JsonArray;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import org.junit.Assert;

public class QueueRejectionValidator implements IValidator {
long startTime;

public QueueRejectionValidator() {
startTime = System.currentTimeMillis();
}

/**
* {"rca_name":"QueueRejectionClusterRca",
* "timestamp":1596557050522,
* "state":"unhealthy",
* "HotClusterSummary":[
* {"number_of_nodes":1,"number_of_unhealthy_nodes":1}
* ]}
*/
@Override
public boolean check(JsonElement response) {
JsonArray array = response.getAsJsonObject().get("data").getAsJsonArray();
if (array.size() == 0) {
return false;
}

for (int i = 0; i < array.size(); i++) {
JsonObject object = array.get(i).getAsJsonObject();
if (object.get("rca_name").getAsString().equals(QueueRejectionClusterRca.RCA_TABLE_NAME)) {
return checkClusterRca(object);
}
}
return false;
}

/**
* {"rca_name":"QueueRejectionClusterRca",
* "timestamp":1597167456322,
* "state":"unhealthy",
* "HotClusterSummary":[{"number_of_nodes":1,"number_of_unhealthy_nodes":1}]
* }
*/
boolean checkClusterRca(JsonObject rcaObject) {
if (!"unhealthy".equals(rcaObject.get("state").getAsString())) {
return false;
}
Assert.assertEquals(1,
JsonParserUtil.getSummaryJsonSize(rcaObject, HotClusterSummary.HOT_CLUSTER_SUMMARY_TABLE));
JsonObject clusterSummaryJson =
JsonParserUtil.getSummaryJson(rcaObject, HotClusterSummary.HOT_CLUSTER_SUMMARY_TABLE, 0);
Assert.assertNotNull(clusterSummaryJson);
Assert.assertEquals(1, clusterSummaryJson.get("number_of_unhealthy_nodes").getAsInt());

Assert.assertEquals(1,
JsonParserUtil.getSummaryJsonSize(clusterSummaryJson, HotNodeSummary.HOT_NODE_SUMMARY_TABLE));
JsonObject nodeSummaryJson =
JsonParserUtil.getSummaryJson(clusterSummaryJson, HotNodeSummary.HOT_NODE_SUMMARY_TABLE, 0);
Assert.assertNotNull(nodeSummaryJson);
Assert.assertEquals("DATA_0", nodeSummaryJson.get(NODE_ID_COL_NAME).getAsString());
Assert.assertEquals("127.0.0.1", nodeSummaryJson.get(HOST_IP_ADDRESS_COL_NAME).getAsString());

Assert.assertEquals(1,
JsonParserUtil.getSummaryJsonSize(nodeSummaryJson, HotResourceSummary.HOT_RESOURCE_SUMMARY_TABLE));
JsonObject resourceSummaryJson =
JsonParserUtil.getSummaryJson(nodeSummaryJson, HotResourceSummary.HOT_RESOURCE_SUMMARY_TABLE, 0);
Assert.assertNotNull(resourceSummaryJson);
Assert.assertEquals("write threadpool", resourceSummaryJson.get(RESOURCE_TYPE_COL_NAME).getAsString());
return true;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/*
* Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/

package com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.integTests.tests.util;

import com.google.gson.JsonArray;
import com.google.gson.JsonObject;

public class JsonParserUtil {
public static int getSummaryJsonSize(JsonObject jsonObject, String summaryName) {
JsonArray array = jsonObject.get(summaryName).getAsJsonArray();
if (array == null) {
return 0;
}
return array.size();
}

public static JsonObject getSummaryJson(JsonObject jsonObject, String summaryName, int idx) {
JsonArray array = jsonObject.get(summaryName).getAsJsonArray();
if (array == null) {
return null;
}
if (idx >= array.size()) {
return null;
}
return array.get(idx).getAsJsonObject();
}
}

0 comments on commit 3f23b25

Please sign in to comment.