From 3f23b251c84d359c788cecf7f1e5bcfff8e0d83b Mon Sep 17 00:00:00 2001 From: Ruizhen Guo <55893852+rguo-aws@users.noreply.github.com> Date: Fri, 14 Aug 2020 14:25:46 -0700 Subject: [PATCH] Add integ test for queue rejection cluster RCA (#370) --- .../api/metrics/ThreadPool_QueueCapacity.java | 4 +- .../api/metrics/ThreadPool_RejectedReqs.java | 4 +- .../integTests/framework/configs/Consts.java | 3 + .../tests/queue_tuning/RcaItQueueTuning.java | 112 ++++++++++++++++++ .../tests/queue_tuning/resource/rca.conf | 78 ++++++++++++ .../validator/QueueRejectionValidator.java | 98 +++++++++++++++ .../integTests/tests/util/JsonParserUtil.java | 40 +++++++ 7 files changed, 337 insertions(+), 2 deletions(-) create mode 100644 src/test/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/rca/integTests/tests/queue_tuning/RcaItQueueTuning.java create mode 100644 src/test/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/rca/integTests/tests/queue_tuning/resource/rca.conf create mode 100644 src/test/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/rca/integTests/tests/queue_tuning/validator/QueueRejectionValidator.java create mode 100644 src/test/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/rca/integTests/tests/util/JsonParserUtil.java diff --git a/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/rca/framework/api/metrics/ThreadPool_QueueCapacity.java b/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/rca/framework/api/metrics/ThreadPool_QueueCapacity.java index d4781e3eb..4d0daf727 100644 --- a/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/rca/framework/api/metrics/ThreadPool_QueueCapacity.java +++ b/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/rca/framework/api/metrics/ThreadPool_QueueCapacity.java @@ -19,7 +19,9 @@ import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.framework.api.Metric; public class ThreadPool_QueueCapacity extends Metric { + public static final String NAME = AllMetrics.ThreadPoolValue.THREADPOOL_QUEUE_CAPACITY.toString(); + public ThreadPool_QueueCapacity() { - super(AllMetrics.ThreadPoolValue.THREADPOOL_QUEUE_CAPACITY.toString(), 5); + super(NAME, 5); } } diff --git a/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/rca/framework/api/metrics/ThreadPool_RejectedReqs.java b/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/rca/framework/api/metrics/ThreadPool_RejectedReqs.java index 545ed9e67..0ba6133f0 100644 --- a/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/rca/framework/api/metrics/ThreadPool_RejectedReqs.java +++ b/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/rca/framework/api/metrics/ThreadPool_RejectedReqs.java @@ -19,7 +19,9 @@ import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.framework.api.Metric; public class ThreadPool_RejectedReqs extends Metric { + public static final String NAME = AllMetrics.ThreadPoolValue.THREADPOOL_REJECTED_REQS.toString(); + public ThreadPool_RejectedReqs(long evaluationIntervalSeconds) { - super(AllMetrics.ThreadPoolValue.THREADPOOL_REJECTED_REQS.toString(), evaluationIntervalSeconds); + super(NAME, evaluationIntervalSeconds); } } diff --git a/src/test/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/rca/integTests/framework/configs/Consts.java b/src/test/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/rca/integTests/framework/configs/Consts.java index 5f60ae4ad..872de8978 100644 --- a/src/test/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/rca/integTests/framework/configs/Consts.java +++ b/src/test/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/rca/integTests/framework/configs/Consts.java @@ -10,6 +10,9 @@ public class Consts { public static final String RCAIT_DEFAULT_RCA_CONF_STANDBY_MASTER_NODE = TEST_RESOURCES_DIR + "rca_master.conf"; public static final String RCAIT_DEFAULT_RCA_CONF_DATA_NODE = TEST_RESOURCES_DIR + "rca.conf"; + public static final String INTEG_TESTS_SRC_DIR = + "./src/test/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/rca/integTests/"; + public static final String HOST_ID_KEY = "hostId"; public static final String HOST_ROLE_KEY = "hostRole"; public static final String DATA_KEY = "data"; diff --git a/src/test/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/rca/integTests/tests/queue_tuning/RcaItQueueTuning.java b/src/test/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/rca/integTests/tests/queue_tuning/RcaItQueueTuning.java new file mode 100644 index 000000000..8f079f065 --- /dev/null +++ b/src/test/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/rca/integTests/tests/queue_tuning/RcaItQueueTuning.java @@ -0,0 +1,112 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.integTests.tests.queue_tuning; + +import com.amazon.opendistro.elasticsearch.performanceanalyzer.metrics.AllMetrics.ThreadPoolDimension; +import com.amazon.opendistro.elasticsearch.performanceanalyzer.metrics.AllMetrics.ThreadPoolType; +import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.framework.api.metrics.ThreadPool_QueueCapacity; +import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.framework.api.metrics.ThreadPool_RejectedReqs; +import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.integTests.framework.RcaItMarker; +import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.integTests.framework.annotations.AClusterType; +import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.integTests.framework.annotations.AErrorPatternIgnored; +import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.integTests.framework.annotations.AExpect; +import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.integTests.framework.annotations.AMetric; +import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.integTests.framework.annotations.ARcaConf; +import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.integTests.framework.annotations.ARcaGraph; +import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.integTests.framework.annotations.ATable; +import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.integTests.framework.annotations.ATuple; +import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.integTests.framework.configs.ClusterType; +import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.integTests.framework.configs.Consts; +import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.integTests.framework.configs.HostTag; +import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.integTests.framework.runners.RcaItNotEncryptedRunner; +import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.integTests.tests.queue_tuning.validator.QueueRejectionValidator; +import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.store.ElasticSearchAnalysisGraph; +import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.store.rca.cluster.QueueRejectionClusterRca; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.junit.runner.RunWith; + +@RunWith(RcaItNotEncryptedRunner.class) + +@Category(RcaItMarker.class) +@AClusterType(ClusterType.MULTI_NODE_CO_LOCATED_MASTER) +@ARcaGraph(ElasticSearchAnalysisGraph.class) +//specify a custom rca.conf to set the rejection-time-period-in-seconds to 5s to reduce runtime +@ARcaConf(dataNode = RcaItQueueTuning.QUEUE_TUNING_RESOURCES_DIR + "rca.conf") +@AMetric(name = ThreadPool_RejectedReqs.class, + dimensionNames = {ThreadPoolDimension.Constants.TYPE_VALUE}, + tables = { + @ATable(hostTag = HostTag.DATA_0, + tuple = { + @ATuple(dimensionValues = {ThreadPoolType.Constants.WRITE_NAME}, + sum = 1.0, avg = 1.0, min = 1.0, max = 1.0), + @ATuple(dimensionValues = {ThreadPoolType.Constants.SEARCH_NAME}, + sum = 0.0, avg = 0.0, min = 0.0, max = 0.0) + } + ), + @ATable(hostTag = {HostTag.ELECTED_MASTER}, + tuple = { + @ATuple(dimensionValues = {ThreadPoolType.Constants.WRITE_NAME}, + sum = 0.0, avg = 0.0, min = 0.0, max = 0.0), + @ATuple(dimensionValues = {ThreadPoolType.Constants.SEARCH_NAME}, + sum = 0.0, avg = 0.0, min = 0.0, max = 0.0) + } + ) + } +) + +@AMetric(name = ThreadPool_QueueCapacity.class, + dimensionNames = {ThreadPoolDimension.Constants.TYPE_VALUE}, + tables = { + @ATable(hostTag = HostTag.DATA_0, + tuple = { + @ATuple(dimensionValues = {ThreadPoolType.Constants.WRITE_NAME}, + sum = 500, avg = 500, min = 500, max = 500), + @ATuple(dimensionValues = {ThreadPoolType.Constants.SEARCH_NAME}, + sum = 1500, avg = 1500, min = 1500, max = 1500) + } + ), + @ATable(hostTag = {HostTag.ELECTED_MASTER}, + tuple = { + @ATuple(dimensionValues = {ThreadPoolType.Constants.WRITE_NAME}, + sum = 0.0, avg = 0.0, min = 0.0, max = 0.0), + @ATuple(dimensionValues = {ThreadPoolType.Constants.SEARCH_NAME}, + sum = 1500, avg = 1500, min = 1500, max = 1500) + } + ) + } +) +public class RcaItQueueTuning { + public static final String QUEUE_TUNING_RESOURCES_DIR = Consts.INTEG_TESTS_SRC_DIR + "./tests/queue_tuning/resource/"; + + // This integ test is built to test queue rejection RCA + queue rejection cluster RCA + // This test injects queue rejection metrics on one of the data node and queries the + // rest API on master to check whether queue rejection cluster RCA becomes unhealthy + //TODO : extend this integ test to cover Decision Maker framework and queue remediation actions + @Test + @AExpect( + what = AExpect.Type.REST_API, + on = HostTag.ELECTED_MASTER, + validator = QueueRejectionValidator.class, + forRca = QueueRejectionClusterRca.class, + timeoutSeconds = 500) + @AErrorPatternIgnored(pattern = "CacheUtil:getCacheMaxSize()", + reason = "Cache related configs are expected to be missing in this integ test") + @AErrorPatternIgnored(pattern = "AggregateMetric:gather()", + reason = "Cache metrics are expected to be missing in this integ test") + public void testQueueRejectionRca() { + } +} diff --git a/src/test/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/rca/integTests/tests/queue_tuning/resource/rca.conf b/src/test/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/rca/integTests/tests/queue_tuning/resource/rca.conf new file mode 100644 index 000000000..00a05f29e --- /dev/null +++ b/src/test/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/rca/integTests/tests/queue_tuning/resource/rca.conf @@ -0,0 +1,78 @@ +{ + "analysis-graph-implementor": + "com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.store.AnalysisGraphTest", + // it can be file:// or s3:// + "rca-store-location": "s3://sifi-store/rcas/", + + //it can be file:// or s3:// + "threshold-store-location": "s3://sifi-store/thresholds/", + + "new-rca-check-minutes": 60, + + "new-threshold-check-minutes": 30, + + // The size of the task queue for all networking operations. + // Small size queues may result in dropping of flow units, while large size queues can lead to a + // bigger backlog of tasks resulting in delays in sending and receiving. + "network-queue-length": 200, + + // The size of the per-vertex buffer for flow units received from remote nodes. + // Small buffer sizes may result in dropping of flow units, while large buffer size can lead to + // high memory consumptions depending on how the analysis graph is configured. + "max-flow-units-per-vertex-buffer": 200, + + "tags": { + "locus": "data-node", + "disk": "ssd", + "region": "use1", + "instance-type": "i3.8xl", + "domain": "rca-test-cluster" + }, + + "remote-peers": ["ip1", "ip2", "ip3"], + + // Tells the runtime where the RCAs will be stored. + "datastore": { + // accepted types are sqlite, in-memory. + "type": "sqlite", + "location-dir": "/tmp", + "filename": "rca.sqlite", + "storage-file-retention-count": 5, + // How often the sqlite file be repeated in seconds. This file contains RCAs and therefore rotating it too frequently + // might not be as fruitful as there might not be any data. + "rotation-period-seconds": 21600 + }, + + // Add config settings for different RCAs + "rca-config-settings": { + // old gen rca + "high-heap-usage-old-gen-rca": { + "top-k" : 3 + }, + //young gen rca + "high-heap-usage-young-gen-rca": { + "promotion-rate-mb-per-second" : 500, + "young-gen-gc-time-ms-per-second" : 400 + }, + "queue-rejection-rca": { + "rejection-time-period-in-seconds" : 5 + }, + //hot shard rca + "hot-shard-rca": { + "cpu-utilization" : 0.01, + "io-total-throughput-in-bytes" : 250000.0, + "io-total-syscallrate-per-second" : 0.1 + } + }, + + "muted-rcas": [], + "muted-deciders": [], + "muted-actions": [], + + "decider-config-settings": { + "cache-decider-config": { + "field-data-cache-upper-bound" : 0.4, + "shard-request-cache-upper-bound" : 0.05 + } + } +} \ No newline at end of file diff --git a/src/test/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/rca/integTests/tests/queue_tuning/validator/QueueRejectionValidator.java b/src/test/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/rca/integTests/tests/queue_tuning/validator/QueueRejectionValidator.java new file mode 100644 index 000000000..e92c26939 --- /dev/null +++ b/src/test/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/rca/integTests/tests/queue_tuning/validator/QueueRejectionValidator.java @@ -0,0 +1,98 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.integTests.tests.queue_tuning.validator; + +import static com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.framework.api.summaries.HotNodeSummary.SQL_SCHEMA_CONSTANTS.HOST_IP_ADDRESS_COL_NAME; +import static com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.framework.api.summaries.HotNodeSummary.SQL_SCHEMA_CONSTANTS.NODE_ID_COL_NAME; +import static com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.framework.api.summaries.HotResourceSummary.SQL_SCHEMA_CONSTANTS.RESOURCE_TYPE_COL_NAME; + +import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.framework.api.summaries.HotClusterSummary; +import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.framework.api.summaries.HotNodeSummary; +import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.framework.api.summaries.HotResourceSummary; +import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.integTests.framework.api.IValidator; +import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.integTests.tests.util.JsonParserUtil; +import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.store.rca.cluster.QueueRejectionClusterRca; +import com.google.gson.JsonArray; +import com.google.gson.JsonElement; +import com.google.gson.JsonObject; +import org.junit.Assert; + +public class QueueRejectionValidator implements IValidator { + long startTime; + + public QueueRejectionValidator() { + startTime = System.currentTimeMillis(); + } + + /** + * {"rca_name":"QueueRejectionClusterRca", + * "timestamp":1596557050522, + * "state":"unhealthy", + * "HotClusterSummary":[ + * {"number_of_nodes":1,"number_of_unhealthy_nodes":1} + * ]} + */ + @Override + public boolean check(JsonElement response) { + JsonArray array = response.getAsJsonObject().get("data").getAsJsonArray(); + if (array.size() == 0) { + return false; + } + + for (int i = 0; i < array.size(); i++) { + JsonObject object = array.get(i).getAsJsonObject(); + if (object.get("rca_name").getAsString().equals(QueueRejectionClusterRca.RCA_TABLE_NAME)) { + return checkClusterRca(object); + } + } + return false; + } + + /** + * {"rca_name":"QueueRejectionClusterRca", + * "timestamp":1597167456322, + * "state":"unhealthy", + * "HotClusterSummary":[{"number_of_nodes":1,"number_of_unhealthy_nodes":1}] + * } + */ + boolean checkClusterRca(JsonObject rcaObject) { + if (!"unhealthy".equals(rcaObject.get("state").getAsString())) { + return false; + } + Assert.assertEquals(1, + JsonParserUtil.getSummaryJsonSize(rcaObject, HotClusterSummary.HOT_CLUSTER_SUMMARY_TABLE)); + JsonObject clusterSummaryJson = + JsonParserUtil.getSummaryJson(rcaObject, HotClusterSummary.HOT_CLUSTER_SUMMARY_TABLE, 0); + Assert.assertNotNull(clusterSummaryJson); + Assert.assertEquals(1, clusterSummaryJson.get("number_of_unhealthy_nodes").getAsInt()); + + Assert.assertEquals(1, + JsonParserUtil.getSummaryJsonSize(clusterSummaryJson, HotNodeSummary.HOT_NODE_SUMMARY_TABLE)); + JsonObject nodeSummaryJson = + JsonParserUtil.getSummaryJson(clusterSummaryJson, HotNodeSummary.HOT_NODE_SUMMARY_TABLE, 0); + Assert.assertNotNull(nodeSummaryJson); + Assert.assertEquals("DATA_0", nodeSummaryJson.get(NODE_ID_COL_NAME).getAsString()); + Assert.assertEquals("127.0.0.1", nodeSummaryJson.get(HOST_IP_ADDRESS_COL_NAME).getAsString()); + + Assert.assertEquals(1, + JsonParserUtil.getSummaryJsonSize(nodeSummaryJson, HotResourceSummary.HOT_RESOURCE_SUMMARY_TABLE)); + JsonObject resourceSummaryJson = + JsonParserUtil.getSummaryJson(nodeSummaryJson, HotResourceSummary.HOT_RESOURCE_SUMMARY_TABLE, 0); + Assert.assertNotNull(resourceSummaryJson); + Assert.assertEquals("write threadpool", resourceSummaryJson.get(RESOURCE_TYPE_COL_NAME).getAsString()); + return true; + } +} diff --git a/src/test/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/rca/integTests/tests/util/JsonParserUtil.java b/src/test/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/rca/integTests/tests/util/JsonParserUtil.java new file mode 100644 index 000000000..607061cf6 --- /dev/null +++ b/src/test/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/rca/integTests/tests/util/JsonParserUtil.java @@ -0,0 +1,40 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.integTests.tests.util; + +import com.google.gson.JsonArray; +import com.google.gson.JsonObject; + +public class JsonParserUtil { + public static int getSummaryJsonSize(JsonObject jsonObject, String summaryName) { + JsonArray array = jsonObject.get(summaryName).getAsJsonArray(); + if (array == null) { + return 0; + } + return array.size(); + } + + public static JsonObject getSummaryJson(JsonObject jsonObject, String summaryName, int idx) { + JsonArray array = jsonObject.get(summaryName).getAsJsonArray(); + if (array == null) { + return null; + } + if (idx >= array.size()) { + return null; + } + return array.get(idx).getAsJsonObject(); + } +}