This repository has been archived by the owner on Aug 2, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 19
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add integ test for queue rejection cluster RCA (#370)
- Loading branch information
Showing
7 changed files
with
337 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
112 changes: 112 additions & 0 deletions
112
...elasticsearch/performanceanalyzer/rca/integTests/tests/queue_tuning/RcaItQueueTuning.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
/* | ||
* Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"). | ||
* You may not use this file except in compliance with the License. | ||
* A copy of the License is located at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* or in the "license" file accompanying this file. This file is distributed | ||
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either | ||
* express or implied. See the License for the specific language governing | ||
* permissions and limitations under the License. | ||
*/ | ||
|
||
package com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.integTests.tests.queue_tuning; | ||
|
||
import com.amazon.opendistro.elasticsearch.performanceanalyzer.metrics.AllMetrics.ThreadPoolDimension; | ||
import com.amazon.opendistro.elasticsearch.performanceanalyzer.metrics.AllMetrics.ThreadPoolType; | ||
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.framework.api.metrics.ThreadPool_QueueCapacity; | ||
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.framework.api.metrics.ThreadPool_RejectedReqs; | ||
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.integTests.framework.RcaItMarker; | ||
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.integTests.framework.annotations.AClusterType; | ||
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.integTests.framework.annotations.AErrorPatternIgnored; | ||
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.integTests.framework.annotations.AExpect; | ||
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.integTests.framework.annotations.AMetric; | ||
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.integTests.framework.annotations.ARcaConf; | ||
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.integTests.framework.annotations.ARcaGraph; | ||
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.integTests.framework.annotations.ATable; | ||
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.integTests.framework.annotations.ATuple; | ||
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.integTests.framework.configs.ClusterType; | ||
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.integTests.framework.configs.Consts; | ||
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.integTests.framework.configs.HostTag; | ||
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.integTests.framework.runners.RcaItNotEncryptedRunner; | ||
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.integTests.tests.queue_tuning.validator.QueueRejectionValidator; | ||
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.store.ElasticSearchAnalysisGraph; | ||
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.store.rca.cluster.QueueRejectionClusterRca; | ||
import org.junit.Test; | ||
import org.junit.experimental.categories.Category; | ||
import org.junit.runner.RunWith; | ||
|
||
@RunWith(RcaItNotEncryptedRunner.class) | ||
|
||
@Category(RcaItMarker.class) | ||
@AClusterType(ClusterType.MULTI_NODE_CO_LOCATED_MASTER) | ||
@ARcaGraph(ElasticSearchAnalysisGraph.class) | ||
//specify a custom rca.conf to set the rejection-time-period-in-seconds to 5s to reduce runtime | ||
@ARcaConf(dataNode = RcaItQueueTuning.QUEUE_TUNING_RESOURCES_DIR + "rca.conf") | ||
@AMetric(name = ThreadPool_RejectedReqs.class, | ||
dimensionNames = {ThreadPoolDimension.Constants.TYPE_VALUE}, | ||
tables = { | ||
@ATable(hostTag = HostTag.DATA_0, | ||
tuple = { | ||
@ATuple(dimensionValues = {ThreadPoolType.Constants.WRITE_NAME}, | ||
sum = 1.0, avg = 1.0, min = 1.0, max = 1.0), | ||
@ATuple(dimensionValues = {ThreadPoolType.Constants.SEARCH_NAME}, | ||
sum = 0.0, avg = 0.0, min = 0.0, max = 0.0) | ||
} | ||
), | ||
@ATable(hostTag = {HostTag.ELECTED_MASTER}, | ||
tuple = { | ||
@ATuple(dimensionValues = {ThreadPoolType.Constants.WRITE_NAME}, | ||
sum = 0.0, avg = 0.0, min = 0.0, max = 0.0), | ||
@ATuple(dimensionValues = {ThreadPoolType.Constants.SEARCH_NAME}, | ||
sum = 0.0, avg = 0.0, min = 0.0, max = 0.0) | ||
} | ||
) | ||
} | ||
) | ||
|
||
@AMetric(name = ThreadPool_QueueCapacity.class, | ||
dimensionNames = {ThreadPoolDimension.Constants.TYPE_VALUE}, | ||
tables = { | ||
@ATable(hostTag = HostTag.DATA_0, | ||
tuple = { | ||
@ATuple(dimensionValues = {ThreadPoolType.Constants.WRITE_NAME}, | ||
sum = 500, avg = 500, min = 500, max = 500), | ||
@ATuple(dimensionValues = {ThreadPoolType.Constants.SEARCH_NAME}, | ||
sum = 1500, avg = 1500, min = 1500, max = 1500) | ||
} | ||
), | ||
@ATable(hostTag = {HostTag.ELECTED_MASTER}, | ||
tuple = { | ||
@ATuple(dimensionValues = {ThreadPoolType.Constants.WRITE_NAME}, | ||
sum = 0.0, avg = 0.0, min = 0.0, max = 0.0), | ||
@ATuple(dimensionValues = {ThreadPoolType.Constants.SEARCH_NAME}, | ||
sum = 1500, avg = 1500, min = 1500, max = 1500) | ||
} | ||
) | ||
} | ||
) | ||
public class RcaItQueueTuning { | ||
public static final String QUEUE_TUNING_RESOURCES_DIR = Consts.INTEG_TESTS_SRC_DIR + "./tests/queue_tuning/resource/"; | ||
|
||
// This integ test is built to test queue rejection RCA + queue rejection cluster RCA | ||
// This test injects queue rejection metrics on one of the data node and queries the | ||
// rest API on master to check whether queue rejection cluster RCA becomes unhealthy | ||
//TODO : extend this integ test to cover Decision Maker framework and queue remediation actions | ||
@Test | ||
@AExpect( | ||
what = AExpect.Type.REST_API, | ||
on = HostTag.ELECTED_MASTER, | ||
validator = QueueRejectionValidator.class, | ||
forRca = QueueRejectionClusterRca.class, | ||
timeoutSeconds = 500) | ||
@AErrorPatternIgnored(pattern = "CacheUtil:getCacheMaxSize()", | ||
reason = "Cache related configs are expected to be missing in this integ test") | ||
@AErrorPatternIgnored(pattern = "AggregateMetric:gather()", | ||
reason = "Cache metrics are expected to be missing in this integ test") | ||
public void testQueueRejectionRca() { | ||
} | ||
} |
78 changes: 78 additions & 0 deletions
78
...tro/elasticsearch/performanceanalyzer/rca/integTests/tests/queue_tuning/resource/rca.conf
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
{ | ||
"analysis-graph-implementor": | ||
"com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.store.AnalysisGraphTest", | ||
// it can be file:// or s3:// | ||
"rca-store-location": "s3://sifi-store/rcas/", | ||
|
||
//it can be file:// or s3:// | ||
"threshold-store-location": "s3://sifi-store/thresholds/", | ||
|
||
"new-rca-check-minutes": 60, | ||
|
||
"new-threshold-check-minutes": 30, | ||
|
||
// The size of the task queue for all networking operations. | ||
// Small size queues may result in dropping of flow units, while large size queues can lead to a | ||
// bigger backlog of tasks resulting in delays in sending and receiving. | ||
"network-queue-length": 200, | ||
|
||
// The size of the per-vertex buffer for flow units received from remote nodes. | ||
// Small buffer sizes may result in dropping of flow units, while large buffer size can lead to | ||
// high memory consumptions depending on how the analysis graph is configured. | ||
"max-flow-units-per-vertex-buffer": 200, | ||
|
||
"tags": { | ||
"locus": "data-node", | ||
"disk": "ssd", | ||
"region": "use1", | ||
"instance-type": "i3.8xl", | ||
"domain": "rca-test-cluster" | ||
}, | ||
|
||
"remote-peers": ["ip1", "ip2", "ip3"], | ||
|
||
// Tells the runtime where the RCAs will be stored. | ||
"datastore": { | ||
// accepted types are sqlite, in-memory. | ||
"type": "sqlite", | ||
"location-dir": "/tmp", | ||
"filename": "rca.sqlite", | ||
"storage-file-retention-count": 5, | ||
// How often the sqlite file be repeated in seconds. This file contains RCAs and therefore rotating it too frequently | ||
// might not be as fruitful as there might not be any data. | ||
"rotation-period-seconds": 21600 | ||
}, | ||
|
||
// Add config settings for different RCAs | ||
"rca-config-settings": { | ||
// old gen rca | ||
"high-heap-usage-old-gen-rca": { | ||
"top-k" : 3 | ||
}, | ||
//young gen rca | ||
"high-heap-usage-young-gen-rca": { | ||
"promotion-rate-mb-per-second" : 500, | ||
"young-gen-gc-time-ms-per-second" : 400 | ||
}, | ||
"queue-rejection-rca": { | ||
"rejection-time-period-in-seconds" : 5 | ||
}, | ||
//hot shard rca | ||
"hot-shard-rca": { | ||
"cpu-utilization" : 0.01, | ||
"io-total-throughput-in-bytes" : 250000.0, | ||
"io-total-syscallrate-per-second" : 0.1 | ||
} | ||
}, | ||
|
||
"muted-rcas": [], | ||
"muted-deciders": [], | ||
"muted-actions": [], | ||
|
||
"decider-config-settings": { | ||
"cache-decider-config": { | ||
"field-data-cache-upper-bound" : 0.4, | ||
"shard-request-cache-upper-bound" : 0.05 | ||
} | ||
} | ||
} |
98 changes: 98 additions & 0 deletions
98
...formanceanalyzer/rca/integTests/tests/queue_tuning/validator/QueueRejectionValidator.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
/* | ||
* Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"). | ||
* You may not use this file except in compliance with the License. | ||
* A copy of the License is located at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* or in the "license" file accompanying this file. This file is distributed | ||
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either | ||
* express or implied. See the License for the specific language governing | ||
* permissions and limitations under the License. | ||
*/ | ||
|
||
package com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.integTests.tests.queue_tuning.validator; | ||
|
||
import static com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.framework.api.summaries.HotNodeSummary.SQL_SCHEMA_CONSTANTS.HOST_IP_ADDRESS_COL_NAME; | ||
import static com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.framework.api.summaries.HotNodeSummary.SQL_SCHEMA_CONSTANTS.NODE_ID_COL_NAME; | ||
import static com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.framework.api.summaries.HotResourceSummary.SQL_SCHEMA_CONSTANTS.RESOURCE_TYPE_COL_NAME; | ||
|
||
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.framework.api.summaries.HotClusterSummary; | ||
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.framework.api.summaries.HotNodeSummary; | ||
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.framework.api.summaries.HotResourceSummary; | ||
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.integTests.framework.api.IValidator; | ||
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.integTests.tests.util.JsonParserUtil; | ||
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.store.rca.cluster.QueueRejectionClusterRca; | ||
import com.google.gson.JsonArray; | ||
import com.google.gson.JsonElement; | ||
import com.google.gson.JsonObject; | ||
import org.junit.Assert; | ||
|
||
public class QueueRejectionValidator implements IValidator { | ||
long startTime; | ||
|
||
public QueueRejectionValidator() { | ||
startTime = System.currentTimeMillis(); | ||
} | ||
|
||
/** | ||
* {"rca_name":"QueueRejectionClusterRca", | ||
* "timestamp":1596557050522, | ||
* "state":"unhealthy", | ||
* "HotClusterSummary":[ | ||
* {"number_of_nodes":1,"number_of_unhealthy_nodes":1} | ||
* ]} | ||
*/ | ||
@Override | ||
public boolean check(JsonElement response) { | ||
JsonArray array = response.getAsJsonObject().get("data").getAsJsonArray(); | ||
if (array.size() == 0) { | ||
return false; | ||
} | ||
|
||
for (int i = 0; i < array.size(); i++) { | ||
JsonObject object = array.get(i).getAsJsonObject(); | ||
if (object.get("rca_name").getAsString().equals(QueueRejectionClusterRca.RCA_TABLE_NAME)) { | ||
return checkClusterRca(object); | ||
} | ||
} | ||
return false; | ||
} | ||
|
||
/** | ||
* {"rca_name":"QueueRejectionClusterRca", | ||
* "timestamp":1597167456322, | ||
* "state":"unhealthy", | ||
* "HotClusterSummary":[{"number_of_nodes":1,"number_of_unhealthy_nodes":1}] | ||
* } | ||
*/ | ||
boolean checkClusterRca(JsonObject rcaObject) { | ||
if (!"unhealthy".equals(rcaObject.get("state").getAsString())) { | ||
return false; | ||
} | ||
Assert.assertEquals(1, | ||
JsonParserUtil.getSummaryJsonSize(rcaObject, HotClusterSummary.HOT_CLUSTER_SUMMARY_TABLE)); | ||
JsonObject clusterSummaryJson = | ||
JsonParserUtil.getSummaryJson(rcaObject, HotClusterSummary.HOT_CLUSTER_SUMMARY_TABLE, 0); | ||
Assert.assertNotNull(clusterSummaryJson); | ||
Assert.assertEquals(1, clusterSummaryJson.get("number_of_unhealthy_nodes").getAsInt()); | ||
|
||
Assert.assertEquals(1, | ||
JsonParserUtil.getSummaryJsonSize(clusterSummaryJson, HotNodeSummary.HOT_NODE_SUMMARY_TABLE)); | ||
JsonObject nodeSummaryJson = | ||
JsonParserUtil.getSummaryJson(clusterSummaryJson, HotNodeSummary.HOT_NODE_SUMMARY_TABLE, 0); | ||
Assert.assertNotNull(nodeSummaryJson); | ||
Assert.assertEquals("DATA_0", nodeSummaryJson.get(NODE_ID_COL_NAME).getAsString()); | ||
Assert.assertEquals("127.0.0.1", nodeSummaryJson.get(HOST_IP_ADDRESS_COL_NAME).getAsString()); | ||
|
||
Assert.assertEquals(1, | ||
JsonParserUtil.getSummaryJsonSize(nodeSummaryJson, HotResourceSummary.HOT_RESOURCE_SUMMARY_TABLE)); | ||
JsonObject resourceSummaryJson = | ||
JsonParserUtil.getSummaryJson(nodeSummaryJson, HotResourceSummary.HOT_RESOURCE_SUMMARY_TABLE, 0); | ||
Assert.assertNotNull(resourceSummaryJson); | ||
Assert.assertEquals("write threadpool", resourceSummaryJson.get(RESOURCE_TYPE_COL_NAME).getAsString()); | ||
return true; | ||
} | ||
} |
40 changes: 40 additions & 0 deletions
40
...pendistro/elasticsearch/performanceanalyzer/rca/integTests/tests/util/JsonParserUtil.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
/* | ||
* Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"). | ||
* You may not use this file except in compliance with the License. | ||
* A copy of the License is located at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* or in the "license" file accompanying this file. This file is distributed | ||
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either | ||
* express or implied. See the License for the specific language governing | ||
* permissions and limitations under the License. | ||
*/ | ||
|
||
package com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.integTests.tests.util; | ||
|
||
import com.google.gson.JsonArray; | ||
import com.google.gson.JsonObject; | ||
|
||
public class JsonParserUtil { | ||
public static int getSummaryJsonSize(JsonObject jsonObject, String summaryName) { | ||
JsonArray array = jsonObject.get(summaryName).getAsJsonArray(); | ||
if (array == null) { | ||
return 0; | ||
} | ||
return array.size(); | ||
} | ||
|
||
public static JsonObject getSummaryJson(JsonObject jsonObject, String summaryName, int idx) { | ||
JsonArray array = jsonObject.get(summaryName).getAsJsonArray(); | ||
if (array == null) { | ||
return null; | ||
} | ||
if (idx >= array.size()) { | ||
return null; | ||
} | ||
return array.get(idx).getAsJsonObject(); | ||
} | ||
} |