Skip to content
This repository has been archived by the owner on Aug 2, 2022. It is now read-only.

Add integ tests for OS metrics(cpu, page fault) #252

Merged
merged 2 commits into from
Jan 12, 2021
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
/*
* Copyright <2020> Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/

package com.amazon.opendistro.elasticsearch.performanceanalyzer.integ_test;

import com.amazon.opendistro.elasticsearch.performanceanalyzer.integ_test.json.JsonResponseData;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.integ_test.json.JsonResponseField.Type.Constants;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.integ_test.json.JsonResponseNode;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.metrics.AllMetrics.OSMetrics;
import java.util.List;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;

public class CpuMetricsIT extends MetricCollectorIntegTestBase {
private List<String> nodeIDs;

@Before
public void init() throws Exception {
nodeIDs = getNodeID();
}

@Test
public void checkCPUUtilization() throws Exception {
//read metric from local node
List<JsonResponseNode> responseNodeList =
readMetric(PERFORMANCE_ANALYZER_BASE_ENDPOINT + "/metrics/?metrics=CPU_Utilization&agg=sum");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: can we create path as constant variable?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this endpoint is only referenced once here. and I have defined a const var for the common path that can be used by other endpoint. Do we still need to define a seperate static var here ?

Assert.assertEquals(1, responseNodeList.size());
validatePerNodeCPUMetric(responseNodeList.get(0));

//read metric from all nodes in cluster
responseNodeList =
readMetric(PERFORMANCE_ANALYZER_BASE_ENDPOINT + "/metrics/?metrics=CPU_Utilization&agg=sum&nodes=all");
int nodeNum = nodeIDs.size();
Assert.assertEquals(nodeNum, responseNodeList.size());
for (int i = 0; i < nodeNum; i++) {
validatePerNodeCPUMetric(responseNodeList.get(i));
}
}

/**
* check if cpu usage is non zero
* {
* "JtlEoRowSI6iNpzpjlbp_Q": {
* "data": {
* "fields": [
* {
* "name": "CPU_Utilization",
* "type": "DOUBLE"
* }
* ],
* "records": [
* [
* 0.005275218803760752
* ]
* ]
* },
* "timestamp": 1606861740000
* }
* }
*/
private void validatePerNodeCPUMetric(JsonResponseNode responseNode) throws Exception {
Assert.assertTrue(responseNode.getTimestamp() > 0);
JsonResponseData responseData = responseNode.getData();
Assert.assertEquals(1, responseData.getFieldDimensionSize());
Assert.assertEquals(OSMetrics.CPU_UTILIZATION.toString(), responseData.getField(0).getName());
Assert.assertEquals(Constants.DOUBLE, responseData.getField(0).getType());
Assert.assertEquals(1, responseData.getRecordSize());
Assert.assertTrue(responseData.getRecordAsDouble(0, OSMetrics.CPU_UTILIZATION.toString()) > 0);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is there any upper bound we need to check for CPU usage as well other than greater than 0?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good catch. yes, I think we can check upper bound here for cpu usage. Will add it

}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
/*
* Copyright <2020> Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/

package com.amazon.opendistro.elasticsearch.performanceanalyzer.integ_test;

import com.amazon.opendistro.elasticsearch.performanceanalyzer.PerformanceAnalyzerIntegTestBase;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.integ_test.json.JsonResponseNode;
import com.google.gson.Gson;
import com.google.gson.JsonObject;
import com.google.gson.JsonParseException;
import com.google.gson.JsonParser;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import org.apache.http.HttpStatus;
import org.apache.http.util.EntityUtils;
import org.elasticsearch.client.Request;
import org.elasticsearch.client.Response;
import org.junit.Assert;

public class MetricCollectorIntegTestBase extends PerformanceAnalyzerIntegTestBase {

protected List<JsonResponseNode> readMetric(String endpoint) throws Exception {
String jsonString;
//read metric from local node
Request request = new Request("GET", endpoint);
Response resp = paClient.performRequest(request);
Assert.assertEquals(HttpStatus.SC_OK, resp.getStatusLine().getStatusCode());
jsonString = EntityUtils.toString(resp.getEntity());
JsonObject jsonObject = new JsonParser().parse(jsonString).getAsJsonObject();
return parseJsonResponse(jsonObject);
}

protected List<String> getNodeID() throws Exception {
final Request request = new Request("GET", "/_cat/nodes?full_id&h=id");
final Response response = adminClient().performRequest(request);
List<String> nodeIDs = new ArrayList<>();
if (response.getStatusLine().getStatusCode() == HttpStatus.SC_OK) {
try (BufferedReader responseReader = new BufferedReader(
new InputStreamReader(response.getEntity().getContent(), StandardCharsets.UTF_8))) {
String line;
while ((line = responseReader.readLine()) != null) {
nodeIDs.add(line);
}
}
}
return nodeIDs;
}

private List<JsonResponseNode> parseJsonResponse(JsonObject jsonObject) throws JsonParseException {
List<JsonResponseNode> responseNodeList = new ArrayList<>();
jsonObject.entrySet().forEach(n -> {
JsonResponseNode responseNode = new Gson().fromJson(n.getValue(), JsonResponseNode.class);
responseNodeList.add(responseNode);
});
return responseNodeList;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
/*
* Copyright <2020> Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/

package com.amazon.opendistro.elasticsearch.performanceanalyzer.integ_test;

import com.amazon.opendistro.elasticsearch.performanceanalyzer.integ_test.json.JsonResponseData;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.integ_test.json.JsonResponseField.Type.Constants;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.integ_test.json.JsonResponseNode;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.metrics.AllMetrics.OSMetrics;
import java.util.List;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;

public class PageFaultMetricsIT extends MetricCollectorIntegTestBase {
private List<String> nodeIDs;

@Before
public void init() throws Exception {
nodeIDs = getNodeID();
}

@Test
public void checkPaging_MajfltRate() throws Exception {
//read metric from local node
List<JsonResponseNode> responseNodeList =
readMetric(PERFORMANCE_ANALYZER_BASE_ENDPOINT + "/metrics/?metrics=Paging_MajfltRate&agg=max");
Assert.assertEquals(1, responseNodeList.size());
validateMajorPageFaultMetric(responseNodeList.get(0));

//read metric from all nodes in cluster
responseNodeList =
readMetric(PERFORMANCE_ANALYZER_BASE_ENDPOINT + "/metrics/?metrics=Paging_MajfltRate&agg=max&nodes=all");
int nodeNum = nodeIDs.size();
Assert.assertEquals(nodeNum, responseNodeList.size());
for (int i = 0; i < nodeNum; i++) {
validateMajorPageFaultMetric(responseNodeList.get(i));
}
}

@Test
public void checkPaging_MinfltRate() throws Exception {
//read metric from local node
List<JsonResponseNode> responseNodeList =
readMetric(PERFORMANCE_ANALYZER_BASE_ENDPOINT + "/metrics/?metrics=Paging_MinfltRate&agg=max");
Assert.assertEquals(1, responseNodeList.size());
validateMinorPageFaultMetric(responseNodeList.get(0));

//read metric from all nodes in cluster
responseNodeList =
readMetric(PERFORMANCE_ANALYZER_BASE_ENDPOINT + "/metrics/?metrics=Paging_MinfltRate&agg=max&nodes=all");
int nodeNum = nodeIDs.size();
Assert.assertEquals(nodeNum, responseNodeList.size());
for (int i = 0; i < nodeNum; i++) {
validateMinorPageFaultMetric(responseNodeList.get(i));
}
}

@Test
public void checkPaging_RSS() throws Exception {
//read metric from local node
List<JsonResponseNode> responseNodeList =
readMetric(PERFORMANCE_ANALYZER_BASE_ENDPOINT + "/metrics/?metrics=Paging_RSS&agg=max");
Assert.assertEquals(1, responseNodeList.size());
validatePagingRSSMetric(responseNodeList.get(0));

//read metric from all nodes in cluster
responseNodeList =
readMetric(PERFORMANCE_ANALYZER_BASE_ENDPOINT + "/metrics/?metrics=Paging_RSS&agg=max&nodes=all");
int nodeNum = nodeIDs.size();
Assert.assertEquals(nodeNum, responseNodeList.size());
for (int i = 0; i < nodeNum; i++) {
validatePagingRSSMetric(responseNodeList.get(i));
}
}

/**
* check if major page fault is greater or equals to 0. major page fault heavily depends on the workload on OS.
* if docker image is running on a OS without heavy workload, we might unlikely observe any major
* page fault during the 5s interval.
* We might want to revisit this and see if we can run some workload to trigger page fault
* {
* "JtlEoRowSI6iNpzpjlbp_Q": {
* "data": {
* "fields": [
* {
* "name": "Paging_MajfltRate",
* "type": "DOUBLE"
* }
* ],
* "records": [
* [
* 0.0
* ]
* ]
* },
* "timestamp": 1606861150000
* }
* }
*/
private void validateMajorPageFaultMetric(JsonResponseNode responseNode) throws Exception {
Assert.assertTrue(responseNode.getTimestamp() > 0);
JsonResponseData responseData = responseNode.getData();
Assert.assertEquals(1, responseData.getFieldDimensionSize());
Assert.assertEquals(OSMetrics.PAGING_MAJ_FLT_RATE.toString(), responseData.getField(0).getName());
Assert.assertEquals(Constants.DOUBLE, responseData.getField(0).getType());
Assert.assertEquals(1, responseData.getRecordSize());
Assert.assertTrue(responseData.getRecordAsDouble(0, OSMetrics.PAGING_MAJ_FLT_RATE.toString()) >= 0);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

won't the value of PAGING_MAJ_FLT_RATE always be >=0?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, it will always be >= 0 if the page metrics are read correctly. unfortunately this is the only sanity check we can do for this metric because the occurance of page fault is out of our control and it would be hard to write a IT to force OS to trigger page fault

}

/**
* {
* "JtlEoRowSI6iNpzpjlbp_Q": {
* "data": {
* "fields": [
* {
* "name": "Paging_MinfltRate",
* "type": "DOUBLE"
* }
* ],
* "records": [
* [
* 0.28116752649470106
* ]
* ]
* },
* "timestamp": 1606861625000
* }
* }
*/
private void validateMinorPageFaultMetric(JsonResponseNode responseNode) throws Exception {
Assert.assertTrue(responseNode.getTimestamp() > 0);
JsonResponseData responseData = responseNode.getData();
Assert.assertEquals(1, responseData.getFieldDimensionSize());
Assert.assertEquals(OSMetrics.PAGING_MIN_FLT_RATE.toString(), responseData.getField(0).getName());
Assert.assertEquals(Constants.DOUBLE, responseData.getField(0).getType());
Assert.assertEquals(1, responseData.getRecordSize());
Assert.assertTrue(responseData.getRecordAsDouble(0, OSMetrics.PAGING_MIN_FLT_RATE.toString()) >= 0);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

won't the value of PAGING_MIN_FLT_RATE always be >=0?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same as above

}

/**
* number of pages in OS should be a non-zero value
* {
* "JtlEoRowSI6iNpzpjlbp_Q": {
* "data": {
* "fields": [
* {
* "name": "Paging_RSS",
* "type": "DOUBLE"
* }
* ],
* "records": [
* [
* 666034.0
* ]
* ]
* },
* "timestamp": 1606866110000
* }
* }
*/
private void validatePagingRSSMetric(JsonResponseNode responseNode) throws Exception {
Assert.assertTrue(responseNode.getTimestamp() > 0);
JsonResponseData responseData = responseNode.getData();
Assert.assertEquals(1, responseData.getFieldDimensionSize());
Assert.assertEquals(OSMetrics.PAGING_RSS.toString(), responseData.getField(0).getName());
Assert.assertEquals(Constants.DOUBLE, responseData.getField(0).getType());
Assert.assertEquals(1, responseData.getRecordSize());
Assert.assertTrue(responseData.getRecordAsDouble(0, OSMetrics.PAGING_RSS.toString()) > 0);
}
}
Loading