Skip to content

Commit

Permalink
[Collector] Adding Latency and Error service metrics (#442)
Browse files Browse the repository at this point in the history
* [Collector] Adding Latency and Error service metrics

Signed-off-by: Khushboo Rajput <[email protected]>

* Removing fastdoubleparser-LICENSE.tx

* Removing fastdoubleparser-NOTICE.txt

Signed-off-by: Khushboo Rajput <[email protected]>

---------

Signed-off-by: Khushboo Rajput <[email protected]>
  • Loading branch information
khushbr committed May 17, 2023
1 parent 27ca714 commit 714c59f
Show file tree
Hide file tree
Showing 26 changed files with 125 additions and 79 deletions.
1 change: 1 addition & 0 deletions licenses/fastdoubleparser-0.8.0.jar.sha1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
85c25540369921659556ead85e02c99ef0d24280
21 changes: 0 additions & 21 deletions licenses/fastdoubleparser-LICENSE.txt

This file was deleted.

Empty file.
1 change: 1 addition & 0 deletions licenses/jackson-annotations-2.15.0.jar.sha1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
89b0fd554928425a776a6e97ed010034312af21d
1 change: 1 addition & 0 deletions licenses/jackson-databind-2.15.0.jar.sha1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
0d41caa3a4e9f85382702a059a65c512f85ac230
1 change: 1 addition & 0 deletions licenses/jackson-module-paranamer-2.15.0.jar.sha1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
1d92b098577555a9a9e6c8783ce03ae24d49c9d2
1 change: 0 additions & 1 deletion licenses/protobuf-java-3.21.12.jar.sha1

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ public void collectMetrics(long startTime) {
return;
}

long startTimeMillis = System.currentTimeMillis();
long mCurrT = System.currentTimeMillis();
try {

Method getAdmissionController =
Expand Down Expand Up @@ -110,17 +110,15 @@ public void collectMetrics(long startTime) {
PerformanceAnalyzerApp.WRITER_METRICS_AGGREGATOR.updateStat(
WriterMetrics.ADMISSION_CONTROL_COLLECTOR_EXECUTION_TIME,
"",
System.currentTimeMillis() - startTimeMillis);
System.currentTimeMillis() - mCurrT);

} catch (Exception ex) {
PerformanceAnalyzerApp.ERRORS_AND_EXCEPTIONS_AGGREGATOR.updateStat(
ExceptionsAndErrors.ADMISSION_CONTROL_COLLECTOR_ERROR,
getCollectorName(),
System.currentTimeMillis() - startTimeMillis);
LOG.debug(
"Exception in collecting AdmissionControl Metrics: {} for startTime {}",
ex::toString,
() -> startTime);
PerformanceAnalyzerApp.ERRORS_AND_EXCEPTIONS_AGGREGATOR.updateStat(
ExceptionsAndErrors.ADMISSION_CONTROL_COLLECTOR_ERROR, "", 1);
}
}

Expand Down Expand Up @@ -183,6 +181,8 @@ private boolean canLoadAdmissionControllerClasses() {
Class.forName(ADMISSION_CONTROL_SERVICE, false, admissionControlClassLoader);
} catch (Exception e) {
LOG.debug("Failed to load AdmissionControllerService classes : {}", e::toString);
PerformanceAnalyzerApp.ERRORS_AND_EXCEPTIONS_AGGREGATOR.updateStat(
ExceptionsAndErrors.ADMISSION_CONTROL_COLLECTOR_ERROR, "", 1);
return false;
}
return true;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,13 @@
import org.opensearch.common.cache.Cache;
import org.opensearch.indices.IndicesService;
import org.opensearch.performanceanalyzer.OpenSearchResources;
import org.opensearch.performanceanalyzer.PerformanceAnalyzerApp;
import org.opensearch.performanceanalyzer.metrics.AllMetrics.CacheConfigDimension;
import org.opensearch.performanceanalyzer.metrics.AllMetrics.CacheConfigValue;
import org.opensearch.performanceanalyzer.metrics.MetricsConfiguration;
import org.opensearch.performanceanalyzer.metrics.MetricsProcessor;
import org.opensearch.performanceanalyzer.metrics.PerformanceAnalyzerMetrics;
import org.opensearch.performanceanalyzer.rca.framework.metrics.WriterMetrics;

/*
* Unlike Cache Hit, Miss, Eviction Count and Size, which is tracked on a per shard basis,
Expand Down Expand Up @@ -54,6 +56,7 @@ public void collectMetrics(long startTime) {
return;
}

long mCurrT = System.currentTimeMillis();
value.setLength(0);
value.append(PerformanceAnalyzerMetrics.getJsonCurrentMilliSeconds());
// This is for backward compatibility. Core OpenSearch may or may not emit maxWeight metric.
Expand Down Expand Up @@ -117,6 +120,10 @@ public void collectMetrics(long startTime) {
value.append(PerformanceAnalyzerMetrics.sMetricNewLineDelimitor)
.append(shardRequestCacheMaxSizeStatus.serialize());
saveMetricValues(value.toString(), startTime);
PerformanceAnalyzerApp.WRITER_METRICS_AGGREGATOR.updateStat(
WriterMetrics.CACHE_CONFIG_METRICS_COLLECTOR_EXECUTION_TIME,
"",
System.currentTimeMillis() - mCurrT);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,12 +68,12 @@ public void collectMetrics(long startTime) {
System.currentTimeMillis() - mCurrT);

} catch (Exception ex) {
PerformanceAnalyzerApp.ERRORS_AND_EXCEPTIONS_AGGREGATOR.updateStat(
ExceptionsAndErrors.CIRCUIT_BREAKER_COLLECTOR_ERROR, "", 1);
LOG.debug(
"Exception in Collecting CircuitBreaker Metrics: {} for startTime {}",
() -> ex.toString(),
() -> startTime);
PerformanceAnalyzerApp.ERRORS_AND_EXCEPTIONS_AGGREGATOR.updateStat(
ExceptionsAndErrors.CIRCUIT_BREAKER_COLLECTOR_ERROR, "", 1);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,12 +104,12 @@ public void collectMetrics(long startTime) {
"",
System.currentTimeMillis() - mCurrT);
} catch (Exception ex) {
PerformanceAnalyzerApp.ERRORS_AND_EXCEPTIONS_AGGREGATOR.updateStat(
ExceptionsAndErrors.CLUSTER_APPLIER_SERVICE_STATS_COLLECTOR_ERROR, "", 1);
LOG.debug(
"Exception in Collecting Cluster Applier Service Metrics: {} for startTime {}",
() -> ex.toString(),
() -> startTime);
PerformanceAnalyzerApp.ERRORS_AND_EXCEPTIONS_AGGREGATOR.updateStat(
ExceptionsAndErrors.CLUSTER_APPLIER_SERVICE_STATS_COLLECTOR_ERROR, "", 1);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import org.opensearch.performanceanalyzer.metrics.MetricsProcessor;
import org.opensearch.performanceanalyzer.metrics.PerformanceAnalyzerMetrics;
import org.opensearch.performanceanalyzer.metrics.ThreadIDUtil;
import org.opensearch.performanceanalyzer.rca.framework.metrics.ExceptionsAndErrors;
import org.opensearch.performanceanalyzer.rca.framework.metrics.WriterMetrics;

@SuppressWarnings("unchecked")
Expand Down Expand Up @@ -83,6 +84,8 @@ public void collectMetrics(long startTime) {
return;
}

long mCurrT = System.currentTimeMillis();

value.setLength(0);
Queue<Runnable> current = getClusterManagerServiceCurrentQueue();

Expand Down Expand Up @@ -145,19 +148,24 @@ public void collectMetrics(long startTime) {
PerformanceAnalyzerMetrics.START_FILE_NAME);

value.setLength(0);
PerformanceAnalyzerApp.WRITER_METRICS_AGGREGATOR.updateStat(
WriterMetrics
.CLUSTER_MANAGER_SERVICE_EVENTS_METRICS_COLLECTOR_EXECUTION_TIME,
"",
System.currentTimeMillis() - mCurrT);
}
} else {
generateFinishMetrics(startTime);
}
LOG.debug(() -> "Successfully collected ClusterManager Event Metrics.");
} catch (Exception ex) {
PerformanceAnalyzerApp.WRITER_METRICS_AGGREGATOR.updateStat(
WriterMetrics.CLUSTER_MANAGER_METRICS_ERROR, "", 1);
LOG.debug(
"Exception in Collecting ClusterManager Metrics: {} for startTime {} with ExceptionCode: {}",
() -> ex.toString(),
() -> startTime,
() -> StatExceptionCode.CLUSTER_MANAGER_METRICS_ERROR.toString());
PerformanceAnalyzerApp.ERRORS_AND_EXCEPTIONS_AGGREGATOR.updateStat(
ExceptionsAndErrors.CLUSTER_MANAGER_METRICS_ERROR, "", 1);
}
}

Expand Down Expand Up @@ -235,8 +243,8 @@ Queue<Runnable> getClusterManagerServiceCurrentQueue() throws Exception {
getPrioritizedTPExecutorCurrentField()
.get(prioritizedOpenSearchThreadPoolExecutor);
} else {
PerformanceAnalyzerApp.WRITER_METRICS_AGGREGATOR.updateStat(
WriterMetrics.CLUSTER_MANAGER_NODE_NOT_UP, "", 1);
PerformanceAnalyzerApp.ERRORS_AND_EXCEPTIONS_AGGREGATOR.updateStat(
ExceptionsAndErrors.CLUSTER_MANAGER_NODE_NOT_UP, "", 1);
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import org.opensearch.performanceanalyzer.metrics.MetricsConfiguration;
import org.opensearch.performanceanalyzer.metrics.MetricsProcessor;
import org.opensearch.performanceanalyzer.metrics.PerformanceAnalyzerMetrics;
import org.opensearch.performanceanalyzer.rca.framework.metrics.ExceptionsAndErrors;
import org.opensearch.performanceanalyzer.rca.framework.metrics.WriterMetrics;

@SuppressWarnings("unchecked")
Expand Down Expand Up @@ -57,6 +58,7 @@ public void collectMetrics(long startTime) {
return;
}

long mCurrT = System.currentTimeMillis();
/*
pendingTasks API returns object of PendingClusterTask which contains insertOrder, priority, source, timeInQueue.
Example :
Expand Down Expand Up @@ -98,13 +100,17 @@ public void collectMetrics(long startTime) {
startTime,
PerformanceAnalyzerMetrics.CLUSTER_MANAGER_CURRENT,
PerformanceAnalyzerMetrics.CLUSTER_MANAGER_META_DATA);
} catch (Exception ex) {
PerformanceAnalyzerApp.WRITER_METRICS_AGGREGATOR.updateStat(
WriterMetrics.CLUSTER_MANAGER_METRICS_ERROR, "", 1);
WriterMetrics.CLUSTER_MANAGER_SERVICE_METRICS_COLLECTOR_EXECUTION_TIME,
"",
System.currentTimeMillis() - mCurrT);
} catch (Exception ex) {
LOG.debug(
"Exception in Collecting ClusterManager Metrics: {} for startTime {}",
() -> ex.toString(),
() -> startTime);
PerformanceAnalyzerApp.ERRORS_AND_EXCEPTIONS_AGGREGATOR.updateStat(
ExceptionsAndErrors.CLUSTER_MANAGER_METRICS_ERROR, "", 1);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,12 +85,12 @@ public void collectMetrics(long startTime) {
System.currentTimeMillis() - mCurrT);

} catch (Exception ex) {
PerformanceAnalyzerApp.ERRORS_AND_EXCEPTIONS_AGGREGATOR.updateStat(
ExceptionsAndErrors.CLUSTER_MANAGER_THROTTLING_COLLECTOR_ERROR, "", 1);
LOG.debug(
"Exception in Collecting ClusterManager Throttling Metrics: {} for startTime {}",
() -> ex.toString(),
() -> startTime);
PerformanceAnalyzerApp.ERRORS_AND_EXCEPTIONS_AGGREGATOR.updateStat(
ExceptionsAndErrors.CLUSTER_MANAGER_THROTTLING_COLLECTOR_ERROR, "", 1);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,14 +80,12 @@ public void collectMetrics(long startTime) {
System.currentTimeMillis() - mCurrT);

} catch (Exception ex) {
PerformanceAnalyzerApp.ERRORS_AND_EXCEPTIONS_AGGREGATOR.updateStat(
ExceptionsAndErrors.ELECTION_TERM_COLLECTOR_ERROR,
"",
System.currentTimeMillis() - mCurrT);
LOG.debug(
"Exception in Collecting Election term Metrics: {} for startTime {}",
() -> ex.toString(),
() -> startTime);
PerformanceAnalyzerApp.ERRORS_AND_EXCEPTIONS_AGGREGATOR.updateStat(
ExceptionsAndErrors.ELECTION_TERM_COLLECTOR_ERROR, "", 1);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,14 +123,12 @@ public void collectMetrics(long startTime) {
"",
System.currentTimeMillis() - mCurrT);
} catch (Exception ex) {
PerformanceAnalyzerApp.ERRORS_AND_EXCEPTIONS_AGGREGATOR.updateStat(
ExceptionsAndErrors.FAULT_DETECTION_COLLECTOR_ERROR,
"",
System.currentTimeMillis() - mCurrT);
LOG.debug(
"Exception in Collecting FaultDetection Metrics: {} for startTime {}",
() -> ex.toString(),
() -> startTime);
PerformanceAnalyzerApp.ERRORS_AND_EXCEPTIONS_AGGREGATOR.updateStat(
ExceptionsAndErrors.FAULT_DETECTION_COLLECTOR_ERROR, "", 1);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,16 @@
import org.opensearch.cluster.node.DiscoveryNode;
import org.opensearch.cluster.node.DiscoveryNodes;
import org.opensearch.performanceanalyzer.OpenSearchResources;
import org.opensearch.performanceanalyzer.PerformanceAnalyzerApp;
import org.opensearch.performanceanalyzer.config.overrides.ConfigOverridesHelper;
import org.opensearch.performanceanalyzer.config.overrides.ConfigOverridesWrapper;
import org.opensearch.performanceanalyzer.metrics.AllMetrics.NodeDetailColumns;
import org.opensearch.performanceanalyzer.metrics.AllMetrics.NodeRole;
import org.opensearch.performanceanalyzer.metrics.MetricsConfiguration;
import org.opensearch.performanceanalyzer.metrics.MetricsProcessor;
import org.opensearch.performanceanalyzer.metrics.PerformanceAnalyzerMetrics;
import org.opensearch.performanceanalyzer.rca.framework.metrics.ExceptionsAndErrors;
import org.opensearch.performanceanalyzer.rca.framework.metrics.WriterMetrics;

public class NodeDetailsCollector extends PerformanceAnalyzerMetricsCollector
implements MetricsProcessor {
Expand All @@ -43,6 +46,8 @@ public void collectMetrics(long startTime) {
return;
}

long mCurrT = System.currentTimeMillis();

StringBuilder value = new StringBuilder();
value.append(PerformanceAnalyzerMetrics.getJsonCurrentMilliSeconds())
.append(PerformanceAnalyzerMetrics.sMetricNewLineDelimitor);
Expand All @@ -62,6 +67,8 @@ public void collectMetrics(long startTime) {
}
} catch (IOException ioe) {
LOG.error("Unable to serialize rca config overrides.", ioe);
PerformanceAnalyzerApp.ERRORS_AND_EXCEPTIONS_AGGREGATOR.updateStat(
ExceptionsAndErrors.CONFIG_OVERRIDES_SER_FAILED, "", 1);
}
value.append(PerformanceAnalyzerMetrics.sMetricNewLineDelimitor);

Expand All @@ -87,6 +94,10 @@ public void collectMetrics(long startTime) {
discoveryNodeIterator.next(), value, localNodeID, clusterManagerNode);
}
saveMetricValues(value.toString(), startTime);
PerformanceAnalyzerApp.WRITER_METRICS_AGGREGATOR.updateStat(
WriterMetrics.NODE_DETAILS_COLLECTOR_EXECUTION_TIME,
"",
System.currentTimeMillis() - mCurrT);
}

private void addMetricsToStringBuilder(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import org.opensearch.performanceanalyzer.metrics.MetricsProcessor;
import org.opensearch.performanceanalyzer.metrics.PerformanceAnalyzerMetrics;
import org.opensearch.performanceanalyzer.rca.framework.metrics.ExceptionsAndErrors;
import org.opensearch.performanceanalyzer.rca.framework.metrics.WriterMetrics;
import org.opensearch.performanceanalyzer.util.Utils;

/**
Expand Down Expand Up @@ -146,6 +147,8 @@ public void collectMetrics(long startTime) {
return;
}

long mCurrT = System.currentTimeMillis();

try {
populateCurrentShards();
populatePerShardStats(indicesService);
Expand Down Expand Up @@ -173,6 +176,11 @@ public void collectMetrics(long startTime) {
new NodeStatsMetricsAllShardsPerCollectionStatus(currentShardStats);
populateDiffMetricValue(
prevValue, currValue, startTime, shardId.getIndexName(), shardId.id());

PerformanceAnalyzerApp.WRITER_METRICS_AGGREGATOR.updateStat(
WriterMetrics.NODE_STATS_ALL_SHARDS_METRICS_COLLECTOR_EXECUTION_TIME,
"",
System.currentTimeMillis() - mCurrT);
}
} catch (Exception ex) {
LOG.debug(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,14 @@
import org.opensearch.performanceanalyzer.metrics.MetricsProcessor;
import org.opensearch.performanceanalyzer.metrics.PerformanceAnalyzerMetrics;
import org.opensearch.performanceanalyzer.rca.framework.metrics.ExceptionsAndErrors;
import org.opensearch.performanceanalyzer.rca.framework.metrics.WriterMetrics;
import org.opensearch.performanceanalyzer.util.Utils;

/**
* This collector collects metrics for fixed number of shards on a node in a single run. These
* metrics are heavy weight metrics which have performance impacts on the performance of the node.
* The number of shards is set via a cluster settings api. The parameter to set is
* shardsPerCollection. The metrics will be populated for these many shards in a single run.
* metrics are heavy which have performance impacts on the performance of the node. The number of
* shards is set via a cluster settings api. The parameter to set is shardsPerCollection. The
* metrics will be populated for these many shards in a single run.
*/
@SuppressWarnings("unchecked")
public class NodeStatsFixedShardsMetricsCollector extends PerformanceAnalyzerMetricsCollector
Expand Down Expand Up @@ -166,6 +167,8 @@ public void collectMetrics(long startTime) {
return;
}

long mCurrT = System.currentTimeMillis();

try {
// reach the end of current shardId list. retrieve new shard list from IndexService
if (!currentShardsIter.hasNext()) {
Expand Down Expand Up @@ -203,6 +206,11 @@ public void collectMetrics(long startTime) {
startTime,
currentIndexShardStats.getShardId().getIndexName(),
String.valueOf(currentIndexShardStats.getShardId().id()));

PerformanceAnalyzerApp.WRITER_METRICS_AGGREGATOR.updateStat(
WriterMetrics.NODE_STATS_FIXED_SHARDS_METRICS_COLLECTOR_EXECUTION_TIME,
"",
System.currentTimeMillis() - mCurrT);
}
}
} catch (Exception ex) {
Expand Down
Loading

0 comments on commit 714c59f

Please sign in to comment.