From 714c59f10e3d90672784052f30232641afb3df42 Mon Sep 17 00:00:00 2001 From: Khushboo Rajput <59671881+khushbr@users.noreply.github.com> Date: Wed, 17 May 2023 14:22:44 -0700 Subject: [PATCH] [Collector] Adding Latency and Error service metrics (#442) * [Collector] Adding Latency and Error service metrics Signed-off-by: Khushboo Rajput * Removing fastdoubleparser-LICENSE.tx * Removing fastdoubleparser-NOTICE.txt Signed-off-by: Khushboo Rajput --------- Signed-off-by: Khushboo Rajput --- licenses/fastdoubleparser-0.8.0.jar.sha1 | 1 + licenses/fastdoubleparser-LICENSE.txt | 21 --------------- licenses/fastdoubleparser-NOTICE.txt | 0 licenses/jackson-annotations-2.15.0.jar.sha1 | 1 + licenses/jackson-databind-2.15.0.jar.sha1 | 1 + .../jackson-module-paranamer-2.15.0.jar.sha1 | 1 + licenses/protobuf-java-3.21.12.jar.sha1 | 1 - .../AdmissionControlMetricsCollector.java | 12 ++++----- .../CacheConfigMetricsCollector.java | 7 +++++ .../collectors/CircuitBreakerCollector.java | 4 +-- .../ClusterApplierServiceStatsCollector.java | 4 +-- .../ClusterManagerServiceEventMetrics.java | 16 +++++++++--- .../ClusterManagerServiceMetrics.java | 10 +++++-- ...sterManagerThrottlingMetricsCollector.java | 4 +-- .../collectors/ElectionTermCollector.java | 6 ++--- .../FaultDetectionMetricsCollector.java | 6 ++--- .../collectors/NodeDetailsCollector.java | 11 ++++++++ .../NodeStatsAllShardsMetricsCollector.java | 8 ++++++ .../NodeStatsFixedShardsMetricsCollector.java | 14 +++++++--- ...ShardIndexingPressureMetricsCollector.java | 9 +++++-- .../collectors/ShardStateCollector.java | 4 +-- .../ThreadPoolMetricsCollector.java | 15 +++++++++++ .../setting/ClusterSettingsManager.java | 10 +++---- .../PerformanceAnalyzerSearchListener.java | 26 +++++++++---------- ...rmanceAnalyzerTransportRequestHandler.java | 6 ++--- .../writer/EventLogQueueProcessor.java | 6 ++--- 26 files changed, 125 insertions(+), 79 deletions(-) create mode 100644 licenses/fastdoubleparser-0.8.0.jar.sha1 delete mode 100644 licenses/fastdoubleparser-LICENSE.txt delete mode 100644 licenses/fastdoubleparser-NOTICE.txt create mode 100644 licenses/jackson-annotations-2.15.0.jar.sha1 create mode 100644 licenses/jackson-databind-2.15.0.jar.sha1 create mode 100644 licenses/jackson-module-paranamer-2.15.0.jar.sha1 delete mode 100644 licenses/protobuf-java-3.21.12.jar.sha1 diff --git a/licenses/fastdoubleparser-0.8.0.jar.sha1 b/licenses/fastdoubleparser-0.8.0.jar.sha1 new file mode 100644 index 00000000..2139d05d --- /dev/null +++ b/licenses/fastdoubleparser-0.8.0.jar.sha1 @@ -0,0 +1 @@ +85c25540369921659556ead85e02c99ef0d24280 \ No newline at end of file diff --git a/licenses/fastdoubleparser-LICENSE.txt b/licenses/fastdoubleparser-LICENSE.txt deleted file mode 100644 index 4949e563..00000000 --- a/licenses/fastdoubleparser-LICENSE.txt +++ /dev/null @@ -1,21 +0,0 @@ -MIT License - -Copyright (c) 2021 Werner Randelshofer, Switzerland. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. \ No newline at end of file diff --git a/licenses/fastdoubleparser-NOTICE.txt b/licenses/fastdoubleparser-NOTICE.txt deleted file mode 100644 index e69de29b..00000000 diff --git a/licenses/jackson-annotations-2.15.0.jar.sha1 b/licenses/jackson-annotations-2.15.0.jar.sha1 new file mode 100644 index 00000000..695645e2 --- /dev/null +++ b/licenses/jackson-annotations-2.15.0.jar.sha1 @@ -0,0 +1 @@ +89b0fd554928425a776a6e97ed010034312af21d \ No newline at end of file diff --git a/licenses/jackson-databind-2.15.0.jar.sha1 b/licenses/jackson-databind-2.15.0.jar.sha1 new file mode 100644 index 00000000..059bbf57 --- /dev/null +++ b/licenses/jackson-databind-2.15.0.jar.sha1 @@ -0,0 +1 @@ +0d41caa3a4e9f85382702a059a65c512f85ac230 \ No newline at end of file diff --git a/licenses/jackson-module-paranamer-2.15.0.jar.sha1 b/licenses/jackson-module-paranamer-2.15.0.jar.sha1 new file mode 100644 index 00000000..4ad294ee --- /dev/null +++ b/licenses/jackson-module-paranamer-2.15.0.jar.sha1 @@ -0,0 +1 @@ +1d92b098577555a9a9e6c8783ce03ae24d49c9d2 \ No newline at end of file diff --git a/licenses/protobuf-java-3.21.12.jar.sha1 b/licenses/protobuf-java-3.21.12.jar.sha1 deleted file mode 100644 index e86ed957..00000000 --- a/licenses/protobuf-java-3.21.12.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -5589e79a33cb6509f7e681d7cf4fc59d47c51c71 \ No newline at end of file diff --git a/src/main/java/org/opensearch/performanceanalyzer/collectors/AdmissionControlMetricsCollector.java b/src/main/java/org/opensearch/performanceanalyzer/collectors/AdmissionControlMetricsCollector.java index 5b3b17b7..57c788ec 100644 --- a/src/main/java/org/opensearch/performanceanalyzer/collectors/AdmissionControlMetricsCollector.java +++ b/src/main/java/org/opensearch/performanceanalyzer/collectors/AdmissionControlMetricsCollector.java @@ -60,7 +60,7 @@ public void collectMetrics(long startTime) { return; } - long startTimeMillis = System.currentTimeMillis(); + long mCurrT = System.currentTimeMillis(); try { Method getAdmissionController = @@ -110,17 +110,15 @@ public void collectMetrics(long startTime) { PerformanceAnalyzerApp.WRITER_METRICS_AGGREGATOR.updateStat( WriterMetrics.ADMISSION_CONTROL_COLLECTOR_EXECUTION_TIME, "", - System.currentTimeMillis() - startTimeMillis); + System.currentTimeMillis() - mCurrT); } catch (Exception ex) { - PerformanceAnalyzerApp.ERRORS_AND_EXCEPTIONS_AGGREGATOR.updateStat( - ExceptionsAndErrors.ADMISSION_CONTROL_COLLECTOR_ERROR, - getCollectorName(), - System.currentTimeMillis() - startTimeMillis); LOG.debug( "Exception in collecting AdmissionControl Metrics: {} for startTime {}", ex::toString, () -> startTime); + PerformanceAnalyzerApp.ERRORS_AND_EXCEPTIONS_AGGREGATOR.updateStat( + ExceptionsAndErrors.ADMISSION_CONTROL_COLLECTOR_ERROR, "", 1); } } @@ -183,6 +181,8 @@ private boolean canLoadAdmissionControllerClasses() { Class.forName(ADMISSION_CONTROL_SERVICE, false, admissionControlClassLoader); } catch (Exception e) { LOG.debug("Failed to load AdmissionControllerService classes : {}", e::toString); + PerformanceAnalyzerApp.ERRORS_AND_EXCEPTIONS_AGGREGATOR.updateStat( + ExceptionsAndErrors.ADMISSION_CONTROL_COLLECTOR_ERROR, "", 1); return false; } return true; diff --git a/src/main/java/org/opensearch/performanceanalyzer/collectors/CacheConfigMetricsCollector.java b/src/main/java/org/opensearch/performanceanalyzer/collectors/CacheConfigMetricsCollector.java index 5f01c2dc..868b0c48 100644 --- a/src/main/java/org/opensearch/performanceanalyzer/collectors/CacheConfigMetricsCollector.java +++ b/src/main/java/org/opensearch/performanceanalyzer/collectors/CacheConfigMetricsCollector.java @@ -18,11 +18,13 @@ import org.opensearch.common.cache.Cache; import org.opensearch.indices.IndicesService; import org.opensearch.performanceanalyzer.OpenSearchResources; +import org.opensearch.performanceanalyzer.PerformanceAnalyzerApp; import org.opensearch.performanceanalyzer.metrics.AllMetrics.CacheConfigDimension; import org.opensearch.performanceanalyzer.metrics.AllMetrics.CacheConfigValue; import org.opensearch.performanceanalyzer.metrics.MetricsConfiguration; import org.opensearch.performanceanalyzer.metrics.MetricsProcessor; import org.opensearch.performanceanalyzer.metrics.PerformanceAnalyzerMetrics; +import org.opensearch.performanceanalyzer.rca.framework.metrics.WriterMetrics; /* * Unlike Cache Hit, Miss, Eviction Count and Size, which is tracked on a per shard basis, @@ -54,6 +56,7 @@ public void collectMetrics(long startTime) { return; } + long mCurrT = System.currentTimeMillis(); value.setLength(0); value.append(PerformanceAnalyzerMetrics.getJsonCurrentMilliSeconds()); // This is for backward compatibility. Core OpenSearch may or may not emit maxWeight metric. @@ -117,6 +120,10 @@ public void collectMetrics(long startTime) { value.append(PerformanceAnalyzerMetrics.sMetricNewLineDelimitor) .append(shardRequestCacheMaxSizeStatus.serialize()); saveMetricValues(value.toString(), startTime); + PerformanceAnalyzerApp.WRITER_METRICS_AGGREGATOR.updateStat( + WriterMetrics.CACHE_CONFIG_METRICS_COLLECTOR_EXECUTION_TIME, + "", + System.currentTimeMillis() - mCurrT); } @Override diff --git a/src/main/java/org/opensearch/performanceanalyzer/collectors/CircuitBreakerCollector.java b/src/main/java/org/opensearch/performanceanalyzer/collectors/CircuitBreakerCollector.java index ca980904..200927ab 100644 --- a/src/main/java/org/opensearch/performanceanalyzer/collectors/CircuitBreakerCollector.java +++ b/src/main/java/org/opensearch/performanceanalyzer/collectors/CircuitBreakerCollector.java @@ -68,12 +68,12 @@ public void collectMetrics(long startTime) { System.currentTimeMillis() - mCurrT); } catch (Exception ex) { - PerformanceAnalyzerApp.ERRORS_AND_EXCEPTIONS_AGGREGATOR.updateStat( - ExceptionsAndErrors.CIRCUIT_BREAKER_COLLECTOR_ERROR, "", 1); LOG.debug( "Exception in Collecting CircuitBreaker Metrics: {} for startTime {}", () -> ex.toString(), () -> startTime); + PerformanceAnalyzerApp.ERRORS_AND_EXCEPTIONS_AGGREGATOR.updateStat( + ExceptionsAndErrors.CIRCUIT_BREAKER_COLLECTOR_ERROR, "", 1); } } diff --git a/src/main/java/org/opensearch/performanceanalyzer/collectors/ClusterApplierServiceStatsCollector.java b/src/main/java/org/opensearch/performanceanalyzer/collectors/ClusterApplierServiceStatsCollector.java index bc9f90a9..9e2212c6 100644 --- a/src/main/java/org/opensearch/performanceanalyzer/collectors/ClusterApplierServiceStatsCollector.java +++ b/src/main/java/org/opensearch/performanceanalyzer/collectors/ClusterApplierServiceStatsCollector.java @@ -104,12 +104,12 @@ public void collectMetrics(long startTime) { "", System.currentTimeMillis() - mCurrT); } catch (Exception ex) { - PerformanceAnalyzerApp.ERRORS_AND_EXCEPTIONS_AGGREGATOR.updateStat( - ExceptionsAndErrors.CLUSTER_APPLIER_SERVICE_STATS_COLLECTOR_ERROR, "", 1); LOG.debug( "Exception in Collecting Cluster Applier Service Metrics: {} for startTime {}", () -> ex.toString(), () -> startTime); + PerformanceAnalyzerApp.ERRORS_AND_EXCEPTIONS_AGGREGATOR.updateStat( + ExceptionsAndErrors.CLUSTER_APPLIER_SERVICE_STATS_COLLECTOR_ERROR, "", 1); } } diff --git a/src/main/java/org/opensearch/performanceanalyzer/collectors/ClusterManagerServiceEventMetrics.java b/src/main/java/org/opensearch/performanceanalyzer/collectors/ClusterManagerServiceEventMetrics.java index 0f68243d..cf26b6d1 100644 --- a/src/main/java/org/opensearch/performanceanalyzer/collectors/ClusterManagerServiceEventMetrics.java +++ b/src/main/java/org/opensearch/performanceanalyzer/collectors/ClusterManagerServiceEventMetrics.java @@ -27,6 +27,7 @@ import org.opensearch.performanceanalyzer.metrics.MetricsProcessor; import org.opensearch.performanceanalyzer.metrics.PerformanceAnalyzerMetrics; import org.opensearch.performanceanalyzer.metrics.ThreadIDUtil; +import org.opensearch.performanceanalyzer.rca.framework.metrics.ExceptionsAndErrors; import org.opensearch.performanceanalyzer.rca.framework.metrics.WriterMetrics; @SuppressWarnings("unchecked") @@ -83,6 +84,8 @@ public void collectMetrics(long startTime) { return; } + long mCurrT = System.currentTimeMillis(); + value.setLength(0); Queue current = getClusterManagerServiceCurrentQueue(); @@ -145,19 +148,24 @@ public void collectMetrics(long startTime) { PerformanceAnalyzerMetrics.START_FILE_NAME); value.setLength(0); + PerformanceAnalyzerApp.WRITER_METRICS_AGGREGATOR.updateStat( + WriterMetrics + .CLUSTER_MANAGER_SERVICE_EVENTS_METRICS_COLLECTOR_EXECUTION_TIME, + "", + System.currentTimeMillis() - mCurrT); } } else { generateFinishMetrics(startTime); } LOG.debug(() -> "Successfully collected ClusterManager Event Metrics."); } catch (Exception ex) { - PerformanceAnalyzerApp.WRITER_METRICS_AGGREGATOR.updateStat( - WriterMetrics.CLUSTER_MANAGER_METRICS_ERROR, "", 1); LOG.debug( "Exception in Collecting ClusterManager Metrics: {} for startTime {} with ExceptionCode: {}", () -> ex.toString(), () -> startTime, () -> StatExceptionCode.CLUSTER_MANAGER_METRICS_ERROR.toString()); + PerformanceAnalyzerApp.ERRORS_AND_EXCEPTIONS_AGGREGATOR.updateStat( + ExceptionsAndErrors.CLUSTER_MANAGER_METRICS_ERROR, "", 1); } } @@ -235,8 +243,8 @@ Queue getClusterManagerServiceCurrentQueue() throws Exception { getPrioritizedTPExecutorCurrentField() .get(prioritizedOpenSearchThreadPoolExecutor); } else { - PerformanceAnalyzerApp.WRITER_METRICS_AGGREGATOR.updateStat( - WriterMetrics.CLUSTER_MANAGER_NODE_NOT_UP, "", 1); + PerformanceAnalyzerApp.ERRORS_AND_EXCEPTIONS_AGGREGATOR.updateStat( + ExceptionsAndErrors.CLUSTER_MANAGER_NODE_NOT_UP, "", 1); } } } diff --git a/src/main/java/org/opensearch/performanceanalyzer/collectors/ClusterManagerServiceMetrics.java b/src/main/java/org/opensearch/performanceanalyzer/collectors/ClusterManagerServiceMetrics.java index 0af0206b..8df12616 100644 --- a/src/main/java/org/opensearch/performanceanalyzer/collectors/ClusterManagerServiceMetrics.java +++ b/src/main/java/org/opensearch/performanceanalyzer/collectors/ClusterManagerServiceMetrics.java @@ -19,6 +19,7 @@ import org.opensearch.performanceanalyzer.metrics.MetricsConfiguration; import org.opensearch.performanceanalyzer.metrics.MetricsProcessor; import org.opensearch.performanceanalyzer.metrics.PerformanceAnalyzerMetrics; +import org.opensearch.performanceanalyzer.rca.framework.metrics.ExceptionsAndErrors; import org.opensearch.performanceanalyzer.rca.framework.metrics.WriterMetrics; @SuppressWarnings("unchecked") @@ -57,6 +58,7 @@ public void collectMetrics(long startTime) { return; } + long mCurrT = System.currentTimeMillis(); /* pendingTasks API returns object of PendingClusterTask which contains insertOrder, priority, source, timeInQueue. Example : @@ -98,13 +100,17 @@ public void collectMetrics(long startTime) { startTime, PerformanceAnalyzerMetrics.CLUSTER_MANAGER_CURRENT, PerformanceAnalyzerMetrics.CLUSTER_MANAGER_META_DATA); - } catch (Exception ex) { PerformanceAnalyzerApp.WRITER_METRICS_AGGREGATOR.updateStat( - WriterMetrics.CLUSTER_MANAGER_METRICS_ERROR, "", 1); + WriterMetrics.CLUSTER_MANAGER_SERVICE_METRICS_COLLECTOR_EXECUTION_TIME, + "", + System.currentTimeMillis() - mCurrT); + } catch (Exception ex) { LOG.debug( "Exception in Collecting ClusterManager Metrics: {} for startTime {}", () -> ex.toString(), () -> startTime); + PerformanceAnalyzerApp.ERRORS_AND_EXCEPTIONS_AGGREGATOR.updateStat( + ExceptionsAndErrors.CLUSTER_MANAGER_METRICS_ERROR, "", 1); } } diff --git a/src/main/java/org/opensearch/performanceanalyzer/collectors/ClusterManagerThrottlingMetricsCollector.java b/src/main/java/org/opensearch/performanceanalyzer/collectors/ClusterManagerThrottlingMetricsCollector.java index ee60f2ea..4a649b01 100644 --- a/src/main/java/org/opensearch/performanceanalyzer/collectors/ClusterManagerThrottlingMetricsCollector.java +++ b/src/main/java/org/opensearch/performanceanalyzer/collectors/ClusterManagerThrottlingMetricsCollector.java @@ -85,12 +85,12 @@ public void collectMetrics(long startTime) { System.currentTimeMillis() - mCurrT); } catch (Exception ex) { - PerformanceAnalyzerApp.ERRORS_AND_EXCEPTIONS_AGGREGATOR.updateStat( - ExceptionsAndErrors.CLUSTER_MANAGER_THROTTLING_COLLECTOR_ERROR, "", 1); LOG.debug( "Exception in Collecting ClusterManager Throttling Metrics: {} for startTime {}", () -> ex.toString(), () -> startTime); + PerformanceAnalyzerApp.ERRORS_AND_EXCEPTIONS_AGGREGATOR.updateStat( + ExceptionsAndErrors.CLUSTER_MANAGER_THROTTLING_COLLECTOR_ERROR, "", 1); } } diff --git a/src/main/java/org/opensearch/performanceanalyzer/collectors/ElectionTermCollector.java b/src/main/java/org/opensearch/performanceanalyzer/collectors/ElectionTermCollector.java index b144d920..a911e079 100644 --- a/src/main/java/org/opensearch/performanceanalyzer/collectors/ElectionTermCollector.java +++ b/src/main/java/org/opensearch/performanceanalyzer/collectors/ElectionTermCollector.java @@ -80,14 +80,12 @@ public void collectMetrics(long startTime) { System.currentTimeMillis() - mCurrT); } catch (Exception ex) { - PerformanceAnalyzerApp.ERRORS_AND_EXCEPTIONS_AGGREGATOR.updateStat( - ExceptionsAndErrors.ELECTION_TERM_COLLECTOR_ERROR, - "", - System.currentTimeMillis() - mCurrT); LOG.debug( "Exception in Collecting Election term Metrics: {} for startTime {}", () -> ex.toString(), () -> startTime); + PerformanceAnalyzerApp.ERRORS_AND_EXCEPTIONS_AGGREGATOR.updateStat( + ExceptionsAndErrors.ELECTION_TERM_COLLECTOR_ERROR, "", 1); } } diff --git a/src/main/java/org/opensearch/performanceanalyzer/collectors/FaultDetectionMetricsCollector.java b/src/main/java/org/opensearch/performanceanalyzer/collectors/FaultDetectionMetricsCollector.java index 5b9fffe4..4a57a0fd 100644 --- a/src/main/java/org/opensearch/performanceanalyzer/collectors/FaultDetectionMetricsCollector.java +++ b/src/main/java/org/opensearch/performanceanalyzer/collectors/FaultDetectionMetricsCollector.java @@ -123,14 +123,12 @@ public void collectMetrics(long startTime) { "", System.currentTimeMillis() - mCurrT); } catch (Exception ex) { - PerformanceAnalyzerApp.ERRORS_AND_EXCEPTIONS_AGGREGATOR.updateStat( - ExceptionsAndErrors.FAULT_DETECTION_COLLECTOR_ERROR, - "", - System.currentTimeMillis() - mCurrT); LOG.debug( "Exception in Collecting FaultDetection Metrics: {} for startTime {}", () -> ex.toString(), () -> startTime); + PerformanceAnalyzerApp.ERRORS_AND_EXCEPTIONS_AGGREGATOR.updateStat( + ExceptionsAndErrors.FAULT_DETECTION_COLLECTOR_ERROR, "", 1); } } diff --git a/src/main/java/org/opensearch/performanceanalyzer/collectors/NodeDetailsCollector.java b/src/main/java/org/opensearch/performanceanalyzer/collectors/NodeDetailsCollector.java index 1b97fcca..885e0ad7 100644 --- a/src/main/java/org/opensearch/performanceanalyzer/collectors/NodeDetailsCollector.java +++ b/src/main/java/org/opensearch/performanceanalyzer/collectors/NodeDetailsCollector.java @@ -14,6 +14,7 @@ import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.node.DiscoveryNodes; import org.opensearch.performanceanalyzer.OpenSearchResources; +import org.opensearch.performanceanalyzer.PerformanceAnalyzerApp; import org.opensearch.performanceanalyzer.config.overrides.ConfigOverridesHelper; import org.opensearch.performanceanalyzer.config.overrides.ConfigOverridesWrapper; import org.opensearch.performanceanalyzer.metrics.AllMetrics.NodeDetailColumns; @@ -21,6 +22,8 @@ import org.opensearch.performanceanalyzer.metrics.MetricsConfiguration; import org.opensearch.performanceanalyzer.metrics.MetricsProcessor; import org.opensearch.performanceanalyzer.metrics.PerformanceAnalyzerMetrics; +import org.opensearch.performanceanalyzer.rca.framework.metrics.ExceptionsAndErrors; +import org.opensearch.performanceanalyzer.rca.framework.metrics.WriterMetrics; public class NodeDetailsCollector extends PerformanceAnalyzerMetricsCollector implements MetricsProcessor { @@ -43,6 +46,8 @@ public void collectMetrics(long startTime) { return; } + long mCurrT = System.currentTimeMillis(); + StringBuilder value = new StringBuilder(); value.append(PerformanceAnalyzerMetrics.getJsonCurrentMilliSeconds()) .append(PerformanceAnalyzerMetrics.sMetricNewLineDelimitor); @@ -62,6 +67,8 @@ public void collectMetrics(long startTime) { } } catch (IOException ioe) { LOG.error("Unable to serialize rca config overrides.", ioe); + PerformanceAnalyzerApp.ERRORS_AND_EXCEPTIONS_AGGREGATOR.updateStat( + ExceptionsAndErrors.CONFIG_OVERRIDES_SER_FAILED, "", 1); } value.append(PerformanceAnalyzerMetrics.sMetricNewLineDelimitor); @@ -87,6 +94,10 @@ public void collectMetrics(long startTime) { discoveryNodeIterator.next(), value, localNodeID, clusterManagerNode); } saveMetricValues(value.toString(), startTime); + PerformanceAnalyzerApp.WRITER_METRICS_AGGREGATOR.updateStat( + WriterMetrics.NODE_DETAILS_COLLECTOR_EXECUTION_TIME, + "", + System.currentTimeMillis() - mCurrT); } private void addMetricsToStringBuilder( diff --git a/src/main/java/org/opensearch/performanceanalyzer/collectors/NodeStatsAllShardsMetricsCollector.java b/src/main/java/org/opensearch/performanceanalyzer/collectors/NodeStatsAllShardsMetricsCollector.java index 14899de0..8ee760a2 100644 --- a/src/main/java/org/opensearch/performanceanalyzer/collectors/NodeStatsAllShardsMetricsCollector.java +++ b/src/main/java/org/opensearch/performanceanalyzer/collectors/NodeStatsAllShardsMetricsCollector.java @@ -29,6 +29,7 @@ import org.opensearch.performanceanalyzer.metrics.MetricsProcessor; import org.opensearch.performanceanalyzer.metrics.PerformanceAnalyzerMetrics; import org.opensearch.performanceanalyzer.rca.framework.metrics.ExceptionsAndErrors; +import org.opensearch.performanceanalyzer.rca.framework.metrics.WriterMetrics; import org.opensearch.performanceanalyzer.util.Utils; /** @@ -146,6 +147,8 @@ public void collectMetrics(long startTime) { return; } + long mCurrT = System.currentTimeMillis(); + try { populateCurrentShards(); populatePerShardStats(indicesService); @@ -173,6 +176,11 @@ public void collectMetrics(long startTime) { new NodeStatsMetricsAllShardsPerCollectionStatus(currentShardStats); populateDiffMetricValue( prevValue, currValue, startTime, shardId.getIndexName(), shardId.id()); + + PerformanceAnalyzerApp.WRITER_METRICS_AGGREGATOR.updateStat( + WriterMetrics.NODE_STATS_ALL_SHARDS_METRICS_COLLECTOR_EXECUTION_TIME, + "", + System.currentTimeMillis() - mCurrT); } } catch (Exception ex) { LOG.debug( diff --git a/src/main/java/org/opensearch/performanceanalyzer/collectors/NodeStatsFixedShardsMetricsCollector.java b/src/main/java/org/opensearch/performanceanalyzer/collectors/NodeStatsFixedShardsMetricsCollector.java index 6187d57a..f6f78c88 100644 --- a/src/main/java/org/opensearch/performanceanalyzer/collectors/NodeStatsFixedShardsMetricsCollector.java +++ b/src/main/java/org/opensearch/performanceanalyzer/collectors/NodeStatsFixedShardsMetricsCollector.java @@ -29,13 +29,14 @@ import org.opensearch.performanceanalyzer.metrics.MetricsProcessor; import org.opensearch.performanceanalyzer.metrics.PerformanceAnalyzerMetrics; import org.opensearch.performanceanalyzer.rca.framework.metrics.ExceptionsAndErrors; +import org.opensearch.performanceanalyzer.rca.framework.metrics.WriterMetrics; import org.opensearch.performanceanalyzer.util.Utils; /** * This collector collects metrics for fixed number of shards on a node in a single run. These - * metrics are heavy weight metrics which have performance impacts on the performance of the node. - * The number of shards is set via a cluster settings api. The parameter to set is - * shardsPerCollection. The metrics will be populated for these many shards in a single run. + * metrics are heavy which have performance impacts on the performance of the node. The number of + * shards is set via a cluster settings api. The parameter to set is shardsPerCollection. The + * metrics will be populated for these many shards in a single run. */ @SuppressWarnings("unchecked") public class NodeStatsFixedShardsMetricsCollector extends PerformanceAnalyzerMetricsCollector @@ -166,6 +167,8 @@ public void collectMetrics(long startTime) { return; } + long mCurrT = System.currentTimeMillis(); + try { // reach the end of current shardId list. retrieve new shard list from IndexService if (!currentShardsIter.hasNext()) { @@ -203,6 +206,11 @@ public void collectMetrics(long startTime) { startTime, currentIndexShardStats.getShardId().getIndexName(), String.valueOf(currentIndexShardStats.getShardId().id())); + + PerformanceAnalyzerApp.WRITER_METRICS_AGGREGATOR.updateStat( + WriterMetrics.NODE_STATS_FIXED_SHARDS_METRICS_COLLECTOR_EXECUTION_TIME, + "", + System.currentTimeMillis() - mCurrT); } } } catch (Exception ex) { diff --git a/src/main/java/org/opensearch/performanceanalyzer/collectors/ShardIndexingPressureMetricsCollector.java b/src/main/java/org/opensearch/performanceanalyzer/collectors/ShardIndexingPressureMetricsCollector.java index 897930f8..985a9358 100644 --- a/src/main/java/org/opensearch/performanceanalyzer/collectors/ShardIndexingPressureMetricsCollector.java +++ b/src/main/java/org/opensearch/performanceanalyzer/collectors/ShardIndexingPressureMetricsCollector.java @@ -232,6 +232,11 @@ public void collectMetrics(long startTime) { } catch (JsonProcessingException | ParseException e) { LOG.debug( "Exception raised while parsing string to json object. Skipping IndexingPressureMetricsCollector"); + PerformanceAnalyzerApp.ERRORS_AND_EXCEPTIONS_AGGREGATOR + .updateStat( + ExceptionsAndErrors.JSON_PARSER_ERROR, + getCollectorName(), + 1); } }); } @@ -244,12 +249,12 @@ public void collectMetrics(long startTime) { System.currentTimeMillis() - mCurrT); } } catch (Exception ex) { - PerformanceAnalyzerApp.ERRORS_AND_EXCEPTIONS_AGGREGATOR.updateStat( - ExceptionsAndErrors.SHARD_INDEXING_PRESSURE_COLLECTOR_ERROR, "", 1); LOG.debug( "Exception in Collecting Shard Indexing Pressure Metrics: {} for startTime {}", () -> ex.toString(), () -> startTime); + PerformanceAnalyzerApp.ERRORS_AND_EXCEPTIONS_AGGREGATOR.updateStat( + ExceptionsAndErrors.SHARD_INDEXING_PRESSURE_COLLECTOR_ERROR, "", 1); } } diff --git a/src/main/java/org/opensearch/performanceanalyzer/collectors/ShardStateCollector.java b/src/main/java/org/opensearch/performanceanalyzer/collectors/ShardStateCollector.java index 9616a63f..594eef73 100644 --- a/src/main/java/org/opensearch/performanceanalyzer/collectors/ShardStateCollector.java +++ b/src/main/java/org/opensearch/performanceanalyzer/collectors/ShardStateCollector.java @@ -99,12 +99,12 @@ public void collectMetrics(long startTime) { "", System.currentTimeMillis() - mCurrT); } catch (Exception ex) { - PerformanceAnalyzerApp.ERRORS_AND_EXCEPTIONS_AGGREGATOR.updateStat( - ExceptionsAndErrors.SHARD_STATE_COLLECTOR_ERROR, "", 1); LOG.debug( "Exception in Collecting Shard Metrics: {} for startTime {}", () -> ex.toString(), () -> startTime); + PerformanceAnalyzerApp.ERRORS_AND_EXCEPTIONS_AGGREGATOR.updateStat( + ExceptionsAndErrors.SHARD_STATE_COLLECTOR_ERROR, "", 1); } } diff --git a/src/main/java/org/opensearch/performanceanalyzer/collectors/ThreadPoolMetricsCollector.java b/src/main/java/org/opensearch/performanceanalyzer/collectors/ThreadPoolMetricsCollector.java index c5b2517f..d12a3263 100644 --- a/src/main/java/org/opensearch/performanceanalyzer/collectors/ThreadPoolMetricsCollector.java +++ b/src/main/java/org/opensearch/performanceanalyzer/collectors/ThreadPoolMetricsCollector.java @@ -18,11 +18,14 @@ import org.apache.logging.log4j.Logger; import org.opensearch.common.util.concurrent.SizeBlockingQueue; import org.opensearch.performanceanalyzer.OpenSearchResources; +import org.opensearch.performanceanalyzer.PerformanceAnalyzerApp; import org.opensearch.performanceanalyzer.metrics.AllMetrics.ThreadPoolDimension; import org.opensearch.performanceanalyzer.metrics.AllMetrics.ThreadPoolValue; import org.opensearch.performanceanalyzer.metrics.MetricsConfiguration; import org.opensearch.performanceanalyzer.metrics.MetricsProcessor; import org.opensearch.performanceanalyzer.metrics.PerformanceAnalyzerMetrics; +import org.opensearch.performanceanalyzer.rca.framework.metrics.ExceptionsAndErrors; +import org.opensearch.performanceanalyzer.rca.framework.metrics.WriterMetrics; import org.opensearch.threadpool.ThreadPool; import org.opensearch.threadpool.ThreadPoolStats.Stats; @@ -47,6 +50,8 @@ public void collectMetrics(long startTime) { return; } + long mCurrT = System.currentTimeMillis(); + Iterator statsIterator = OpenSearchResources.INSTANCE.getThreadPool().stats().iterator(); value.setLength(0); @@ -102,6 +107,12 @@ public void collectMetrics(long startTime) { } } catch (Exception e) { LOG.warn("Fail to read queue capacity via reflection"); + PerformanceAnalyzerApp.ERRORS_AND_EXCEPTIONS_AGGREGATOR + .updateStat( + ExceptionsAndErrors + .THREADPOOL_METRICS_COLLECTOR_ERROR, + "", + 1); } return -1; }); @@ -118,6 +129,10 @@ public void collectMetrics(long startTime) { .append(threadPoolStatus.serialize()); } saveMetricValues(value.toString(), startTime); + PerformanceAnalyzerApp.WRITER_METRICS_AGGREGATOR.updateStat( + WriterMetrics.THREADPOOL_METRICS_COLLECTOR_EXECUTION_TIME, + "", + System.currentTimeMillis() - mCurrT); } @Override diff --git a/src/main/java/org/opensearch/performanceanalyzer/config/setting/ClusterSettingsManager.java b/src/main/java/org/opensearch/performanceanalyzer/config/setting/ClusterSettingsManager.java index 6fedabf1..70279b76 100644 --- a/src/main/java/org/opensearch/performanceanalyzer/config/setting/ClusterSettingsManager.java +++ b/src/main/java/org/opensearch/performanceanalyzer/config/setting/ClusterSettingsManager.java @@ -24,7 +24,7 @@ import org.opensearch.common.settings.Settings; import org.opensearch.performanceanalyzer.OpenSearchResources; import org.opensearch.performanceanalyzer.PerformanceAnalyzerApp; -import org.opensearch.performanceanalyzer.rca.framework.metrics.WriterMetrics; +import org.opensearch.performanceanalyzer.rca.framework.metrics.ExceptionsAndErrors; /** * Class that handles updating cluster settings, and notifying the listeners when cluster settings @@ -224,8 +224,8 @@ private void callIntSettingListeners(final Setting setting, int setting } } catch (Exception ex) { LOG.error(ex); - PerformanceAnalyzerApp.WRITER_METRICS_AGGREGATOR.updateStat( - WriterMetrics.OPENSEARCH_REQUEST_INTERCEPTOR_ERROR, "", 1); + PerformanceAnalyzerApp.ERRORS_AND_EXCEPTIONS_AGGREGATOR.updateStat( + ExceptionsAndErrors.OPENSEARCH_REQUEST_INTERCEPTOR_ERROR, "", 1); } } @@ -246,8 +246,8 @@ private void callStringSettingListeners(final Setting setting, String se } } catch (Exception ex) { LOG.error(ex); - PerformanceAnalyzerApp.WRITER_METRICS_AGGREGATOR.updateStat( - WriterMetrics.OPENSEARCH_REQUEST_INTERCEPTOR_ERROR, "", 1); + PerformanceAnalyzerApp.ERRORS_AND_EXCEPTIONS_AGGREGATOR.updateStat( + ExceptionsAndErrors.OPENSEARCH_REQUEST_INTERCEPTOR_ERROR, "", 1); } } /** Class that handles response to GET /_cluster/settings */ diff --git a/src/main/java/org/opensearch/performanceanalyzer/listener/PerformanceAnalyzerSearchListener.java b/src/main/java/org/opensearch/performanceanalyzer/listener/PerformanceAnalyzerSearchListener.java index 93c702b4..8af60fa1 100644 --- a/src/main/java/org/opensearch/performanceanalyzer/listener/PerformanceAnalyzerSearchListener.java +++ b/src/main/java/org/opensearch/performanceanalyzer/listener/PerformanceAnalyzerSearchListener.java @@ -16,7 +16,7 @@ import org.opensearch.performanceanalyzer.metrics.MetricsProcessor; import org.opensearch.performanceanalyzer.metrics.PerformanceAnalyzerMetrics; import org.opensearch.performanceanalyzer.metrics.ThreadIDUtil; -import org.opensearch.performanceanalyzer.rca.framework.metrics.WriterMetrics; +import org.opensearch.performanceanalyzer.rca.framework.metrics.ExceptionsAndErrors; import org.opensearch.search.internal.SearchContext; public class PerformanceAnalyzerSearchListener @@ -47,8 +47,8 @@ public void onPreQueryPhase(SearchContext searchContext) { getSearchListener().preQueryPhase(searchContext); } catch (Exception ex) { LOG.error(ex); - PerformanceAnalyzerApp.WRITER_METRICS_AGGREGATOR.updateStat( - WriterMetrics.OPENSEARCH_REQUEST_INTERCEPTOR_ERROR, "", 1); + PerformanceAnalyzerApp.ERRORS_AND_EXCEPTIONS_AGGREGATOR.updateStat( + ExceptionsAndErrors.OPENSEARCH_REQUEST_INTERCEPTOR_ERROR, "", 1); } } @@ -58,8 +58,8 @@ public void onQueryPhase(SearchContext searchContext, long tookInNanos) { getSearchListener().queryPhase(searchContext, tookInNanos); } catch (Exception ex) { LOG.error(ex); - PerformanceAnalyzerApp.WRITER_METRICS_AGGREGATOR.updateStat( - WriterMetrics.OPENSEARCH_REQUEST_INTERCEPTOR_ERROR, "", 1); + PerformanceAnalyzerApp.ERRORS_AND_EXCEPTIONS_AGGREGATOR.updateStat( + ExceptionsAndErrors.OPENSEARCH_REQUEST_INTERCEPTOR_ERROR, "", 1); } } @@ -69,8 +69,8 @@ public void onFailedQueryPhase(SearchContext searchContext) { getSearchListener().failedQueryPhase(searchContext); } catch (Exception ex) { LOG.error(ex); - PerformanceAnalyzerApp.WRITER_METRICS_AGGREGATOR.updateStat( - WriterMetrics.OPENSEARCH_REQUEST_INTERCEPTOR_ERROR, "", 1); + PerformanceAnalyzerApp.ERRORS_AND_EXCEPTIONS_AGGREGATOR.updateStat( + ExceptionsAndErrors.OPENSEARCH_REQUEST_INTERCEPTOR_ERROR, "", 1); } } @@ -80,8 +80,8 @@ public void onPreFetchPhase(SearchContext searchContext) { getSearchListener().preFetchPhase(searchContext); } catch (Exception ex) { LOG.error(ex); - PerformanceAnalyzerApp.WRITER_METRICS_AGGREGATOR.updateStat( - WriterMetrics.OPENSEARCH_REQUEST_INTERCEPTOR_ERROR, "", 1); + PerformanceAnalyzerApp.ERRORS_AND_EXCEPTIONS_AGGREGATOR.updateStat( + ExceptionsAndErrors.OPENSEARCH_REQUEST_INTERCEPTOR_ERROR, "", 1); } } @@ -91,8 +91,8 @@ public void onFetchPhase(SearchContext searchContext, long tookInNanos) { getSearchListener().fetchPhase(searchContext, tookInNanos); } catch (Exception ex) { LOG.error(ex); - PerformanceAnalyzerApp.WRITER_METRICS_AGGREGATOR.updateStat( - WriterMetrics.OPENSEARCH_REQUEST_INTERCEPTOR_ERROR, "", 1); + PerformanceAnalyzerApp.ERRORS_AND_EXCEPTIONS_AGGREGATOR.updateStat( + ExceptionsAndErrors.OPENSEARCH_REQUEST_INTERCEPTOR_ERROR, "", 1); } } @@ -102,8 +102,8 @@ public void onFailedFetchPhase(SearchContext searchContext) { getSearchListener().failedFetchPhase(searchContext); } catch (Exception ex) { LOG.error(ex); - PerformanceAnalyzerApp.WRITER_METRICS_AGGREGATOR.updateStat( - WriterMetrics.OPENSEARCH_REQUEST_INTERCEPTOR_ERROR, "", 1); + PerformanceAnalyzerApp.ERRORS_AND_EXCEPTIONS_AGGREGATOR.updateStat( + ExceptionsAndErrors.OPENSEARCH_REQUEST_INTERCEPTOR_ERROR, "", 1); } } diff --git a/src/main/java/org/opensearch/performanceanalyzer/transport/PerformanceAnalyzerTransportRequestHandler.java b/src/main/java/org/opensearch/performanceanalyzer/transport/PerformanceAnalyzerTransportRequestHandler.java index abf2dfff..b9c8db7a 100644 --- a/src/main/java/org/opensearch/performanceanalyzer/transport/PerformanceAnalyzerTransportRequestHandler.java +++ b/src/main/java/org/opensearch/performanceanalyzer/transport/PerformanceAnalyzerTransportRequestHandler.java @@ -13,7 +13,7 @@ import org.opensearch.action.support.replication.TransportReplicationAction.ConcreteShardRequest; import org.opensearch.performanceanalyzer.PerformanceAnalyzerApp; import org.opensearch.performanceanalyzer.config.PerformanceAnalyzerController; -import org.opensearch.performanceanalyzer.rca.framework.metrics.WriterMetrics; +import org.opensearch.performanceanalyzer.rca.framework.metrics.ExceptionsAndErrors; import org.opensearch.tasks.Task; import org.opensearch.transport.TransportChannel; import org.opensearch.transport.TransportRequest; @@ -93,8 +93,8 @@ private TransportChannel getShardBulkChannel(T request, TransportChannel channel LOG.error(ex); logOnce = true; } - PerformanceAnalyzerApp.WRITER_METRICS_AGGREGATOR.updateStat( - WriterMetrics.OPENSEARCH_REQUEST_INTERCEPTOR_ERROR, "", 1); + PerformanceAnalyzerApp.ERRORS_AND_EXCEPTIONS_AGGREGATOR.updateStat( + ExceptionsAndErrors.OPENSEARCH_REQUEST_INTERCEPTOR_ERROR, "", 1); } return performanceanalyzerChannel; diff --git a/src/main/java/org/opensearch/performanceanalyzer/writer/EventLogQueueProcessor.java b/src/main/java/org/opensearch/performanceanalyzer/writer/EventLogQueueProcessor.java index f4132043..25a5bc28 100644 --- a/src/main/java/org/opensearch/performanceanalyzer/writer/EventLogQueueProcessor.java +++ b/src/main/java/org/opensearch/performanceanalyzer/writer/EventLogQueueProcessor.java @@ -25,7 +25,7 @@ import org.opensearch.performanceanalyzer.http_action.config.PerformanceAnalyzerConfigAction; import org.opensearch.performanceanalyzer.metrics.MetricsConfiguration; import org.opensearch.performanceanalyzer.metrics.PerformanceAnalyzerMetrics; -import org.opensearch.performanceanalyzer.rca.framework.metrics.WriterMetrics; +import org.opensearch.performanceanalyzer.rca.framework.metrics.ExceptionsAndErrors; import org.opensearch.performanceanalyzer.reader_writer_shared.Event; import org.opensearch.performanceanalyzer.reader_writer_shared.EventLogFileHandler; @@ -144,8 +144,8 @@ public void purgeQueueAndPersist() { } else { // increment stale_metrics count when metrics to be collected is falling behind the // current bucket - PerformanceAnalyzerApp.WRITER_METRICS_AGGREGATOR.updateStat( - WriterMetrics.STALE_METRICS, "", 1); + PerformanceAnalyzerApp.ERRORS_AND_EXCEPTIONS_AGGREGATOR.updateStat( + ExceptionsAndErrors.STALE_METRICS, "", 1); } }