diff --git a/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/metrics/AllMetrics.java b/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/metrics/AllMetrics.java index 6e479f4e9..aad4d465d 100644 --- a/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/metrics/AllMetrics.java +++ b/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/metrics/AllMetrics.java @@ -1056,6 +1056,68 @@ public static class Constants { public static final String SHARD_OP_COUNT_VALUE = "ShardEvents"; } } + /* + * column names of FollowerCheck_Latency table + * SourceNodeId | TargetNodeID | sum | avg | min |max + * + * column names of LeaderCheck_Latency table + * SourceNodeId | TargetNodeID | sum | avg | min |max + * + * column names of FollowerCheck_Failure table + * SourceNodeId | TargetNodeID | sum | avg | min |max + * + * column names of LeaderCheck_Failure table + * SourceNodeId | TargetNodeID | sum | avg | min |max + * + *

Example: + * chMe07whRwGrOAqyLTP9vw|hgi7an4RwGrOAqyLTP9vw|1.0|0.2|0.0|1.0 + */ + + public enum FaultDetectionMetric implements MetricValue { + FOLLOWER_CHECK_LATENCY(Constants.FOLLOWER_CHECK_LATENCY), + LEADER_CHECK_LATENCY(Constants.LEADER_CHECK_LATENCY), + FOLLOWER_CHECK_FAILURE(Constants.FOLLOWER_CHECK_FAILURE), + LEADER_CHECK_FAILURE(Constants.LEADER_CHECK_FAILURE); + + private final String value; + + FaultDetectionMetric(String value) { + this.value = value; + } + + @Override + public String toString() { + return value; + } + + public static class Constants { + public static final String FOLLOWER_CHECK_LATENCY = "FollowerCheck_Latency"; + public static final String LEADER_CHECK_LATENCY = "LeaderCheck_Latency"; + public static final String FOLLOWER_CHECK_FAILURE = "FollowerCheck_Failure"; + public static final String LEADER_CHECK_FAILURE = "LeaderCheck_Failure"; + } + } + + public enum FaultDetectionDimension implements MetricDimension { + SOURCE_NODE_ID(Constants.SOURCE_NODE_ID), + TARGET_NODE_ID(Constants.TARGET_NODE_ID); + + private final String value; + + FaultDetectionDimension(String value) { + this.value = value; + } + + @Override + public String toString() { + return value; + } + + public static class Constants { + public static final String SOURCE_NODE_ID = "SourceNodeID"; + public static final String TARGET_NODE_ID = "TargetNodeID"; + } + } public enum CommonDimension implements MetricDimension { INDEX_NAME(Constants.INDEX_NAME_VALUE), diff --git a/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/metrics/PerformanceAnalyzerMetrics.java b/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/metrics/PerformanceAnalyzerMetrics.java index 37f20f17c..3feb2cd2d 100644 --- a/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/metrics/PerformanceAnalyzerMetrics.java +++ b/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/metrics/PerformanceAnalyzerMetrics.java @@ -43,6 +43,7 @@ public class PerformanceAnalyzerMetrics { public static final String sShardFetchPath = "shardfetch"; public static final String sShardQueryPath = "shardquery"; public static final String sMasterTaskPath = "master_task"; + public static final String sFaultDetection = "fault_detection"; public static final String sHttpPath = "http"; public static final String sOSPath = "os_metrics"; public static final String sHeapPath = "heap_metrics"; @@ -62,6 +63,9 @@ public class PerformanceAnalyzerMetrics { public static final String MASTER_CURRENT = "current"; public static final String MASTER_META_DATA = "metadata"; public static final String METRIC_CURRENT_TIME = "current_time"; + public static final String FAULT_DETECTION_FOLLOWER_CHECK = "follower_check"; + public static final String FAULT_DETECTION_LEADER_CHECK = "leader_check"; + public static final String FAULT = "fault"; public static final int QUEUE_SIZE = PluginSettings.instance().getWriterQueueSize(); // TODO: Comeup with a more sensible number. diff --git a/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/model/MetricsModel.java b/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/model/MetricsModel.java index ea86ef15e..f899dd81d 100644 --- a/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/model/MetricsModel.java +++ b/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/model/MetricsModel.java @@ -340,7 +340,26 @@ public class MetricsModel { new MetricAttributes( MetricUnits.MILLISECOND.toString(), AllMetrics.MasterMetricDimensions.values())); - // Master Throttling Metrics + allMetricsInitializer.put( + AllMetrics.FaultDetectionMetric.FOLLOWER_CHECK_LATENCY.toString(), + new MetricAttributes( + MetricUnits.MILLISECOND.toString(), AllMetrics.FaultDetectionDimension.values())); + + allMetricsInitializer.put( + AllMetrics.FaultDetectionMetric.LEADER_CHECK_LATENCY.toString(), + new MetricAttributes( + MetricUnits.MILLISECOND.toString(), AllMetrics.FaultDetectionDimension.values())); + + allMetricsInitializer.put( + AllMetrics.FaultDetectionMetric.FOLLOWER_CHECK_FAILURE.toString(), + new MetricAttributes( + MetricUnits.COUNT.toString(), AllMetrics.FaultDetectionDimension.values())); + + allMetricsInitializer.put( + AllMetrics.FaultDetectionMetric.LEADER_CHECK_FAILURE.toString(), + new MetricAttributes( + MetricUnits.COUNT.toString(), AllMetrics.FaultDetectionDimension.values())); + allMetricsInitializer.put( AllMetrics.MasterThrottlingValue.MASTER_THROTTLED_PENDING_TASK_COUNT.toString(), new MetricAttributes(MetricUnits.COUNT.toString(), EmptyDimension.values())); diff --git a/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/rca/framework/metrics/ExceptionsAndErrors.java b/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/rca/framework/metrics/ExceptionsAndErrors.java index aa27bf81b..55100a0ae 100644 --- a/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/rca/framework/metrics/ExceptionsAndErrors.java +++ b/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/rca/framework/metrics/ExceptionsAndErrors.java @@ -51,7 +51,9 @@ public enum ExceptionsAndErrors implements MeasurementSet { SHARD_STATE_COLLECTOR_ERROR("ShardStateCollectorError"), - MASTER_THROTTLING_COLLECTOR_ERROR("MasterThrottlingMetricsCollector"); + MASTER_THROTTLING_COLLECTOR_ERROR("MasterThrottlingMetricsCollector"), + + FAULT_DETECTION_COLLECTOR_ERROR("FaultDetectionMetricsCollector"); /** What we want to appear as the metric name. */ private String name; diff --git a/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/rca/framework/metrics/ReaderMetrics.java b/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/rca/framework/metrics/ReaderMetrics.java index 0cff09434..056bd3a61 100644 --- a/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/rca/framework/metrics/ReaderMetrics.java +++ b/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/rca/framework/metrics/ReaderMetrics.java @@ -67,8 +67,10 @@ public enum ReaderMetrics implements MeasurementSet { * Amount of time taken to emit Master throttling metrics. */ MASTER_THROTTLING_EMITTER_EXECUTION_TIME("MasterThrottlingEmitterExecutionTime", "millis", - Arrays.asList(Statistics.MAX, Statistics.MIN, Statistics.MEAN, Statistics.COUNT, Statistics.SUM)); + Arrays.asList(Statistics.MAX, Statistics.MIN, Statistics.MEAN, Statistics.COUNT, Statistics.SUM)), + FAULT_DETECTION_METRICS_EMITTER_EXECUTION_TIME("FaultDetectionMetricsEmitterExecutionTime", "millis", + Arrays.asList(Statistics.MAX, Statistics.MIN, Statistics.MEAN, Statistics.COUNT, Statistics.SUM)); /** What we want to appear as the metric name. */ private String name; diff --git a/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/rca/framework/metrics/WriterMetrics.java b/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/rca/framework/metrics/WriterMetrics.java index 11c542bf9..6277c3861 100644 --- a/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/rca/framework/metrics/WriterMetrics.java +++ b/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/rca/framework/metrics/WriterMetrics.java @@ -30,7 +30,10 @@ public enum WriterMetrics implements MeasurementSet { Statistics.MAX, Statistics.MIN, Statistics.MEAN, Statistics.COUNT, Statistics.SUM)), MASTER_THROTTLING_COLLECTOR_NOT_AVAILABLE("MasterThrottlingCollectorNotAvailable", "count", Arrays.asList( - Statistics.MAX, Statistics.MIN, Statistics.MEAN, Statistics.COUNT, Statistics.SUM)); + Statistics.MAX, Statistics.MIN, Statistics.MEAN, Statistics.COUNT, Statistics.SUM)), + + FAULT_DETECTION_COLLECTOR_EXECUTION_TIME("FaultDetectionCollectorExecutionTime", "millis", Arrays.asList( + Statistics.MAX, Statistics.MIN, Statistics.MEAN, Statistics.COUNT, Statistics.SUM)); /** What we want to appear as the metric name. */ private String name; diff --git a/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/reader/FaultDetectionMetricsProcessor.java b/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/reader/FaultDetectionMetricsProcessor.java new file mode 100644 index 000000000..8e1351909 --- /dev/null +++ b/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/reader/FaultDetectionMetricsProcessor.java @@ -0,0 +1,171 @@ +/* + * Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.opendistro.elasticsearch.performanceanalyzer.reader; + +import com.amazon.opendistro.elasticsearch.performanceanalyzer.collectors.StatExceptionCode; +import com.amazon.opendistro.elasticsearch.performanceanalyzer.collectors.StatsCollector; +import com.amazon.opendistro.elasticsearch.performanceanalyzer.metrics.AllMetrics.CommonMetric; +import com.amazon.opendistro.elasticsearch.performanceanalyzer.metrics.AllMetrics.FaultDetectionDimension; +import com.amazon.opendistro.elasticsearch.performanceanalyzer.metrics.PerformanceAnalyzerMetrics; +import com.amazon.opendistro.elasticsearch.performanceanalyzer.reader_writer_shared.Event; +import java.io.File; +import java.sql.Connection; +import java.util.Map; +import java.util.NavigableMap; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.jooq.BatchBindStep; + +public class FaultDetectionMetricsProcessor implements EventProcessor { + private static final Logger LOG = LogManager.getLogger(FaultDetectionMetricsProcessor.class); + private FaultDetectionMetricsSnapshot faultDetectionMetricsSnapshot; + private long startTime; + private long endTime; + private BatchBindStep handle; + + public FaultDetectionMetricsProcessor(FaultDetectionMetricsSnapshot faultDetectionMetricsSnapshot) { + this.faultDetectionMetricsSnapshot = faultDetectionMetricsSnapshot; + } + + static FaultDetectionMetricsProcessor buildFaultDetectionMetricsProcessor( + long currWindowStartTime, + Connection conn, + NavigableMap + faultDetectionMetricsMap) { + + if (faultDetectionMetricsMap.get(currWindowStartTime) == null) { + FaultDetectionMetricsSnapshot faultDetectionMetricsSnapshot = + new FaultDetectionMetricsSnapshot(conn, currWindowStartTime); + Map.Entry entry = faultDetectionMetricsMap.lastEntry(); + if (entry != null) { + faultDetectionMetricsSnapshot.rolloverInFlightRequests(entry.getValue()); + } + faultDetectionMetricsMap.put(currWindowStartTime, faultDetectionMetricsSnapshot); + return new FaultDetectionMetricsProcessor(faultDetectionMetricsSnapshot); + } else { + return new FaultDetectionMetricsProcessor(faultDetectionMetricsMap.get(currWindowStartTime)); + } + } + + @Override + public void initializeProcessing(long startTime, long endTime) { + this.startTime = startTime; + this.endTime = endTime; + this.handle = faultDetectionMetricsSnapshot.startBatchPut(); + } + + @Override + public void finalizeProcessing() { + if (handle.size() > 0) { + handle.execute(); + } + LOG.debug("Final Fault Detection request metrics {}", faultDetectionMetricsSnapshot.fetchAll()); + } + + @Override + public void processEvent(Event event) { + String[] keyItems = event.key.split(File.separatorChar == '\\' ? "\\\\" : File.separator); + assert keyItems.length == 4; + if (keyItems[0].equals(PerformanceAnalyzerMetrics.sFaultDetection)) { + if (keyItems[3].equals(PerformanceAnalyzerMetrics.START_FILE_NAME)) { + emitStartMetric(event, keyItems); + } else if (keyItems[3].equals(PerformanceAnalyzerMetrics.FINISH_FILE_NAME)) { + emitFinishMetric(event, keyItems); + } + } + } + + @Override + public boolean shouldProcessEvent(Event event) { + return event.key.contains(PerformanceAnalyzerMetrics.sFaultDetection); + } + + @Override + public void commitBatchIfRequired() { + if (handle.size() > BATCH_LIMIT) { + handle.execute(); + handle = faultDetectionMetricsSnapshot.startBatchPut(); + } + } + + /** + * A keyItem is of the form : [fault_detection, follower_check, 76532, start] + * Example value part of the entry is: + * current_time:1566413979979 + * StartTime:1566413987986 + * SourceNodeID:g52i9a93a762cd59dda8d3379b09a752a + * TargetNodeID:b2a5a93a762cd59dda8d3379b09a752a + * $ + * @param entry fault detection event. + * @param keyItems keys extracted from metrics path + */ + private void emitStartMetric(Event entry, String[] keyItems) { + Map keyValueMap = ReaderMetricsProcessor.extractEntryData(entry.value); + + String sourceNodeId = keyValueMap.get(FaultDetectionDimension.SOURCE_NODE_ID.toString()); + String targetNodeId = keyValueMap.get(FaultDetectionDimension.TARGET_NODE_ID.toString()); + String startTimeVal = keyValueMap.get(CommonMetric.START_TIME.toString()); + + try { + long st = Long.parseLong(startTimeVal); + + String fault_detection_type = keyItems[1]; + String rid = keyItems[2]; + // A keyItem is of the form : [fault_detection, follower_check, 76543, start] + handle.bind(rid, sourceNodeId, targetNodeId, fault_detection_type, st, null, 0); + } catch (NumberFormatException e) { + LOG.error("Unable to parse string. StartTime:{}", startTimeVal); + StatsCollector.instance().logException(StatExceptionCode.READER_PARSER_ERROR); + throw e; + } + } + + /** + * A keyItem is of the form : [fault_detection, follower_check, 76532, start] + * Example value part of the entry is: + * current_time:1566413979979 + * FinishTime:1566413987986 + * SourceNodeID:g52i9a93a762cd59dda8d3379b09a752a + * TargetNodeID:b2a5a93a762cd59dda8d3379b09a752a + * fault:0 + * $ + * @param entry fault detection event. + * @param keyItems keys extracted from metrics path + */ + private void emitFinishMetric(Event entry, String[] keyItems) { + Map keyValueMap = ReaderMetricsProcessor.extractEntryData(entry.value); + + String sourceNodeId = keyValueMap.get(FaultDetectionDimension.SOURCE_NODE_ID.toString()); + String targetNodeId = keyValueMap.get(FaultDetectionDimension.TARGET_NODE_ID.toString()); + String finishTimeVal = keyValueMap.get(CommonMetric.FINISH_TIME.toString()); + String faultString = keyValueMap.get(PerformanceAnalyzerMetrics.FAULT); + + try { + long et = Long.parseLong(finishTimeVal); + int fault = Integer.parseInt(faultString); + + String fault_detection_type = keyItems[1]; + String rid = keyItems[2]; + // A keyItem is of the form : [fault_detection, follower_check, 76543, finish] + handle.bind(rid, sourceNodeId, targetNodeId, fault_detection_type, null, et, fault); + } catch (NumberFormatException e) { + LOG.error("Unable to parse string. StartTime:{}, Error:{}", finishTimeVal, faultString); + StatsCollector.instance().logException(StatExceptionCode.READER_PARSER_ERROR); + throw e; + } + } +} diff --git a/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/reader/FaultDetectionMetricsSnapshot.java b/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/reader/FaultDetectionMetricsSnapshot.java new file mode 100644 index 000000000..bc5015a81 --- /dev/null +++ b/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/reader/FaultDetectionMetricsSnapshot.java @@ -0,0 +1,260 @@ +/* + * Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.opendistro.elasticsearch.performanceanalyzer.reader; + +import com.amazon.opendistro.elasticsearch.performanceanalyzer.DBUtils; +import com.amazon.opendistro.elasticsearch.performanceanalyzer.metrics.AllMetrics.FaultDetectionDimension; +import com.amazon.opendistro.elasticsearch.performanceanalyzer.metricsdb.MetricsDB; +import com.google.common.annotations.VisibleForTesting; +import java.sql.Connection; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.jooq.BatchBindStep; +import org.jooq.DSLContext; +import org.jooq.Field; +import org.jooq.Record; +import org.jooq.Result; +import org.jooq.SQLDialect; +import org.jooq.SelectField; +import org.jooq.SelectHavingStep; +import org.jooq.impl.DSL; + +public class FaultDetectionMetricsSnapshot implements Removable { + private static final Logger LOG = LogManager.getLogger(FaultDetectionMetricsSnapshot.class); + private final DSLContext create; + private final Long windowStartTime; + private final String tableName; + private ArrayList> columns; + private static final Long EXPIRE_AFTER = 600000L; + + public enum Fields { + RID("rid"), + FAULT_DETECTION_TYPE("type"), + ST("st"), + ET("et"), + LAT("lat"), + FAULT("fault"); + + private final String fieldValue; + + Fields(String fieldValue) { + this.fieldValue = fieldValue; + } + + @Override + public String toString() { + return fieldValue; + } + + } + + public FaultDetectionMetricsSnapshot(Connection conn, Long windowStartTime) { + this.create = DSL.using(conn, SQLDialect.SQLITE); + this.windowStartTime = windowStartTime; + this.tableName = "fault_detection_" + windowStartTime; + + this.columns = + new ArrayList>() { + { + this.add(DSL.field(DSL.name(Fields.RID.toString()), String.class)); + this.add(DSL.field(DSL.name(FaultDetectionDimension.SOURCE_NODE_ID.toString()), String.class)); + this.add(DSL.field(DSL.name(FaultDetectionDimension.TARGET_NODE_ID.toString()), String.class)); + this.add(DSL.field(DSL.name(Fields.FAULT_DETECTION_TYPE.toString()), String.class)); + this.add(DSL.field(DSL.name(Fields.ST.toString()), Long.class)); + this.add(DSL.field(DSL.name(Fields.ET.toString()), Long.class)); + this.add(DSL.field(DSL.name(Fields.FAULT.toString()), Integer.class)); + } + }; + create.createTable(this.tableName).columns(columns).execute(); + } + + public BatchBindStep startBatchPut() { + + List dummyValues = new ArrayList<>(); + for (int i = 0; i < columns.size(); i++) { + dummyValues.add(null); + } + return create.batch(create.insertInto(DSL.table(this.tableName)).values(dummyValues)); + } + + @VisibleForTesting + public void putStartMetric(Long startTime, Map dimensions) { + Map, String> dimensionMap = new HashMap<>(); + for (Map.Entry dimension : dimensions.entrySet()) { + dimensionMap.put(DSL.field(DSL.name(dimension.getKey()), String.class), dimension.getValue()); + } + create + .insertInto(DSL.table(this.tableName)) + .set(DSL.field(DSL.name(Fields.ST.toString()), Long.class), startTime) + .set(dimensionMap) + .execute(); + } + + @VisibleForTesting + public void putEndMetric(Long endTime, int error, Map dimensions) { + Map, String> dimensionMap = new HashMap<>(); + for (Map.Entry dimension : dimensions.entrySet()) { + dimensionMap.put(DSL.field(DSL.name(dimension.getKey()), String.class), dimension.getValue()); + } + create + .insertInto(DSL.table(this.tableName)) + .set(DSL.field(DSL.name(Fields.ET.toString()), Long.class), endTime) + .set(DSL.field(DSL.name(Fields.FAULT.toString()), Integer.class), error) + .set(dimensionMap) + .execute(); + } + + public Result fetchAll() { + return create.select().from(DSL.table(this.tableName)).fetch(); + } + + @Override + public void remove() throws Exception { + create.dropTable(DSL.table(this.tableName)).execute(); + } + + public void rolloverInFlightRequests(FaultDetectionMetricsSnapshot prevSnap) { + // Fetch all entries that have not ended and write to current table. + create + .insertInto(DSL.table(this.tableName)) + .select(create.select().from(prevSnap.fetchInFlightRequests())) + .execute(); + } + + public SelectHavingStep fetchInFlightRequests() { + ArrayList> fields = + new ArrayList>() { + { + this.add(DSL.field(DSL.name(Fields.RID.toString()), String.class)); + this.add(DSL.field(DSL.name(FaultDetectionDimension.SOURCE_NODE_ID.toString()), String.class)); + this.add(DSL.field(DSL.name(FaultDetectionDimension.TARGET_NODE_ID.toString()), String.class)); + this.add(DSL.field(DSL.name(Fields.FAULT_DETECTION_TYPE.toString()), String.class)); + this.add(DSL.field(DSL.name(Fields.FAULT.toString()), String.class)); + this.add(DSL.field(Fields.ST.toString(), Long.class)); + this.add(DSL.field(Fields.ET.toString(), Long.class)); + } + }; + + return create + .select(fields) + .from(groupByRidAndTypeSelect()) + .where( + DSL.field(Fields.ST.toString()) + .isNotNull() + .and(DSL.field(Fields.ET.toString()).isNull()) + .and(DSL.field(Fields.ST.toString()).gt(this.windowStartTime - EXPIRE_AFTER))); + } + + public SelectHavingStep groupByRidAndTypeSelect() { + ArrayList> fields = + new ArrayList>() { + { + this.add(DSL.field(DSL.name(Fields.RID.toString()), String.class)); + this.add(DSL.field(DSL.name(FaultDetectionDimension.SOURCE_NODE_ID.toString()), String.class)); + this.add(DSL.field(DSL.name(FaultDetectionDimension.TARGET_NODE_ID.toString()), String.class)); + this.add(DSL.field(DSL.name(Fields.FAULT_DETECTION_TYPE.toString()), String.class)); + } + }; + fields.add( + DSL.max(DSL.field(Fields.ST.toString(), Long.class)).as(DSL.name(Fields.ST.toString()))); + fields.add( + DSL.max(DSL.field(Fields.ET.toString(), Long.class)).as(DSL.name(Fields.ET.toString()))); + fields.add( + DSL.max(DSL.field(Fields.FAULT.toString(), Integer.class)).as(DSL.name(Fields.FAULT.toString()))); + return create + .select(fields) + .from(DSL.table(this.tableName)) + .groupBy(DSL.field(Fields.RID.toString()), DSL.field(Fields.FAULT_DETECTION_TYPE.toString())); + } + + public SelectHavingStep fetchLatencyTable() { + ArrayList> fields = + new ArrayList>() { + { + this.add(DSL.field(DSL.name(Fields.RID.toString()), String.class)); + this.add(DSL.field(DSL.name(FaultDetectionDimension.SOURCE_NODE_ID.toString()), String.class)); + this.add(DSL.field(DSL.name(FaultDetectionDimension.TARGET_NODE_ID.toString()), Long.class)); + this.add(DSL.field(DSL.name(Fields.FAULT_DETECTION_TYPE.toString()), String.class)); + this.add(DSL.field(Fields.ST.toString(), Long.class)); + this.add(DSL.field(Fields.ET.toString(), Long.class)); + this.add(DSL.field(Fields.FAULT.toString(), Integer.class)); + } + }; + fields.add( + DSL.field(Fields.ET.toString()) + .minus(DSL.field(Fields.ST.toString())) + .as(DSL.name(Fields.LAT.toString()))); + return create + .select(fields) + .from(groupByRidAndTypeSelect()) + .where( + DSL.field(Fields.ET.toString()) + .isNotNull() + .and(DSL.field(Fields.ST.toString()).isNotNull())); + } + + public Result fetchAggregatedTable() { + ArrayList> fields = + new ArrayList>() { + { + this.add(DSL.field(DSL.name(FaultDetectionDimension.SOURCE_NODE_ID.toString()), String.class)); + this.add(DSL.field(DSL.name(FaultDetectionDimension.TARGET_NODE_ID.toString()), String.class)); + this.add(DSL.field(DSL.name(Fields.FAULT_DETECTION_TYPE.toString()), String.class)); + + this.add( + DSL.sum(DSL.field(DSL.name(Fields.LAT.toString()), Double.class)) + .as(DBUtils.getAggFieldName(Fields.LAT.toString(), MetricsDB.SUM))); + this.add( + DSL.avg(DSL.field(DSL.name(Fields.LAT.toString()), Double.class)) + .as(DBUtils.getAggFieldName(Fields.LAT.toString(), MetricsDB.AVG))); + this.add( + DSL.min(DSL.field(DSL.name(Fields.LAT.toString()), Double.class)) + .as(DBUtils.getAggFieldName(Fields.LAT.toString(), MetricsDB.MIN))); + this.add( + DSL.max(DSL.field(DSL.name(Fields.LAT.toString()), Double.class)) + .as(DBUtils.getAggFieldName(Fields.LAT.toString(), MetricsDB.MAX))); + + this.add( + DSL.sum(DSL.field(DSL.name(Fields.FAULT.toString()), Double.class)) + .as(DBUtils.getAggFieldName(Fields.FAULT.toString(), MetricsDB.SUM))); + this.add( + DSL.avg(DSL.field(DSL.name(Fields.FAULT.toString()), Double.class)) + .as(DBUtils.getAggFieldName(Fields.FAULT.toString(), MetricsDB.AVG))); + this.add( + DSL.min(DSL.field(DSL.name(Fields.FAULT.toString()), Double.class)) + .as(DBUtils.getAggFieldName(Fields.FAULT.toString(), MetricsDB.MIN))); + this.add( + DSL.max(DSL.field(DSL.name(Fields.FAULT.toString()), Double.class)) + .as(DBUtils.getAggFieldName(Fields.FAULT.toString(), MetricsDB.MAX))); + } + }; + ArrayList> groupByFields = + new ArrayList>() { + { + this.add(DSL.field(DSL.name(FaultDetectionDimension.SOURCE_NODE_ID.toString()), String.class)); + this.add(DSL.field(DSL.name(FaultDetectionDimension.TARGET_NODE_ID.toString()), String.class)); + this.add(DSL.field(DSL.name(Fields.FAULT_DETECTION_TYPE.toString()), String.class)); + } + }; + + return create.select(fields).from(fetchLatencyTable()).groupBy(groupByFields).fetch(); + } +} diff --git a/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/reader/MetricsEmitter.java b/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/reader/MetricsEmitter.java index 5b099b2a2..ed9f662d5 100644 --- a/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/reader/MetricsEmitter.java +++ b/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/reader/MetricsEmitter.java @@ -20,9 +20,12 @@ import com.amazon.opendistro.elasticsearch.performanceanalyzer.config.TroubleshootingConfig; import com.amazon.opendistro.elasticsearch.performanceanalyzer.metrics.AllMetrics; import com.amazon.opendistro.elasticsearch.performanceanalyzer.metrics.AllMetrics.CommonMetric; +import com.amazon.opendistro.elasticsearch.performanceanalyzer.metrics.AllMetrics.FaultDetectionDimension; +import com.amazon.opendistro.elasticsearch.performanceanalyzer.metrics.AllMetrics.FaultDetectionMetric; import com.amazon.opendistro.elasticsearch.performanceanalyzer.metrics.AllMetrics.GCInfoDimension; import com.amazon.opendistro.elasticsearch.performanceanalyzer.metrics.AllMetrics.GCInfoValue; import com.amazon.opendistro.elasticsearch.performanceanalyzer.metrics.AllMetrics.OSMetrics; +import com.amazon.opendistro.elasticsearch.performanceanalyzer.metrics.PerformanceAnalyzerMetrics; import com.amazon.opendistro.elasticsearch.performanceanalyzer.metricsdb.Dimensions; import com.amazon.opendistro.elasticsearch.performanceanalyzer.metricsdb.Metric; import com.amazon.opendistro.elasticsearch.performanceanalyzer.metricsdb.MetricsDB; @@ -102,6 +105,14 @@ public class MetricsEmitter { } }; + private static final List FAULT_DETECTION_TABLE_DIMENSIONS = + new ArrayList() { + { + this.add(FaultDetectionDimension.SOURCE_NODE_ID.toString()); + this.add(FaultDetectionDimension.TARGET_NODE_ID.toString()); + } + }; + public static void emitAggregatedOSMetrics( final DSLContext create, final MetricsDB db, @@ -863,6 +874,136 @@ public static void emitNodeMetrics( } } + public static void emitFaultDetectionMetrics(MetricsDB db, FaultDetectionMetricsSnapshot faultDetectionSnapshot) { + + long mCurrT = System.currentTimeMillis(); + Dimensions dimensions = new Dimensions(); + Result res = faultDetectionSnapshot.fetchAggregatedTable(); + + db.createMetric( + new Metric(FaultDetectionMetric.FOLLOWER_CHECK_LATENCY.toString(), 0d), + FAULT_DETECTION_TABLE_DIMENSIONS); + + db.createMetric( + new Metric(FaultDetectionMetric.LEADER_CHECK_LATENCY.toString(), 0d), + FAULT_DETECTION_TABLE_DIMENSIONS); + + db.createMetric( + new Metric(FaultDetectionMetric.FOLLOWER_CHECK_FAILURE.toString(), 0d), + FAULT_DETECTION_TABLE_DIMENSIONS); + + db.createMetric( + new Metric(FaultDetectionMetric.LEADER_CHECK_FAILURE.toString(), 0d), + FAULT_DETECTION_TABLE_DIMENSIONS); + for (Record r : res) { + dimensions.put( + FaultDetectionDimension.SOURCE_NODE_ID.toString(), + r.get(FaultDetectionDimension.SOURCE_NODE_ID.toString()).toString()); + dimensions.put( + FaultDetectionDimension.TARGET_NODE_ID.toString(), + r.get(FaultDetectionDimension.TARGET_NODE_ID.toString()).toString()); + + Double sumLatency = + Double.parseDouble( + r.get( + DBUtils.getAggFieldName( + FaultDetectionMetricsSnapshot.Fields.LAT.toString(), MetricsDB.SUM)) + .toString()); + Double avgLatency = + Double.parseDouble( + r.get( + DBUtils.getAggFieldName( + FaultDetectionMetricsSnapshot.Fields.LAT.toString(), MetricsDB.AVG)) + .toString()); + Double minLatency = + Double.parseDouble( + r.get( + DBUtils.getAggFieldName( + FaultDetectionMetricsSnapshot.Fields.LAT.toString(), MetricsDB.MIN)) + .toString()); + Double maxLatency = + Double.parseDouble( + r.get( + DBUtils.getAggFieldName( + FaultDetectionMetricsSnapshot.Fields.LAT.toString(), MetricsDB.MAX)) + .toString()); + + Double sumFault = + Double.parseDouble( + r.get( + DBUtils.getAggFieldName( + FaultDetectionMetricsSnapshot.Fields.FAULT.toString(), + MetricsDB.SUM)) + .toString()); + Double avgFault = + Double.parseDouble( + r.get( + DBUtils.getAggFieldName( + FaultDetectionMetricsSnapshot.Fields.FAULT.toString(), + MetricsDB.AVG)) + .toString()); + Double minFault = + Double.parseDouble( + r.get( + DBUtils.getAggFieldName( + FaultDetectionMetricsSnapshot.Fields.FAULT.toString(), + MetricsDB.MIN)) + .toString()); + Double maxFault = + Double.parseDouble( + r.get( + DBUtils.getAggFieldName( + FaultDetectionMetricsSnapshot.Fields.FAULT.toString(), + MetricsDB.MAX)) + .toString()); + if (r.get(FaultDetectionMetricsSnapshot.Fields.FAULT_DETECTION_TYPE.toString()).toString() + .equals(PerformanceAnalyzerMetrics.FAULT_DETECTION_FOLLOWER_CHECK)) { + db.putMetric( + new Metric( + FaultDetectionMetric.FOLLOWER_CHECK_LATENCY.toString(), + sumLatency, + avgLatency, + minLatency, + maxLatency), + dimensions, + 0); + db.putMetric( + new Metric( + FaultDetectionMetric.FOLLOWER_CHECK_FAILURE.toString(), + sumFault, + avgFault, + minFault, + maxFault), + dimensions, + 0); + } else if (r.get(FaultDetectionMetricsSnapshot.Fields.FAULT_DETECTION_TYPE.toString()).toString() + .equals(PerformanceAnalyzerMetrics.FAULT_DETECTION_LEADER_CHECK)) { + db.putMetric( + new Metric( + FaultDetectionMetric.LEADER_CHECK_LATENCY.toString(), + sumLatency, + avgLatency, + minLatency, + maxLatency), + dimensions, + 0); + db.putMetric( + new Metric( + FaultDetectionMetric.LEADER_CHECK_FAILURE.toString(), + sumFault, + avgFault, + minFault, + maxFault), + dimensions, + 0); + } + } + long mFinalT = System.currentTimeMillis(); + PerformanceAnalyzerApp.READER_METRICS_AGGREGATOR.updateStat( + ReaderMetrics.FAULT_DETECTION_METRICS_EMITTER_EXECUTION_TIME, "", mFinalT - mCurrT); + LOG.debug("Total time taken for writing fault detection metrics to metricsdb: {}", mFinalT - mCurrT); + } + public static void emitMasterThrottledTaskMetric( MetricsDB metricsDB, MasterThrottlingMetricsSnapshot masterThrottlingMetricsSnapshot) { long mCurrT = System.currentTimeMillis(); diff --git a/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/reader/ReaderMetricsProcessor.java b/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/reader/ReaderMetricsProcessor.java index fd2d3feca..ca4cc7bc1 100644 --- a/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/reader/ReaderMetricsProcessor.java +++ b/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/reader/ReaderMetricsProcessor.java @@ -79,14 +79,17 @@ public class ReaderMetricsProcessor implements Runnable { private NavigableMap masterEventMetricsMap; private NavigableMap gcInfoMap; private Map> nodeMetricsMap; + private NavigableMap faultDetectionMetricsMap; private NavigableMap masterThrottlingMetricsMap; private NavigableMap shardStateMetricsMap; + private static final int MAX_DATABASES = 2; private static final int OS_SNAPSHOTS = 4; private static final int SHARD_STATE_SNAPSHOTS = 2; private static final int RQ_SNAPSHOTS = 4; private static final int HTTP_RQ_SNAPSHOTS = 4; private static final int MASTER_EVENT_SNAPSHOTS = 4; + private static final int FAULT_DETECTION_SNAPSHOTS = 2; private static final int GC_INFO_SNAPSHOTS = 4; private static final int MASTER_THROTTLING_SNAPSHOTS = 2; private final String rootLocation; @@ -132,6 +135,7 @@ public ReaderMetricsProcessor(String rootLocation, boolean processNewFormat, fin shardRqMetricsMap = new TreeMap<>(); httpRqMetricsMap = new TreeMap<>(); masterEventMetricsMap = new TreeMap<>(); + faultDetectionMetricsMap = new TreeMap<>(); shardStateMetricsMap = new TreeMap<>(); gcInfoMap = new TreeMap<>(); masterThrottlingMetricsMap = new TreeMap<>(); @@ -273,6 +277,7 @@ public void trimOldSnapshots() throws Exception { trimMap(shardRqMetricsMap, RQ_SNAPSHOTS); trimMap(httpRqMetricsMap, HTTP_RQ_SNAPSHOTS); trimMap(masterEventMetricsMap, MASTER_EVENT_SNAPSHOTS); + trimMap(faultDetectionMetricsMap, FAULT_DETECTION_SNAPSHOTS); trimMap(shardStateMetricsMap, SHARD_STATE_SNAPSHOTS); trimMap(gcInfoMap, GC_INFO_SNAPSHOTS); trimMap(masterThrottlingMetricsMap, MASTER_THROTTLING_SNAPSHOTS); @@ -395,6 +400,7 @@ private void emitMetrics(long currWindowStartTime) throws Exception { emitShardRequestMetrics(prevWindowStartTime, alignedOSSnapHolder, osAlignedSnap, metricsDB); emitHttpRequestMetrics(prevWindowStartTime, metricsDB); emitNodeMetrics(currWindowStartTime, metricsDB); + emitFaultDetectionMetrics(prevWindowStartTime, metricsDB); emitMasterThrottlingMetrics(prevWindowStartTime, metricsDB); emitShardStateMetrics(prevWindowStartTime, metricsDB); @@ -412,6 +418,17 @@ private void emitMetrics(long currWindowStartTime) throws Exception { LOG.debug("Total time taken for emitting Metrics: {}", mFinalT - mCurrT); TIMING_STATS.put("emitMetrics", (double) (mFinalT - mCurrT)); } + + private void emitFaultDetectionMetrics(long prevWindowStartTime, MetricsDB metricsDB) { + if (faultDetectionMetricsMap.containsKey(prevWindowStartTime)) { + + FaultDetectionMetricsSnapshot prevFaultDetectionSnap = faultDetectionMetricsMap.get(prevWindowStartTime); + MetricsEmitter.emitFaultDetectionMetrics(metricsDB, prevFaultDetectionSnap); + } else { + LOG.debug( + "Fault Detection snapshot for the previous window does not exist. Not emitting metrics."); + } + } private void emitShardStateMetrics(long prevWindowStartTime, MetricsDB metricsDB) { if (shardStateMetricsMap.containsKey(prevWindowStartTime)) { @@ -582,6 +599,9 @@ is ready so it starts to read that file (go back two windows and EventProcessor httpProcessor = HttpRequestEventProcessor.buildHttpRequestMetricEventsProcessor( currWindowStartTime, currWindowEndTime, conn, httpRqMetricsMap); + EventProcessor faultDetectionProcessor = + FaultDetectionMetricsProcessor.buildFaultDetectionMetricsProcessor( + currWindowStartTime, conn, faultDetectionMetricsMap); EventProcessor masterEventsProcessor = MasterMetricsEventProcessor.buildMasterMetricEventsProcessor( currWindowStartTime, conn, masterEventMetricsMap); @@ -617,6 +637,7 @@ is ready so it starts to read that file (go back two windows and eventDispatcher.registerEventProcessor(masterThrottlingEventsProcessor); eventDispatcher.registerEventProcessor(shardStateMetricsProcessor); eventDispatcher.registerEventProcessor(clusterDetailsEventsProcessor); + eventDispatcher.registerEventProcessor(faultDetectionProcessor); eventDispatcher.registerEventProcessor(garbageCollectorInfoProcessor); eventDispatcher.initializeProcessing( diff --git a/src/test/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/reader/FaultDetectionMetricsSnapshotTests.java b/src/test/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/reader/FaultDetectionMetricsSnapshotTests.java new file mode 100644 index 000000000..60f7b35a6 --- /dev/null +++ b/src/test/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/reader/FaultDetectionMetricsSnapshotTests.java @@ -0,0 +1,66 @@ +/* + * Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.opendistro.elasticsearch.performanceanalyzer.reader; + +import static org.junit.Assert.assertEquals; + +import com.amazon.opendistro.elasticsearch.performanceanalyzer.metrics.AllMetrics; +import java.sql.Connection; +import java.sql.DriverManager; +import org.jooq.BatchBindStep; +import org.jooq.Record; +import org.jooq.Result; +import org.junit.Before; +import org.junit.Test; + +public class FaultDetectionMetricsSnapshotTests { + private static final String DB_URL = "jdbc:sqlite:"; + private Connection conn; + + @Before + public void setup() throws Exception { + Class.forName("org.sqlite.JDBC"); + System.setProperty("java.io.tmpdir", "/tmp"); + conn = DriverManager.getConnection(DB_URL); + } + + @Test + public void testPutMetrics() { + FaultDetectionMetricsSnapshot faultDetectionMetricsSnapshot = + new FaultDetectionMetricsSnapshot(conn, 1535065195000L); + BatchBindStep handle = faultDetectionMetricsSnapshot.startBatchPut(); + + handle.bind("1", "sourceNode", "targetNodeId", "follower_check",1535065195000L, null, 0); + handle.bind("1", "sourceNode", "targetNodeId", "follower_check", null, 1535065195050L, 0); + handle.execute(); + Result rt = faultDetectionMetricsSnapshot.fetchAggregatedTable(); + + assertEquals(1, rt.size()); + Double latency = Double.parseDouble(rt.get(0).get("sum_" + FaultDetectionMetricsSnapshot.Fields.LAT.toString()).toString()); + assertEquals(50d, latency.doubleValue(), 0); + assertEquals( + "sourceNode", rt.get(0).get(AllMetrics.FaultDetectionDimension.SOURCE_NODE_ID.toString())); + assertEquals( + "targetNodeId", + rt.get(0).get(AllMetrics.FaultDetectionDimension.TARGET_NODE_ID.toString())); + assertEquals( + "follower_check", + rt.get(0).get(FaultDetectionMetricsSnapshot.Fields.FAULT_DETECTION_TYPE.toString())); + assertEquals( + 0, + Integer.parseInt(rt.get(0).get("sum_" + FaultDetectionMetricsSnapshot.Fields.FAULT.toString()).toString())); + } +} diff --git a/src/test/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/reader/MetricsEmitterTests.java b/src/test/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/reader/MetricsEmitterTests.java index f94b53e8c..1e21b66d0 100644 --- a/src/test/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/reader/MetricsEmitterTests.java +++ b/src/test/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/reader/MetricsEmitterTests.java @@ -378,6 +378,40 @@ public void testEmitNodeMetrics() throws Exception { } @Test + public void testFaultDetectionMetricsEmitter() throws Exception { + Connection conn = DriverManager.getConnection(DB_URL); + FaultDetectionMetricsSnapshot faultDetectionMetricsSnapshot = new FaultDetectionMetricsSnapshot(conn, 1L); + Map dimensions = new HashMap<>(); + dimensions.put(AllMetrics.FaultDetectionDimension.SOURCE_NODE_ID.toString(), "sourceNodeId"); + dimensions.put(AllMetrics.FaultDetectionDimension.TARGET_NODE_ID.toString(), "targetNodeId"); + dimensions.put(FaultDetectionMetricsSnapshot.Fields.FAULT_DETECTION_TYPE.toString(), "follower_check"); + dimensions.put(FaultDetectionMetricsSnapshot.Fields.RID.toString(), "1"); + faultDetectionMetricsSnapshot.putStartMetric(12345L, dimensions); + faultDetectionMetricsSnapshot.putEndMetric(33325L, 0, dimensions); + + dimensions.put(FaultDetectionMetricsSnapshot.Fields.RID.toString(), "2"); + faultDetectionMetricsSnapshot.putStartMetric(22245L, dimensions); + + dimensions.put(FaultDetectionMetricsSnapshot.Fields.RID.toString(), "3"); + faultDetectionMetricsSnapshot.putStartMetric(10000L, dimensions); + faultDetectionMetricsSnapshot.putEndMetric(30000L, 1, dimensions); + + MetricsDB db = new MetricsDB(1553713438); + MetricsEmitter.emitFaultDetectionMetrics(db, faultDetectionMetricsSnapshot); + Result res = + db.queryMetric( + Arrays.asList( + AllMetrics.FaultDetectionMetric.FOLLOWER_CHECK_LATENCY.toString(), + AllMetrics.FaultDetectionMetric.FOLLOWER_CHECK_FAILURE.toString()), + Arrays.asList("avg", "sum"), + Arrays.asList(AllMetrics.FaultDetectionDimension.SOURCE_NODE_ID.toString())); + + Float latency = Float.parseFloat(res.get(0).get(AllMetrics.FaultDetectionMetric.FOLLOWER_CHECK_LATENCY.toString()) + .toString()); + db.remove(); + assertEquals(20490.0f, latency.floatValue(), 0); + } + public void testShardStateMetricsEmitter() throws Exception { Connection conn = DriverManager.getConnection(DB_URL); ShardStateMetricsSnapshot shardStateMetricsSnapshot = new ShardStateMetricsSnapshot(conn, 1L); diff --git a/src/test/resources/reader/1566413960000 b/src/test/resources/reader/1566413960000 index 81b70b3ad..75448366c 100644 --- a/src/test/resources/reader/1566413960000 +++ b/src/test/resources/reader/1566413960000 @@ -75,6 +75,28 @@ $ {"MemType":"NonHeap","GC_Collection_Event":-2,"GC_Collection_Time":-2,"Heap_Committed":260165632,"Heap_Init":2555904,"Heap_Max":-1,"Heap_Used":248759720} {"MemType":"Heap","GC_Collection_Event":-2,"GC_Collection_Time":-2,"Heap_Committed":17145004032,"Heap_Init":17179869184,"Heap_Max":17145004032,"Heap_Used":5991469464} $ +^fault_detection/follower_check/538187/start +current_time:1566413936500 +SourceNodeID:g52i9a93a762cd59dda8d3379b09a752a +TargetNodeID:b2a5a93a762cd59dda8d3379b09a752a +StartTime:1566413988986$ +^fault_detection/follower_check/538187/finish +current_time:1566413936550 +SourceNodeID:g52i9a93a762cd59dda8d3379b09a752a +TargetNodeID:b2a5a93a762cd59dda8d3379b09a752a +FinishTime:1566413989986 +fault:0$ +^fault_detection/follower_check/727187/start +current_time:1566413936507 +SourceNodeID:g52i9a93a762cd59dda8d3379b09a752a +TargetNodeID:b2a5a93a762cd59dda8d3379b09a752a +StartTime:1566413987786$ +^fault_detection/follower_check/727187/finish +current_time:1566413936500 +SourceNodeID:g52i9a93a762cd59dda8d3379b09a752a +TargetNodeID:b2a5a93a762cd59dda8d3379b09a752a +FinishTime:1566413987986 +fault:0$ ^indices/nyc_taxis_2/27 {"current_time":1566413936500} {"Indexing_ThrottleTime":0,"Cache_Query_Hit":8,"Cache_Query_Miss":0,"Cache_Query_Size":483027,"Cache_FieldData_Eviction":0,"Cache_FieldData_Size":0,"Cache_Request_Hit":0,"Cache_Request_Miss":0,"Cache_Request_Eviction":0,"Cache_Request_Size":0,"Refresh_Event":0,"Refresh_Time":0,"Flush_Event":0,"Flush_Time":0,"Merge_Event":0,"Merge_Time":0,"Merge_CurrentEvent":0,"Indexing_Buffer":0,"Segments_Total":21,"Segments_Memory":1508419,"Terms_Memory":1066993,"StoredFields_Memory":116608,"TermVectors_Memory":1066993,"Norms_Memory":0,"Points_Memory":276918,"DocValues_Memory":47900,"IndexWriter_Memory":0,"VersionMap_Memory":0,"Bitset_Memory":0}$ diff --git a/src/test/resources/reader/1566413965000 b/src/test/resources/reader/1566413965000 index ffa9f9898..6b0fc4050 100644 --- a/src/test/resources/reader/1566413965000 +++ b/src/test/resources/reader/1566413965000 @@ -74,6 +74,28 @@ $ {"MemType":"NonHeap","GC_Collection_Event":-2,"GC_Collection_Time":-2,"Heap_Committed":260427776,"Heap_Init":2555904,"Heap_Max":-1,"Heap_Used":249123672} {"MemType":"Heap","GC_Collection_Event":-2,"GC_Collection_Time":-2,"Heap_Committed":17145004032,"Heap_Init":17179869184,"Heap_Max":17145004032,"Heap_Used":5973041688} $ +^fault_detection/follower_check/538187/start +current_time:1566413966497 +SourceNodeID:g52i9a93a762cd59dda8d3379b09a752a +TargetNodeID:b2a5a93a762cd59dda8d3379b09a752a +StartTime:1566413988986$ +^fault_detection/follower_check/538187/finish +current_time:1566413966497 +SourceNodeID:g52i9a93a762cd59dda8d3379b09a752a +TargetNodeID:b2a5a93a762cd59dda8d3379b09a752a +FinishTime:1566413989986 +fault:0$ +^fault_detection/follower_check/852187/start +current_time:1566413966497 +SourceNodeID:g52i9a93a762cd59dda8d3379b09a752a +TargetNodeID:b2a5a93a762cd59dda8d3379b09a752a +StartTime:1566413987786$ +^fault_detection/follower_check/852187/finish +current_time:1566413966497 +SourceNodeID:g52i9a93a762cd59dda8d3379b09a752a +TargetNodeID:b2a5a93a762cd59dda8d3379b09a752a +FinishTime:1566413987986 +fault:0$ ^indices/nyc_taxis/27 {"current_time":1566413966497} {"Indexing_ThrottleTime":0,"Cache_Query_Hit":0,"Cache_Query_Miss":0,"Cache_Query_Size":0,"Cache_FieldData_Eviction":0,"Cache_FieldData_Size":0,"Cache_Request_Hit":0,"Cache_Request_Miss":0,"Cache_Request_Eviction":0,"Cache_Request_Size":0,"Refresh_Event":2,"Refresh_Time":0,"Flush_Event":0,"Flush_Time":0,"Merge_Event":0,"Merge_Time":0,"Merge_CurrentEvent":0,"Indexing_Buffer":0,"Segments_Total":0,"Segments_Memory":0,"Terms_Memory":0,"StoredFields_Memory":0,"TermVectors_Memory":0,"Norms_Memory":0,"Points_Memory":0,"DocValues_Memory":0,"IndexWriter_Memory":0,"VersionMap_Memory":0,"Bitset_Memory":0}$ diff --git a/src/test/resources/reader/1566413970000 b/src/test/resources/reader/1566413970000 index eb8812a94..8bc389e30 100644 --- a/src/test/resources/reader/1566413970000 +++ b/src/test/resources/reader/1566413970000 @@ -75,6 +75,28 @@ $ {"MemType":"NonHeap","GC_Collection_Event":-2,"GC_Collection_Time":-2,"Heap_Committed":260427776,"Heap_Init":2555904,"Heap_Max":-1,"Heap_Used":249158360} {"MemType":"Heap","GC_Collection_Event":-2,"GC_Collection_Time":-2,"Heap_Committed":17145004032,"Heap_Init":17179869184,"Heap_Max":17145004032,"Heap_Used":8569832928} $ +^fault_detection/follower_check/654187/start +current_time:1566413996768 +SourceNodeID:g52i9a93a762cd59dda8d3379b09a752a +TargetNodeID:b2a5a93a762cd59dda8d3379b09a752a +StartTime:1566413988986$ +^fault_detection/follower_check/654187/finish +current_time:1566413996768 +SourceNodeID:g52i9a93a762cd59dda8d3379b09a752a +TargetNodeID:b2a5a93a762cd59dda8d3379b09a752a +FinishTime:1566413989986 +fault:0$ +^fault_detection/follower_check/852187/start +current_time:1566413996768 +SourceNodeID:g52i9a93a762cd59dda8d3379b09a752a +TargetNodeID:b2a5a93a762cd59dda8d3379b09a752a +StartTime:1566413987786$ +^fault_detection/follower_check/852187/finish +current_time:1566413996768 +SourceNodeID:g52i9a93a762cd59dda8d3379b09a752a +TargetNodeID:b2a5a93a762cd59dda8d3379b09a752a +FinishTime:1566413987986 +fault:0$ ^indices/nyc_taxis_1/27 {"current_time":1566413996768} {"Indexing_ThrottleTime":0,"Cache_Query_Hit":0,"Cache_Query_Miss":0,"Cache_Query_Size":0,"Cache_FieldData_Eviction":0,"Cache_FieldData_Size":0,"Cache_Request_Hit":0,"Cache_Request_Miss":0,"Cache_Request_Eviction":0,"Cache_Request_Size":0,"Refresh_Event":0,"Refresh_Time":0,"Flush_Event":0,"Flush_Time":0,"Merge_Event":0,"Merge_Time":0,"Merge_CurrentEvent":0,"Indexing_Buffer":11114952,"Segments_Total":0,"Segments_Memory":0,"Terms_Memory":0,"StoredFields_Memory":0,"TermVectors_Memory":0,"Norms_Memory":0,"Points_Memory":0,"DocValues_Memory":0,"IndexWriter_Memory":11114952,"VersionMap_Memory":0,"Bitset_Memory":0}$