From dd5ac643a144756e695d6c446b9bc51ca42248c6 Mon Sep 17 00:00:00 2001 From: Kaituo Li Date: Tue, 14 Jul 2020 13:43:22 -0700 Subject: [PATCH] Adds initialization progress to profile API (#164) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Adds initialization progress to profile API This PR adds init_progress to profile API. init_progress helps users track initialization percentage, needed shingles, and estimated time to go if the future data stream is continuous (no missing data). Initialization percentage measures how far away we can observe RCF models emitting scores.  The implementation fetches the RCF model's total updates while the AD job is running and materializes the value to the newly added index .opendistro-anomaly-info. Total updates record the number of times this RCF model has been updated.  Initialization percent is computed as x/128:  * if total updates > 128, x = 128. Otherwise, x is the total updates  * 128 is our output after the number in RCF. After observing 128 samples, RCF starts emitting scores. Needed shingles are computed as 128 -x.  Estimated minutes to go is computed as needed shingles * detector interval This PR also materializes the error message in the most recent run to speed up profile API's error fetching. During each AD execution, we also check if a checkpoint is there (the result is saved and maintained as other AD states), if yes, we cold start immediately. Testing done: 1. adds unit tests 2. run e2e tests to verify init_progress number makes sense. --- .../ad/AnomalyDetectorJobRunner.java | 18 +- .../ad/AnomalyDetectorPlugin.java | 68 ++- .../ad/AnomalyDetectorProfileRunner.java | 460 +++++++-------- .../ad/constant/CommonName.java | 1 + .../ad/indices/AnomalyDetectionIndices.java | 36 +- .../ad/ml/ModelManager.java | 50 +- .../ad/model/AnomalyDetectorJob.java | 9 + .../ad/model/DetectorInternalState.java | 160 ++++++ .../ad/model/DetectorProfile.java | 142 ++++- .../ad/model/InitProgressProfile.java | 146 +++++ .../ad/model/ModelProfile.java | 12 + .../ad/model/ProfileName.java | 5 +- .../ad/rest/RestAnomalyDetectorJobAction.java | 9 - .../rest/RestDeleteAnomalyDetectorAction.java | 33 +- .../IndexAnomalyDetectorActionHandler.java | 2 + .../IndexAnomalyDetectorJobActionHandler.java | 9 - .../ad/settings/AnomalyDetectorSettings.java | 1 + .../AnomalyResultTransportAction.java | 67 +-- .../ad/transport/CronTransportAction.java | 4 +- .../transport/DeleteModelTransportAction.java | 6 +- .../ad/transport/RCFPollingAction.java | 28 + .../ad/transport/RCFPollingRequest.java | 72 +++ .../ad/transport/RCFPollingResponse.java | 56 ++ .../transport/RCFPollingTransportAction.java | 144 +++++ .../ad/transport/TransportState.java | 100 ++++ ...anager.java => TransportStateManager.java} | 120 ++-- .../handler/AnomalyIndexHandler.java | 188 +++++++ .../handler/AnomalyResultHandler.java | 204 ------- .../handler/DetectionStateHandler.java | 165 ++++++ .../ad/util/ExceptionUtil.java | 60 ++ .../ad/util/IndexUtils.java | 30 +- .../MultiResponsesDelegateActionListener.java | 4 +- .../ad/util/ThrowingConsumer.java | 27 + .../ad/util/ThrowingConsumerWrapper.java | 41 ++ .../mappings/anomaly-detection-state.json | 18 + .../ad/AbstractADTest.java | 72 ++- .../ad/AnomalyDetectorJobRunnerTests.java | 64 ++- .../ad/AnomalyDetectorProfileRunnerTests.java | 523 +++++++++++------- .../ad/TestHelpers.java | 40 +- .../ad/feature/FeatureManagerTests.java | 4 +- .../ad/feature/SearchFeatureDaoTests.java | 4 +- .../ADStatsNodesTransportActionTests.java | 10 +- .../ad/transport/AnomalyResultTests.java | 37 +- .../transport/CronTransportActionTests.java | 2 +- .../DeleteModelTransportActionTests.java | 2 +- .../ad/transport/RCFPollingTests.java | 354 ++++++++++++ ...s.java => TransportStateManagerTests.java} | 167 ++++-- .../ad/transport/TransportStateTests.java | 96 ++++ .../handler/AnomalyResultHandlerTests.java | 121 ++-- .../handler/DetectorStateHandlerTests.java | 164 ++++++ .../ad/util/IndexUtilsTests.java | 14 +- ...iResponsesDelegateActionListenerTests.java | 48 ++ .../ad/util/FakeNode.java | 22 +- 53 files changed, 3264 insertions(+), 975 deletions(-) create mode 100644 src/main/java/com/amazon/opendistroforelasticsearch/ad/model/DetectorInternalState.java create mode 100644 src/main/java/com/amazon/opendistroforelasticsearch/ad/model/InitProgressProfile.java create mode 100644 src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/RCFPollingAction.java create mode 100644 src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/RCFPollingRequest.java create mode 100644 src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/RCFPollingResponse.java create mode 100644 src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/RCFPollingTransportAction.java create mode 100644 src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/TransportState.java rename src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/{ADStateManager.java => TransportStateManager.java} (60%) create mode 100644 src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/handler/AnomalyIndexHandler.java delete mode 100644 src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/handler/AnomalyResultHandler.java create mode 100644 src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/handler/DetectionStateHandler.java create mode 100644 src/main/java/com/amazon/opendistroforelasticsearch/ad/util/ExceptionUtil.java create mode 100644 src/main/java/com/amazon/opendistroforelasticsearch/ad/util/ThrowingConsumer.java create mode 100644 src/main/java/com/amazon/opendistroforelasticsearch/ad/util/ThrowingConsumerWrapper.java create mode 100644 src/main/resources/mappings/anomaly-detection-state.json create mode 100644 src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/RCFPollingTests.java rename src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/{ADStateManagerTests.java => TransportStateManagerTests.java} (57%) create mode 100644 src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/TransportStateTests.java create mode 100644 src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/handler/DetectorStateHandlerTests.java create mode 100644 src/test/java/com/amazon/opendistroforelasticsearch/ad/util/MultiResponsesDelegateActionListenerTests.java diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/AnomalyDetectorJobRunner.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/AnomalyDetectorJobRunner.java index 05099c48..abc22f38 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/ad/AnomalyDetectorJobRunner.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/AnomalyDetectorJobRunner.java @@ -56,7 +56,8 @@ import com.amazon.opendistroforelasticsearch.ad.transport.AnomalyResultRequest; import com.amazon.opendistroforelasticsearch.ad.transport.AnomalyResultResponse; import com.amazon.opendistroforelasticsearch.ad.transport.AnomalyResultTransportAction; -import com.amazon.opendistroforelasticsearch.ad.transport.handler.AnomalyResultHandler; +import com.amazon.opendistroforelasticsearch.ad.transport.handler.AnomalyIndexHandler; +import com.amazon.opendistroforelasticsearch.ad.transport.handler.DetectionStateHandler; import com.amazon.opendistroforelasticsearch.ad.util.ClientUtil; import com.amazon.opendistroforelasticsearch.jobscheduler.spi.JobExecutionContext; import com.amazon.opendistroforelasticsearch.jobscheduler.spi.LockModel; @@ -77,8 +78,9 @@ public class AnomalyDetectorJobRunner implements ScheduledJobRunner { private Client client; private ClientUtil clientUtil; private ThreadPool threadPool; - private AnomalyResultHandler anomalyResultHandler; + private AnomalyIndexHandler anomalyResultHandler; private ConcurrentHashMap detectorEndRunExceptionCount; + private DetectionStateHandler detectionStateHandler; public static AnomalyDetectorJobRunner getJobRunnerInstance() { if (INSTANCE != null) { @@ -110,7 +112,7 @@ public void setThreadPool(ThreadPool threadPool) { this.threadPool = threadPool; } - public void setAnomalyResultHandler(AnomalyResultHandler anomalyResultHandler) { + public void setAnomalyResultHandler(AnomalyIndexHandler anomalyResultHandler) { this.anomalyResultHandler = anomalyResultHandler; } @@ -119,6 +121,10 @@ public void setSettings(Settings settings) { this.maxRetryForEndRunException = AnomalyDetectorSettings.MAX_RETRY_FOR_END_RUN_EXCEPTION.get(settings); } + public void setDetectionStateHandler(DetectionStateHandler detectionStateHandler) { + this.detectionStateHandler = detectionStateHandler; + } + @Override public void runJob(ScheduledJobParameter jobParameter, JobExecutionContext context) { String detectorId = jobParameter.getName(); @@ -436,7 +442,8 @@ private void indexAnomalyResult( Instant.now(), response.getError() ); - anomalyResultHandler.indexAnomalyResult(anomalyResult); + anomalyResultHandler.index(anomalyResult, detectorId); + detectionStateHandler.saveError(response.getError(), detectorId); } catch (Exception e) { log.error("Failed to index anomaly result for " + detectorId, e); } finally { @@ -490,7 +497,8 @@ private void indexAnomalyResultException( Instant.now(), errorMessage ); - anomalyResultHandler.indexAnomalyResult(anomalyResult); + anomalyResultHandler.index(anomalyResult, detectorId); + detectionStateHandler.saveError(errorMessage, detectorId); } catch (Exception e) { log.error("Failed to index anomaly result for " + detectorId, e); } finally { diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/AnomalyDetectorPlugin.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/AnomalyDetectorPlugin.java index 0acff42e..ff1ee2a3 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/ad/AnomalyDetectorPlugin.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/AnomalyDetectorPlugin.java @@ -21,12 +21,10 @@ import java.security.PrivilegedAction; import java.time.Clock; import java.util.Arrays; -import java.util.Calendar; import java.util.Collection; import java.util.Collections; import java.util.List; import java.util.Map; -import java.util.TimeZone; import java.util.function.Supplier; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -83,6 +81,7 @@ import com.amazon.opendistroforelasticsearch.ad.model.AnomalyDetector; import com.amazon.opendistroforelasticsearch.ad.model.AnomalyDetectorJob; import com.amazon.opendistroforelasticsearch.ad.model.AnomalyResult; +import com.amazon.opendistroforelasticsearch.ad.model.DetectorInternalState; import com.amazon.opendistroforelasticsearch.ad.rest.RestAnomalyDetectorJobAction; import com.amazon.opendistroforelasticsearch.ad.rest.RestDeleteAnomalyDetectorAction; import com.amazon.opendistroforelasticsearch.ad.rest.RestExecuteAnomalyDetectorAction; @@ -100,7 +99,6 @@ import com.amazon.opendistroforelasticsearch.ad.stats.suppliers.IndexStatusSupplier; import com.amazon.opendistroforelasticsearch.ad.stats.suppliers.ModelsOnNodeSupplier; import com.amazon.opendistroforelasticsearch.ad.stats.suppliers.SettableSupplier; -import com.amazon.opendistroforelasticsearch.ad.transport.ADStateManager; import com.amazon.opendistroforelasticsearch.ad.transport.ADStatsNodesAction; import com.amazon.opendistroforelasticsearch.ad.transport.ADStatsNodesTransportAction; import com.amazon.opendistroforelasticsearch.ad.transport.AnomalyResultAction; @@ -111,18 +109,23 @@ import com.amazon.opendistroforelasticsearch.ad.transport.DeleteModelTransportAction; import com.amazon.opendistroforelasticsearch.ad.transport.ProfileAction; import com.amazon.opendistroforelasticsearch.ad.transport.ProfileTransportAction; +import com.amazon.opendistroforelasticsearch.ad.transport.RCFPollingAction; +import com.amazon.opendistroforelasticsearch.ad.transport.RCFPollingTransportAction; import com.amazon.opendistroforelasticsearch.ad.transport.RCFResultAction; import com.amazon.opendistroforelasticsearch.ad.transport.RCFResultTransportAction; import com.amazon.opendistroforelasticsearch.ad.transport.StopDetectorAction; import com.amazon.opendistroforelasticsearch.ad.transport.StopDetectorTransportAction; import com.amazon.opendistroforelasticsearch.ad.transport.ThresholdResultAction; import com.amazon.opendistroforelasticsearch.ad.transport.ThresholdResultTransportAction; -import com.amazon.opendistroforelasticsearch.ad.transport.handler.AnomalyResultHandler; +import com.amazon.opendistroforelasticsearch.ad.transport.TransportStateManager; +import com.amazon.opendistroforelasticsearch.ad.transport.handler.AnomalyIndexHandler; +import com.amazon.opendistroforelasticsearch.ad.transport.handler.DetectionStateHandler; import com.amazon.opendistroforelasticsearch.ad.util.ClientUtil; import com.amazon.opendistroforelasticsearch.ad.util.ColdStartRunner; import com.amazon.opendistroforelasticsearch.ad.util.DiscoveryNodeFilterer; import com.amazon.opendistroforelasticsearch.ad.util.IndexUtils; import com.amazon.opendistroforelasticsearch.ad.util.Throttler; +import com.amazon.opendistroforelasticsearch.ad.util.ThrowingConsumerWrapper; import com.amazon.opendistroforelasticsearch.jobscheduler.spi.JobSchedulerExtension; import com.amazon.opendistroforelasticsearch.jobscheduler.spi.ScheduledJobParser; import com.amazon.opendistroforelasticsearch.jobscheduler.spi.ScheduledJobRunner; @@ -150,6 +153,8 @@ public class AnomalyDetectorPlugin extends Plugin implements ActionPlugin, Scrip private NamedXContentRegistry xContentRegistry; private ClientUtil clientUtil; private DiscoveryNodeFilterer nodeFilter; + private IndexUtils indexUtils; + private DetectionStateHandler detectorStateHandler; static { SpecialPermission.check(); @@ -170,28 +175,34 @@ public List getRestHandlers( IndexNameExpressionResolver indexNameExpressionResolver, Supplier nodesInCluster ) { - AnomalyResultHandler anomalyResultHandler = new AnomalyResultHandler( + + AnomalyIndexHandler anomalyResultHandler; + anomalyResultHandler = new AnomalyIndexHandler( client, settings, - clusterService, - indexNameExpressionResolver, - anomalyDetectionIndices, - threadPool + threadPool, + AnomalyResult.ANOMALY_RESULT_INDEX, + ThrowingConsumerWrapper.throwingConsumerWrapper(anomalyDetectionIndices::initAnomalyResultIndexDirectly), + anomalyDetectionIndices::doesAnomalyResultIndexExist, + false, + this.clientUtil, + this.indexUtils, + clusterService ); + AnomalyDetectorJobRunner jobRunner = AnomalyDetectorJobRunner.getJobRunnerInstance(); jobRunner.setClient(client); jobRunner.setClientUtil(clientUtil); jobRunner.setThreadPool(threadPool); jobRunner.setAnomalyResultHandler(anomalyResultHandler); + jobRunner.setDetectionStateHandler(detectorStateHandler); jobRunner.setSettings(settings); AnomalyDetectorProfileRunner profileRunner = new AnomalyDetectorProfileRunner( client, this.xContentRegistry, this.nodeFilter, - indexNameExpressionResolver, - clusterService, - Calendar.getInstance(TimeZone.getTimeZone("UTC")) + AnomalyDetectorSettings.NUM_MIN_SAMPLES ); RestGetAnomalyDetectorAction restGetAnomalyDetectorAction = new RestGetAnomalyDetectorAction(profileRunner); RestIndexAnomalyDetectorAction restIndexAnomalyDetectorAction = new RestIndexAnomalyDetectorAction( @@ -257,7 +268,7 @@ public Collection createComponents( Clock clock = Clock.systemUTC(); Throttler throttler = new Throttler(clock); this.clientUtil = new ClientUtil(settings, client, throttler, threadPool); - IndexUtils indexUtils = new IndexUtils(client, clientUtil, clusterService); + this.indexUtils = new IndexUtils(client, clientUtil, clusterService, indexNameExpressionResolver); anomalyDetectionIndices = new AnomalyDetectionIndices(client, clusterService, threadPool, settings); this.clusterService = clusterService; this.xContentRegistry = xContentRegistry; @@ -301,7 +312,7 @@ public Collection createComponents( ); HashRing hashRing = new HashRing(nodeFilter, clock, settings); - ADStateManager stateManager = new ADStateManager( + TransportStateManager stateManager = new TransportStateManager( client, xContentRegistry, modelManager, @@ -350,6 +361,18 @@ public Collection createComponents( adStats = new ADStats(indexUtils, modelManager, stats); ADCircuitBreakerService adCircuitBreakerService = new ADCircuitBreakerService(jvmService).init(); + this.detectorStateHandler = new DetectionStateHandler( + client, + settings, + threadPool, + ThrowingConsumerWrapper.throwingConsumerWrapper(anomalyDetectionIndices::initDetectorStateIndex), + anomalyDetectionIndices::doesDetectorStateIndexExist, + this.clientUtil, + this.indexUtils, + clusterService, + xContentRegistry, + stateManager + ); return ImmutableList .of( @@ -370,7 +393,8 @@ public Collection createComponents( adCircuitBreakerService, adStats, new MasterEventListener(clusterService, threadPool, client, clock, clientUtil, nodeFilter), - nodeFilter + nodeFilter, + detectorStateHandler ); } @@ -415,7 +439,13 @@ public List> getSettings() { @Override public List getNamedXContent() { - return ImmutableList.of(AnomalyDetector.XCONTENT_REGISTRY, AnomalyResult.XCONTENT_REGISTRY); + return ImmutableList + .of( + AnomalyDetector.XCONTENT_REGISTRY, + AnomalyResult.XCONTENT_REGISTRY, + DetectorInternalState.XCONTENT_REGISTRY, + AnomalyDetectorJob.XCONTENT_REGISTRY + ); } /* @@ -432,7 +462,8 @@ public List getNamedXContent() { new ActionHandler<>(AnomalyResultAction.INSTANCE, AnomalyResultTransportAction.class), new ActionHandler<>(CronAction.INSTANCE, CronTransportAction.class), new ActionHandler<>(ADStatsNodesAction.INSTANCE, ADStatsNodesTransportAction.class), - new ActionHandler<>(ProfileAction.INSTANCE, ProfileTransportAction.class) + new ActionHandler<>(ProfileAction.INSTANCE, ProfileTransportAction.class), + new ActionHandler<>(RCFPollingAction.INSTANCE, RCFPollingTransportAction.class) ); } @@ -468,7 +499,8 @@ public Collection getSystemIndexDescriptors(Settings sett new SystemIndexDescriptor(AnomalyDetectionIndices.ALL_AD_RESULTS_INDEX_PATTERN, "anomaly result"), new SystemIndexDescriptor(AnomalyDetector.ANOMALY_DETECTORS_INDEX, "detector definition"), new SystemIndexDescriptor(AnomalyDetectorJob.ANOMALY_DETECTOR_JOB_INDEX, "detector job"), - new SystemIndexDescriptor(CommonName.CHECKPOINT_INDEX_NAME, "model checkpoint") + new SystemIndexDescriptor(CommonName.CHECKPOINT_INDEX_NAME, "model checkpoint"), + new SystemIndexDescriptor(DetectorInternalState.DETECTOR_STATE_INDEX, "detector information like total rcf updates") ) ); } diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/AnomalyDetectorProfileRunner.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/AnomalyDetectorProfileRunner.java index 943a1a4f..aa92d855 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/ad/AnomalyDetectorProfileRunner.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/AnomalyDetectorProfileRunner.java @@ -20,54 +20,42 @@ import static org.elasticsearch.common.xcontent.XContentParserUtils.ensureExpectedToken; import java.io.IOException; -import java.time.Instant; -import java.util.ArrayList; -import java.util.Calendar; -import java.util.List; -import java.util.Map; import java.util.Set; -import java.util.TimeZone; -import java.util.TreeMap; -import java.util.regex.Matcher; -import java.util.regex.Pattern; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.apache.logging.log4j.core.util.Throwables; +import org.apache.logging.log4j.message.ParameterizedMessage; import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.get.GetRequest; import org.elasticsearch.action.get.GetResponse; -import org.elasticsearch.action.search.SearchRequest; -import org.elasticsearch.action.search.SearchResponse; -import org.elasticsearch.action.support.IndicesOptions; import org.elasticsearch.client.Client; -import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; import org.elasticsearch.cluster.node.DiscoveryNode; -import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.xcontent.LoggingDeprecationHandler; import org.elasticsearch.common.xcontent.NamedXContentRegistry; import org.elasticsearch.common.xcontent.XContentParseException; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.common.xcontent.XContentType; import org.elasticsearch.index.IndexNotFoundException; -import org.elasticsearch.index.query.BoolQueryBuilder; -import org.elasticsearch.index.query.QueryBuilders; -import org.elasticsearch.index.query.RangeQueryBuilder; -import org.elasticsearch.search.SearchHit; -import org.elasticsearch.search.SearchHits; -import org.elasticsearch.search.builder.SearchSourceBuilder; -import org.elasticsearch.search.sort.FieldSortBuilder; -import org.elasticsearch.search.sort.SortOrder; - -import com.amazon.opendistroforelasticsearch.ad.indices.AnomalyDetectionIndices; + +import com.amazon.opendistroforelasticsearch.ad.common.exception.ResourceNotFoundException; +import com.amazon.opendistroforelasticsearch.ad.constant.CommonName; +import com.amazon.opendistroforelasticsearch.ad.model.AnomalyDetector; import com.amazon.opendistroforelasticsearch.ad.model.AnomalyDetectorJob; -import com.amazon.opendistroforelasticsearch.ad.model.AnomalyResult; +import com.amazon.opendistroforelasticsearch.ad.model.DetectorInternalState; import com.amazon.opendistroforelasticsearch.ad.model.DetectorProfile; import com.amazon.opendistroforelasticsearch.ad.model.DetectorState; +import com.amazon.opendistroforelasticsearch.ad.model.InitProgressProfile; +import com.amazon.opendistroforelasticsearch.ad.model.IntervalTimeConfiguration; import com.amazon.opendistroforelasticsearch.ad.model.ProfileName; import com.amazon.opendistroforelasticsearch.ad.transport.ProfileAction; import com.amazon.opendistroforelasticsearch.ad.transport.ProfileRequest; import com.amazon.opendistroforelasticsearch.ad.transport.ProfileResponse; +import com.amazon.opendistroforelasticsearch.ad.transport.RCFPollingAction; +import com.amazon.opendistroforelasticsearch.ad.transport.RCFPollingRequest; +import com.amazon.opendistroforelasticsearch.ad.transport.RCFPollingResponse; import com.amazon.opendistroforelasticsearch.ad.util.DiscoveryNodeFilterer; +import com.amazon.opendistroforelasticsearch.ad.util.ExceptionUtil; import com.amazon.opendistroforelasticsearch.ad.util.MultiResponsesDelegateActionListener; public class AnomalyDetectorProfileRunner { @@ -75,31 +63,25 @@ public class AnomalyDetectorProfileRunner { private Client client; private NamedXContentRegistry xContentRegistry; private DiscoveryNodeFilterer nodeFilter; - private final IndexNameExpressionResolver indexNameExpressionResolver; static String FAIL_TO_FIND_DETECTOR_MSG = "Fail to find detector with id: "; static String FAIL_TO_GET_PROFILE_MSG = "Fail to get profile for detector "; - private final ClusterService clusterService; - private Calendar calendar; + private long requiredSamples; public AnomalyDetectorProfileRunner( Client client, NamedXContentRegistry xContentRegistry, DiscoveryNodeFilterer nodeFilter, - IndexNameExpressionResolver indexNameExpressionResolver, - ClusterService clusterService, - Calendar calendar + long requiredSamples ) { this.client = client; this.xContentRegistry = xContentRegistry; this.nodeFilter = nodeFilter; - this.indexNameExpressionResolver = indexNameExpressionResolver; - this.clusterService = clusterService; - this.calendar = Calendar.getInstance(TimeZone.getTimeZone("UTC")); + this.requiredSamples = requiredSamples; } - public void profile(String detectorId, ActionListener listener, Set profiles) { + public void profile(String detectorId, ActionListener listener, Set profilesToCollect) { - if (profiles.isEmpty()) { + if (profilesToCollect.isEmpty()) { listener.onFailure(new RuntimeException("Unsupported profile types.")); return; } @@ -108,18 +90,22 @@ public void profile(String detectorId, ActionListener listener, // and return to users int totalListener = 0; - if (profiles.contains(ProfileName.STATE)) { + if (profilesToCollect.contains(ProfileName.STATE)) { + totalListener++; + } + + if (profilesToCollect.contains(ProfileName.ERROR)) { totalListener++; } - if (profiles.contains(ProfileName.ERROR)) { + if (profilesToCollect.contains(ProfileName.INIT_PROGRESS)) { totalListener++; } - if (profiles.contains(ProfileName.COORDINATING_NODE) - || profiles.contains(ProfileName.SHINGLE_SIZE) - || profiles.contains(ProfileName.TOTAL_SIZE_IN_BYTES) - || profiles.contains(ProfileName.MODELS)) { + if (profilesToCollect.contains(ProfileName.COORDINATING_NODE) + || profilesToCollect.contains(ProfileName.SHINGLE_SIZE) + || profilesToCollect.contains(ProfileName.TOTAL_SIZE_IN_BYTES) + || profilesToCollect.contains(ProfileName.MODELS)) { totalListener++; } @@ -129,13 +115,13 @@ public void profile(String detectorId, ActionListener listener, "Fail to fetch profile for " + detectorId ); - prepareProfile(detectorId, delegateListener, profiles); + prepareProfile(detectorId, delegateListener, profilesToCollect); } private void prepareProfile( String detectorId, MultiResponsesDelegateActionListener listener, - Set profiles + Set profilesToCollect ) { GetRequest getRequest = new GetRequest(ANOMALY_DETECTOR_JOB_INDEX, detectorId); client.get(getRequest, ActionListener.wrap(getResponse -> { @@ -149,18 +135,20 @@ private void prepareProfile( AnomalyDetectorJob job = AnomalyDetectorJob.parse(parser); long enabledTimeMs = job.getEnabledTime().toEpochMilli(); - if (profiles.contains(ProfileName.STATE)) { - profileState(detectorId, enabledTimeMs, listener, job.isEnabled()); + if (profilesToCollect.contains(ProfileName.ERROR)) { + GetRequest getStateRequest = new GetRequest(DetectorInternalState.DETECTOR_STATE_INDEX, detectorId); + client.get(getStateRequest, onGetDetectorState(listener, detectorId, enabledTimeMs)); } - if (profiles.contains(ProfileName.ERROR)) { - profileError(detectorId, enabledTimeMs, job.getDisabledTime(), listener); + + if (profilesToCollect.contains(ProfileName.STATE) || profilesToCollect.contains(ProfileName.INIT_PROGRESS)) { + profileStateRelated(detectorId, listener, job.isEnabled(), profilesToCollect); } - if (profiles.contains(ProfileName.COORDINATING_NODE) - || profiles.contains(ProfileName.SHINGLE_SIZE) - || profiles.contains(ProfileName.TOTAL_SIZE_IN_BYTES) - || profiles.contains(ProfileName.MODELS)) { - profileModels(detectorId, profiles, listener); + if (profilesToCollect.contains(ProfileName.COORDINATING_NODE) + || profilesToCollect.contains(ProfileName.SHINGLE_SIZE) + || profilesToCollect.contains(ProfileName.TOTAL_SIZE_IN_BYTES) + || profilesToCollect.contains(ProfileName.MODELS)) { + profileModels(detectorId, profilesToCollect, listener); } } catch (IOException | XContentParseException | NullPointerException e) { logger.error(e); @@ -168,13 +156,13 @@ private void prepareProfile( } } else { GetRequest getDetectorRequest = new GetRequest(ANOMALY_DETECTORS_INDEX, detectorId); - client.get(getDetectorRequest, onGetDetectorResponse(listener, detectorId, profiles)); + client.get(getDetectorRequest, onGetDetectorForPrepare(listener, detectorId, profilesToCollect)); } }, exception -> { if (exception instanceof IndexNotFoundException) { logger.info(exception.getMessage()); GetRequest getDetectorRequest = new GetRequest(ANOMALY_DETECTORS_INDEX, detectorId); - client.get(getDetectorRequest, onGetDetectorResponse(listener, detectorId, profiles)); + client.get(getDetectorRequest, onGetDetectorForPrepare(listener, detectorId, profilesToCollect)); } else { logger.error(FAIL_TO_GET_PROFILE_MSG + detectorId); listener.onFailure(exception); @@ -182,18 +170,18 @@ private void prepareProfile( })); } - private ActionListener onGetDetectorResponse( + private ActionListener onGetDetectorForPrepare( MultiResponsesDelegateActionListener listener, String detectorId, Set profiles ) { return ActionListener.wrap(getResponse -> { if (getResponse != null && getResponse.isExists()) { - DetectorProfile profile = new DetectorProfile(); + DetectorProfile.Builder profileBuilder = new DetectorProfile.Builder(); if (profiles.contains(ProfileName.STATE)) { - profile.setState(DetectorState.DISABLED); + profileBuilder.state(DetectorState.DISABLED); } - listener.respondImmediately(profile); + listener.respondImmediately(profileBuilder.build()); } else { listener.failImmediately(FAIL_TO_FIND_DETECTOR_MSG + detectorId); } @@ -203,242 +191,121 @@ private ActionListener onGetDetectorResponse( /** * We expect three kinds of states: * -Disabled: if get ad job api says the job is disabled; - * -Init: if anomaly score after the last update time of the detector is larger than 0 + * -Init: if rcf model's total updates is less than required * -Running: if neither of the above applies and no exceptions. * @param detectorId detector id - * @param enabledTime the time when AD job is enabled in milliseconds * @param listener listener to process the returned state or exception * @param enabled whether the detector job is enabled or not + * @param profilesToCollect target profiles to fetch */ - private void profileState( + private void profileStateRelated( String detectorId, - long enabledTime, MultiResponsesDelegateActionListener listener, - boolean enabled + boolean enabled, + Set profilesToCollect ) { if (enabled) { - SearchRequest searchLatestResult = createInittedEverRequest(detectorId, enabledTime); - client.search(searchLatestResult, onInittedEver(listener, detectorId, enabledTime)); + RCFPollingRequest request = new RCFPollingRequest(detectorId); + client.execute(RCFPollingAction.INSTANCE, request, onPollRCFUpdates(detectorId, profilesToCollect, listener)); } else { - DetectorProfile profile = new DetectorProfile(); - profile.setState(DetectorState.DISABLED); - listener.onResponse(profile); - } - } - - private ActionListener onInittedEver( - MultiResponsesDelegateActionListener listener, - String detectorId, - long lastUpdateTimeMs - ) { - return ActionListener.wrap(searchResponse -> { - SearchHits hits = searchResponse.getHits(); - DetectorProfile profile = new DetectorProfile(); - if (hits.getHits().length == 0L) { - profile.setState(DetectorState.INIT); - } else { - profile.setState(DetectorState.RUNNING); + if (profilesToCollect.contains(ProfileName.STATE)) { + listener.onResponse(new DetectorProfile.Builder().state(DetectorState.DISABLED).build()); } - - listener.onResponse(profile); - - }, exception -> { - if (exception instanceof IndexNotFoundException) { - DetectorProfile profile = new DetectorProfile(); - // anomaly result index is not created yet - profile.setState(DetectorState.INIT); - listener.onResponse(profile); - } else { - logger - .error( - "Fail to find any anomaly result with anomaly score larger than 0 after AD job enabled time for detector {}", - detectorId - ); - listener.onFailure(new RuntimeException("Fail to find detector state: " + detectorId, exception)); + if (profilesToCollect.contains(ProfileName.INIT_PROGRESS)) { + listener.onResponse(new DetectorProfile.Builder().build()); } - }); + } } /** - * Precondition: - * 1. Index are rotated with name pattern ".opendistro-anomaly-results-history-{now/d}-1" and now is using UTC. - * 2. Latest entry with error is recorded within enabled and disabled time. Note disabled time can be null. - * - * Error is populated if error of the latest anomaly result is not empty. - * - * Two optimization to avoid scanning all anomaly result indices to get a detector's most recent error - * - * First, when a detector is running, we only need to scan the current index, not all of the rolled over ones - * since we are interested in the latest error. - * Second, when a detector is disabled, we only need to scan the latest anomaly result indices created before the - * detector's enable time. - * + * Action listener for a detector in running or init state + * @param listener listener to consolidate results and return a final response * @param detectorId detector id - * @param enabledTimeMillis the time when AD job is enabled in milliseconds - * @param listener listener to process the returned error or exception + * @param enabledTimeMs AD job enabled time + * @return the listener for a detector in disabled state */ - private void profileError( + private ActionListener onGetDetectorState( + MultiResponsesDelegateActionListener listener, String detectorId, - long enabledTimeMillis, - Instant disabledTime, - MultiResponsesDelegateActionListener listener + long enabledTimeMs ) { - String[] latestIndex = null; - - long disabledTimeMillis = 0; - if (disabledTime != null) { - disabledTimeMillis = disabledTime.toEpochMilli(); - } - if (enabledTimeMillis > disabledTimeMillis) { - // detector is still running - latestIndex = new String[1]; - latestIndex[0] = AnomalyResult.ANOMALY_RESULT_INDEX; - } else { - String[] concreteIndices = indexNameExpressionResolver - .concreteIndexNames( - clusterService.state(), - IndicesOptions.lenientExpandOpen(), - AnomalyDetectionIndices.ALL_AD_RESULTS_INDEX_PATTERN - ); - - // find the latest from result indices such as .opendistro-anomaly-results-history-2020.04.06-1 and - // /.opendistro-anomaly-results-history-2020.04.07-000002 - long maxTimestamp = -1; - TreeMap> candidateIndices = new TreeMap<>(); - for (String indexName : concreteIndices) { - Matcher m = Pattern.compile("\\.opendistro-anomaly-results-history-(\\d{4})\\.(\\d{2})\\.(\\d{2})-\\d+").matcher(indexName); - if (m.matches()) { - int year = Integer.parseInt(m.group(1)); - int month = Integer.parseInt(m.group(2)); - int date = Integer.parseInt(m.group(3)); - // month starts with 0 - calendar.clear(); - calendar.set(year, month - 1, date); - // 2020.05.08 is translated to 1588896000000 - long timestamp = calendar.getTimeInMillis(); - - // a candidate index can be created before or after enabled time, but the index is definitely created before disabled - // time - if (timestamp <= disabledTimeMillis && maxTimestamp <= timestamp) { - maxTimestamp = timestamp; - // we can have two rotations on the same day and we don't know which one has our data, so we keep all - List indexList = candidateIndices.computeIfAbsent(timestamp, k -> new ArrayList()); - indexList.add(indexName); - } - } - } - List candidates = new ArrayList(); - List latestCandidate = candidateIndices.get(maxTimestamp); - - if (latestCandidate != null) { - candidates.addAll(latestCandidate); - } - - // look back one more index for an edge case: - // Suppose detector interval is 1 minute. Detector last run is at 2020-05-07, 11:59:50 PM, - // then AD result indices rolled over as .opendistro-anomaly-results-history-2020.05.07-001 - // Detector next run will be 2020-05-08, 00:00:50 AM. If a user stop the detector at - // 2020-05-08 00:00:10 AM, detector will not have AD result on 2020-05-08. - // We check AD result indices one day earlier to make sure we can always get AD result. - Map.Entry> earlierCandidate = candidateIndices.lowerEntry(maxTimestamp); - if (earlierCandidate != null) { - candidates.addAll(earlierCandidate.getValue()); - } - latestIndex = candidates.toArray(new String[0]); - } - - if (latestIndex == null || latestIndex.length == 0) { - // no result index found: can be due to anomaly result is not created yet or result indices for the detector have been deleted. - listener.onResponse(new DetectorProfile()); - return; - } - SearchRequest searchLatestResult = createLatestAnomalyResultRequest(detectorId, enabledTimeMillis, disabledTimeMillis, latestIndex); - client.search(searchLatestResult, onGetLatestAnomalyResult(listener, detectorId)); - } - - private ActionListener onGetLatestAnomalyResult(ActionListener listener, String detectorId) { - return ActionListener.wrap(searchResponse -> { - SearchHits hits = searchResponse.getHits(); - if (hits.getHits().length == 0L) { - listener.onResponse(new DetectorProfile()); - } else { - SearchHit hit = hits.getAt(0); - + return ActionListener.wrap(getResponse -> { + DetectorProfile.Builder profileBuilder = new DetectorProfile.Builder(); + if (getResponse != null && getResponse.isExists()) { try ( XContentParser parser = XContentType.JSON .xContent() - .createParser(xContentRegistry, LoggingDeprecationHandler.INSTANCE, hit.getSourceAsString()) + .createParser(xContentRegistry, LoggingDeprecationHandler.INSTANCE, getResponse.getSourceAsString()) ) { ensureExpectedToken(XContentParser.Token.START_OBJECT, parser.nextToken(), parser::getTokenLocation); - AnomalyResult result = parser.namedObject(AnomalyResult.class, AnomalyResult.PARSE_FIELD_NAME, null); - DetectorProfile profile = new DetectorProfile(); - if (result.getError() != null) { - profile.setError(result.getError()); + DetectorInternalState detectorState = DetectorInternalState.parse(parser); + long lastUpdateTimeMs = detectorState.getLastUpdateTime().toEpochMilli(); + + // if state index hasn't been updated, we should not use the error field + // For example, before a detector is enabled, if the error message contains + // the phrase "stopped due to blah", we should not show this when the detector + // is enabled. + if (lastUpdateTimeMs > enabledTimeMs && detectorState.getError() != null) { + profileBuilder.error(detectorState.getError()); } - listener.onResponse(profile); + + listener.onResponse(profileBuilder.build()); } catch (IOException | XContentParseException | NullPointerException e) { - logger.error("Fail to parse anomaly result with " + hit.toString()); - listener.onFailure(new RuntimeException("Fail to find detector error: " + detectorId, e)); + logger.error(e); + listener.failImmediately(FAIL_TO_GET_PROFILE_MSG, e); } + } else { + // detector state for this detector does not exist + listener.onResponse(profileBuilder.build()); } }, exception -> { if (exception instanceof IndexNotFoundException) { - listener.onResponse(new DetectorProfile()); + // detector state index is not created yet + listener.onResponse(new DetectorProfile.Builder().build()); } else { - logger.error("Fail to find any anomaly result after AD job enabled time for detector {}", detectorId); - listener.onFailure(new RuntimeException("Fail to find detector error: " + detectorId, exception)); + logger.error("Fail to find any detector info for detector {}", detectorId); + listener.onFailure(exception); } }); } - /** - * Create search request to check if we have at least 1 anomaly score larger than 0 after AD job enabled time - * @param detectorId detector id - * @param enabledTime the time when AD job is enabled in milliseconds - * @return the search request - */ - private SearchRequest createInittedEverRequest(String detectorId, long enabledTime) { - BoolQueryBuilder filterQuery = new BoolQueryBuilder(); - filterQuery.filter(QueryBuilders.termQuery(AnomalyResult.DETECTOR_ID_FIELD, detectorId)); - filterQuery.filter(QueryBuilders.rangeQuery(AnomalyResult.EXECUTION_END_TIME_FIELD).gte(enabledTime)); - filterQuery.filter(QueryBuilders.rangeQuery(AnomalyResult.ANOMALY_SCORE_FIELD).gt(0)); - - // I am only looking for last 1 occurrence and have no interest in the total number of documents that match the query. - // ES will not try to count the number of documents and will be able to terminate the query as soon as 1 document - // have been collected per segment. - SearchSourceBuilder source = new SearchSourceBuilder().query(filterQuery).size(1).trackTotalHits(false); - - SearchRequest request = new SearchRequest(AnomalyResult.ANOMALY_RESULT_INDEX); - request.source(source); - return request; - } - - /** - * Create search request to get the latest anomaly result after AD job enabled time - * @param detectorId detector id - * @param enabledTime the time when AD job is enabled in milliseconds - * @return the search request - */ - private SearchRequest createLatestAnomalyResultRequest(String detectorId, long enabledTime, long disabledTime, String[] index) { - BoolQueryBuilder filterQuery = new BoolQueryBuilder(); - filterQuery.filter(QueryBuilders.termQuery(AnomalyResult.DETECTOR_ID_FIELD, detectorId)); - RangeQueryBuilder rangeBuilder = QueryBuilders.rangeQuery(AnomalyResult.EXECUTION_END_TIME_FIELD).gte(enabledTime); - if (disabledTime >= enabledTime) { - rangeBuilder.lte(disabledTime); - } - filterQuery.filter(rangeBuilder); - - FieldSortBuilder sortQuery = new FieldSortBuilder(AnomalyResult.EXECUTION_END_TIME_FIELD).order(SortOrder.DESC); - - // I am only looking for last 1 occurrence and have no interest in the total number of documents that match the query. - // ES will not try to count the number of documents and will be able to terminate the query as soon as 1 document - // have been collected per segment. - SearchSourceBuilder source = new SearchSourceBuilder().query(filterQuery).size(1).sort(sortQuery).trackTotalHits(false); + private ActionListener onGetDetectorForInitProgress( + MultiResponsesDelegateActionListener listener, + String detectorId, + Set profilesToCollect, + long totalUpdates, + long requiredSamples + ) { + return ActionListener.wrap(getResponse -> { + if (getResponse != null && getResponse.isExists()) { + try ( + XContentParser parser = XContentType.JSON + .xContent() + .createParser(xContentRegistry, LoggingDeprecationHandler.INSTANCE, getResponse.getSourceAsString()) + ) { + ensureExpectedToken(XContentParser.Token.START_OBJECT, parser.nextToken(), parser::getTokenLocation); + AnomalyDetector detector = AnomalyDetector.parse(parser, detectorId); + long intervalMins = ((IntervalTimeConfiguration) detector.getDetectionInterval()).toDuration().toMinutes(); + float percent = (100.0f * totalUpdates) / requiredSamples; + int neededPoints = (int) (requiredSamples - totalUpdates); + InitProgressProfile initProgress = new InitProgressProfile( + // rounding: 93.456 => 93%, 93.556 => 94% + String.format("%.0f%%", percent), + intervalMins * neededPoints, + neededPoints + ); - SearchRequest request = new SearchRequest(index); - request.source(source); - return request; + listener.onResponse(new DetectorProfile.Builder().initProgress(initProgress).build()); + } catch (Exception t) { + logger.error("Fail to parse detector {}", detectorId); + logger.error("Stack trace:", t); + listener.failImmediately(FAIL_TO_FIND_DETECTOR_MSG + detectorId, t); + } + } else { + listener.failImmediately(FAIL_TO_FIND_DETECTOR_MSG + detectorId); + } + }, exception -> { listener.failImmediately(FAIL_TO_FIND_DETECTOR_MSG + detectorId, exception); }); } private void profileModels( @@ -457,21 +324,86 @@ private ActionListener onModelResponse( MultiResponsesDelegateActionListener listener ) { return ActionListener.wrap(profileResponse -> { - DetectorProfile profile = new DetectorProfile(); + DetectorProfile.Builder profile = new DetectorProfile.Builder(); if (profiles.contains(ProfileName.COORDINATING_NODE)) { - profile.setCoordinatingNode(profileResponse.getCoordinatingNode()); + profile.coordinatingNode(profileResponse.getCoordinatingNode()); } if (profiles.contains(ProfileName.SHINGLE_SIZE)) { - profile.setShingleSize(profileResponse.getShingleSize()); + profile.shingleSize(profileResponse.getShingleSize()); } if (profiles.contains(ProfileName.TOTAL_SIZE_IN_BYTES)) { - profile.setTotalSizeInBytes(profileResponse.getTotalSizeInBytes()); + profile.totalSizeInBytes(profileResponse.getTotalSizeInBytes()); } if (profiles.contains(ProfileName.MODELS)) { - profile.setModelProfile(profileResponse.getModelProfile()); + profile.modelProfile(profileResponse.getModelProfile()); } - listener.onResponse(profile); + listener.onResponse(profile.build()); }, listener::onFailure); } + + /** + * Listener for polling rcf updates through transport messaging + * @param detectorId detector Id + * @param profilesToCollect profiles to collect like state + * @param listener delegate listener + * @return Listener for polling rcf updates through transport messaging + */ + private ActionListener onPollRCFUpdates( + String detectorId, + Set profilesToCollect, + MultiResponsesDelegateActionListener listener + ) { + return ActionListener.wrap(rcfPollResponse -> { + long totalUpdates = rcfPollResponse.getTotalUpdates(); + if (totalUpdates < requiredSamples) { + processInitResponse(detectorId, profilesToCollect, listener, totalUpdates); + } else { + if (profilesToCollect.contains(ProfileName.STATE)) { + listener.onResponse(new DetectorProfile.Builder().state(DetectorState.RUNNING).build()); + } + + if (profilesToCollect.contains(ProfileName.INIT_PROGRESS)) { + InitProgressProfile initProgress = new InitProgressProfile("100%", 0, 0); + listener.onResponse(new DetectorProfile.Builder().initProgress(initProgress).build()); + } + } + }, exception -> { + // we will get an AnomalyDetectionException wrapping the real exception inside + Throwable cause = Throwables.getRootCause(exception); + + // exception can be a RemoteTransportException + Exception causeException = (Exception) cause; + if (ExceptionUtil + .isException(causeException, ResourceNotFoundException.class, ExceptionUtil.RESOURCE_NOT_FOUND_EXCEPTION_NAME_UNDERSCORE) + || (causeException instanceof IndexNotFoundException + && causeException.getMessage().contains(CommonName.CHECKPOINT_INDEX_NAME))) { + // cannot find checkpoint + processInitResponse(detectorId, profilesToCollect, listener, 0L); + } else { + logger.error(new ParameterizedMessage("Fail to get init progress through messaging for {}", detectorId), exception); + listener.failImmediately(FAIL_TO_GET_PROFILE_MSG + detectorId, exception); + } + }); + } + + private void processInitResponse( + String detectorId, + Set profilesToCollect, + MultiResponsesDelegateActionListener listener, + long totalUpdates + ) { + if (profilesToCollect.contains(ProfileName.STATE)) { + listener.onResponse(new DetectorProfile.Builder().state(DetectorState.INIT).build()); + } + + if (profilesToCollect.contains(ProfileName.INIT_PROGRESS)) { + GetRequest getDetectorRequest = new GetRequest(ANOMALY_DETECTORS_INDEX, detectorId); + client + .get( + getDetectorRequest, + onGetDetectorForInitProgress(listener, detectorId, profilesToCollect, totalUpdates, requiredSamples) + ); + } + } } diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/constant/CommonName.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/constant/CommonName.java index 8f730ccb..c6336fd6 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/ad/constant/CommonName.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/constant/CommonName.java @@ -54,4 +54,5 @@ public class CommonName { public static final String SHINGLE_SIZE = "shingle_size"; public static final String TOTAL_SIZE_IN_BYTES = "total_size_in_bytes"; public static final String MODELS = "models"; + public static final String INIT_PROGRESS = "init_progress"; } diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/indices/AnomalyDetectionIndices.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/indices/AnomalyDetectionIndices.java index 61e13825..3cc730dd 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/ad/indices/AnomalyDetectionIndices.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/indices/AnomalyDetectionIndices.java @@ -18,6 +18,7 @@ import static com.amazon.opendistroforelasticsearch.ad.settings.AnomalyDetectorSettings.AD_RESULT_HISTORY_MAX_DOCS; import static com.amazon.opendistroforelasticsearch.ad.settings.AnomalyDetectorSettings.AD_RESULT_HISTORY_RETENTION_PERIOD; import static com.amazon.opendistroforelasticsearch.ad.settings.AnomalyDetectorSettings.AD_RESULT_HISTORY_ROLLOVER_PERIOD; +import static com.amazon.opendistroforelasticsearch.ad.settings.AnomalyDetectorSettings.ANOMALY_DETECTION_STATE_INDEX_MAPPING_FILE; import static com.amazon.opendistroforelasticsearch.ad.settings.AnomalyDetectorSettings.ANOMALY_DETECTORS_INDEX_MAPPING_FILE; import static com.amazon.opendistroforelasticsearch.ad.settings.AnomalyDetectorSettings.ANOMALY_DETECTOR_JOBS_INDEX_MAPPING_FILE; import static com.amazon.opendistroforelasticsearch.ad.settings.AnomalyDetectorSettings.ANOMALY_RESULTS_INDEX_MAPPING_FILE; @@ -56,12 +57,13 @@ import com.amazon.opendistroforelasticsearch.ad.model.AnomalyDetector; import com.amazon.opendistroforelasticsearch.ad.model.AnomalyDetectorJob; import com.amazon.opendistroforelasticsearch.ad.model.AnomalyResult; +import com.amazon.opendistroforelasticsearch.ad.model.DetectorInternalState; import com.carrotsearch.hppc.cursors.ObjectCursor; import com.google.common.base.Charsets; import com.google.common.io.Resources; /** - * This class manages creation of anomaly detector index. + * This class provides utility methods for various anomaly detection indices. */ public class AnomalyDetectionIndices implements LocalNodeMasterListener { @@ -148,6 +150,17 @@ private String getAnomalyDetectorJobMappings() throws IOException { return Resources.toString(url, Charsets.UTF_8); } + /** + * Get anomaly detector state index mapping json content. + * + * @return anomaly detector state index mapping + * @throws IOException IOException if mapping file can't be read correctly + */ + private String getDetectorStateMappings() throws IOException { + URL url = AnomalyDetectionIndices.class.getClassLoader().getResource(ANOMALY_DETECTION_STATE_INDEX_MAPPING_FILE); + return Resources.toString(url, Charsets.UTF_8); + } + /** * Anomaly detector index exist or not. * @@ -175,6 +188,15 @@ public boolean doesAnomalyResultIndexExist() { return clusterService.state().metadata().hasAlias(AnomalyResult.ANOMALY_RESULT_INDEX); } + /** + * Anomaly result index exist or not. + * + * @return true if anomaly detector index exists + */ + public boolean doesDetectorStateIndexExist() { + return clusterService.state().getRoutingTable().hasIndex(DetectorInternalState.DETECTOR_STATE_INDEX); + } + /** * Create anomaly detector index if not exist. * @@ -238,6 +260,18 @@ public void initAnomalyDetectorJobIndex(ActionListener acti adminClient.indices().create(request, actionListener); } + /** + * Create an index. + * + * @param actionListener action called after create index + * @throws IOException IOException from {@link AnomalyDetectionIndices#getAnomalyDetectorJobMappings} + */ + public void initDetectorStateIndex(ActionListener actionListener) throws IOException { + CreateIndexRequest request = new CreateIndexRequest(DetectorInternalState.DETECTOR_STATE_INDEX) + .mapping(AnomalyDetector.TYPE, getDetectorStateMappings(), XContentType.JSON); + adminClient.indices().create(request, actionListener); + } + @Override public void onMaster() { try { diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/ml/ModelManager.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/ml/ModelManager.java index cfe6ce0c..8fd81864 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/ad/ml/ModelManager.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/ml/ModelManager.java @@ -397,6 +397,13 @@ private void getRcfResult(ModelState modelState, double[] point listener.onResponse(new RcfResult(score, confidence, forestSize)); } + private Optional> restoreCheckpoint(Optional rcfCheckpoint, String modelId, String detectorId) { + return rcfCheckpoint + .map(checkpoint -> AccessController.doPrivileged((PrivilegedAction) () -> rcfSerde.fromJson(checkpoint))) + .filter(rcf -> isHostingAllowed(detectorId, rcf)) + .map(rcf -> new ModelState<>(rcf, modelId, detectorId, ModelType.RCF.getName(), clock.instant())); + } + private void processRcfCheckpoint( Optional rcfCheckpoint, String modelId, @@ -404,10 +411,7 @@ private void processRcfCheckpoint( double[] point, ActionListener listener ) { - Optional> model = rcfCheckpoint - .map(checkpoint -> AccessController.doPrivileged((PrivilegedAction) () -> rcfSerde.fromJson(checkpoint))) - .filter(rcf -> isHostingAllowed(detectorId, rcf)) - .map(rcf -> new ModelState<>(rcf, modelId, detectorId, ModelType.RCF.getName(), clock.instant())); + Optional> model = restoreCheckpoint(rcfCheckpoint, modelId, detectorId); if (model.isPresent()) { forests.put(modelId, model.get()); getRcfResult(model.get(), point, listener); @@ -416,6 +420,24 @@ private void processRcfCheckpoint( } } + /** + * Process rcf checkpoint for total rcf updates polling + * @param rcfCheckpoint rcf checkpoint json string + * @param modelId model Id + * @param detectorId detector Id + * @param listener listener to return total updates of rcf + */ + private void processRcfCheckpoint(Optional rcfCheckpoint, String modelId, String detectorId, ActionListener listener) { + logger.info("Restoring checkpoint for {}", modelId); + Optional> model = restoreCheckpoint(rcfCheckpoint, modelId, detectorId); + if (model.isPresent()) { + forests.put(modelId, model.get()); + listener.onResponse(model.get().getModel().getTotalUpdates()); + } else { + listener.onFailure(new ResourceNotFoundException(detectorId, CommonErrorMessages.NO_CHECKPOINT_ERR_MSG + modelId)); + } + } + /** * Gets the result using the specified thresholding model. * @@ -1045,4 +1067,24 @@ public Map getModelSize(String detectorId) { .forEach(entry -> { res.put(entry.getKey(), 0L); }); return res; } + + /** + * Get a RCF model's total updates. + * @param modelId the RCF model's id + * @param detectorId detector Id + * @param listener listener to return the result + */ + public void getTotalUpdates(String modelId, String detectorId, ActionListener listener) { + ModelState model = forests.get(modelId); + if (model != null) { + listener.onResponse(model.getModel().getTotalUpdates()); + } else { + checkpointDao + .getModelCheckpoint( + modelId, + ActionListener.wrap(checkpoint -> processRcfCheckpoint(checkpoint, modelId, detectorId, listener), listener::onFailure) + ); + } + + } } diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/model/AnomalyDetectorJob.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/model/AnomalyDetectorJob.java index 30f36939..9a99e60c 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/ad/model/AnomalyDetectorJob.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/model/AnomalyDetectorJob.java @@ -21,6 +21,8 @@ import java.io.IOException; import java.time.Instant; +import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.xcontent.NamedXContentRegistry; import org.elasticsearch.common.xcontent.ToXContentObject; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; @@ -36,6 +38,13 @@ */ public class AnomalyDetectorJob implements ToXContentObject, ScheduledJobParameter { + public static final String PARSE_FIELD_NAME = "AnomalyDetectorJob"; + public static final NamedXContentRegistry.Entry XCONTENT_REGISTRY = new NamedXContentRegistry.Entry( + AnomalyDetectorJob.class, + new ParseField(PARSE_FIELD_NAME), + it -> parse(it) + ); + public static final String ANOMALY_DETECTOR_JOB_INDEX = ".opendistro-anomaly-detector-jobs"; public static final String NAME_FIELD = "name"; public static final String LAST_UPDATE_TIME_FIELD = "last_update_time"; diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/model/DetectorInternalState.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/model/DetectorInternalState.java new file mode 100644 index 00000000..03633c79 --- /dev/null +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/model/DetectorInternalState.java @@ -0,0 +1,160 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.opendistroforelasticsearch.ad.model; + +import static org.elasticsearch.common.xcontent.XContentParserUtils.ensureExpectedToken; + +import java.io.IOException; +import java.time.Instant; + +import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.xcontent.NamedXContentRegistry; +import org.elasticsearch.common.xcontent.ToXContentObject; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentParser; + +import com.amazon.opendistroforelasticsearch.ad.annotation.Generated; +import com.amazon.opendistroforelasticsearch.ad.util.ParseUtils; +import com.google.common.base.Objects; + +/** + * Include anomaly detector's state + */ +public class DetectorInternalState implements ToXContentObject, Cloneable { + + public static final String PARSE_FIELD_NAME = "DetectorInternalState"; + public static final NamedXContentRegistry.Entry XCONTENT_REGISTRY = new NamedXContentRegistry.Entry( + DetectorInternalState.class, + new ParseField(PARSE_FIELD_NAME), + it -> parse(it) + ); + + public static final String DETECTOR_STATE_INDEX = ".opendistro-anomaly-detection-state"; + + public static final String LAST_UPDATE_TIME_FIELD = "last_update_time"; + public static final String ERROR_FIELD = "error"; + + private Instant lastUpdateTime = null; + private String error = null; + + private DetectorInternalState() {} + + public static class Builder { + private Instant lastUpdateTime = null; + private String error = null; + + public Builder() {} + + public Builder lastUpdateTime(Instant lastUpdateTime) { + this.lastUpdateTime = lastUpdateTime; + return this; + } + + public Builder error(String error) { + this.error = error; + return this; + } + + public DetectorInternalState build() { + DetectorInternalState state = new DetectorInternalState(); + state.lastUpdateTime = this.lastUpdateTime; + state.error = this.error; + + return state; + } + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + XContentBuilder xContentBuilder = builder.startObject(); + + if (lastUpdateTime != null) { + xContentBuilder.field(LAST_UPDATE_TIME_FIELD, lastUpdateTime.toEpochMilli()); + } + if (error != null) { + xContentBuilder.field(ERROR_FIELD, error); + } + return xContentBuilder.endObject(); + } + + public static DetectorInternalState parse(XContentParser parser) throws IOException { + Instant lastUpdateTime = null; + String error = null; + + ensureExpectedToken(XContentParser.Token.START_OBJECT, parser.currentToken(), parser::getTokenLocation); + while (parser.nextToken() != XContentParser.Token.END_OBJECT) { + String fieldName = parser.currentName(); + parser.nextToken(); + + switch (fieldName) { + case LAST_UPDATE_TIME_FIELD: + lastUpdateTime = ParseUtils.toInstant(parser); + break; + case ERROR_FIELD: + error = parser.text(); + break; + default: + parser.skipChildren(); + break; + } + } + return new DetectorInternalState.Builder().lastUpdateTime(lastUpdateTime).error(error).build(); + } + + @Generated + @Override + public boolean equals(Object o) { + if (this == o) + return true; + if (o == null || getClass() != o.getClass()) + return false; + DetectorInternalState that = (DetectorInternalState) o; + return Objects.equal(getLastUpdateTime(), that.getLastUpdateTime()) && Objects.equal(getError(), that.getError()); + } + + @Generated + @Override + public int hashCode() { + return Objects.hashCode(lastUpdateTime, error); + } + + @Override + public Object clone() { + DetectorInternalState state = null; + try { + state = (DetectorInternalState) super.clone(); + } catch (CloneNotSupportedException e) { + state = new DetectorInternalState.Builder().lastUpdateTime(lastUpdateTime).error(error).build(); + } + return state; + } + + public Instant getLastUpdateTime() { + return lastUpdateTime; + } + + public void setLastUpdateTime(Instant lastUpdateTime) { + this.lastUpdateTime = lastUpdateTime; + } + + public String getError() { + return error; + } + + public void setError(String error) { + this.error = error; + } +} diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/model/DetectorProfile.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/model/DetectorProfile.java index 8ee3efd4..6b066491 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/ad/model/DetectorProfile.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/model/DetectorProfile.java @@ -33,18 +33,72 @@ public class DetectorProfile implements ToXContentObject, Mergeable { private int shingleSize; private String coordinatingNode; private long totalSizeInBytes; + private InitProgressProfile initProgress; public XContentBuilder toXContent(XContentBuilder builder) throws IOException { return toXContent(builder, ToXContent.EMPTY_PARAMS); } - public DetectorProfile() { - state = null; - error = null; - modelProfile = null; - shingleSize = -1; - coordinatingNode = null; - totalSizeInBytes = -1; + private DetectorProfile() {} + + public static class Builder { + private DetectorState state = null; + private String error = null; + private ModelProfile[] modelProfile = null; + private int shingleSize = -1; + private String coordinatingNode = null; + private long totalSizeInBytes = -1; + private InitProgressProfile initProgress = null; + + public Builder() {} + + public Builder state(DetectorState state) { + this.state = state; + return this; + } + + public Builder error(String error) { + this.error = error; + return this; + } + + public Builder modelProfile(ModelProfile[] modelProfile) { + this.modelProfile = modelProfile; + return this; + } + + public Builder shingleSize(int shingleSize) { + this.shingleSize = shingleSize; + return this; + } + + public Builder coordinatingNode(String coordinatingNode) { + this.coordinatingNode = coordinatingNode; + return this; + } + + public Builder totalSizeInBytes(long totalSizeInBytes) { + this.totalSizeInBytes = totalSizeInBytes; + return this; + } + + public Builder initProgress(InitProgressProfile initProgress) { + this.initProgress = initProgress; + return this; + } + + public DetectorProfile build() { + DetectorProfile profile = new DetectorProfile(); + profile.state = this.state; + profile.error = this.error; + profile.modelProfile = modelProfile; + profile.shingleSize = shingleSize; + profile.coordinatingNode = coordinatingNode; + profile.totalSizeInBytes = totalSizeInBytes; + profile.initProgress = initProgress; + + return profile; + } } @Override @@ -73,7 +127,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws if (totalSizeInBytes != -1) { xContentBuilder.field(CommonName.TOTAL_SIZE_IN_BYTES, totalSizeInBytes); } - + if (initProgress != null) { + xContentBuilder.field(CommonName.INIT_PROGRESS, initProgress); + } return xContentBuilder.endObject(); } @@ -125,6 +181,14 @@ public void setTotalSizeInBytes(long totalSizeInBytes) { this.totalSizeInBytes = totalSizeInBytes; } + public InitProgressProfile getInitProgress() { + return initProgress; + } + + public void setInitProgress(InitProgressProfile initProgress) { + this.initProgress = initProgress; + } + @Override public void merge(Mergeable other) { if (this == other || other == null || getClass() != other.getClass()) { @@ -149,6 +213,9 @@ public void merge(Mergeable other) { if (otherProfile.getTotalSizeInBytes() != -1) { this.totalSizeInBytes = otherProfile.getTotalSizeInBytes(); } + if (otherProfile.getInitProgress() != null) { + this.initProgress = otherProfile.getInitProgress(); + } } @Override @@ -162,18 +229,71 @@ public boolean equals(Object obj) { if (obj instanceof DetectorProfile) { DetectorProfile other = (DetectorProfile) obj; - return new EqualsBuilder().append(state, other.state).append(error, other.error).isEquals(); + EqualsBuilder equalsBuilder = new EqualsBuilder(); + if (state != null) { + equalsBuilder.append(state, other.state); + } + if (error != null) { + equalsBuilder.append(error, other.error); + } + if (modelProfile != null && modelProfile.length > 0) { + equalsBuilder.append(modelProfile, other.modelProfile); + } + if (shingleSize != -1) { + equalsBuilder.append(shingleSize, other.shingleSize); + } + if (coordinatingNode != null) { + equalsBuilder.append(coordinatingNode, other.coordinatingNode); + } + if (totalSizeInBytes != -1) { + equalsBuilder.append(totalSizeInBytes, other.totalSizeInBytes); + } + if (initProgress != null) { + equalsBuilder.append(initProgress, other.initProgress); + } + return equalsBuilder.isEquals(); } return false; } @Override public int hashCode() { - return new HashCodeBuilder().append(state).append(error).toHashCode(); + return new HashCodeBuilder() + .append(state) + .append(error) + .append(modelProfile) + .append(shingleSize) + .append(coordinatingNode) + .append(totalSizeInBytes) + .append(initProgress) + .toHashCode(); } @Override public String toString() { - return new ToStringBuilder(this).append("state", state).append("error", error).toString(); + ToStringBuilder toStringBuilder = new ToStringBuilder(this); + + if (state != null) { + toStringBuilder.append(CommonName.STATE, state); + } + if (error != null) { + toStringBuilder.append(CommonName.ERROR, error); + } + if (modelProfile != null && modelProfile.length > 0) { + toStringBuilder.append(modelProfile); + } + if (shingleSize != -1) { + toStringBuilder.append(CommonName.SHINGLE_SIZE, shingleSize); + } + if (coordinatingNode != null) { + toStringBuilder.append(CommonName.COORDINATING_NODE, coordinatingNode); + } + if (totalSizeInBytes != -1) { + toStringBuilder.append(CommonName.TOTAL_SIZE_IN_BYTES, totalSizeInBytes); + } + if (initProgress != null) { + toStringBuilder.append(CommonName.INIT_PROGRESS, initProgress); + } + return toStringBuilder.toString(); } } diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/model/InitProgressProfile.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/model/InitProgressProfile.java new file mode 100644 index 00000000..2047439f --- /dev/null +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/model/InitProgressProfile.java @@ -0,0 +1,146 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.opendistroforelasticsearch.ad.model; + +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +import java.io.IOException; + +import org.apache.commons.lang.builder.EqualsBuilder; +import org.apache.commons.lang.builder.HashCodeBuilder; +import org.apache.commons.lang.builder.ToStringBuilder; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.common.xcontent.ToXContent; +import org.elasticsearch.common.xcontent.XContentBuilder; + +/** + * Profile output for detector initialization progress. When the new detector is created, it is possible that + * there hasn’t been enough continuous data in the index. We need to use live data to initialize. + * During initialization, we need to tell users progress (using a percentage), how many more + * shingles to go, and approximately how many minutes before the detector becomes operational + * if they keep their data stream continuous. + * + */ +public class InitProgressProfile implements Writeable, ToXContent { + // field name in toXContent + public static final String PERCENTAGE = "percentage"; + public static final String ESTIMATED_MINUTES_LEFT = "estimated_minutes_left"; + public static final String NEEDED_SHINGLES = "needed_shingles"; + + private final String percentage; + private final long estimatedMinutesLeft; + private final int neededShingles; + + public InitProgressProfile(String percentage, long estimatedMinutesLeft, int neededDataPoints) { + super(); + this.percentage = percentage; + this.estimatedMinutesLeft = estimatedMinutesLeft; + this.neededShingles = neededDataPoints; + } + + public InitProgressProfile(StreamInput in) throws IOException { + percentage = in.readString(); + estimatedMinutesLeft = in.readVLong(); + neededShingles = in.readVInt(); + } + + public String getPercentage() { + return percentage; + } + + public long getEstimatedMinutesLeft() { + return estimatedMinutesLeft; + } + + public int getNeededDataPoints() { + return neededShingles; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.field(PERCENTAGE, percentage); + if (estimatedMinutesLeft > 0) { + builder.field(ESTIMATED_MINUTES_LEFT, estimatedMinutesLeft); + } + if (neededShingles > 0) { + builder.field(NEEDED_SHINGLES, neededShingles); + } + builder.endObject(); + return builder; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeString(percentage); + out.writeVLong(estimatedMinutesLeft); + out.writeVInt(neededShingles); + } + + @Override + public String toString() { + ToStringBuilder builder = new ToStringBuilder(this); + builder.append(PERCENTAGE, percentage); + if (estimatedMinutesLeft > 0) { + builder.append(ESTIMATED_MINUTES_LEFT, estimatedMinutesLeft); + } + if (neededShingles > 0) { + builder.append(NEEDED_SHINGLES, neededShingles); + } + return builder.toString(); + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + if (obj instanceof InitProgressProfile) { + InitProgressProfile other = (InitProgressProfile) obj; + + EqualsBuilder equalsBuilder = new EqualsBuilder(); + equalsBuilder.append(percentage, other.percentage); + equalsBuilder.append(estimatedMinutesLeft, other.estimatedMinutesLeft); + equalsBuilder.append(neededShingles, other.neededShingles); + + return equalsBuilder.isEquals(); + } + return false; + } + + @Override + public int hashCode() { + return new HashCodeBuilder().append(percentage).append(estimatedMinutesLeft).append(neededShingles).toHashCode(); + } +} diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/model/ModelProfile.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/model/ModelProfile.java index 71d61530..f0c8b9e6 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/ad/model/ModelProfile.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/model/ModelProfile.java @@ -32,6 +32,7 @@ import java.io.IOException; +import org.apache.commons.lang.builder.ToStringBuilder; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.io.stream.Writeable; @@ -91,4 +92,15 @@ public void writeTo(StreamOutput out) throws IOException { out.writeVLong(modelSizeInBytes); out.writeString(nodeId); } + + @Override + public String toString() { + ToStringBuilder builder = new ToStringBuilder(this); + builder.append(MODEL_ID, modelId); + if (modelSizeInBytes > 0) { + builder.append(MODEL_SIZE_IN_BYTES, modelSizeInBytes); + } + builder.append(NODE_ID, nodeId); + return builder.toString(); + } } diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/model/ProfileName.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/model/ProfileName.java index 3c3fa93b..1ab1c19d 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/ad/model/ProfileName.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/model/ProfileName.java @@ -27,7 +27,8 @@ public enum ProfileName { COORDINATING_NODE(CommonName.COORDINATING_NODE), SHINGLE_SIZE(CommonName.SHINGLE_SIZE), TOTAL_SIZE_IN_BYTES(CommonName.TOTAL_SIZE_IN_BYTES), - MODELS(CommonName.MODELS); + MODELS(CommonName.MODELS), + INIT_PROGRESS(CommonName.INIT_PROGRESS); private String name; @@ -58,6 +59,8 @@ public static ProfileName getName(String name) { return TOTAL_SIZE_IN_BYTES; case CommonName.MODELS: return MODELS; + case CommonName.INIT_PROGRESS: + return INIT_PROGRESS; default: throw new IllegalArgumentException("Unsupported profile types"); } diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/rest/RestAnomalyDetectorJobAction.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/rest/RestAnomalyDetectorJobAction.java index 5410b532..a418a81f 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/ad/rest/RestAnomalyDetectorJobAction.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/rest/RestAnomalyDetectorJobAction.java @@ -19,7 +19,6 @@ import static com.amazon.opendistroforelasticsearch.ad.util.RestHandlerUtils.DETECTOR_ID; import static com.amazon.opendistroforelasticsearch.ad.util.RestHandlerUtils.IF_PRIMARY_TERM; import static com.amazon.opendistroforelasticsearch.ad.util.RestHandlerUtils.IF_SEQ_NO; -import static com.amazon.opendistroforelasticsearch.ad.util.RestHandlerUtils.REFRESH; import static com.amazon.opendistroforelasticsearch.ad.util.RestHandlerUtils.START_JOB; import static com.amazon.opendistroforelasticsearch.ad.util.RestHandlerUtils.STOP_JOB; @@ -27,7 +26,6 @@ import java.util.List; import java.util.Locale; -import org.elasticsearch.action.support.WriteRequest; import org.elasticsearch.client.node.NodeClient; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.settings.Settings; @@ -51,12 +49,10 @@ public class RestAnomalyDetectorJobAction extends BaseRestHandler { public static final String AD_JOB_ACTION = "anomaly_detector_job_action"; private volatile TimeValue requestTimeout; private final AnomalyDetectionIndices anomalyDetectionIndices; - private final ClusterService clusterService; public RestAnomalyDetectorJobAction(Settings settings, ClusterService clusterService, AnomalyDetectionIndices anomalyDetectionIndices) { this.anomalyDetectionIndices = anomalyDetectionIndices; this.requestTimeout = REQUEST_TIMEOUT.get(settings); - this.clusterService = clusterService; clusterService.getClusterSettings().addSettingsUpdateConsumer(REQUEST_TIMEOUT, it -> requestTimeout = it); } @@ -76,19 +72,14 @@ protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient cli return channel -> { long seqNo = request.paramAsLong(IF_SEQ_NO, SequenceNumbers.UNASSIGNED_SEQ_NO); long primaryTerm = request.paramAsLong(IF_PRIMARY_TERM, SequenceNumbers.UNASSIGNED_PRIMARY_TERM); - WriteRequest.RefreshPolicy refreshPolicy = request.hasParam(REFRESH) - ? WriteRequest.RefreshPolicy.parse(request.param(REFRESH)) - : WriteRequest.RefreshPolicy.IMMEDIATE; IndexAnomalyDetectorJobActionHandler handler = new IndexAnomalyDetectorJobActionHandler( - clusterService, client, channel, anomalyDetectionIndices, detectorId, seqNo, primaryTerm, - refreshPolicy, requestTimeout ); diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/rest/RestDeleteAnomalyDetectorAction.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/rest/RestDeleteAnomalyDetectorAction.java index b7b2f17c..a4b8bd70 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/ad/rest/RestDeleteAnomalyDetectorAction.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/rest/RestDeleteAnomalyDetectorAction.java @@ -40,6 +40,7 @@ import com.amazon.opendistroforelasticsearch.ad.constant.CommonErrorMessages; import com.amazon.opendistroforelasticsearch.ad.model.AnomalyDetector; import com.amazon.opendistroforelasticsearch.ad.model.AnomalyDetectorJob; +import com.amazon.opendistroforelasticsearch.ad.model.DetectorInternalState; import com.amazon.opendistroforelasticsearch.ad.rest.handler.AnomalyDetectorActionHandler; import com.amazon.opendistroforelasticsearch.ad.settings.EnabledSetting; import com.google.common.collect.ImmutableList; @@ -91,13 +92,13 @@ private void deleteAnomalyDetectorJobDoc(NodeClient client, String detectorId, R .setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE); client.delete(deleteRequest, ActionListener.wrap(response -> { if (response.getResult() == DocWriteResponse.Result.DELETED || response.getResult() == DocWriteResponse.Result.NOT_FOUND) { - deleteAnomalyDetectorDoc(client, detectorId, channel); + deleteDetectorStateDoc(client, detectorId, channel); } else { logger.error("Fail to delete anomaly detector job {}", detectorId); } }, exception -> { if (exception instanceof IndexNotFoundException) { - deleteAnomalyDetectorDoc(client, detectorId, channel); + deleteDetectorStateDoc(client, detectorId, channel); } else { logger.error("Failed to delete anomaly detector job", exception); try { @@ -109,6 +110,34 @@ private void deleteAnomalyDetectorJobDoc(NodeClient client, String detectorId, R })); } + private void deleteDetectorStateDoc(NodeClient client, String detectorId, RestChannel channel) { + logger.info("Delete detector info {}", detectorId); + DeleteRequest deleteRequest = new DeleteRequest(DetectorInternalState.DETECTOR_STATE_INDEX, detectorId); + client + .delete( + deleteRequest, + ActionListener + .wrap( + response -> { + // whether deleted state doc or not, continue as state doc may not exist + deleteAnomalyDetectorDoc(client, detectorId, channel); + }, + exception -> { + if (exception instanceof IndexNotFoundException) { + deleteAnomalyDetectorDoc(client, detectorId, channel); + } else { + logger.error("Failed to delete detector state", exception); + try { + channel.sendResponse(new BytesRestResponse(channel, exception)); + } catch (IOException e) { + logger.error("Failed to send response of deletedetector state", e); + } + } + } + ) + ); + } + private void deleteAnomalyDetectorDoc(NodeClient client, String detectorId, RestChannel channel) { logger.info("Delete anomaly detector {}", detectorId); DeleteRequest deleteRequest = new DeleteRequest(AnomalyDetector.ANOMALY_DETECTORS_INDEX, detectorId) diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/rest/handler/IndexAnomalyDetectorActionHandler.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/rest/handler/IndexAnomalyDetectorActionHandler.java index d712967c..5f6fd438 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/ad/rest/handler/IndexAnomalyDetectorActionHandler.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/rest/handler/IndexAnomalyDetectorActionHandler.java @@ -93,6 +93,8 @@ public class IndexAnomalyDetectorActionHandler extends AbstractActionHandler { * @param refreshPolicy refresh policy * @param anomalyDetector anomaly detector instance * @param requestTimeout request time out configuration + * @param maxAnomalyDetectors max anomaly detector allowed + * @param maxAnomalyFeatures max features allowed per detector */ public IndexAnomalyDetectorActionHandler( Settings settings, diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/rest/handler/IndexAnomalyDetectorJobActionHandler.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/rest/handler/IndexAnomalyDetectorJobActionHandler.java index be72172a..4cf0894c 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/ad/rest/handler/IndexAnomalyDetectorJobActionHandler.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/rest/handler/IndexAnomalyDetectorJobActionHandler.java @@ -37,7 +37,6 @@ import org.elasticsearch.action.index.IndexResponse; import org.elasticsearch.action.support.WriteRequest; import org.elasticsearch.client.node.NodeClient; -import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; @@ -65,8 +64,6 @@ public class IndexAnomalyDetectorJobActionHandler extends AbstractActionHandler private final String detectorId; private final Long seqNo; private final Long primaryTerm; - private final WriteRequest.RefreshPolicy refreshPolicy; - private final ClusterService clusterService; private final Logger logger = LogManager.getLogger(IndexAnomalyDetectorJobActionHandler.class); private final TimeValue requestTimeout; @@ -74,34 +71,28 @@ public class IndexAnomalyDetectorJobActionHandler extends AbstractActionHandler /** * Constructor function. * - * @param clusterService ClusterService * @param client ES node client that executes actions on the local node * @param channel ES channel used to construct bytes / builder based outputs, and send responses * @param anomalyDetectionIndices anomaly detector index manager * @param detectorId detector identifier * @param seqNo sequence number of last modification * @param primaryTerm primary term of last modification - * @param refreshPolicy refresh policy * @param requestTimeout request time out configuration */ public IndexAnomalyDetectorJobActionHandler( - ClusterService clusterService, NodeClient client, RestChannel channel, AnomalyDetectionIndices anomalyDetectionIndices, String detectorId, Long seqNo, Long primaryTerm, - WriteRequest.RefreshPolicy refreshPolicy, TimeValue requestTimeout ) { super(client, channel); - this.clusterService = clusterService; this.anomalyDetectionIndices = anomalyDetectionIndices; this.detectorId = detectorId; this.seqNo = seqNo; this.primaryTerm = primaryTerm; - this.refreshPolicy = refreshPolicy; this.requestTimeout = requestTimeout; } diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/settings/AnomalyDetectorSettings.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/settings/AnomalyDetectorSettings.java index a0067e93..805e5d6a 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/ad/settings/AnomalyDetectorSettings.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/settings/AnomalyDetectorSettings.java @@ -141,6 +141,7 @@ private AnomalyDetectorSettings() {} public static final String ANOMALY_DETECTORS_INDEX_MAPPING_FILE = "mappings/anomaly-detectors.json"; public static final String ANOMALY_DETECTOR_JOBS_INDEX_MAPPING_FILE = "mappings/anomaly-detector-jobs.json"; public static final String ANOMALY_RESULTS_INDEX_MAPPING_FILE = "mappings/anomaly-results.json"; + public static final String ANOMALY_DETECTION_STATE_INDEX_MAPPING_FILE = "mappings/anomaly-detection-state.json"; public static final Duration HOURLY_MAINTENANCE = Duration.ofHours(1); diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/AnomalyResultTransportAction.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/AnomalyResultTransportAction.java index 08afa578..4a4a200a 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/AnomalyResultTransportAction.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/AnomalyResultTransportAction.java @@ -42,7 +42,6 @@ import org.elasticsearch.cluster.node.DiscoveryNodes; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.inject.Inject; -import org.elasticsearch.common.io.stream.NotSerializableExceptionWrapper; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.IndexNotFoundException; import org.elasticsearch.node.NodeClosedException; @@ -75,6 +74,7 @@ import com.amazon.opendistroforelasticsearch.ad.stats.ADStats; import com.amazon.opendistroforelasticsearch.ad.stats.StatNames; import com.amazon.opendistroforelasticsearch.ad.util.ColdStartRunner; +import com.amazon.opendistroforelasticsearch.ad.util.ExceptionUtil; public class AnomalyResultTransportAction extends HandledTransportAction { @@ -88,12 +88,10 @@ public class AnomalyResultTransportAction extends HandledTransportAction onFeatureResponse( } if (!featureOptional.getProcessedFeatures().isPresent()) { + stateManager.getDetectorCheckpoint(adID, ActionListener.wrap(checkpointExists -> { + if (!checkpointExists) { + LOG.info("Trigger cold start for {}", adID); + globalRunner.compute(new ColdStartJob(detector)); + } + }, exception -> { + Throwable cause = ExceptionsHelper.unwrapCause(exception); + if (cause instanceof IndexNotFoundException) { + LOG.info("Trigger cold start for {}", adID); + globalRunner.compute(new ColdStartJob(detector)); + } else { + LOG.error(String.format("Fail to get checkpoint state for %s", adID), exception); + } + })); if (!featureOptional.getUnprocessedFeatures().isPresent()) { // Feature not available is common when we have data holes. Respond empty response // so that alerting will not print stack trace to avoid bloating our logs. @@ -403,7 +415,7 @@ private boolean coldStartIfNoModel(AtomicReference fa AnomalyDetectionException exp = failure.get(); if (exp != null) { if (exp instanceof ResourceNotFoundException) { - LOG.info("Cold start for {}", detector.getDetectorId()); + LOG.info("Trigger cold start for {}", detector.getDetectorId()); globalRunner.compute(new ColdStartJob(detector)); return true; } else { @@ -421,11 +433,12 @@ private void findException(Throwable cause, String adID, AtomicReference expected, String expectedErrorName) { - if (exception == null) { - return false; - } - - if (expected.isAssignableFrom(exception.getClass())) { - return true; - } - - // all exception that has not been registered to sent over wire can be wrapped - // inside NotSerializableExceptionWrapper. - // see StreamOutput.writeException - // ElasticsearchException.getExceptionName(exception) returns exception - // separated by underscore. For example, ResourceNotFoundException is converted - // to "resource_not_found_exception". - if (exception instanceof NotSerializableExceptionWrapper && exception.getMessage().trim().startsWith(expectedErrorName)) { - return true; - } - return false; - } - private CombinedRcfResult getCombinedResult(List rcfResults) { List rcfResultLib = new ArrayList<>(); for (RCFResultResponse result : rcfResults) { @@ -799,13 +780,15 @@ class ColdStartJob implements Callable { @Override public Boolean call() { + String detectorId = detector.getDetectorId(); try { Optional traingData = featureManager.getColdStartData(detector); if (traingData.isPresent()) { - modelManager.trainModel(detector, traingData.get()); + double[][] trainingPoints = traingData.get(); + modelManager.trainModel(detector, trainingPoints); return true; } else { - throw new EndRunException(detector.getDetectorId(), "Cannot get training data", false); + throw new EndRunException(detectorId, "Cannot get training data", false); } } catch (ElasticsearchTimeoutException timeoutEx) { diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/CronTransportAction.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/CronTransportAction.java index 3d63f77c..190f15d7 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/CronTransportAction.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/CronTransportAction.java @@ -32,7 +32,7 @@ public class CronTransportAction extends TransportNodesAction { - private ADStateManager transportStateManager; + private TransportStateManager transportStateManager; private ModelManager modelManager; private FeatureManager featureManager; @@ -42,7 +42,7 @@ public CronTransportAction( ClusterService clusterService, TransportService transportService, ActionFilters actionFilters, - ADStateManager tarnsportStatemanager, + TransportStateManager tarnsportStatemanager, ModelManager modelManager, FeatureManager featureManager ) { diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/DeleteModelTransportAction.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/DeleteModelTransportAction.java index 3256b70e..409c6400 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/DeleteModelTransportAction.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/DeleteModelTransportAction.java @@ -35,7 +35,7 @@ public class DeleteModelTransportAction extends TransportNodesAction { private static final Logger LOG = LogManager.getLogger(DeleteModelTransportAction.class); - private ADStateManager transportStateManager; + private TransportStateManager transportStateManager; private ModelManager modelManager; private FeatureManager featureManager; @@ -45,7 +45,7 @@ public DeleteModelTransportAction( ClusterService clusterService, TransportService transportService, ActionFilters actionFilters, - ADStateManager tarnsportStatemanager, + TransportStateManager tarnsportStatemanager, ModelManager modelManager, FeatureManager featureManager ) { @@ -85,8 +85,6 @@ protected DeleteModelNodeResponse newNodeResponse(StreamInput in) throws IOExcep } /** - * Precondition: - * associated alerting monitors have been deleted * * Delete checkpoint document (including both RCF and thresholding model), in-memory models, * buffered shingle data, transport state, and anomaly result diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/RCFPollingAction.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/RCFPollingAction.java new file mode 100644 index 00000000..103aa2e3 --- /dev/null +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/RCFPollingAction.java @@ -0,0 +1,28 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.opendistroforelasticsearch.ad.transport; + +import org.elasticsearch.action.ActionType; + +public class RCFPollingAction extends ActionType { + public static final RCFPollingAction INSTANCE = new RCFPollingAction(); + public static final String NAME = "cluster:admin/ad/rcfpolling"; + + private RCFPollingAction() { + super(NAME, RCFPollingResponse::new); + } + +} diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/RCFPollingRequest.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/RCFPollingRequest.java new file mode 100644 index 00000000..fb5e7ea6 --- /dev/null +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/RCFPollingRequest.java @@ -0,0 +1,72 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.opendistroforelasticsearch.ad.transport; + +import static org.elasticsearch.action.ValidateActions.addValidationError; + +import java.io.IOException; + +import org.elasticsearch.action.ActionRequest; +import org.elasticsearch.action.ActionRequestValidationException; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.xcontent.ToXContentObject; +import org.elasticsearch.common.xcontent.XContentBuilder; + +import com.amazon.opendistroforelasticsearch.ad.constant.CommonErrorMessages; +import com.amazon.opendistroforelasticsearch.ad.constant.CommonMessageAttributes; + +public class RCFPollingRequest extends ActionRequest implements ToXContentObject { + private String adID; + + public RCFPollingRequest(StreamInput in) throws IOException { + super(in); + adID = in.readString(); + } + + public RCFPollingRequest(String adID) { + super(); + this.adID = adID; + } + + public String getAdID() { + return adID; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); + out.writeString(adID); + } + + @Override + public ActionRequestValidationException validate() { + ActionRequestValidationException validationException = null; + if (Strings.isEmpty(adID)) { + validationException = addValidationError(CommonErrorMessages.AD_ID_MISSING_MSG, validationException); + } + return validationException; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.field(CommonMessageAttributes.ID_JSON_KEY, adID); + builder.endObject(); + return builder; + } +} diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/RCFPollingResponse.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/RCFPollingResponse.java new file mode 100644 index 00000000..52fbd0f7 --- /dev/null +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/RCFPollingResponse.java @@ -0,0 +1,56 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.opendistroforelasticsearch.ad.transport; + +import java.io.IOException; + +import org.elasticsearch.action.ActionResponse; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.xcontent.ToXContentObject; +import org.elasticsearch.common.xcontent.XContentBuilder; + +public class RCFPollingResponse extends ActionResponse implements ToXContentObject { + public static final String TOTAL_UPDATES_KEY = "totalUpdates"; + + private final long totalUpdates; + + public RCFPollingResponse(long totalUpdates) { + this.totalUpdates = totalUpdates; + } + + public RCFPollingResponse(StreamInput in) throws IOException { + super(in); + totalUpdates = in.readVLong(); + } + + public long getTotalUpdates() { + return totalUpdates; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeVLong(totalUpdates); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.field(TOTAL_UPDATES_KEY, totalUpdates); + builder.endObject(); + return builder; + } +} diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/RCFPollingTransportAction.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/RCFPollingTransportAction.java new file mode 100644 index 00000000..f288534f --- /dev/null +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/RCFPollingTransportAction.java @@ -0,0 +1,144 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.opendistroforelasticsearch.ad.transport; + +import java.io.IOException; +import java.util.Optional; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.support.ActionFilters; +import org.elasticsearch.action.support.HandledTransportAction; +import org.elasticsearch.cluster.node.DiscoveryNode; +import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.tasks.Task; +import org.elasticsearch.threadpool.ThreadPool; +import org.elasticsearch.transport.TransportException; +import org.elasticsearch.transport.TransportRequestOptions; +import org.elasticsearch.transport.TransportResponseHandler; +import org.elasticsearch.transport.TransportService; + +import com.amazon.opendistroforelasticsearch.ad.cluster.HashRing; +import com.amazon.opendistroforelasticsearch.ad.common.exception.AnomalyDetectionException; +import com.amazon.opendistroforelasticsearch.ad.ml.ModelManager; +import com.amazon.opendistroforelasticsearch.ad.settings.AnomalyDetectorSettings; + +/** + * Transport action to get total rcf updates from hosted models or checkpoint + * + */ +public class RCFPollingTransportAction extends HandledTransportAction { + + private static final Logger LOG = LogManager.getLogger(RCFPollingTransportAction.class); + static final String NO_NODE_FOUND_MSG = "Cannot find model hosting node"; + static final String FAIL_TO_GET_RCF_UPDATE_MSG = "Cannot find hosted model or related checkpoint"; + + private final TransportService transportService; + private final ModelManager modelManager; + private final HashRing hashRing; + private final TransportRequestOptions option; + private final ClusterService clusterService; + + @Inject + public RCFPollingTransportAction( + ActionFilters actionFilters, + TransportService transportService, + Settings settings, + ModelManager modelManager, + HashRing hashRing, + ClusterService clusterService + ) { + super(RCFPollingAction.NAME, transportService, actionFilters, RCFPollingRequest::new); + this.transportService = transportService; + this.modelManager = modelManager; + this.hashRing = hashRing; + this.option = TransportRequestOptions + .builder() + .withType(TransportRequestOptions.Type.REG) + .withTimeout(AnomalyDetectorSettings.REQUEST_TIMEOUT.get(settings)) + .build(); + this.clusterService = clusterService; + } + + @Override + protected void doExecute(Task task, RCFPollingRequest request, ActionListener listener) { + + String adID = request.getAdID(); + + String rcfModelID = modelManager.getRcfModelId(adID, 0); + + Optional rcfNode = hashRing.getOwningNode(rcfModelID.toString()); + if (!rcfNode.isPresent()) { + listener.onFailure(new AnomalyDetectionException(adID, NO_NODE_FOUND_MSG)); + return; + } + + String rcfNodeId = rcfNode.get().getId(); + + DiscoveryNode localNode = clusterService.localNode(); + + if (localNode.getId().equals(rcfNodeId)) { + modelManager + .getTotalUpdates( + rcfModelID, + adID, + ActionListener + .wrap( + totalUpdates -> listener.onResponse(new RCFPollingResponse(totalUpdates)), + e -> listener.onFailure(new AnomalyDetectionException(adID, FAIL_TO_GET_RCF_UPDATE_MSG, e)) + ) + ); + } else { + // redirect + LOG.info("Sending RCF polling request to {} for model {}", rcfNodeId, rcfModelID); + + try { + transportService + .sendRequest(rcfNode.get(), RCFPollingAction.NAME, request, option, new TransportResponseHandler() { + + @Override + public RCFPollingResponse read(StreamInput in) throws IOException { + return new RCFPollingResponse(in); + } + + @Override + public void handleResponse(RCFPollingResponse response) { + listener.onResponse(response); + } + + @Override + public void handleException(TransportException exp) { + listener.onFailure(exp); + } + + @Override + public String executor() { + return ThreadPool.Names.SAME; + } + + }); + } catch (Exception e) { + LOG.error(String.format("Fail to poll RCF models for {}", adID), e); + listener.onFailure(new AnomalyDetectionException(adID, FAIL_TO_GET_RCF_UPDATE_MSG, e)); + } + + } + } +} diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/TransportState.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/TransportState.java new file mode 100644 index 00000000..18d0e6ab --- /dev/null +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/TransportState.java @@ -0,0 +1,100 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.opendistroforelasticsearch.ad.transport; + +import java.time.Duration; +import java.time.Instant; +import java.util.Map.Entry; + +import com.amazon.opendistroforelasticsearch.ad.model.AnomalyDetector; + +public class TransportState { + private String detectorId; + // detector definition and the definition fetch time + private Entry detectorDef; + // number of partitions and the number's fetch time + private Entry partitonNumber; + // checkpoint fetch time + private Instant checkpoint; + // last error. Used by DetectorStateHandler to check if the error for a + // detector has changed or not. If changed, trigger indexing. + private Entry lastError; + + public TransportState(String detectorId) { + this.detectorId = detectorId; + detectorDef = null; + partitonNumber = null; + checkpoint = null; + lastError = null; + } + + public String getDetectorId() { + return detectorId; + } + + public Entry getDetectorDef() { + return detectorDef; + } + + public void setDetectorDef(Entry detectorDef) { + this.detectorDef = detectorDef; + } + + public Entry getPartitonNumber() { + return partitonNumber; + } + + public void setPartitonNumber(Entry partitonNumber) { + this.partitonNumber = partitonNumber; + } + + public Instant getCheckpoint() { + return checkpoint; + } + + public void setCheckpoint(Instant checkpoint) { + this.checkpoint = checkpoint; + }; + + public Entry getLastError() { + return lastError; + } + + public void setLastError(Entry lastError) { + this.lastError = lastError; + } + + public boolean expired(Duration stateTtl, Instant now) { + boolean ans = true; + if (detectorDef != null) { + ans = ans && expired(stateTtl, now, detectorDef.getValue()); + } + if (partitonNumber != null) { + ans = ans && expired(stateTtl, now, partitonNumber.getValue()); + } + if (checkpoint != null) { + ans = ans && expired(stateTtl, now, checkpoint); + } + if (lastError != null) { + ans = ans && expired(stateTtl, now, lastError.getValue()); + } + return ans; + } + + private boolean expired(Duration stateTtl, Instant now, Instant toCheck) { + return toCheck.plus(stateTtl).isBefore(now); + } +} diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/ADStateManager.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/TransportStateManager.java similarity index 60% rename from src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/ADStateManager.java rename to src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/TransportStateManager.java index 9eb5bbcc..ce06fc8c 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/ADStateManager.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/TransportStateManager.java @@ -39,6 +39,7 @@ import org.elasticsearch.common.xcontent.XContentType; import com.amazon.opendistroforelasticsearch.ad.common.exception.LimitExceededException; +import com.amazon.opendistroforelasticsearch.ad.constant.CommonName; import com.amazon.opendistroforelasticsearch.ad.ml.ModelManager; import com.amazon.opendistroforelasticsearch.ad.model.AnomalyDetector; import com.amazon.opendistroforelasticsearch.ad.util.ClientUtil; @@ -48,10 +49,9 @@ * and the number of partitions for a detector id. * */ -public class ADStateManager { - private static final Logger LOG = LogManager.getLogger(ADStateManager.class); - private ConcurrentHashMap> currentDetectors; - private ConcurrentHashMap> partitionNumber; +public class TransportStateManager { + private static final Logger LOG = LogManager.getLogger(TransportStateManager.class); + private ConcurrentHashMap transportStates; private Client client; private ModelManager modelManager; private NamedXContentRegistry xContentRegistry; @@ -62,7 +62,9 @@ public class ADStateManager { private final Settings settings; private final Duration stateTtl; - public ADStateManager( + public static final String NO_ERROR = "no_error"; + + public TransportStateManager( Client client, NamedXContentRegistry xContentRegistry, ModelManager modelManager, @@ -71,11 +73,10 @@ public ADStateManager( Clock clock, Duration stateTtl ) { - this.currentDetectors = new ConcurrentHashMap<>(); + this.transportStates = new ConcurrentHashMap<>(); this.client = client; this.modelManager = modelManager; this.xContentRegistry = xContentRegistry; - this.partitionNumber = new ConcurrentHashMap<>(); this.clientUtil = clientUtil; this.backpressureMuter = new ConcurrentHashMap<>(); this.clock = clock; @@ -91,31 +92,31 @@ public ADStateManager( * @throws LimitExceededException when there is no sufficient resource available */ public int getPartitionNumber(String adID, AnomalyDetector detector) { - Entry partitonAndTime = partitionNumber.get(adID); - if (partitonAndTime != null) { + if (transportStates.containsKey(adID) && transportStates.get(adID).getPartitonNumber() != null) { + Entry partitonAndTime = transportStates.get(adID).getPartitonNumber(); partitonAndTime.setValue(clock.instant()); return partitonAndTime.getKey(); } int partitionNum = modelManager.getPartitionedForestSizes(detector).getKey(); - partitionNumber.putIfAbsent(adID, new SimpleEntry<>(partitionNum, clock.instant())); + TransportState state = transportStates.computeIfAbsent(adID, id -> new TransportState(id)); + state.setPartitonNumber(new SimpleEntry<>(partitionNum, clock.instant())); + return partitionNum; } public void getAnomalyDetector(String adID, ActionListener> listener) { - Entry detectorAndTime = currentDetectors.get(adID); - if (detectorAndTime != null) { + if (transportStates.containsKey(adID) && transportStates.get(adID).getDetectorDef() != null) { + Entry detectorAndTime = transportStates.get(adID).getDetectorDef(); detectorAndTime.setValue(clock.instant()); listener.onResponse(Optional.of(detectorAndTime.getKey())); - return; + } else { + GetRequest request = new GetRequest(AnomalyDetector.ANOMALY_DETECTORS_INDEX, adID); + clientUtil.asyncRequest(request, client::get, onGetDetectorResponse(adID, listener)); } - - GetRequest request = new GetRequest(AnomalyDetector.ANOMALY_DETECTORS_INDEX, adID); - - clientUtil.asyncRequest(request, client::get, onGetResponse(adID, listener)); } - private ActionListener onGetResponse(String adID, ActionListener> listener) { + private ActionListener onGetDetectorResponse(String adID, ActionListener> listener) { return ActionListener.wrap(response -> { if (response == null || !response.isExists()) { listener.onResponse(Optional.empty()); @@ -130,7 +131,9 @@ private ActionListener onGetResponse(String adID, ActionListener(detector, clock.instant())); + TransportState state = transportStates.computeIfAbsent(adID, id -> new TransportState(id)); + state.setDetectorDef(new SimpleEntry<>(detector, clock.instant())); + listener.onResponse(Optional.of(detector)); } catch (Exception t) { LOG.error("Fail to parse detector {}", adID); @@ -140,35 +143,61 @@ private ActionListener onGetResponse(String adID, ActionListener listener) { + if (transportStates.containsKey(adID) && transportStates.get(adID).getCheckpoint() != null) { + transportStates.get(adID).setCheckpoint(clock.instant()); + listener.onResponse(Boolean.TRUE); + return; + } + + GetRequest request = new GetRequest(CommonName.CHECKPOINT_INDEX_NAME, modelManager.getRcfModelId(adID, 0)); + + clientUtil.asyncRequest(request, client::get, onGetCheckpointResponse(adID, listener)); + } + + private ActionListener onGetCheckpointResponse(String adID, ActionListener listener) { + return ActionListener.wrap(response -> { + if (response == null || !response.isExists()) { + listener.onResponse(Boolean.FALSE); + } else { + TransportState state = transportStates.get(adID); + if (state == null) { + state = new TransportState(adID); + transportStates.put(adID, state); + } + state.setCheckpoint(clock.instant()); + listener.onResponse(Boolean.TRUE); + } + }, listener::onFailure); + } + /** * Used in delete workflow * * @param adID detector ID */ public void clear(String adID) { - currentDetectors.remove(adID); - partitionNumber.remove(adID); - } - - public void maintenance() { - maintenance(currentDetectors); - maintenance(partitionNumber); + transportStates.remove(adID); } /** - * Clean states if it is older than our stateTtl. The input has to be a + * Clean states if it is older than our stateTtl. transportState has to be a * ConcurrentHashMap otherwise we will have * java.util.ConcurrentModificationException. * - * @param states states to be maintained */ - void maintenance(ConcurrentHashMap> states) { - states.entrySet().stream().forEach(entry -> { + public void maintenance() { + transportStates.entrySet().stream().forEach(entry -> { String detectorId = entry.getKey(); try { - Entry stateAndTime = entry.getValue(); - if (stateAndTime.getValue().plus(stateTtl).isBefore(clock.instant())) { - states.remove(detectorId); + TransportState state = entry.getValue(); + if (state.expired(stateTtl, clock.instant())) { + transportStates.remove(detectorId); } } catch (Exception e) { LOG.warn("Failed to finish maintenance for detector id " + detectorId, e); @@ -204,4 +233,29 @@ public void resetBackpressureCounter(String nodeId) { public boolean hasRunningQuery(AnomalyDetector detector) { return clientUtil.hasRunningQuery(detector); } + + /** + * Get last error of a detector + * @param adID detector id + * @return last error for the detector + */ + public String getLastError(String adID) { + if (transportStates.containsKey(adID) && transportStates.get(adID).getLastError() != null) { + Entry errorAndTime = transportStates.get(adID).getLastError(); + errorAndTime.setValue(clock.instant()); + return errorAndTime.getKey(); + } + + return NO_ERROR; + } + + /** + * Set last error of a detector + * @param adID detector id + * @param error error, can be null + */ + public void setLastError(String adID, String error) { + TransportState state = transportStates.computeIfAbsent(adID, id -> new TransportState(id)); + state.setLastError(new SimpleEntry<>(error, clock.instant())); + } } diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/handler/AnomalyIndexHandler.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/handler/AnomalyIndexHandler.java new file mode 100644 index 00000000..95be0690 --- /dev/null +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/handler/AnomalyIndexHandler.java @@ -0,0 +1,188 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.opendistroforelasticsearch.ad.transport.handler; + +import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; + +import java.util.Iterator; +import java.util.Locale; +import java.util.function.BooleanSupplier; +import java.util.function.Consumer; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.elasticsearch.ExceptionsHelper; +import org.elasticsearch.ResourceAlreadyExistsException; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.admin.indices.create.CreateIndexResponse; +import org.elasticsearch.action.bulk.BackoffPolicy; +import org.elasticsearch.action.index.IndexRequest; +import org.elasticsearch.action.index.IndexResponse; +import org.elasticsearch.client.Client; +import org.elasticsearch.cluster.block.ClusterBlockLevel; +import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.unit.TimeValue; +import org.elasticsearch.common.util.concurrent.EsRejectedExecutionException; +import org.elasticsearch.common.xcontent.ToXContentObject; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.threadpool.ThreadPool; + +import com.amazon.opendistroforelasticsearch.ad.common.exception.AnomalyDetectionException; +import com.amazon.opendistroforelasticsearch.ad.settings.AnomalyDetectorSettings; +import com.amazon.opendistroforelasticsearch.ad.util.ClientUtil; +import com.amazon.opendistroforelasticsearch.ad.util.IndexUtils; +import com.amazon.opendistroforelasticsearch.ad.util.RestHandlerUtils; + +public class AnomalyIndexHandler { + private static final Logger LOG = LogManager.getLogger(AnomalyIndexHandler.class); + + static final String CANNOT_SAVE_ERR_MSG = "Cannot save %s due to write block."; + static final String FAIL_TO_SAVE_ERR_MSG = "Fail to save %s: "; + static final String RETRY_SAVING_ERR_MSG = "Retry in saving %s: "; + static final String SUCCESS_SAVING_MSG = "Succeed in saving %s"; + + protected final Client client; + + private final ThreadPool threadPool; + private final BackoffPolicy savingBackoffPolicy; + protected final String indexName; + private final Consumer> createIndex; + private final BooleanSupplier indexExists; + // whether save to a specific doc id or not + private final boolean fixedDoc; + protected final ClientUtil clientUtil; + private final IndexUtils indexUtils; + private final ClusterService clusterService; + + public AnomalyIndexHandler( + Client client, + Settings settings, + ThreadPool threadPool, + String indexName, + Consumer> createIndex, + BooleanSupplier indexExists, + boolean fixedDoc, + ClientUtil clientUtil, + IndexUtils indexUtils, + ClusterService clusterService + ) { + this.client = client; + this.threadPool = threadPool; + this.savingBackoffPolicy = BackoffPolicy + .exponentialBackoff( + AnomalyDetectorSettings.BACKOFF_INITIAL_DELAY.get(settings), + AnomalyDetectorSettings.MAX_RETRY_FOR_BACKOFF.get(settings) + ); + this.indexName = indexName; + this.createIndex = createIndex; + this.indexExists = indexExists; + this.fixedDoc = fixedDoc; + this.clientUtil = clientUtil; + this.indexUtils = indexUtils; + this.clusterService = clusterService; + } + + public void index(T toSave, String detectorId) { + if (indexUtils.checkIndicesBlocked(clusterService.state(), ClusterBlockLevel.WRITE, this.indexName)) { + LOG.warn(String.format(Locale.ROOT, CANNOT_SAVE_ERR_MSG, detectorId)); + return; + } + + try { + if (!indexExists.getAsBoolean()) { + createIndex + .accept(ActionListener.wrap(initResponse -> onCreateIndexResponse(initResponse, toSave, detectorId), exception -> { + if (ExceptionsHelper.unwrapCause(exception) instanceof ResourceAlreadyExistsException) { + // It is possible the index has been created while we sending the create request + save(toSave, detectorId); + } else { + throw new AnomalyDetectionException( + detectorId, + String.format("Unexpected error creating index %s", indexName), + exception + ); + } + })); + } else { + save(toSave, detectorId); + } + } catch (Exception e) { + throw new AnomalyDetectionException( + detectorId, + String.format(Locale.ROOT, "Error in saving %s for detector %s", indexName, detectorId), + e + ); + } + } + + private void onCreateIndexResponse(CreateIndexResponse response, T toSave, String detectorId) { + if (response.isAcknowledged()) { + save(toSave, detectorId); + } else { + throw new AnomalyDetectionException(detectorId, "Creating %s with mappings call not acknowledged."); + } + } + + protected void save(T toSave, String detectorId) { + try (XContentBuilder builder = jsonBuilder()) { + IndexRequest indexRequest = new IndexRequest(indexName).source(toSave.toXContent(builder, RestHandlerUtils.XCONTENT_WITH_TYPE)); + if (fixedDoc) { + indexRequest.id(detectorId); + } + + saveIteration(indexRequest, detectorId, savingBackoffPolicy.iterator()); + } catch (Exception e) { + LOG.error(String.format("Failed to save %s", indexName), e); + throw new AnomalyDetectionException(detectorId, String.format("Cannot save %s", indexName)); + } + } + + void saveIteration(IndexRequest indexRequest, String detectorId, Iterator backoff) { + clientUtil + .asyncRequest( + indexRequest, + client::index, + ActionListener.wrap(response -> { LOG.debug(String.format(SUCCESS_SAVING_MSG, detectorId)); }, exception -> { + // Elasticsearch has a thread pool and a queue for write per node. A thread + // pool will have N number of workers ready to handle the requests. When a + // request comes and if a worker is free , this is handled by the worker. Now by + // default the number of workers is equal to the number of cores on that CPU. + // When the workers are full and there are more write requests, the request + // will go to queue. The size of queue is also limited. If by default size is, + // say, 200 and if there happens more parallel requests than this, then those + // requests would be rejected as you can see EsRejectedExecutionException. + // So EsRejectedExecutionException is the way that Elasticsearch tells us that + // it cannot keep up with the current indexing rate. + // When it happens, we should pause indexing a bit before trying again, ideally + // with randomized exponential backoff. + Throwable cause = ExceptionsHelper.unwrapCause(exception); + if (!(cause instanceof EsRejectedExecutionException) || !backoff.hasNext()) { + LOG.error(String.format(FAIL_TO_SAVE_ERR_MSG, detectorId), cause); + } else { + TimeValue nextDelay = backoff.next(); + LOG.warn(String.format(RETRY_SAVING_ERR_MSG, detectorId), cause); + // copy original request's source without other information like autoGeneratedTimestamp + // otherwise, an exception will be thrown indicating autoGeneratedTimestamp should not be set + // while request id is already set (id is set because we have already sent the request before). + IndexRequest newReuqest = new IndexRequest(indexRequest.index()); + newReuqest.source(indexRequest.source(), indexRequest.getContentType()); + threadPool.schedule(() -> saveIteration(newReuqest, detectorId, backoff), nextDelay, ThreadPool.Names.SAME); + } + }) + ); + } +} diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/handler/AnomalyResultHandler.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/handler/AnomalyResultHandler.java deleted file mode 100644 index 670503c6..00000000 --- a/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/handler/AnomalyResultHandler.java +++ /dev/null @@ -1,204 +0,0 @@ -/* - * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * or in the "license" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ - -package com.amazon.opendistroforelasticsearch.ad.transport.handler; - -import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; - -import java.util.Iterator; -import java.util.Locale; - -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; -import org.elasticsearch.ExceptionsHelper; -import org.elasticsearch.ResourceAlreadyExistsException; -import org.elasticsearch.action.ActionListener; -import org.elasticsearch.action.admin.indices.create.CreateIndexResponse; -import org.elasticsearch.action.bulk.BackoffPolicy; -import org.elasticsearch.action.index.IndexRequest; -import org.elasticsearch.action.index.IndexResponse; -import org.elasticsearch.action.support.IndicesOptions; -import org.elasticsearch.client.Client; -import org.elasticsearch.cluster.ClusterState; -import org.elasticsearch.cluster.block.ClusterBlockLevel; -import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; -import org.elasticsearch.cluster.service.ClusterService; -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.common.unit.TimeValue; -import org.elasticsearch.common.util.concurrent.EsRejectedExecutionException; -import org.elasticsearch.common.xcontent.XContentBuilder; -import org.elasticsearch.threadpool.ThreadPool; - -import com.amazon.opendistroforelasticsearch.ad.common.exception.AnomalyDetectionException; -import com.amazon.opendistroforelasticsearch.ad.indices.AnomalyDetectionIndices; -import com.amazon.opendistroforelasticsearch.ad.model.AnomalyResult; -import com.amazon.opendistroforelasticsearch.ad.settings.AnomalyDetectorSettings; -import com.amazon.opendistroforelasticsearch.ad.util.RestHandlerUtils; - -public class AnomalyResultHandler { - private static final Logger LOG = LogManager.getLogger(AnomalyResultHandler.class); - - static final String CANNOT_SAVE_ERR_MSG = "Cannot save anomaly result due to write block."; - static final String FAIL_TO_SAVE_ERR_MSG = "Fail to save anomaly index: "; - static final String RETRY_SAVING_ERR_MSG = "Retry in saving anomaly index: "; - static final String SUCCESS_SAVING_MSG = "SSUCCESS_SAVING_MSGuccess in saving anomaly index: "; - - private final Client client; - private final ClusterService clusterService; - private final IndexNameExpressionResolver indexNameExpressionResolver; - private final AnomalyDetectionIndices anomalyDetectionIndices; - private final ThreadPool threadPool; - private final BackoffPolicy resultSavingBackoffPolicy; - - public AnomalyResultHandler( - Client client, - Settings settings, - ClusterService clusterService, - IndexNameExpressionResolver indexNameExpressionResolver, - AnomalyDetectionIndices anomalyDetectionIndices, - ThreadPool threadPool - ) { - this.client = client; - this.clusterService = clusterService; - this.indexNameExpressionResolver = indexNameExpressionResolver; - this.anomalyDetectionIndices = anomalyDetectionIndices; - this.threadPool = threadPool; - this.resultSavingBackoffPolicy = BackoffPolicy - .exponentialBackoff( - AnomalyDetectorSettings.BACKOFF_INITIAL_DELAY.get(settings), - AnomalyDetectorSettings.MAX_RETRY_FOR_BACKOFF.get(settings) - ); - } - - public void indexAnomalyResult(AnomalyResult anomalyResult) { - try { - if (checkIndicesBlocked(clusterService.state(), ClusterBlockLevel.WRITE, AnomalyResult.ANOMALY_RESULT_INDEX)) { - LOG.warn(CANNOT_SAVE_ERR_MSG); - return; - } - if (!anomalyDetectionIndices.doesAnomalyResultIndexExist()) { - anomalyDetectionIndices - .initAnomalyResultIndexDirectly( - ActionListener.wrap(initResponse -> onCreateAnomalyResultIndexResponse(initResponse, anomalyResult), exception -> { - if (ExceptionsHelper.unwrapCause(exception) instanceof ResourceAlreadyExistsException) { - // It is possible the index has been created while we sending the create request - saveDetectorResult(anomalyResult); - } else { - throw new AnomalyDetectionException( - anomalyResult.getDetectorId(), - "Unexpected error creating anomaly result index", - exception - ); - } - }) - ); - } else { - saveDetectorResult(anomalyResult); - } - } catch (Exception e) { - throw new AnomalyDetectionException( - anomalyResult.getDetectorId(), - String - .format( - Locale.ROOT, - "Error in saving anomaly index for ID %s from %s to %s", - anomalyResult.getDetectorId(), - anomalyResult.getDataStartTime(), - anomalyResult.getDataEndTime() - ), - e - ); - } - } - - /** - * Similar to checkGlobalBlock, we check block on the indices level. - * - * @param state Cluster state - * @param level block level - * @param indices the indices on which to check block - * @return whether any of the index has block on the level. - */ - private boolean checkIndicesBlocked(ClusterState state, ClusterBlockLevel level, String... indices) { - // the original index might be an index expression with wildcards like "log*", - // so we need to expand the expression to concrete index name - String[] concreteIndices = indexNameExpressionResolver.concreteIndexNames(state, IndicesOptions.lenientExpandOpen(), indices); - - return state.blocks().indicesBlockedException(level, concreteIndices) != null; - } - - private void onCreateAnomalyResultIndexResponse(CreateIndexResponse response, AnomalyResult anomalyResult) { - if (response.isAcknowledged()) { - saveDetectorResult(anomalyResult); - } else { - throw new AnomalyDetectionException( - anomalyResult.getDetectorId(), - "Creating anomaly result index with mappings call not acknowledged." - ); - } - } - - private void saveDetectorResult(AnomalyResult anomalyResult) { - try (XContentBuilder builder = jsonBuilder()) { - IndexRequest indexRequest = new IndexRequest(AnomalyResult.ANOMALY_RESULT_INDEX) - .source(anomalyResult.toXContent(builder, RestHandlerUtils.XCONTENT_WITH_TYPE)); - saveDetectorResult( - indexRequest, - String - .format( - Locale.ROOT, - "ID %s from %s to %s", - anomalyResult.getDetectorId(), - anomalyResult.getDataStartTime(), - anomalyResult.getDataEndTime() - ), - resultSavingBackoffPolicy.iterator() - ); - } catch (Exception e) { - LOG.error("Failed to save anomaly result", e); - throw new AnomalyDetectionException(anomalyResult.getDetectorId(), "Cannot save result"); - } - } - - void saveDetectorResult(IndexRequest indexRequest, String context, Iterator backoff) { - client.index(indexRequest, ActionListener.wrap(response -> LOG.debug(SUCCESS_SAVING_MSG + context), exception -> { - // Elasticsearch has a thread pool and a queue for write per node. A thread - // pool will have N number of workers ready to handle the requests. When a - // request comes and if a worker is free , this is handled by the worker. Now by - // default the number of workers is equal to the number of cores on that CPU. - // When the workers are full and there are more write requests, the request - // will go to queue. The size of queue is also limited. If by default size is, - // say, 200 and if there happens more parallel requests than this, then those - // requests would be rejected as you can see EsRejectedExecutionException. - // So EsRejectedExecutionException is the way that Elasticsearch tells us that - // it cannot keep up with the current indexing rate. - // When it happens, we should pause indexing a bit before trying again, ideally - // with randomized exponential backoff. - Throwable cause = ExceptionsHelper.unwrapCause(exception); - if (!(cause instanceof EsRejectedExecutionException) || !backoff.hasNext()) { - LOG.error(FAIL_TO_SAVE_ERR_MSG + context, cause); - } else { - TimeValue nextDelay = backoff.next(); - LOG.warn(RETRY_SAVING_ERR_MSG + context, cause); - // copy original request's source without other information like autoGeneratedTimestamp - // otherwise, an exception will be thrown indicating autoGeneratedTimestamp should not be set - // while request id is already set (id is set because we have already sent the request before). - IndexRequest newReuqest = new IndexRequest(AnomalyResult.ANOMALY_RESULT_INDEX); - newReuqest.source(indexRequest.source(), indexRequest.getContentType()); - threadPool.schedule(() -> saveDetectorResult(newReuqest, context, backoff), nextDelay, ThreadPool.Names.SAME); - } - })); - } -} diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/handler/DetectionStateHandler.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/handler/DetectionStateHandler.java new file mode 100644 index 00000000..d1aebbb8 --- /dev/null +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/handler/DetectionStateHandler.java @@ -0,0 +1,165 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.opendistroforelasticsearch.ad.transport.handler; + +import static org.elasticsearch.common.xcontent.XContentParserUtils.ensureExpectedToken; + +import java.io.IOException; +import java.time.Instant; +import java.util.function.BooleanSupplier; +import java.util.function.Consumer; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.elasticsearch.ExceptionsHelper; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.admin.indices.create.CreateIndexResponse; +import org.elasticsearch.action.get.GetRequest; +import org.elasticsearch.action.get.GetResponse; +import org.elasticsearch.client.Client; +import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.xcontent.LoggingDeprecationHandler; +import org.elasticsearch.common.xcontent.NamedXContentRegistry; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.common.xcontent.XContentType; +import org.elasticsearch.index.IndexNotFoundException; +import org.elasticsearch.threadpool.ThreadPool; + +import com.amazon.opendistroforelasticsearch.ad.model.DetectorInternalState; +import com.amazon.opendistroforelasticsearch.ad.transport.TransportStateManager; +import com.amazon.opendistroforelasticsearch.ad.util.ClientUtil; +import com.amazon.opendistroforelasticsearch.ad.util.IndexUtils; +import com.google.common.base.Objects; + +public class DetectionStateHandler extends AnomalyIndexHandler { + interface GetStateStrategy { + /** + * Strategy to create new state to save. Return null if state does not change and don't need to save. + * @param state old state + * @return new state or null if state does not change + */ + DetectorInternalState createNewState(DetectorInternalState state); + } + + class ErrorStrategy implements GetStateStrategy { + private String error; + + ErrorStrategy(String error) { + this.error = error; + } + + @Override + public DetectorInternalState createNewState(DetectorInternalState state) { + DetectorInternalState newState = null; + if (state == null) { + newState = new DetectorInternalState.Builder().error(error).lastUpdateTime(Instant.now()).build(); + } else if (!Objects.equal(state.getError(), error)) { + newState = (DetectorInternalState) state.clone(); + newState.setError(error); + newState.setLastUpdateTime(Instant.now()); + } + + return newState; + } + } + + private static final Logger LOG = LogManager.getLogger(DetectionStateHandler.class); + private NamedXContentRegistry xContentRegistry; + private TransportStateManager adStateManager; + + public DetectionStateHandler( + Client client, + Settings settings, + ThreadPool threadPool, + Consumer> createIndex, + BooleanSupplier indexExists, + ClientUtil clientUtil, + IndexUtils indexUtils, + ClusterService clusterService, + NamedXContentRegistry xContentRegistry, + TransportStateManager adStateManager + ) { + super( + client, + settings, + threadPool, + DetectorInternalState.DETECTOR_STATE_INDEX, + createIndex, + indexExists, + true, + clientUtil, + indexUtils, + clusterService + ); + this.xContentRegistry = xContentRegistry; + this.adStateManager = adStateManager; + } + + public void saveError(String error, String detectorId) { + // trigger indexing if no error recorded (e.g., this detector got enabled just now) + // or the recorded error is different than this one. + if (!Objects.equal(adStateManager.getLastError(detectorId), error)) { + update(detectorId, new ErrorStrategy(error)); + adStateManager.setLastError(detectorId, error); + } + } + + /** + * Updates a detector's state according to GetStateHandler + * @param detectorId detector id + * @param handler specify how to convert from existing state object to an object we want to save + */ + private void update(String detectorId, GetStateStrategy handler) { + try { + GetRequest getRequest = new GetRequest(this.indexName).id(detectorId); + + clientUtil.asyncRequest(getRequest, client::get, ActionListener.wrap(response -> { + DetectorInternalState newState = null; + if (response.isExists()) { + try ( + XContentParser parser = XContentType.JSON + .xContent() + .createParser(xContentRegistry, LoggingDeprecationHandler.INSTANCE, response.getSourceAsString()) + ) { + ensureExpectedToken(XContentParser.Token.START_OBJECT, parser.nextToken(), parser::getTokenLocation); + DetectorInternalState state = DetectorInternalState.parse(parser); + newState = handler.createNewState(state); + } catch (IOException e) { + LOG.error("Failed to update AD state for " + detectorId, e); + return; + } + } else { + newState = handler.createNewState(null); + } + + if (newState != null) { + super.index(newState, detectorId); + } + + }, exception -> { + Throwable cause = ExceptionsHelper.unwrapCause(exception); + if (cause instanceof IndexNotFoundException) { + super.index(handler.createNewState(null), detectorId); + } else { + LOG.error("Failed to get detector state " + detectorId, exception); + } + })); + } catch (Exception e) { + LOG.error("Failed to update AD state for " + detectorId, e); + } + } +} diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/util/ExceptionUtil.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/util/ExceptionUtil.java new file mode 100644 index 00000000..7ae1ffaa --- /dev/null +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/util/ExceptionUtil.java @@ -0,0 +1,60 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.opendistroforelasticsearch.ad.util; + +import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.common.io.stream.NotSerializableExceptionWrapper; + +import com.amazon.opendistroforelasticsearch.ad.common.exception.ResourceNotFoundException; + +public class ExceptionUtil { + public static final String RESOURCE_NOT_FOUND_EXCEPTION_NAME_UNDERSCORE = ElasticsearchException + .getExceptionName(new ResourceNotFoundException("", "")); + + /** + * Elasticsearch restricts the kind of exceptions can be thrown over the wire + * (See ElasticsearchException.ElasticsearchExceptionHandle). Since we cannot + * add our own exception like ResourceNotFoundException without modifying + * Elasticsearch's code, we have to unwrap the remote transport exception and + * check its root cause message. + * + * @param exception exception thrown locally or over the wire + * @param expected expected root cause + * @param expectedExceptionName expected exception name + * @return whether the exception wraps the expected exception as the cause + */ + public static boolean isException(Throwable exception, Class expected, String expectedExceptionName) { + if (exception == null) { + return false; + } + + if (expected.isAssignableFrom(exception.getClass())) { + return true; + } + + // all exception that has not been registered to sent over wire can be wrapped + // inside NotSerializableExceptionWrapper. + // see StreamOutput.writeException + // ElasticsearchException.getExceptionName(exception) returns exception + // separated by underscore. For example, ResourceNotFoundException is converted + // to "resource_not_found_exception". + if (exception instanceof NotSerializableExceptionWrapper && exception.getMessage().trim().startsWith(expectedExceptionName)) { + return true; + } + return false; + } + +} diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/util/IndexUtils.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/util/IndexUtils.java index b54d9b86..82b881c4 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/ad/util/IndexUtils.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/util/IndexUtils.java @@ -22,9 +22,13 @@ import org.apache.logging.log4j.Logger; import org.elasticsearch.action.admin.indices.stats.IndicesStatsRequest; import org.elasticsearch.action.admin.indices.stats.IndicesStatsResponse; +import org.elasticsearch.action.support.IndicesOptions; import org.elasticsearch.client.Client; +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.block.ClusterBlockLevel; import org.elasticsearch.cluster.health.ClusterIndexHealth; import org.elasticsearch.cluster.metadata.IndexMetadata; +import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; import org.elasticsearch.cluster.service.ClusterService; public class IndexUtils { @@ -44,6 +48,7 @@ public class IndexUtils { private Client client; private ClientUtil clientUtil; private ClusterService clusterService; + private final IndexNameExpressionResolver indexNameExpressionResolver; /** * Constructor @@ -51,11 +56,18 @@ public class IndexUtils { * @param client Client to make calls to ElasticSearch * @param clientUtil AD Client utility * @param clusterService ES ClusterService + * @param indexNameExpressionResolver index name resolver */ - public IndexUtils(Client client, ClientUtil clientUtil, ClusterService clusterService) { + public IndexUtils( + Client client, + ClientUtil clientUtil, + ClusterService clusterService, + IndexNameExpressionResolver indexNameExpressionResolver + ) { this.client = client; this.clientUtil = clientUtil; this.clusterService = clusterService; + this.indexNameExpressionResolver = indexNameExpressionResolver; } /** @@ -117,4 +129,20 @@ public Long getNumberOfDocumentsInIndex(String indexName) { Optional response = clientUtil.timedRequest(indicesStatsRequest, logger, client.admin().indices()::stats); return response.map(r -> r.getIndex(indexName).getPrimaries().docs.getCount()).orElse(-1L); } + + /** + * Similar to checkGlobalBlock, we check block on the indices level. + * + * @param state Cluster state + * @param level block level + * @param indices the indices on which to check block + * @return whether any of the index has block on the level. + */ + public boolean checkIndicesBlocked(ClusterState state, ClusterBlockLevel level, String... indices) { + // the original index might be an index expression with wildcards like "log*", + // so we need to expand the expression to concrete index name + String[] concreteIndices = indexNameExpressionResolver.concreteIndexNames(state, IndicesOptions.lenientExpandOpen(), indices); + + return state.blocks().indicesBlockedException(level, concreteIndices) != null; + } } diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/util/MultiResponsesDelegateActionListener.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/util/MultiResponsesDelegateActionListener.java index 3f42a18c..39c829a9 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/ad/util/MultiResponsesDelegateActionListener.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/util/MultiResponsesDelegateActionListener.java @@ -32,6 +32,8 @@ */ public class MultiResponsesDelegateActionListener implements ActionListener { private static final Logger LOG = LogManager.getLogger(MultiResponsesDelegateActionListener.class); + static final String NO_RESPONSE = "No response collected"; + private final ActionListener delegate; private final AtomicInteger collectedResponseCount; private final int maxResponseCount; @@ -81,7 +83,7 @@ public void onFailure(Exception e) { private void finish() { if (this.exceptions.size() == 0) { if (savedResponses.size() == 0) { - this.delegate.onFailure(new RuntimeException("No response collected")); + this.delegate.onFailure(new RuntimeException(NO_RESPONSE)); } else { T response0 = savedResponses.get(0); for (int i = 1; i < savedResponses.size(); i++) { diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/util/ThrowingConsumer.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/util/ThrowingConsumer.java new file mode 100644 index 00000000..afb1b551 --- /dev/null +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/util/ThrowingConsumer.java @@ -0,0 +1,27 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.opendistroforelasticsearch.ad.util; + +/** + * A consumer that can throw checked exception + * + * @param method parameter type + * @param Exception type + */ +@FunctionalInterface +public interface ThrowingConsumer { + void accept(T t) throws E; +} diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/util/ThrowingConsumerWrapper.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/util/ThrowingConsumerWrapper.java new file mode 100644 index 00000000..2facdc92 --- /dev/null +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/util/ThrowingConsumerWrapper.java @@ -0,0 +1,41 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.opendistroforelasticsearch.ad.util; + +import java.util.function.Consumer; + +public class ThrowingConsumerWrapper { + /** + * Utility method to use a method throwing checked exception inside a function + * that does not throw the corresponding checked exception. This happens + * when we are in a ES function that we have no control over its signature. + * Convert the checked exception thrown by by throwingConsumer to a RuntimeException + * so that the compier won't complain. + * @param the method's parameter type + * @param throwingConsumer the method reference that can throw checked exception + * @return converted method reference + */ + public static Consumer throwingConsumerWrapper(ThrowingConsumer throwingConsumer) { + + return i -> { + try { + throwingConsumer.accept(i); + } catch (Exception ex) { + throw new RuntimeException(ex); + } + }; + } +} diff --git a/src/main/resources/mappings/anomaly-detection-state.json b/src/main/resources/mappings/anomaly-detection-state.json new file mode 100644 index 00000000..dcb0f7c0 --- /dev/null +++ b/src/main/resources/mappings/anomaly-detection-state.json @@ -0,0 +1,18 @@ +{ + "dynamic": false, + "_meta": { + "schema_version": 1 + }, + "properties": { + "schema_version": { + "type": "integer" + }, + "last_update_time": { + "type": "date", + "format": "strict_date_time||epoch_millis" + }, + "error": { + "type": "text" + } + } +} \ No newline at end of file diff --git a/src/test/java/com/amazon/opendistroforelasticsearch/ad/AbstractADTest.java b/src/test/java/com/amazon/opendistroforelasticsearch/ad/AbstractADTest.java index 9cb23a24..dc60acf9 100644 --- a/src/test/java/com/amazon/opendistroforelasticsearch/ad/AbstractADTest.java +++ b/src/test/java/com/amazon/opendistroforelasticsearch/ad/AbstractADTest.java @@ -15,9 +15,13 @@ package com.amazon.opendistroforelasticsearch.ad; +import static org.hamcrest.Matchers.containsString; + import java.util.ArrayList; import java.util.List; import java.util.concurrent.TimeUnit; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import org.apache.logging.log4j.Level; import org.apache.logging.log4j.LogManager; @@ -26,10 +30,14 @@ import org.apache.logging.log4j.core.appender.AbstractAppender; import org.apache.logging.log4j.core.layout.PatternLayout; import org.apache.logging.log4j.util.StackLocatorUtil; +import org.elasticsearch.action.ActionResponse; +import org.elasticsearch.action.support.PlainActionFuture; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.threadpool.TestThreadPool; import org.elasticsearch.threadpool.ThreadPool; +import org.elasticsearch.transport.TransportInterceptor; +import org.elasticsearch.transport.TransportService; import test.com.amazon.opendistroforelasticsearch.ad.util.FakeNode; @@ -52,31 +60,70 @@ protected TestAppender(String name) { public List messages = new ArrayList(); - public boolean containsMessage(String msg) { + public boolean containsMessage(String msg, boolean formatString) { + Pattern p = null; + if (formatString) { + String regex = convertToRegex(msg); + p = Pattern.compile(regex); + } for (String logMsg : messages) { LOG.info(logMsg); - if (logMsg.contains(msg)) { + if (p != null) { + Matcher m = p.matcher(logMsg); + if (m.matches()) { + return true; + } + } else if (logMsg.contains(msg)) { return true; } } return false; } - public int countMessage(String msg) { + public boolean containsMessage(String msg) { + return containsMessage(msg, false); + } + + public int countMessage(String msg, boolean formatString) { + Pattern p = null; + if (formatString) { + String regex = convertToRegex(msg); + p = Pattern.compile(regex); + } int count = 0; for (String logMsg : messages) { LOG.info(logMsg); - if (logMsg.contains(msg)) { + if (p != null) { + Matcher m = p.matcher(logMsg); + if (m.matches()) { + count++; + } + } else if (logMsg.contains(msg)) { count++; } } return count; } + public int countMessage(String msg) { + return countMessage(msg, false); + } + @Override public void append(LogEvent event) { messages.add(event.getMessage().getFormattedMessage()); } + + /** + * Convert a string with format like "Cannot save %s due to write block." + * to a regex with .* like "Cannot save .* due to write block." + * @return converted regex + */ + private String convertToRegex(String formattedStr) { + int percentIndex = formattedStr.indexOf("%"); + return formattedStr.substring(0, percentIndex) + ".*" + formattedStr.substring(percentIndex + 2); + } + } protected static ThreadPool threadPool; @@ -130,19 +177,32 @@ protected static void tearDownThreadPool() { threadPool = null; } - public void setupTestNodes(Settings settings) { + public void setupTestNodes(Settings settings, TransportInterceptor transportInterceptor) { nodesCount = randomIntBetween(2, 10); testNodes = new FakeNode[nodesCount]; for (int i = 0; i < testNodes.length; i++) { - testNodes[i] = new FakeNode("node" + i, threadPool, settings); + testNodes[i] = new FakeNode("node" + i, threadPool, settings, transportInterceptor); } FakeNode.connectNodes(testNodes); } + public void setupTestNodes(Settings settings) { + setupTestNodes(settings, TransportService.NOOP_TRANSPORT_INTERCEPTOR); + } + public void tearDownTestNodes() { for (FakeNode testNode : testNodes) { testNode.close(); } testNodes = null; } + + public void assertException( + PlainActionFuture listener, + Class exceptionType, + String msg + ) { + Exception e = expectThrows(exceptionType, () -> listener.actionGet()); + assertThat(e.getMessage(), containsString(msg)); + } } diff --git a/src/test/java/com/amazon/opendistroforelasticsearch/ad/AnomalyDetectorJobRunnerTests.java b/src/test/java/com/amazon/opendistroforelasticsearch/ad/AnomalyDetectorJobRunnerTests.java index 8be5cdb5..35dd8a65 100644 --- a/src/test/java/com/amazon/opendistroforelasticsearch/ad/AnomalyDetectorJobRunnerTests.java +++ b/src/test/java/com/amazon/opendistroforelasticsearch/ad/AnomalyDetectorJobRunnerTests.java @@ -40,12 +40,14 @@ import org.elasticsearch.action.index.IndexRequest; import org.elasticsearch.action.index.IndexResponse; import org.elasticsearch.client.Client; +import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.common.util.concurrent.EsExecutors; import org.elasticsearch.common.util.concurrent.ThreadContext; +import org.elasticsearch.common.xcontent.NamedXContentRegistry; import org.elasticsearch.common.xcontent.ToXContent; import org.elasticsearch.index.Index; import org.elasticsearch.index.get.GetResult; @@ -62,10 +64,16 @@ import org.mockito.MockitoAnnotations; import com.amazon.opendistroforelasticsearch.ad.common.exception.EndRunException; +import com.amazon.opendistroforelasticsearch.ad.indices.AnomalyDetectionIndices; import com.amazon.opendistroforelasticsearch.ad.model.AnomalyDetectorJob; +import com.amazon.opendistroforelasticsearch.ad.model.AnomalyResult; import com.amazon.opendistroforelasticsearch.ad.model.IntervalTimeConfiguration; -import com.amazon.opendistroforelasticsearch.ad.transport.handler.AnomalyResultHandler; +import com.amazon.opendistroforelasticsearch.ad.transport.TransportStateManager; +import com.amazon.opendistroforelasticsearch.ad.transport.handler.AnomalyIndexHandler; +import com.amazon.opendistroforelasticsearch.ad.transport.handler.DetectionStateHandler; import com.amazon.opendistroforelasticsearch.ad.util.ClientUtil; +import com.amazon.opendistroforelasticsearch.ad.util.IndexUtils; +import com.amazon.opendistroforelasticsearch.ad.util.ThrowingConsumerWrapper; import com.amazon.opendistroforelasticsearch.jobscheduler.spi.JobExecutionContext; import com.amazon.opendistroforelasticsearch.jobscheduler.spi.LockModel; import com.amazon.opendistroforelasticsearch.jobscheduler.spi.ScheduledJobParameter; @@ -103,7 +111,9 @@ public class AnomalyDetectorJobRunnerTests extends AbstractADTest { private Iterator backoff; @Mock - private AnomalyResultHandler anomalyResultHandler; + private AnomalyIndexHandler anomalyResultHandler; + + private DetectionStateHandler detectorStateHandler; @BeforeClass public static void setUpBeforeClass() { @@ -129,17 +139,33 @@ public void setup() throws Exception { runner.setClientUtil(clientUtil); runner.setAnomalyResultHandler(anomalyResultHandler); + Settings settings = Settings + .builder() + .put("opendistro.anomaly_detection.max_retry_for_backoff", 2) + .put("opendistro.anomaly_detection.backoff_initial_delay", TimeValue.timeValueMillis(1)) + .put("opendistro.anomaly_detection.max_retry_for_end_run_exception", 3) + .build(); setUpJobParameter(); - runner - .setSettings( - Settings - .builder() - .put("opendistro.anomaly_detection.max_retry_for_backoff", 2) - .put("opendistro.anomaly_detection.backoff_initial_delay", TimeValue.timeValueMillis(1)) - .put("opendistro.anomaly_detection.max_retry_for_end_run_exception", 3) - .build() - ); + runner.setSettings(settings); + + AnomalyDetectionIndices anomalyDetectionIndices = mock(AnomalyDetectionIndices.class); + IndexNameExpressionResolver indexNameResolver = mock(IndexNameExpressionResolver.class); + IndexUtils indexUtils = new IndexUtils(client, clientUtil, clusterService, indexNameResolver); + TransportStateManager stateManager = mock(TransportStateManager.class); + detectorStateHandler = new DetectionStateHandler( + client, + settings, + threadPool, + ThrowingConsumerWrapper.throwingConsumerWrapper(anomalyDetectionIndices::initDetectorStateIndex), + anomalyDetectionIndices::doesDetectorStateIndexExist, + this.clientUtil, + indexUtils, + clusterService, + NamedXContentRegistry.EMPTY, + stateManager + ); + runner.setDetectionStateHandler(detectorStateHandler); lockService = new LockService(client, clusterService); doReturn(lockService).when(context).getLockService(); @@ -215,13 +241,13 @@ public void testRunAdJobWithEndRunExceptionNow() { LockModel lock = new LockModel("indexName", "jobId", Instant.now(), 10, false); Exception exception = new EndRunException(jobParameter.getName(), randomAlphaOfLength(5), true); runner.handleAdException(jobParameter, lockService, lock, Instant.now().minusMillis(1000 * 60), Instant.now(), exception); - verify(anomalyResultHandler).indexAnomalyResult(any()); + verify(anomalyResultHandler).index(any(), any()); } @Test public void testRunAdJobWithEndRunExceptionNowAndExistingAdJob() { testRunAdJobWithEndRunExceptionNowAndStopAdJob(true, true, true); - verify(anomalyResultHandler).indexAnomalyResult(any()); + verify(anomalyResultHandler).index(any(), any()); verify(clientUtil).asyncRequest(any(IndexRequest.class), any(), any()); assertTrue(testAppender.containsMessage("AD Job was disabled by JobRunner for")); } @@ -229,7 +255,7 @@ public void testRunAdJobWithEndRunExceptionNowAndExistingAdJob() { @Test public void testRunAdJobWithEndRunExceptionNowAndExistingAdJobAndIndexException() { testRunAdJobWithEndRunExceptionNowAndStopAdJob(true, true, false); - verify(anomalyResultHandler).indexAnomalyResult(any()); + verify(anomalyResultHandler).index(any(), any()); verify(clientUtil).asyncRequest(any(IndexRequest.class), any(), any()); assertTrue(testAppender.containsMessage("Failed to disable AD job for")); } @@ -237,7 +263,7 @@ public void testRunAdJobWithEndRunExceptionNowAndExistingAdJobAndIndexException( @Test public void testRunAdJobWithEndRunExceptionNowAndNotExistingEnabledAdJob() { testRunAdJobWithEndRunExceptionNowAndStopAdJob(false, true, true); - verify(anomalyResultHandler).indexAnomalyResult(any()); + verify(anomalyResultHandler).index(any(), any()); verify(client, never()).index(any(), any()); assertFalse(testAppender.containsMessage("AD Job was disabled by JobRunner for")); assertFalse(testAppender.containsMessage("Failed to disable AD job for")); @@ -246,7 +272,7 @@ public void testRunAdJobWithEndRunExceptionNowAndNotExistingEnabledAdJob() { @Test public void testRunAdJobWithEndRunExceptionNowAndExistingDisabledAdJob() { testRunAdJobWithEndRunExceptionNowAndStopAdJob(true, false, true); - verify(anomalyResultHandler).indexAnomalyResult(any()); + verify(anomalyResultHandler).index(any(), any()); verify(client, never()).index(any(), any()); assertFalse(testAppender.containsMessage("AD Job was disabled by JobRunner for")); } @@ -254,7 +280,7 @@ public void testRunAdJobWithEndRunExceptionNowAndExistingDisabledAdJob() { @Test public void testRunAdJobWithEndRunExceptionNowAndNotExistingDisabledAdJob() { testRunAdJobWithEndRunExceptionNowAndStopAdJob(false, false, true); - verify(anomalyResultHandler).indexAnomalyResult(any()); + verify(anomalyResultHandler).index(any(), any()); verify(client, never()).index(any(), any()); assertFalse(testAppender.containsMessage("AD Job was disabled by JobRunner for")); } @@ -323,7 +349,7 @@ public void testRunAdJobWithEndRunExceptionNowAndGetJobException() { }).when(clientUtil).asyncRequest(any(GetRequest.class), any(), any()); runner.handleAdException(jobParameter, lockService, lock, Instant.now().minusMillis(1000 * 60), Instant.now(), exception); - verify(anomalyResultHandler).indexAnomalyResult(any()); + verify(anomalyResultHandler).index(any(), any()); assertEquals(1, testAppender.countMessage("JobRunner failed to get detector job")); } @@ -335,7 +361,7 @@ public void testRunAdJobWithEndRunExceptionNowAndFailToGetJob() { doThrow(new RuntimeException("fail to get AD job")).when(clientUtil).asyncRequest(any(GetRequest.class), any(), any()); runner.handleAdException(jobParameter, lockService, lock, Instant.now().minusMillis(1000 * 60), Instant.now(), exception); - verify(anomalyResultHandler).indexAnomalyResult(any()); + verify(anomalyResultHandler).index(any(), any()); assertEquals(1, testAppender.countMessage("JobRunner failed to stop AD job")); } diff --git a/src/test/java/com/amazon/opendistroforelasticsearch/ad/AnomalyDetectorProfileRunnerTests.java b/src/test/java/com/amazon/opendistroforelasticsearch/ad/AnomalyDetectorProfileRunnerTests.java index 1f135bba..73142fc4 100644 --- a/src/test/java/com/amazon/opendistroforelasticsearch/ad/AnomalyDetectorProfileRunnerTests.java +++ b/src/test/java/com/amazon/opendistroforelasticsearch/ad/AnomalyDetectorProfileRunnerTests.java @@ -26,8 +26,8 @@ import java.io.IOException; import java.time.Instant; +import java.time.temporal.ChronoUnit; import java.util.Arrays; -import java.util.Calendar; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; @@ -37,81 +37,98 @@ import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; import org.elasticsearch.Version; import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.FailedNodeException; import org.elasticsearch.action.get.GetRequest; import org.elasticsearch.action.get.GetResponse; -import org.elasticsearch.action.search.SearchRequest; -import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.client.Client; import org.elasticsearch.cluster.ClusterName; import org.elasticsearch.cluster.ClusterState; -import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.common.io.stream.NotSerializableExceptionWrapper; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.transport.TransportAddress; import org.elasticsearch.common.xcontent.NamedXContentRegistry; import org.elasticsearch.index.IndexNotFoundException; import org.elasticsearch.search.SearchModule; import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.transport.RemoteTransportException; import org.junit.Before; import org.junit.BeforeClass; -import com.amazon.opendistroforelasticsearch.ad.indices.AnomalyDetectionIndices; +import com.amazon.opendistroforelasticsearch.ad.common.exception.AnomalyDetectionException; +import com.amazon.opendistroforelasticsearch.ad.common.exception.ResourceNotFoundException; +import com.amazon.opendistroforelasticsearch.ad.constant.CommonName; import com.amazon.opendistroforelasticsearch.ad.model.AnomalyDetector; import com.amazon.opendistroforelasticsearch.ad.model.AnomalyDetectorJob; import com.amazon.opendistroforelasticsearch.ad.model.AnomalyResult; +import com.amazon.opendistroforelasticsearch.ad.model.DetectorInternalState; import com.amazon.opendistroforelasticsearch.ad.model.DetectorProfile; import com.amazon.opendistroforelasticsearch.ad.model.DetectorState; +import com.amazon.opendistroforelasticsearch.ad.model.InitProgressProfile; +import com.amazon.opendistroforelasticsearch.ad.model.IntervalTimeConfiguration; import com.amazon.opendistroforelasticsearch.ad.model.ModelProfile; import com.amazon.opendistroforelasticsearch.ad.model.ProfileName; +import com.amazon.opendistroforelasticsearch.ad.transport.ProfileAction; import com.amazon.opendistroforelasticsearch.ad.transport.ProfileNodeResponse; import com.amazon.opendistroforelasticsearch.ad.transport.ProfileResponse; +import com.amazon.opendistroforelasticsearch.ad.transport.RCFPollingAction; +import com.amazon.opendistroforelasticsearch.ad.transport.RCFPollingResponse; import com.amazon.opendistroforelasticsearch.ad.util.DiscoveryNodeFilterer; public class AnomalyDetectorProfileRunnerTests extends ESTestCase { - private static final Logger LOG = LogManager.getLogger(AnomalyDetectorProfileRunnerTests.class); private AnomalyDetectorProfileRunner runner; private Client client; private DiscoveryNodeFilterer nodeFilter; private AnomalyDetector detector; - private IndexNameExpressionResolver resolver; private ClusterService clusterService; private static Set stateOnly; private static Set stateNError; private static Set modelProfile; + private static Set stateInitProgress; private static String noFullShingleError = "No full shingle in current detection window"; private static String stoppedError = "Stopped detector as job failed consecutively for more than 3 times: Having trouble querying data." + " Maybe all of your features have been disabled."; - private Calendar calendar; - private String indexWithRequiredError1 = ".opendistro-anomaly-results-history-2020.04.06-1"; - private String indexWithRequiredError2 = ".opendistro-anomaly-results-history-2020.04.07-000002"; + + private int requiredSamples; + private int neededSamples; // profile model related - String node1; - String nodeName1; - DiscoveryNode discoveryNode1; + private String node1; + private String nodeName1; + private DiscoveryNode discoveryNode1; + + private String node2; + private String nodeName2; + private DiscoveryNode discoveryNode2; - String node2; - String nodeName2; - DiscoveryNode discoveryNode2; + private long modelSize; + private String model1Id; + private String model0Id; - long modelSize; - String model1Id; - String model0Id; + private int shingleSize; - int shingleSize; + private int detectorIntervalMin; + private GetResponse detectorGetReponse; + private String messaingExceptionError = "blah"; @Override protected NamedXContentRegistry xContentRegistry() { SearchModule searchModule = new SearchModule(Settings.EMPTY, false, Collections.emptyList()); List entries = searchModule.getNamedXContents(); - entries.addAll(Arrays.asList(AnomalyDetector.XCONTENT_REGISTRY, AnomalyResult.XCONTENT_REGISTRY)); + entries + .addAll( + Arrays + .asList( + AnomalyDetector.XCONTENT_REGISTRY, + AnomalyResult.XCONTENT_REGISTRY, + DetectorInternalState.XCONTENT_REGISTRY, + AnomalyDetectorJob.XCONTENT_REGISTRY + ) + ); return new NamedXContentRegistry(entries); } @@ -122,6 +139,9 @@ public static void setUpOnce() { stateNError = new HashSet(); stateNError.add(ProfileName.ERROR); stateNError.add(ProfileName.STATE); + stateInitProgress = new HashSet(); + stateInitProgress.add(ProfileName.INIT_PROGRESS); + stateInitProgress.add(ProfileName.STATE); modelProfile = new HashSet( Arrays.asList(ProfileName.SHINGLE_SIZE, ProfileName.MODELS, ProfileName.COORDINATING_NODE, ProfileName.TOTAL_SIZE_IN_BYTES) ); @@ -133,57 +153,81 @@ public void setUp() throws Exception { super.setUp(); client = mock(Client.class); nodeFilter = mock(DiscoveryNodeFilterer.class); - calendar = mock(Calendar.class); - resolver = mock(IndexNameExpressionResolver.class); clusterService = mock(ClusterService.class); - when(resolver.concreteIndexNames(any(), any(), any())) - .thenReturn( - new String[] { indexWithRequiredError1, indexWithRequiredError2, ".opendistro-anomaly-results-history-2020.04.08-000003" } - ); when(clusterService.state()).thenReturn(ClusterState.builder(new ClusterName("test cluster")).build()); - runner = new AnomalyDetectorProfileRunner(client, xContentRegistry(), nodeFilter, resolver, clusterService, calendar); + requiredSamples = 128; + neededSamples = 5; + + runner = new AnomalyDetectorProfileRunner(client, xContentRegistry(), nodeFilter, requiredSamples); + + detectorIntervalMin = 3; + detectorGetReponse = mock(GetResponse.class); + } + + enum DetectorStatus { + INDEX_NOT_EXIST, + NO_DOC, + EXIST } enum JobStatus { INDEX_NOT_EXIT, DISABLED, - ENABLED, - DISABLED_ROTATED_1, - DISABLED_ROTATED_2, - DISABLED_ROTATED_3 + ENABLED } - enum InittedEverResultStatus { - INDEX_NOT_EXIT, - GREATER_THAN_ZERO, + enum RCFPollingStatus { + INIT_NOT_EXIT, + REMOTE_INIT_NOT_EXIT, + INDEX_NOT_FOUND, + REMOTE_INDEX_NOT_FOUND, + INIT_DONE, EMPTY, - EXCEPTION + EXCEPTION, + INITTING } enum ErrorResultStatus { INDEX_NOT_EXIT, NO_ERROR, SHINGLE_ERROR, - STOPPED_ERROR_1, - STOPPED_ERROR_2 + STOPPED_ERROR } @SuppressWarnings("unchecked") - private void setUpClientGet(boolean detectorExists, JobStatus jobStatus) throws IOException { - detector = TestHelpers.randomAnomalyDetector(TestHelpers.randomUiMetadata(), Instant.now()); + private void setUpClientGet( + DetectorStatus detectorStatus, + JobStatus jobStatus, + RCFPollingStatus rcfPollingStatus, + ErrorResultStatus errorResultStatus + ) throws IOException { + detector = TestHelpers.randomAnomalyDetectorWithInterval(new IntervalTimeConfiguration(detectorIntervalMin, ChronoUnit.MINUTES)); doAnswer(invocation -> { Object[] args = invocation.getArguments(); GetRequest request = (GetRequest) args[0]; ActionListener listener = (ActionListener) args[1]; if (request.index().equals(ANOMALY_DETECTORS_INDEX)) { - if (detectorExists) { - listener.onResponse(TestHelpers.createGetResponse(detector, detector.getDetectorId())); - } else { - listener.onFailure(new IndexNotFoundException(ANOMALY_DETECTORS_INDEX)); + switch (detectorStatus) { + case EXIST: + listener + .onResponse( + TestHelpers.createGetResponse(detector, detector.getDetectorId(), AnomalyDetector.ANOMALY_DETECTORS_INDEX) + ); + break; + case INDEX_NOT_EXIST: + listener.onFailure(new IndexNotFoundException(ANOMALY_DETECTORS_INDEX)); + break; + case NO_DOC: + when(detectorGetReponse.isExists()).thenReturn(false); + listener.onResponse(detectorGetReponse); + break; + default: + assertTrue("should not reach here", false); + break; } - } else { + } else if (request.index().equals(ANOMALY_DETECTOR_JOB_INDEX)) { AnomalyDetectorJob job = null; switch (jobStatus) { case INDEX_NOT_EXIT: @@ -191,116 +235,57 @@ private void setUpClientGet(boolean detectorExists, JobStatus jobStatus) throws break; case DISABLED: job = TestHelpers.randomAnomalyDetectorJob(false); - listener.onResponse(TestHelpers.createGetResponse(job, detector.getDetectorId())); + listener + .onResponse( + TestHelpers.createGetResponse(job, detector.getDetectorId(), AnomalyDetectorJob.ANOMALY_DETECTOR_JOB_INDEX) + ); break; case ENABLED: job = TestHelpers.randomAnomalyDetectorJob(true); - listener.onResponse(TestHelpers.createGetResponse(job, detector.getDetectorId())); - break; - case DISABLED_ROTATED_1: - // enabled time is smaller than 1586217600000, while disabled time is larger than 1586217600000 - // which is April 7, 2020 12:00:00 AM. - job = TestHelpers - .randomAnomalyDetectorJob(false, Instant.ofEpochMilli(1586217500000L), Instant.ofEpochMilli(1586227600000L)); - listener.onResponse(TestHelpers.createGetResponse(job, detector.getDetectorId())); - break; - case DISABLED_ROTATED_2: - // both enabled and disabled time are larger than 1586217600000, - // which is April 7, 2020 12:00:00 AM. - job = TestHelpers - .randomAnomalyDetectorJob(false, Instant.ofEpochMilli(1586217500000L), Instant.ofEpochMilli(1586227600000L)); - listener.onResponse(TestHelpers.createGetResponse(job, detector.getDetectorId())); - break; - case DISABLED_ROTATED_3: - // both enabled and disabled time are larger than 1586131200000, - // which is April 6, 2020 12:00:00 AM. - job = TestHelpers - .randomAnomalyDetectorJob(false, Instant.ofEpochMilli(1586131300000L), Instant.ofEpochMilli(1586131400000L)); - listener.onResponse(TestHelpers.createGetResponse(job, detector.getDetectorId())); - break; - default: - assertTrue("should not reach here", false); - break; - } - } - - return null; - }).when(client).get(any(), any()); - } - - @SuppressWarnings("unchecked") - private void setUpClientSearch(InittedEverResultStatus inittedEverResultStatus, ErrorResultStatus errorResultStatus) { - doAnswer(invocation -> { - Object[] args = invocation.getArguments(); - SearchRequest request = (SearchRequest) args[0]; - ActionListener listener = (ActionListener) args[1]; - if (errorResultStatus == ErrorResultStatus.INDEX_NOT_EXIT - || inittedEverResultStatus == InittedEverResultStatus.INDEX_NOT_EXIT) { - listener.onFailure(new IndexNotFoundException(AnomalyResult.ANOMALY_RESULT_INDEX)); - return null; - } - AnomalyResult result = null; - if (request.source().query().toString().contains(AnomalyResult.ANOMALY_SCORE_FIELD)) { - switch (inittedEverResultStatus) { - case GREATER_THAN_ZERO: - result = TestHelpers.randomAnomalyDetectResult(0.87); - listener.onResponse(TestHelpers.createSearchResponse(result)); - break; - case EMPTY: - listener.onResponse(TestHelpers.createEmptySearchResponse()); - break; - case EXCEPTION: - listener.onFailure(new RuntimeException()); + listener + .onResponse( + TestHelpers.createGetResponse(job, detector.getDetectorId(), AnomalyDetectorJob.ANOMALY_DETECTOR_JOB_INDEX) + ); break; default: assertTrue("should not reach here", false); break; } } else { + if (errorResultStatus == ErrorResultStatus.INDEX_NOT_EXIT) { + listener.onFailure(new IndexNotFoundException(DetectorInternalState.DETECTOR_STATE_INDEX)); + return null; + } + DetectorInternalState.Builder result = new DetectorInternalState.Builder().lastUpdateTime(Instant.now()); + switch (errorResultStatus) { case NO_ERROR: - result = TestHelpers.randomAnomalyDetectResult(null); - listener.onResponse(TestHelpers.createSearchResponse(result)); break; case SHINGLE_ERROR: - result = TestHelpers.randomAnomalyDetectResult(noFullShingleError); - listener.onResponse(TestHelpers.createSearchResponse(result)); - break; - case STOPPED_ERROR_2: - if (request.indices().length == 2) { - for (int i = 0; i < 2; i++) { - assertTrue( - request.indices()[i].equals(indexWithRequiredError1) - || request.indices()[i].equals(indexWithRequiredError2) - ); - } - result = TestHelpers.randomAnomalyDetectResult(stoppedError); - listener.onResponse(TestHelpers.createSearchResponse(result)); - } else { - assertTrue("should not reach here", false); - } + result.error(noFullShingleError); break; - case STOPPED_ERROR_1: - if (request.indices().length == 1 && request.indices()[0].equals(indexWithRequiredError1)) { - result = TestHelpers.randomAnomalyDetectResult(stoppedError); - listener.onResponse(TestHelpers.createSearchResponse(result)); - } else { - assertTrue("should not reach here", false); - } + case STOPPED_ERROR: + result.error(stoppedError); break; default: assertTrue("should not reach here", false); break; } + listener + .onResponse( + TestHelpers.createGetResponse(result.build(), detector.getDetectorId(), DetectorInternalState.DETECTOR_STATE_INDEX) + ); + } return null; - }).when(client).search(any(), any()); + }).when(client).get(any(), any()); + setUpClientExecuteRCFPollingAction(rcfPollingStatus); } public void testDetectorNotExist() throws IOException, InterruptedException { - setUpClientGet(false, JobStatus.INDEX_NOT_EXIT); + setUpClientGet(DetectorStatus.INDEX_NOT_EXIST, JobStatus.INDEX_NOT_EXIT, RCFPollingStatus.EMPTY, ErrorResultStatus.NO_ERROR); final CountDownLatch inProgressLatch = new CountDownLatch(1); runner.profile("x123", ActionListener.wrap(response -> { @@ -314,9 +299,8 @@ public void testDetectorNotExist() throws IOException, InterruptedException { } public void testDisabledJobIndexTemplate(JobStatus status) throws IOException, InterruptedException { - setUpClientGet(true, status); - DetectorProfile expectedProfile = new DetectorProfile(); - expectedProfile.setState(DetectorState.DISABLED); + setUpClientGet(DetectorStatus.EXIST, status, RCFPollingStatus.EMPTY, ErrorResultStatus.NO_ERROR); + DetectorProfile expectedProfile = new DetectorProfile.Builder().state(DetectorState.DISABLED).build(); final CountDownLatch inProgressLatch = new CountDownLatch(1); runner.profile(detector.getDetectorId(), ActionListener.wrap(response -> { @@ -337,18 +321,20 @@ public void testJobDisabled() throws IOException, InterruptedException { testDisabledJobIndexTemplate(JobStatus.DISABLED); } - public void testInitOrRunningStateTemplate(InittedEverResultStatus status, DetectorState expectedState) throws IOException, + public void testInitOrRunningStateTemplate(RCFPollingStatus status, DetectorState expectedState) throws IOException, InterruptedException { - setUpClientGet(true, JobStatus.ENABLED); - setUpClientSearch(status, ErrorResultStatus.NO_ERROR); - DetectorProfile expectedProfile = new DetectorProfile(); - expectedProfile.setState(expectedState); + setUpClientGet(DetectorStatus.EXIST, JobStatus.ENABLED, status, ErrorResultStatus.NO_ERROR); + DetectorProfile expectedProfile = new DetectorProfile.Builder().state(expectedState).build(); final CountDownLatch inProgressLatch = new CountDownLatch(1); runner.profile(detector.getDetectorId(), ActionListener.wrap(response -> { assertEquals(expectedProfile, response); inProgressLatch.countDown(); }, exception -> { + logger.error(exception); + for (StackTraceElement ste : exception.getStackTrace()) { + logger.info(ste); + } assertTrue("Should not reach here ", false); inProgressLatch.countDown(); }), stateOnly); @@ -356,76 +342,122 @@ public void testInitOrRunningStateTemplate(InittedEverResultStatus status, Detec } public void testResultNotExist() throws IOException, InterruptedException { - testInitOrRunningStateTemplate(InittedEverResultStatus.INDEX_NOT_EXIT, DetectorState.INIT); + testInitOrRunningStateTemplate(RCFPollingStatus.INIT_NOT_EXIT, DetectorState.INIT); + } + + public void testRemoteResultNotExist() throws IOException, InterruptedException { + testInitOrRunningStateTemplate(RCFPollingStatus.REMOTE_INIT_NOT_EXIT, DetectorState.INIT); + } + + public void testCheckpointIndexNotExist() throws IOException, InterruptedException { + testInitOrRunningStateTemplate(RCFPollingStatus.INDEX_NOT_FOUND, DetectorState.INIT); + } + + public void testRemoteCheckpointIndexNotExist() throws IOException, InterruptedException { + testInitOrRunningStateTemplate(RCFPollingStatus.REMOTE_INDEX_NOT_FOUND, DetectorState.INIT); } public void testResultEmpty() throws IOException, InterruptedException { - testInitOrRunningStateTemplate(InittedEverResultStatus.EMPTY, DetectorState.INIT); + testInitOrRunningStateTemplate(RCFPollingStatus.EMPTY, DetectorState.INIT); } public void testResultGreaterThanZero() throws IOException, InterruptedException { - testInitOrRunningStateTemplate(InittedEverResultStatus.GREATER_THAN_ZERO, DetectorState.RUNNING); + testInitOrRunningStateTemplate(RCFPollingStatus.INIT_DONE, DetectorState.RUNNING); } - public void testErrorStateTemplate(InittedEverResultStatus initStatus, ErrorResultStatus status, DetectorState state, String error) - throws IOException, + public void testErrorStateTemplate( + RCFPollingStatus initStatus, + ErrorResultStatus status, + DetectorState state, + String error, + JobStatus jobStatus, + Set profilesToCollect + ) throws IOException, InterruptedException { - setUpClientGet(true, JobStatus.ENABLED); - setUpClientSearch(initStatus, status); - DetectorProfile expectedProfile = new DetectorProfile(); - expectedProfile.setState(state); - expectedProfile.setError(error); + setUpClientExecuteRCFPollingAction(initStatus); + setUpClientGet(DetectorStatus.EXIST, jobStatus, initStatus, status); + DetectorProfile.Builder builder = new DetectorProfile.Builder(); + if (profilesToCollect.contains(ProfileName.STATE)) { + builder.state(state); + } + if (profilesToCollect.contains(ProfileName.ERROR)) { + builder.error(error); + } + DetectorProfile expectedProfile = builder.build(); final CountDownLatch inProgressLatch = new CountDownLatch(1); runner.profile(detector.getDetectorId(), ActionListener.wrap(response -> { assertEquals(expectedProfile, response); inProgressLatch.countDown(); }, exception -> { - assertTrue("Should not reach here ", false); + logger.info(exception); + for (StackTraceElement ste : exception.getStackTrace()) { + logger.info(ste); + } + assertTrue("Should not reach here", false); inProgressLatch.countDown(); - }), stateNError); + }), profilesToCollect); assertTrue(inProgressLatch.await(100, TimeUnit.SECONDS)); } - public void testInitNoError() throws IOException, InterruptedException { - testErrorStateTemplate(InittedEverResultStatus.INDEX_NOT_EXIT, ErrorResultStatus.INDEX_NOT_EXIT, DetectorState.INIT, null); + public void testErrorStateTemplate( + RCFPollingStatus initStatus, + ErrorResultStatus status, + DetectorState state, + String error, + JobStatus jobStatus + ) throws IOException, + InterruptedException { + testErrorStateTemplate(initStatus, status, state, error, jobStatus, stateNError); } public void testRunningNoError() throws IOException, InterruptedException { - testErrorStateTemplate(InittedEverResultStatus.GREATER_THAN_ZERO, ErrorResultStatus.NO_ERROR, DetectorState.RUNNING, null); + testErrorStateTemplate(RCFPollingStatus.INIT_DONE, ErrorResultStatus.NO_ERROR, DetectorState.RUNNING, null, JobStatus.ENABLED); } public void testRunningWithError() throws IOException, InterruptedException { testErrorStateTemplate( - InittedEverResultStatus.GREATER_THAN_ZERO, + RCFPollingStatus.INIT_DONE, ErrorResultStatus.SHINGLE_ERROR, DetectorState.RUNNING, - noFullShingleError + noFullShingleError, + JobStatus.ENABLED ); } - public void testInitWithError() throws IOException, InterruptedException { - testErrorStateTemplate(InittedEverResultStatus.EMPTY, ErrorResultStatus.SHINGLE_ERROR, DetectorState.INIT, noFullShingleError); + public void testDisabledForStateError() throws IOException, InterruptedException { + testErrorStateTemplate( + RCFPollingStatus.INITTING, + ErrorResultStatus.STOPPED_ERROR, + DetectorState.DISABLED, + stoppedError, + JobStatus.DISABLED + ); } - public void testExceptionOnStateFetching() throws IOException, InterruptedException { - setUpClientGet(true, JobStatus.ENABLED); - setUpClientSearch(InittedEverResultStatus.EXCEPTION, ErrorResultStatus.NO_ERROR); - - final CountDownLatch inProgressLatch = new CountDownLatch(1); + public void testDisabledForStateInit() throws IOException, InterruptedException { + testErrorStateTemplate( + RCFPollingStatus.INITTING, + ErrorResultStatus.STOPPED_ERROR, + DetectorState.DISABLED, + stoppedError, + JobStatus.DISABLED, + stateInitProgress + ); + } - runner.profile(detector.getDetectorId(), ActionListener.wrap(response -> { - assertTrue("Should not reach here ", false); - inProgressLatch.countDown(); - }, exception -> { - assertTrue("Unexcpeted exception " + exception.getMessage(), exception instanceof RuntimeException); - inProgressLatch.countDown(); - }), stateOnly); - assertTrue(inProgressLatch.await(100, TimeUnit.SECONDS)); + public void testInitWithError() throws IOException, InterruptedException { + testErrorStateTemplate( + RCFPollingStatus.EMPTY, + ErrorResultStatus.SHINGLE_ERROR, + DetectorState.INIT, + noFullShingleError, + JobStatus.ENABLED + ); } @SuppressWarnings("unchecked") - private void setUpClientExecute() { + private void setUpClientExecuteProfileAction() { doAnswer(invocation -> { Object[] args = invocation.getArguments(); ActionListener listener = (ActionListener) args[2]; @@ -472,7 +504,6 @@ private void setUpClientExecute() { } }; - LOG.info("hello"); ProfileNodeResponse profileNodeResponse1 = new ProfileNodeResponse(discoveryNode1, modelSizeMap1, shingleSize); ProfileNodeResponse profileNodeResponse2 = new ProfileNodeResponse(discoveryNode2, modelSizeMap2, -1); List profileNodeResponses = Arrays.asList(profileNodeResponse1, profileNodeResponse2); @@ -482,13 +513,71 @@ private void setUpClientExecute() { listener.onResponse(profileResponse); return null; - }).when(client).execute(any(), any(), any()); + }).when(client).execute(any(ProfileAction.class), any(), any()); + + } + + @SuppressWarnings("unchecked") + private void setUpClientExecuteRCFPollingAction(RCFPollingStatus inittedEverResultStatus) { + doAnswer(invocation -> { + Object[] args = invocation.getArguments(); + ActionListener listener = (ActionListener) args[2]; + + Exception cause = null; + String detectorId = "123"; + if (inittedEverResultStatus == RCFPollingStatus.INIT_NOT_EXIT + || inittedEverResultStatus == RCFPollingStatus.REMOTE_INIT_NOT_EXIT + || inittedEverResultStatus == RCFPollingStatus.INDEX_NOT_FOUND + || inittedEverResultStatus == RCFPollingStatus.REMOTE_INDEX_NOT_FOUND) { + switch (inittedEverResultStatus) { + case INIT_NOT_EXIT: + case REMOTE_INIT_NOT_EXIT: + cause = new ResourceNotFoundException(detectorId, messaingExceptionError); + break; + case INDEX_NOT_FOUND: + case REMOTE_INDEX_NOT_FOUND: + cause = new IndexNotFoundException(detectorId, CommonName.CHECKPOINT_INDEX_NAME); + break; + default: + assertTrue("should not reach here", false); + break; + } + cause = new AnomalyDetectionException(detectorId, cause); + if (inittedEverResultStatus == RCFPollingStatus.REMOTE_INIT_NOT_EXIT + || inittedEverResultStatus == RCFPollingStatus.REMOTE_INDEX_NOT_FOUND) { + cause = new RemoteTransportException(RCFPollingAction.NAME, new NotSerializableExceptionWrapper(cause)); + } + listener.onFailure(cause); + } else { + RCFPollingResponse result = null; + switch (inittedEverResultStatus) { + case INIT_DONE: + result = new RCFPollingResponse(requiredSamples + 1); + break; + case INITTING: + result = new RCFPollingResponse(requiredSamples - neededSamples); + break; + case EMPTY: + result = new RCFPollingResponse(0); + break; + case EXCEPTION: + listener.onFailure(new RuntimeException()); + break; + default: + assertTrue("should not reach here", false); + break; + } + + listener.onResponse(result); + } + return null; + }).when(client).execute(any(RCFPollingAction.class), any(), any()); } public void testProfileModels() throws InterruptedException, IOException { - setUpClientGet(true, JobStatus.ENABLED); - setUpClientExecute(); + setUpClientGet(DetectorStatus.EXIST, JobStatus.ENABLED, RCFPollingStatus.EMPTY, ErrorResultStatus.NO_ERROR); + setUpClientExecuteProfileAction(); final CountDownLatch inProgressLatch = new CountDownLatch(1); @@ -515,21 +604,13 @@ public void testProfileModels() throws InterruptedException, IOException { assertTrue(inProgressLatch.await(100, TimeUnit.SECONDS)); } - /** - * A detector's error message can be on a rotated index. This test makes sure we get error info - * from .opendistro-anomaly-results index that has been rolled over. - * @param state expected detector state - * @param jobStatus job status to config in the test case - * @throws IOException when profile API throws it - * @throws InterruptedException when our CountDownLatch has been interruptted - */ - private void stoppedDetectorErrorTemplate(DetectorState state, JobStatus jobStatus, ErrorResultStatus errorStatus) throws IOException, - InterruptedException { - setUpClientGet(true, jobStatus); - setUpClientSearch(InittedEverResultStatus.GREATER_THAN_ZERO, errorStatus); - DetectorProfile expectedProfile = new DetectorProfile(); - expectedProfile.setState(state); - expectedProfile.setError(stoppedError); + public void testInitProgress() throws IOException, InterruptedException { + setUpClientGet(DetectorStatus.EXIST, JobStatus.ENABLED, RCFPollingStatus.INITTING, ErrorResultStatus.NO_ERROR); + DetectorProfile expectedProfile = new DetectorProfile.Builder().state(DetectorState.INIT).build(); + + // 123 / 128 rounded to 96% + InitProgressProfile profile = new InitProgressProfile("96%", neededSamples * detectorIntervalMin, neededSamples); + expectedProfile.setInitProgress(profile); final CountDownLatch inProgressLatch = new CountDownLatch(1); runner.profile(detector.getDetectorId(), ActionListener.wrap(response -> { @@ -538,36 +619,48 @@ private void stoppedDetectorErrorTemplate(DetectorState state, JobStatus jobStat }, exception -> { assertTrue("Should not reach here ", false); inProgressLatch.countDown(); - }), stateNError); + }), stateInitProgress); assertTrue(inProgressLatch.await(100, TimeUnit.SECONDS)); } - /** - * Job enabled time is earlier than and disabled time is later than index 2 creation date, we expect to search 2 indices - */ - public void testDetectorStoppedEnabledTimeLtIndex2Date() throws IOException, InterruptedException { - stoppedDetectorErrorTemplate(DetectorState.DISABLED, JobStatus.DISABLED_ROTATED_1, ErrorResultStatus.STOPPED_ERROR_2); - } + public void testInitProgressFailImmediately() throws IOException, InterruptedException { + setUpClientGet(DetectorStatus.NO_DOC, JobStatus.ENABLED, RCFPollingStatus.INITTING, ErrorResultStatus.NO_ERROR); + DetectorProfile expectedProfile = new DetectorProfile.Builder().state(DetectorState.INIT).build(); - /** - * Both job enabled and disabled time are later than index 2 creation date, we expect to search 2 indices - */ - public void testDetectorStoppedEnabledTimeGtIndex2Date() throws IOException, InterruptedException { - stoppedDetectorErrorTemplate(DetectorState.DISABLED, JobStatus.DISABLED_ROTATED_2, ErrorResultStatus.STOPPED_ERROR_2); - } + // 123 / 128 rounded to 96% + InitProgressProfile profile = new InitProgressProfile("96%", neededSamples * detectorIntervalMin, neededSamples); + expectedProfile.setInitProgress(profile); + final CountDownLatch inProgressLatch = new CountDownLatch(1); - /** - * Both job enabled and disabled time are earlier than index 2 creation date, we expect to search 1 indices - */ - public void testDetectorStoppedEnabledTimeGtIndex1Date() throws IOException, InterruptedException { - stoppedDetectorErrorTemplate(DetectorState.DISABLED, JobStatus.DISABLED_ROTATED_3, ErrorResultStatus.STOPPED_ERROR_1); + runner.profile(detector.getDetectorId(), ActionListener.wrap(response -> { + assertTrue("Should not reach here ", false); + inProgressLatch.countDown(); + }, exception -> { + assertTrue(exception.getMessage().contains(AnomalyDetectorProfileRunner.FAIL_TO_FIND_DETECTOR_MSG)); + inProgressLatch.countDown(); + }), stateInitProgress); + assertTrue(inProgressLatch.await(100, TimeUnit.SECONDS)); } - public void testAssumption() { - assertEquals( - "profileError depends on this assumption.", - ".opendistro-anomaly-results*", - AnomalyDetectionIndices.ALL_AD_RESULTS_INDEX_PATTERN - ); + public void testInitNoUpdateNoIndex() throws IOException, InterruptedException { + setUpClientGet(DetectorStatus.EXIST, JobStatus.ENABLED, RCFPollingStatus.EMPTY, ErrorResultStatus.NO_ERROR); + DetectorProfile expectedProfile = new DetectorProfile.Builder() + .state(DetectorState.INIT) + .initProgress(new InitProgressProfile("0%", detectorIntervalMin * requiredSamples, requiredSamples)) + .build(); + final CountDownLatch inProgressLatch = new CountDownLatch(1); + + runner.profile(detector.getDetectorId(), ActionListener.wrap(response -> { + assertEquals(expectedProfile, response); + inProgressLatch.countDown(); + }, exception -> { + logger.error(exception); + for (StackTraceElement ste : exception.getStackTrace()) { + logger.info(ste); + } + assertTrue("Should not reach here ", false); + inProgressLatch.countDown(); + }), stateInitProgress); + assertTrue(inProgressLatch.await(100, TimeUnit.SECONDS)); } } diff --git a/src/test/java/com/amazon/opendistroforelasticsearch/ad/TestHelpers.java b/src/test/java/com/amazon/opendistroforelasticsearch/ad/TestHelpers.java index c56ccdd6..1cc9167d 100644 --- a/src/test/java/com/amazon/opendistroforelasticsearch/ad/TestHelpers.java +++ b/src/test/java/com/amazon/opendistroforelasticsearch/ad/TestHelpers.java @@ -99,6 +99,7 @@ import com.amazon.opendistroforelasticsearch.ad.model.AnomalyDetectorExecutionInput; import com.amazon.opendistroforelasticsearch.ad.model.AnomalyDetectorJob; import com.amazon.opendistroforelasticsearch.ad.model.AnomalyResult; +import com.amazon.opendistroforelasticsearch.ad.model.DetectorInternalState; import com.amazon.opendistroforelasticsearch.ad.model.Feature; import com.amazon.opendistroforelasticsearch.ad.model.FeatureData; import com.amazon.opendistroforelasticsearch.ad.model.IntervalTimeConfiguration; @@ -230,6 +231,24 @@ public static AnomalyDetector randomAnomalyDetectorWithEmptyFeature() throws IOE ); } + public static AnomalyDetector randomAnomalyDetectorWithInterval(TimeConfiguration interval) throws IOException { + return new AnomalyDetector( + randomAlphaOfLength(10), + randomLong(), + randomAlphaOfLength(20), + randomAlphaOfLength(30), + randomAlphaOfLength(5), + ImmutableList.of(randomAlphaOfLength(10).toLowerCase()), + ImmutableList.of(randomFeature()), + randomQuery(), + interval, + randomIntervalTimeConfiguration(), + null, + randomInt(), + Instant.now().truncatedTo(ChronoUnit.SECONDS) + ); + } + public static SearchSourceBuilder randomFeatureQuery() throws IOException { String query = "{\"query\":{\"match\":{\"user\":{\"query\":\"kimchy\",\"operator\":\"OR\",\"prefix_length\":0," + "\"max_expansions\":50,\"fuzzy_transpositions\":true,\"lenient\":false,\"zero_terms_query\":\"NONE\"," @@ -455,12 +474,11 @@ public static void createIndex(RestClient client, String indexName, HttpEntity d ); } - public static GetResponse createGetResponse(ToXContentObject o, String id) throws IOException { + public static GetResponse createGetResponse(ToXContentObject o, String id, String indexName) throws IOException { XContentBuilder content = o.toXContent(XContentFactory.jsonBuilder(), ToXContent.EMPTY_PARAMS); - return new GetResponse( new GetResult( - AnomalyDetector.ANOMALY_DETECTORS_INDEX, + indexName, MapperService.SINGLE_MAPPING_NAME, id, UNASSIGNED_SEQ_NO, @@ -520,4 +538,20 @@ public static SearchResponse createEmptySearchResponse() throws IOException { SearchResponse.Clusters.EMPTY ); } + + public static AnomalyResult randomDetectState() { + return randomAnomalyDetectResult(randomDouble(), randomAlphaOfLength(5)); + } + + public static DetectorInternalState randomDetectState(String error) { + return randomDetectState(error, Instant.now()); + } + + public static DetectorInternalState randomDetectState(Instant lastUpdateTime) { + return randomDetectState(randomAlphaOfLength(5), lastUpdateTime); + } + + public static DetectorInternalState randomDetectState(String error, Instant lastUpdateTime) { + return new DetectorInternalState.Builder().lastUpdateTime(lastUpdateTime).error(error).build(); + } } diff --git a/src/test/java/com/amazon/opendistroforelasticsearch/ad/feature/FeatureManagerTests.java b/src/test/java/com/amazon/opendistroforelasticsearch/ad/feature/FeatureManagerTests.java index 0d14105d..e7533a16 100644 --- a/src/test/java/com/amazon/opendistroforelasticsearch/ad/feature/FeatureManagerTests.java +++ b/src/test/java/com/amazon/opendistroforelasticsearch/ad/feature/FeatureManagerTests.java @@ -59,7 +59,7 @@ import com.amazon.opendistroforelasticsearch.ad.dataprocessor.SingleFeatureLinearUniformInterpolator; import com.amazon.opendistroforelasticsearch.ad.model.AnomalyDetector; import com.amazon.opendistroforelasticsearch.ad.model.IntervalTimeConfiguration; -import com.amazon.opendistroforelasticsearch.ad.transport.ADStateManager; +import com.amazon.opendistroforelasticsearch.ad.transport.TransportStateManager; import com.amazon.opendistroforelasticsearch.ad.util.ArrayEqMatcher; @RunWith(JUnitParamsRunner.class) @@ -91,7 +91,7 @@ public class FeatureManagerTests { private Clock clock; @Mock - private ADStateManager stateManager; + private TransportStateManager stateManager; private FeatureManager featureManager; diff --git a/src/test/java/com/amazon/opendistroforelasticsearch/ad/feature/SearchFeatureDaoTests.java b/src/test/java/com/amazon/opendistroforelasticsearch/ad/feature/SearchFeatureDaoTests.java index 610bc7da..c6976c2d 100644 --- a/src/test/java/com/amazon/opendistroforelasticsearch/ad/feature/SearchFeatureDaoTests.java +++ b/src/test/java/com/amazon/opendistroforelasticsearch/ad/feature/SearchFeatureDaoTests.java @@ -89,7 +89,7 @@ import com.amazon.opendistroforelasticsearch.ad.dataprocessor.SingleFeatureLinearUniformInterpolator; import com.amazon.opendistroforelasticsearch.ad.model.AnomalyDetector; import com.amazon.opendistroforelasticsearch.ad.model.IntervalTimeConfiguration; -import com.amazon.opendistroforelasticsearch.ad.transport.ADStateManager; +import com.amazon.opendistroforelasticsearch.ad.transport.TransportStateManager; import com.amazon.opendistroforelasticsearch.ad.util.ClientUtil; import com.amazon.opendistroforelasticsearch.ad.util.ParseUtils; @@ -128,7 +128,7 @@ public class SearchFeatureDaoTests { @Mock private Max max; @Mock - private ADStateManager stateManager; + private TransportStateManager stateManager; @Mock private AnomalyDetector detector; diff --git a/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/ADStatsNodesTransportActionTests.java b/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/ADStatsNodesTransportActionTests.java index 49fd78c8..8c66031b 100644 --- a/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/ADStatsNodesTransportActionTests.java +++ b/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/ADStatsNodesTransportActionTests.java @@ -26,6 +26,7 @@ import org.elasticsearch.action.support.ActionFilters; import org.elasticsearch.client.Client; +import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.threadpool.ThreadPool; @@ -51,6 +52,7 @@ public class ADStatsNodesTransportActionTests extends ESIntegTestCase { private String clusterStatName1, clusterStatName2; private String nodeStatName1, nodeStatName2; + @Override @Before public void setUp() throws Exception { super.setUp(); @@ -59,7 +61,13 @@ public void setUp() throws Exception { Clock clock = mock(Clock.class); Throttler throttler = new Throttler(clock); ThreadPool threadPool = mock(ThreadPool.class); - IndexUtils indexUtils = new IndexUtils(client, new ClientUtil(Settings.EMPTY, client, throttler, threadPool), clusterService()); + IndexNameExpressionResolver indexNameResolver = mock(IndexNameExpressionResolver.class); + IndexUtils indexUtils = new IndexUtils( + client, + new ClientUtil(Settings.EMPTY, client, throttler, threadPool), + clusterService(), + indexNameResolver + ); ModelManager modelManager = mock(ModelManager.class); clusterStatName1 = "clusterStat1"; diff --git a/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/AnomalyResultTests.java b/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/AnomalyResultTests.java index f8ebc54b..faa552b3 100644 --- a/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/AnomalyResultTests.java +++ b/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/AnomalyResultTests.java @@ -42,6 +42,7 @@ import java.io.IOException; import java.time.Clock; +import java.time.Instant; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -55,6 +56,8 @@ import org.elasticsearch.ElasticsearchTimeoutException; import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.ActionRequestValidationException; +import org.elasticsearch.action.get.GetRequest; +import org.elasticsearch.action.get.GetResponse; import org.elasticsearch.action.index.IndexRequest; import org.elasticsearch.action.index.IndexResponse; import org.elasticsearch.action.support.ActionFilters; @@ -92,6 +95,7 @@ import test.com.amazon.opendistroforelasticsearch.ad.util.JsonDeserializer; import com.amazon.opendistroforelasticsearch.ad.AbstractADTest; +import com.amazon.opendistroforelasticsearch.ad.TestHelpers; import com.amazon.opendistroforelasticsearch.ad.breaker.ADCircuitBreakerService; import com.amazon.opendistroforelasticsearch.ad.cluster.HashRing; import com.amazon.opendistroforelasticsearch.ad.common.exception.AnomalyDetectionException; @@ -111,6 +115,7 @@ import com.amazon.opendistroforelasticsearch.ad.ml.rcf.CombinedRcfResult; import com.amazon.opendistroforelasticsearch.ad.model.AnomalyDetector; import com.amazon.opendistroforelasticsearch.ad.model.AnomalyResult; +import com.amazon.opendistroforelasticsearch.ad.model.DetectorInternalState; import com.amazon.opendistroforelasticsearch.ad.model.FeatureData; import com.amazon.opendistroforelasticsearch.ad.stats.ADStat; import com.amazon.opendistroforelasticsearch.ad.stats.ADStats; @@ -126,7 +131,7 @@ public class AnomalyResultTests extends AbstractADTest { private static Settings settings = Settings.EMPTY; private TransportService transportService; private ClusterService clusterService; - private ADStateManager stateManager; + private TransportStateManager stateManager; private ColdStartRunner runner; private FeatureManager featureQuery; private ModelManager normalModelManager; @@ -163,7 +168,7 @@ public void setUp() throws Exception { runner = new ColdStartRunner(); transportService = testNodes[0].transportService; clusterService = testNodes[0].clusterService; - stateManager = mock(ADStateManager.class); + stateManager = mock(TransportStateManager.class); // return 2 RCF partitions when(stateManager.getPartitionNumber(any(String.class), any(AnomalyDetector.class))).thenReturn(2); when(stateManager.isMuted(any(String.class))).thenReturn(false); @@ -242,7 +247,7 @@ public void setUp() throws Exception { Throttler throttler = new Throttler(clock); ThreadPool threadpool = mock(ThreadPool.class); ClientUtil clientUtil = new ClientUtil(Settings.EMPTY, client, throttler, threadpool); - IndexUtils indexUtils = new IndexUtils(client, clientUtil, clusterService); + IndexUtils indexUtils = new IndexUtils(client, clientUtil, clusterService, indexNameResolver); Map> statsMap = new HashMap>() { { @@ -252,6 +257,25 @@ public void setUp() throws Exception { }; adStats = new ADStats(indexUtils, normalModelManager, statsMap); + + doAnswer(invocation -> { + Object[] args = invocation.getArguments(); + GetRequest request = (GetRequest) args[0]; + ActionListener listener = (ActionListener) args[1]; + + if (request.index().equals(DetectorInternalState.DETECTOR_STATE_INDEX)) { + + DetectorInternalState.Builder result = new DetectorInternalState.Builder().lastUpdateTime(Instant.now()); + + listener + .onResponse( + TestHelpers.createGetResponse(result.build(), detector.getDetectorId(), DetectorInternalState.DETECTOR_STATE_INDEX) + ); + + } + + return null; + }).when(client).get(any(), any()); } @Override @@ -269,11 +293,6 @@ private Throwable assertException(PlainActionFuture liste return expectThrows(exceptionType, () -> listener.actionGet()); } - private void assertException(PlainActionFuture listener, Class exceptionType, String msg) { - Exception e = expectThrows(exceptionType, () -> listener.actionGet()); - assertThat(e.getMessage(), containsString(msg)); - } - public void testNormal() throws IOException { // These constructors register handler in transport service @@ -643,7 +662,7 @@ public void testTemporaryThresholdNodeNotConnectedException() { @SuppressWarnings("unchecked") public void testMute() { - ADStateManager muteStateManager = mock(ADStateManager.class); + TransportStateManager muteStateManager = mock(TransportStateManager.class); when(muteStateManager.isMuted(any(String.class))).thenReturn(true); doAnswer(invocation -> { ActionListener> listener = invocation.getArgument(1); diff --git a/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/CronTransportActionTests.java b/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/CronTransportActionTests.java index 38ce9fa6..172df264 100644 --- a/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/CronTransportActionTests.java +++ b/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/CronTransportActionTests.java @@ -64,7 +64,7 @@ public void setUp() throws Exception { TransportService transportService = mock(TransportService.class); ActionFilters actionFilters = mock(ActionFilters.class); - ADStateManager tarnsportStatemanager = mock(ADStateManager.class); + TransportStateManager tarnsportStatemanager = mock(TransportStateManager.class); ModelManager modelManager = mock(ModelManager.class); FeatureManager featureManager = mock(FeatureManager.class); diff --git a/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/DeleteModelTransportActionTests.java b/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/DeleteModelTransportActionTests.java index e5b1dd55..8defa7b4 100644 --- a/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/DeleteModelTransportActionTests.java +++ b/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/DeleteModelTransportActionTests.java @@ -69,7 +69,7 @@ public void setUp() throws Exception { TransportService transportService = mock(TransportService.class); ActionFilters actionFilters = mock(ActionFilters.class); - ADStateManager tarnsportStatemanager = mock(ADStateManager.class); + TransportStateManager tarnsportStatemanager = mock(TransportStateManager.class); ModelManager modelManager = mock(ModelManager.class); FeatureManager featureManager = mock(FeatureManager.class); diff --git a/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/RCFPollingTests.java b/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/RCFPollingTests.java new file mode 100644 index 00000000..cc8b8799 --- /dev/null +++ b/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/RCFPollingTests.java @@ -0,0 +1,354 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.opendistroforelasticsearch.ad.transport; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.anyInt; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import java.io.IOException; +import java.net.InetAddress; +import java.net.InetSocketAddress; +import java.util.Collections; +import java.util.Optional; + +import org.elasticsearch.Version; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.support.ActionFilters; +import org.elasticsearch.action.support.PlainActionFuture; +import org.elasticsearch.cluster.node.DiscoveryNode; +import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.transport.TransportAddress; +import org.elasticsearch.common.xcontent.ToXContent; +import org.elasticsearch.tasks.Task; +import org.elasticsearch.transport.ConnectTransportException; +import org.elasticsearch.transport.Transport; +import org.elasticsearch.transport.TransportException; +import org.elasticsearch.transport.TransportInterceptor; +import org.elasticsearch.transport.TransportRequest; +import org.elasticsearch.transport.TransportRequestOptions; +import org.elasticsearch.transport.TransportResponse; +import org.elasticsearch.transport.TransportResponseHandler; +import org.elasticsearch.transport.TransportService; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; + +import test.com.amazon.opendistroforelasticsearch.ad.util.FakeNode; +import test.com.amazon.opendistroforelasticsearch.ad.util.JsonDeserializer; + +import com.amazon.opendistroforelasticsearch.ad.AbstractADTest; +import com.amazon.opendistroforelasticsearch.ad.TestHelpers; +import com.amazon.opendistroforelasticsearch.ad.cluster.HashRing; +import com.amazon.opendistroforelasticsearch.ad.common.exception.AnomalyDetectionException; +import com.amazon.opendistroforelasticsearch.ad.common.exception.JsonPathNotFoundException; +import com.amazon.opendistroforelasticsearch.ad.constant.CommonMessageAttributes; +import com.amazon.opendistroforelasticsearch.ad.ml.ModelManager; +import com.google.gson.Gson; +import com.google.gson.GsonBuilder; + +public class RCFPollingTests extends AbstractADTest { + Gson gson = new GsonBuilder().create(); + private String detectorId = "jqIG6XIBEyaF3zCMZfcB"; + private String model0Id = detectorId + "_rcf_0"; + private long totalUpdates = 3L; + private String nodeId = "abc"; + private ClusterService clusterService; + private HashRing hashRing; + private TransportAddress transportAddress1; + private ModelManager manager; + private TransportService transportService; + private PlainActionFuture future; + private RCFPollingTransportAction action; + private RCFPollingRequest request; + private TransportInterceptor normalTransportInterceptor, failureTransportInterceptor; + + @BeforeClass + public static void setUpBeforeClass() { + setUpThreadPool(RCFPollingTests.class.getSimpleName()); + + } + + @AfterClass + public static void tearDownAfterClass() { + tearDownThreadPool(); + } + + private void registerHandler(FakeNode node) { + new RCFPollingTransportAction( + new ActionFilters(Collections.emptySet()), + node.transportService, + Settings.EMPTY, + manager, + hashRing, + node.clusterService + ); + } + + @Override + @Before + public void setUp() throws Exception { + super.setUp(); + clusterService = mock(ClusterService.class); + hashRing = mock(HashRing.class); + transportAddress1 = new TransportAddress(new InetSocketAddress(InetAddress.getByName("1.2.3.4"), 9300)); + manager = mock(ModelManager.class); + transportService = new TransportService( + Settings.EMPTY, + mock(Transport.class), + null, + TransportService.NOOP_TRANSPORT_INTERCEPTOR, + x -> null, + null, + Collections.emptySet() + ); + future = new PlainActionFuture<>(); + + request = new RCFPollingRequest(detectorId); + when(manager.getRcfModelId(any(String.class), anyInt())).thenReturn(model0Id); + + doAnswer(invocation -> { + Object[] args = invocation.getArguments(); + @SuppressWarnings("unchecked") + ActionListener listener = (ActionListener) args[2]; + listener.onResponse(totalUpdates); + return null; + }).when(manager).getTotalUpdates(any(String.class), any(String.class), any()); + + normalTransportInterceptor = new TransportInterceptor() { + @Override + public AsyncSender interceptSender(AsyncSender sender) { + return new AsyncSender() { + @Override + public void sendRequest( + Transport.Connection connection, + String action, + TransportRequest request, + TransportRequestOptions options, + TransportResponseHandler handler + ) { + if (RCFPollingAction.NAME.equals(action)) { + sender.sendRequest(connection, action, request, options, rcfRollingHandler(handler)); + } else { + sender.sendRequest(connection, action, request, options, handler); + } + } + }; + } + }; + + failureTransportInterceptor = new TransportInterceptor() { + @Override + public AsyncSender interceptSender(AsyncSender sender) { + return new AsyncSender() { + @Override + public void sendRequest( + Transport.Connection connection, + String action, + TransportRequest request, + TransportRequestOptions options, + TransportResponseHandler handler + ) { + if (RCFPollingAction.NAME.equals(action)) { + sender.sendRequest(connection, action, request, options, rcfFailureRollingHandler(handler)); + } else { + sender.sendRequest(connection, action, request, options, handler); + } + } + }; + } + }; + } + + public void testNormal() { + DiscoveryNode localNode = new DiscoveryNode(nodeId, transportAddress1, Version.CURRENT.minimumCompatibilityVersion()); + when(hashRing.getOwningNode(any(String.class))).thenReturn(Optional.of(localNode)); + + when(clusterService.localNode()).thenReturn(localNode); + + action = new RCFPollingTransportAction( + mock(ActionFilters.class), + transportService, + Settings.EMPTY, + manager, + hashRing, + clusterService + ); + action.doExecute(mock(Task.class), request, future); + + RCFPollingResponse response = future.actionGet(); + assertEquals(totalUpdates, response.getTotalUpdates()); + } + + public void testNoNodeFoundForModel() { + when(manager.getRcfModelId(any(String.class), anyInt())).thenReturn(model0Id); + when(hashRing.getOwningNode(any(String.class))).thenReturn(Optional.empty()); + action = new RCFPollingTransportAction( + mock(ActionFilters.class), + transportService, + Settings.EMPTY, + manager, + hashRing, + clusterService + ); + action.doExecute(mock(Task.class), request, future); + assertException(future, AnomalyDetectionException.class, RCFPollingTransportAction.NO_NODE_FOUND_MSG); + } + + /** + * Precondition: receiver's model manager respond with a response. See + * manager.getRcfModelId mocked output in setUp method. + * When receiving a response, respond back with totalUpdates. + * @param handler handler for receiver + * @return handler for request sender + */ + private TransportResponseHandler rcfRollingHandler(TransportResponseHandler handler) { + return new TransportResponseHandler() { + @Override + public T read(StreamInput in) throws IOException { + return handler.read(in); + } + + @Override + @SuppressWarnings("unchecked") + public void handleResponse(T response) { + handler.handleResponse((T) new RCFPollingResponse(totalUpdates)); + } + + @Override + public void handleException(TransportException exp) { + handler.handleException(exp); + } + + @Override + public String executor() { + return handler.executor(); + } + }; + } + + /** + * Precondition: receiver's model manager respond with a response. See + * manager.getRcfModelId mocked output in setUp method. + * Create handler that would return a connection failure + * @param handler callback handler + * @return handlder that would return a connection failure + */ + private TransportResponseHandler rcfFailureRollingHandler(TransportResponseHandler handler) { + return new TransportResponseHandler() { + @Override + public T read(StreamInput in) throws IOException { + return handler.read(in); + } + + @Override + public void handleResponse(T response) { + handler + .handleException( + new ConnectTransportException( + new DiscoveryNode(nodeId, transportAddress1, Version.CURRENT.minimumCompatibilityVersion()), + RCFPollingAction.NAME + ) + ); + } + + @Override + public void handleException(TransportException exp) { + handler.handleException(exp); + } + + @Override + public String executor() { + return handler.executor(); + } + }; + } + + public void testGetRemoteNormalResponse() { + setupTestNodes(Settings.EMPTY, normalTransportInterceptor); + try { + TransportService realTransportService = testNodes[0].transportService; + clusterService = testNodes[0].clusterService; + + action = new RCFPollingTransportAction( + new ActionFilters(Collections.emptySet()), + realTransportService, + Settings.EMPTY, + manager, + hashRing, + clusterService + ); + + when(hashRing.getOwningNode(any(String.class))).thenReturn(Optional.of(testNodes[1].discoveryNode())); + registerHandler(testNodes[1]); + + action.doExecute(null, request, future); + + RCFPollingResponse response = future.actionGet(); + assertEquals(totalUpdates, response.getTotalUpdates()); + } finally { + tearDownTestNodes(); + } + } + + public void testGetRemoteFailureResponse() { + setupTestNodes(Settings.EMPTY, failureTransportInterceptor); + try { + TransportService realTransportService = testNodes[0].transportService; + clusterService = testNodes[0].clusterService; + + action = new RCFPollingTransportAction( + new ActionFilters(Collections.emptySet()), + realTransportService, + Settings.EMPTY, + manager, + hashRing, + clusterService + ); + + when(hashRing.getOwningNode(any(String.class))).thenReturn(Optional.of(testNodes[1].discoveryNode())); + registerHandler(testNodes[1]); + + action.doExecute(null, request, future); + + expectThrows(ConnectTransportException.class, () -> future.actionGet()); + } finally { + tearDownTestNodes(); + } + } + + public void testResponseToXContent() throws IOException, JsonPathNotFoundException { + RCFPollingResponse response = new RCFPollingResponse(totalUpdates); + String json = TestHelpers.xContentBuilderToString(response.toXContent(TestHelpers.builder(), ToXContent.EMPTY_PARAMS)); + assertEquals(totalUpdates, JsonDeserializer.getLongValue(json, RCFPollingResponse.TOTAL_UPDATES_KEY)); + } + + public void testRequestToXContent() throws IOException, JsonPathNotFoundException { + RCFPollingRequest response = new RCFPollingRequest(detectorId); + String json = TestHelpers.xContentBuilderToString(response.toXContent(TestHelpers.builder(), ToXContent.EMPTY_PARAMS)); + assertEquals(detectorId, JsonDeserializer.getTextValue(json, CommonMessageAttributes.ID_JSON_KEY)); + } + + public void testNullDetectorId() { + String nullDetectorId = null; + RCFPollingRequest emptyRequest = new RCFPollingRequest(nullDetectorId); + assertTrue(emptyRequest.validate() != null); + } +} diff --git a/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/ADStateManagerTests.java b/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/TransportStateManagerTests.java similarity index 57% rename from src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/ADStateManagerTests.java rename to src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/TransportStateManagerTests.java index a088ec39..eeacd5d5 100644 --- a/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/ADStateManagerTests.java +++ b/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/TransportStateManagerTests.java @@ -15,10 +15,11 @@ package com.amazon.opendistroforelasticsearch.ad.transport; -import static org.elasticsearch.index.seqno.SequenceNumbers.UNASSIGNED_SEQ_NO; import static org.mockito.Matchers.any; import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; import static org.mockito.Mockito.verifyZeroInteractions; import static org.mockito.Mockito.when; @@ -29,8 +30,6 @@ import java.util.AbstractMap.SimpleImmutableEntry; import java.util.Arrays; import java.util.Collections; -import java.util.Map.Entry; -import java.util.concurrent.ConcurrentHashMap; import java.util.stream.IntStream; import org.elasticsearch.action.ActionListener; @@ -38,15 +37,9 @@ import org.elasticsearch.action.get.GetResponse; import org.elasticsearch.action.search.SearchRequest; import org.elasticsearch.client.Client; -import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.common.xcontent.NamedXContentRegistry; -import org.elasticsearch.common.xcontent.ToXContent; -import org.elasticsearch.common.xcontent.XContentBuilder; -import org.elasticsearch.common.xcontent.XContentFactory; -import org.elasticsearch.index.get.GetResult; -import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.search.SearchModule; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.threadpool.ThreadPool; @@ -60,8 +53,8 @@ import com.amazon.opendistroforelasticsearch.ad.util.Throttler; import com.google.common.collect.ImmutableMap; -public class ADStateManagerTests extends ESTestCase { - private ADStateManager stateManager; +public class TransportStateManagerTests extends ESTestCase { + private TransportStateManager stateManager; private ModelManager modelManager; private Client client; private ClientUtil clientUtil; @@ -71,6 +64,9 @@ public class ADStateManagerTests extends ESTestCase { private ThreadPool context; private AnomalyDetector detectorToCheck; private Settings settings; + private String adId = "123"; + + private GetResponse checkpointResponse; @Override protected NamedXContentRegistry xContentRegistry() { @@ -85,7 +81,6 @@ public void setUp() throws Exception { modelManager = mock(ModelManager.class); when(modelManager.getPartitionedForestSizes(any(AnomalyDetector.class))).thenReturn(new SimpleImmutableEntry<>(2, 20)); client = mock(Client.class); - clientUtil = mock(ClientUtil.class); settings = Settings .builder() .put("opendistro.anomaly_detection.max_retry_for_unresponsive_node", 3) @@ -96,8 +91,10 @@ public void setUp() throws Exception { context = TestHelpers.createThreadPool(); throttler = new Throttler(clock); - stateManager = new ADStateManager(client, xContentRegistry(), modelManager, settings, clientUtil, clock, duration); + clientUtil = new ClientUtil(Settings.EMPTY, client, throttler, mock(ThreadPool.class)); + stateManager = new TransportStateManager(client, xContentRegistry(), modelManager, settings, clientUtil, clock, duration); + checkpointResponse = mock(GetResponse.class); } @Override @@ -112,9 +109,8 @@ public void tearDown() throws Exception { } @SuppressWarnings("unchecked") - private String setupDetector(boolean responseExists) throws IOException { + private String setupDetector() throws IOException { detectorToCheck = TestHelpers.randomAnomalyDetector(TestHelpers.randomUiMetadata(), null); - XContentBuilder content = detectorToCheck.toXContent(XContentFactory.jsonBuilder(), ToXContent.EMPTY_PARAMS); doAnswer(invocation -> { Object[] args = invocation.getArguments(); @@ -125,39 +121,63 @@ private String setupDetector(boolean responseExists) throws IOException { if (args[0] instanceof GetRequest) { request = (GetRequest) args[0]; } - if (args[2] instanceof ActionListener) { - listener = (ActionListener) args[2]; + if (args[1] instanceof ActionListener) { + listener = (ActionListener) args[1]; } assertTrue(request != null && listener != null); listener .onResponse( - new GetResponse( - new GetResult( - AnomalyDetector.ANOMALY_DETECTORS_INDEX, - MapperService.SINGLE_MAPPING_NAME, - detectorToCheck.getDetectorId(), - UNASSIGNED_SEQ_NO, - 0, - -1, - responseExists, - BytesReference.bytes(content), - Collections.emptyMap(), - Collections.emptyMap() - ) - ) + TestHelpers.createGetResponse(detectorToCheck, detectorToCheck.getDetectorId(), AnomalyDetector.ANOMALY_DETECTORS_INDEX) ); return null; - }).when(clientUtil).asyncRequest(any(GetRequest.class), any(), any(ActionListener.class)); + }).when(client).get(any(), any(ActionListener.class)); return detectorToCheck.getDetectorId(); } + @SuppressWarnings("unchecked") + private void setupCheckpoint(boolean responseExists) throws IOException { + when(checkpointResponse.isExists()).thenReturn(responseExists); + + doAnswer(invocation -> { + Object[] args = invocation.getArguments(); + assertTrue(String.format("The size of args is %d. Its content is %s", args.length, Arrays.toString(args)), args.length >= 2); + + GetRequest request = null; + ActionListener listener = null; + if (args[0] instanceof GetRequest) { + request = (GetRequest) args[0]; + } + if (args[1] instanceof ActionListener) { + listener = (ActionListener) args[1]; + } + + assertTrue(request != null && listener != null); + listener.onResponse(checkpointResponse); + + return null; + }).when(client).get(any(), any(ActionListener.class)); + } + public void testGetPartitionNumber() throws IOException, InterruptedException { - String detectorId = setupDetector(true); - int partitionNumber = stateManager - .getPartitionNumber(detectorId, TestHelpers.randomAnomalyDetector(TestHelpers.randomUiMetadata(), null)); - assertEquals(2, partitionNumber); + String detectorId = setupDetector(); + AnomalyDetector detector = TestHelpers.randomAnomalyDetector(TestHelpers.randomUiMetadata(), null); + for (int i = 0; i < 2; i++) { + // call two times should return the same result + int partitionNumber = stateManager.getPartitionNumber(detectorId, detector); + assertEquals(2, partitionNumber); + } + + // the 2nd call should directly fetch cached result + verify(modelManager, times(1)).getPartitionedForestSizes(any()); + } + + public void testGetLastError() throws IOException, InterruptedException { + String error = "blah"; + assertEquals(TransportStateManager.NO_ERROR, stateManager.getLastError(adId)); + stateManager.setLastError(adId, error); + assertEquals(error, stateManager.getLastError(adId)); } public void testShouldMute() { @@ -186,26 +206,8 @@ public void testMaintenanceDoNothing() { verifyZeroInteractions(clock); } - public void testMaintenanceNotRemove() throws IOException { - ConcurrentHashMap> states = new ConcurrentHashMap<>(); - when(clock.instant()).thenReturn(Instant.MIN); - states.put("123", new SimpleImmutableEntry<>(TestHelpers.randomAnomalyDetector(TestHelpers.randomUiMetadata(), null), Instant.MAX)); - stateManager.maintenance(states); - assertEquals(1, states.size()); - - } - - public void testMaintenancRemove() throws IOException { - ConcurrentHashMap> states = new ConcurrentHashMap<>(); - when(clock.instant()).thenReturn(Instant.MAX); - states.put("123", new SimpleImmutableEntry<>(TestHelpers.randomAnomalyDetector(TestHelpers.randomUiMetadata(), null), Instant.MIN)); - stateManager.maintenance(states); - assertEquals(0, states.size()); - - } - public void testHasRunningQuery() throws IOException { - stateManager = new ADStateManager( + stateManager = new TransportStateManager( client, xContentRegistry(), modelManager, @@ -223,11 +225,64 @@ public void testHasRunningQuery() throws IOException { } public void testGetAnomalyDetector() throws IOException { - String detectorId = setupDetector(true); + String detectorId = setupDetector(); stateManager .getAnomalyDetector( detectorId, ActionListener.wrap(asDetector -> { assertEquals(detectorToCheck, asDetector.get()); }, exception -> assertTrue(false)) ); } + + public void getCheckpointTestTemplate(boolean exists) throws IOException { + setupCheckpoint(exists); + when(clock.instant()).thenReturn(Instant.MIN); + stateManager + .getDetectorCheckpoint(adId, ActionListener.wrap(checkpointExists -> { assertEquals(exists, checkpointExists); }, exception -> { + for (StackTraceElement ste : exception.getStackTrace()) { + logger.info(ste); + } + assertTrue(false); + })); + } + + public void testCheckpointExists() throws IOException { + getCheckpointTestTemplate(true); + } + + public void testCheckpointNotExists() throws IOException { + getCheckpointTestTemplate(false); + } + + public void testMaintenanceNotRemove() throws IOException { + setupCheckpoint(true); + when(clock.instant()).thenReturn(Instant.ofEpochMilli(1)); + stateManager + .getDetectorCheckpoint( + adId, + ActionListener.wrap(gotCheckpoint -> { assertTrue(gotCheckpoint); }, exception -> assertTrue(false)) + ); + when(clock.instant()).thenReturn(Instant.ofEpochMilli(1)); + stateManager.maintenance(); + stateManager + .getDetectorCheckpoint(adId, ActionListener.wrap(gotCheckpoint -> assertTrue(gotCheckpoint), exception -> assertTrue(false))); + verify(client, times(1)).get(any(), any()); + } + + public void testMaintenanceRemove() throws IOException { + setupCheckpoint(true); + when(clock.instant()).thenReturn(Instant.ofEpochMilli(1)); + stateManager + .getDetectorCheckpoint( + adId, + ActionListener.wrap(gotCheckpoint -> { assertTrue(gotCheckpoint); }, exception -> assertTrue(false)) + ); + when(clock.instant()).thenReturn(Instant.ofEpochSecond(7200L)); + stateManager.maintenance(); + stateManager + .getDetectorCheckpoint( + adId, + ActionListener.wrap(gotCheckpoint -> { assertTrue(gotCheckpoint); }, exception -> assertTrue(false)) + ); + verify(client, times(2)).get(any(), any()); + } } diff --git a/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/TransportStateTests.java b/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/TransportStateTests.java new file mode 100644 index 00000000..88087d77 --- /dev/null +++ b/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/TransportStateTests.java @@ -0,0 +1,96 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.opendistroforelasticsearch.ad.transport; + +import java.io.IOException; +import java.time.Duration; +import java.time.Instant; +import java.util.AbstractMap.SimpleImmutableEntry; + +import org.elasticsearch.test.ESTestCase; + +import com.amazon.opendistroforelasticsearch.ad.TestHelpers; + +public class TransportStateTests extends ESTestCase { + private TransportState state; + + @Override + public void setUp() throws Exception { + super.setUp(); + state = new TransportState("123"); + } + + private Duration duration = Duration.ofHours(1); + + public void testMaintenanceNotRemoveSingle() throws IOException { + state + .setDetectorDef( + new SimpleImmutableEntry<>( + TestHelpers.randomAnomalyDetector(TestHelpers.randomUiMetadata(), null), + Instant.ofEpochMilli(1000) + ) + ); + + assertTrue(!state.expired(duration, Instant.MIN)); + } + + public void testMaintenanceNotRemove() throws IOException { + state + .setDetectorDef( + new SimpleImmutableEntry<>( + TestHelpers.randomAnomalyDetector(TestHelpers.randomUiMetadata(), null), + Instant.ofEpochSecond(1000) + ) + ); + state.setLastError(new SimpleImmutableEntry<>(null, Instant.ofEpochMilli(1000))); + + assertTrue(!state.expired(duration, Instant.ofEpochSecond(3700))); + } + + public void testMaintenanceRemoveLastError() throws IOException { + state + .setDetectorDef( + new SimpleImmutableEntry<>( + TestHelpers.randomAnomalyDetector(TestHelpers.randomUiMetadata(), null), + Instant.ofEpochMilli(1000) + ) + ); + state.setLastError(new SimpleImmutableEntry<>(null, Instant.ofEpochMilli(1000))); + + assertTrue(state.expired(duration, Instant.ofEpochSecond(3700))); + } + + public void testMaintenancRemoveDetector() throws IOException { + state + .setDetectorDef( + new SimpleImmutableEntry<>(TestHelpers.randomAnomalyDetector(TestHelpers.randomUiMetadata(), null), Instant.MIN) + ); + assertTrue(state.expired(duration, Instant.MAX)); + + } + + public void testMaintenanceFlagNotRemove() throws IOException { + state.setCheckpoint(Instant.ofEpochMilli(1000)); + assertTrue(!state.expired(duration, Instant.MIN)); + + } + + public void testMaintenancFlagRemove() throws IOException { + state.setCheckpoint(Instant.MIN); + assertTrue(!state.expired(duration, Instant.MIN)); + + } +} diff --git a/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/handler/AnomalyResultHandlerTests.java b/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/handler/AnomalyResultHandlerTests.java index 65565bf1..075e704a 100644 --- a/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/handler/AnomalyResultHandlerTests.java +++ b/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/handler/AnomalyResultHandlerTests.java @@ -41,6 +41,7 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.common.util.concurrent.EsRejectedExecutionException; +import org.elasticsearch.threadpool.ThreadPool; import org.junit.AfterClass; import org.junit.Before; import org.junit.BeforeClass; @@ -57,6 +58,10 @@ import com.amazon.opendistroforelasticsearch.ad.indices.AnomalyDetectionIndices; import com.amazon.opendistroforelasticsearch.ad.model.AnomalyResult; import com.amazon.opendistroforelasticsearch.ad.transport.AnomalyResultTests; +import com.amazon.opendistroforelasticsearch.ad.util.ClientUtil; +import com.amazon.opendistroforelasticsearch.ad.util.IndexUtils; +import com.amazon.opendistroforelasticsearch.ad.util.Throttler; +import com.amazon.opendistroforelasticsearch.ad.util.ThrowingConsumerWrapper; public class AnomalyResultHandlerTests extends AbstractADTest { private static Settings settings; @@ -66,11 +71,22 @@ public class AnomalyResultHandlerTests extends AbstractADTest { @Mock private Client client; + private ClientUtil clientUtil; + + @Mock + private IndexNameExpressionResolver indexNameResolver; + @Mock private AnomalyDetectionIndices anomalyDetectionIndices; + private String detectorId = "123"; + @Mock - private IndexNameExpressionResolver indexNameResolver; + private Throttler throttler; + + private ThreadPool context; + + private IndexUtils indexUtil; @BeforeClass public static void setUpBeforeClass() { @@ -88,9 +104,12 @@ public static void tearDownAfterClass() { @Before public void setUp() throws Exception { super.setUp(); - super.setUpLog4jForJUnit(AnomalyResultHandler.class); + super.setUpLog4jForJUnit(AnomalyIndexHandler.class); MockitoAnnotations.initMocks(this); setWriteBlockAdResultIndex(false); + context = TestHelpers.createThreadPool(); + clientUtil = new ClientUtil(settings, client, throttler, context); + indexUtil = new IndexUtils(client, clientUtil, clusterService, indexNameResolver); } @Override @@ -114,25 +133,29 @@ public void testSavingAdResult() throws IOException { listener.onResponse(mock(IndexResponse.class)); return null; }).when(client).index(any(IndexRequest.class), ArgumentMatchers.>any()); - AnomalyResultHandler handler = new AnomalyResultHandler( + AnomalyIndexHandler handler = new AnomalyIndexHandler( client, settings, - clusterService, - indexNameResolver, - anomalyDetectionIndices, - threadPool + threadPool, + AnomalyResult.ANOMALY_RESULT_INDEX, + ThrowingConsumerWrapper.throwingConsumerWrapper(anomalyDetectionIndices::initAnomalyResultIndexDirectly), + anomalyDetectionIndices::doesAnomalyResultIndexExist, + false, + clientUtil, + indexUtil, + clusterService ); - handler.indexAnomalyResult(TestHelpers.randomAnomalyDetectResult()); - assertEquals(1, testAppender.countMessage((AnomalyResultHandler.SUCCESS_SAVING_MSG))); + handler.index(TestHelpers.randomAnomalyDetectResult(), detectorId); + assertEquals(1, testAppender.countMessage(AnomalyIndexHandler.SUCCESS_SAVING_MSG, true)); } @Test public void testSavingFailureNotRetry() throws InterruptedException, IOException { savingFailureTemplate(false, 1, true); - assertEquals(1, testAppender.countMessage((AnomalyResultHandler.FAIL_TO_SAVE_ERR_MSG))); - assertTrue(!testAppender.containsMessage(AnomalyResultHandler.SUCCESS_SAVING_MSG)); - assertTrue(!testAppender.containsMessage(AnomalyResultHandler.RETRY_SAVING_ERR_MSG)); + assertEquals(1, testAppender.countMessage(AnomalyIndexHandler.FAIL_TO_SAVE_ERR_MSG, true)); + assertTrue(!testAppender.containsMessage(AnomalyIndexHandler.SUCCESS_SAVING_MSG, true)); + assertTrue(!testAppender.containsMessage(AnomalyIndexHandler.RETRY_SAVING_ERR_MSG, true)); } @Test @@ -140,57 +163,69 @@ public void testSavingFailureRetry() throws InterruptedException, IOException { setWriteBlockAdResultIndex(false); savingFailureTemplate(true, 3, true); - assertEquals(2, testAppender.countMessage((AnomalyResultHandler.RETRY_SAVING_ERR_MSG))); - assertEquals(1, testAppender.countMessage((AnomalyResultHandler.FAIL_TO_SAVE_ERR_MSG))); - assertTrue(!testAppender.containsMessage(AnomalyResultHandler.SUCCESS_SAVING_MSG)); + assertEquals(2, testAppender.countMessage(AnomalyIndexHandler.RETRY_SAVING_ERR_MSG, true)); + assertEquals(1, testAppender.countMessage(AnomalyIndexHandler.FAIL_TO_SAVE_ERR_MSG, true)); + assertTrue(!testAppender.containsMessage(AnomalyIndexHandler.SUCCESS_SAVING_MSG, true)); } @Test public void testIndexWriteBlock() { setWriteBlockAdResultIndex(true); - AnomalyResultHandler handler = new AnomalyResultHandler( + AnomalyIndexHandler handler = new AnomalyIndexHandler( client, settings, - clusterService, - indexNameResolver, - anomalyDetectionIndices, - threadPool + threadPool, + AnomalyResult.ANOMALY_RESULT_INDEX, + ThrowingConsumerWrapper.throwingConsumerWrapper(anomalyDetectionIndices::initAnomalyResultIndexDirectly), + anomalyDetectionIndices::doesAnomalyResultIndexExist, + false, + clientUtil, + indexUtil, + clusterService ); - handler.indexAnomalyResult(TestHelpers.randomAnomalyDetectResult()); + handler.index(TestHelpers.randomAnomalyDetectResult(), detectorId); - assertTrue(testAppender.containsMessage(AnomalyResultHandler.CANNOT_SAVE_ERR_MSG)); + assertTrue(testAppender.containsMessage(AnomalyIndexHandler.CANNOT_SAVE_ERR_MSG, true)); } @Test public void testAdResultIndexExist() throws IOException { setInitAnomalyResultIndexException(true); - AnomalyResultHandler handler = new AnomalyResultHandler( + AnomalyIndexHandler handler = new AnomalyIndexHandler( client, settings, - clusterService, - indexNameResolver, - anomalyDetectionIndices, - threadPool + threadPool, + AnomalyResult.ANOMALY_RESULT_INDEX, + ThrowingConsumerWrapper.throwingConsumerWrapper(anomalyDetectionIndices::initAnomalyResultIndexDirectly), + anomalyDetectionIndices::doesAnomalyResultIndexExist, + false, + clientUtil, + indexUtil, + clusterService ); - handler.indexAnomalyResult(TestHelpers.randomAnomalyDetectResult()); + handler.index(TestHelpers.randomAnomalyDetectResult(), detectorId); verify(client, times(1)).index(any(), any()); } @Test public void testAdResultIndexOtherException() throws IOException { expectedEx.expect(AnomalyDetectionException.class); - expectedEx.expectMessage("Error in saving anomaly index for ID"); + expectedEx.expectMessage("Error in saving .opendistro-anomaly-results for detector " + detectorId); setInitAnomalyResultIndexException(false); - AnomalyResultHandler handler = new AnomalyResultHandler( + AnomalyIndexHandler handler = new AnomalyIndexHandler( client, settings, - clusterService, - indexNameResolver, - anomalyDetectionIndices, - threadPool + threadPool, + AnomalyResult.ANOMALY_RESULT_INDEX, + ThrowingConsumerWrapper.throwingConsumerWrapper(anomalyDetectionIndices::initAnomalyResultIndexDirectly), + anomalyDetectionIndices::doesAnomalyResultIndexExist, + false, + clientUtil, + indexUtil, + clusterService ); - handler.indexAnomalyResult(TestHelpers.randomAnomalyDetectResult()); + handler.index(TestHelpers.randomAnomalyDetectResult(), detectorId); verify(client, never()).index(any(), any()); } @@ -257,16 +292,20 @@ private void savingFailureTemplate(boolean throwEsRejectedExecutionException, in .put("opendistro.anomaly_detection.backoff_initial_delay", TimeValue.timeValueMillis(1)) .build(); - AnomalyResultHandler handler = new AnomalyResultHandler( + AnomalyIndexHandler handler = new AnomalyIndexHandler( client, backoffSettings, - clusterService, - indexNameResolver, - anomalyDetectionIndices, - threadPool + threadPool, + AnomalyResult.ANOMALY_RESULT_INDEX, + ThrowingConsumerWrapper.throwingConsumerWrapper(anomalyDetectionIndices::initAnomalyResultIndexDirectly), + anomalyDetectionIndices::doesAnomalyResultIndexExist, + false, + clientUtil, + indexUtil, + clusterService ); - handler.indexAnomalyResult(TestHelpers.randomAnomalyDetectResult()); + handler.index(TestHelpers.randomAnomalyDetectResult(), detectorId); backoffLatch.await(); } diff --git a/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/handler/DetectorStateHandlerTests.java b/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/handler/DetectorStateHandlerTests.java new file mode 100644 index 00000000..8a52b2d3 --- /dev/null +++ b/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/handler/DetectorStateHandlerTests.java @@ -0,0 +1,164 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.opendistroforelasticsearch.ad.transport.handler; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import java.time.Clock; +import java.time.Instant; + +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.get.GetResponse; +import org.elasticsearch.client.Client; +import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.xcontent.NamedXContentRegistry; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.threadpool.ThreadPool; + +import com.amazon.opendistroforelasticsearch.ad.TestHelpers; +import com.amazon.opendistroforelasticsearch.ad.indices.AnomalyDetectionIndices; +import com.amazon.opendistroforelasticsearch.ad.model.DetectorInternalState; +import com.amazon.opendistroforelasticsearch.ad.transport.TransportStateManager; +import com.amazon.opendistroforelasticsearch.ad.transport.handler.DetectionStateHandler.ErrorStrategy; +import com.amazon.opendistroforelasticsearch.ad.util.ClientUtil; +import com.amazon.opendistroforelasticsearch.ad.util.IndexUtils; +import com.amazon.opendistroforelasticsearch.ad.util.Throttler; +import com.amazon.opendistroforelasticsearch.ad.util.ThrowingConsumerWrapper; + +public class DetectorStateHandlerTests extends ESTestCase { + private DetectionStateHandler detectorStateHandler; + private String detectorId = "123"; + private Client client; + private String error = "Stopped due to blah"; + private IndexUtils indexUtils; + private TransportStateManager stateManager; + + @Override + public void setUp() throws Exception { + super.setUp(); + AnomalyDetectionIndices anomalyDetectionIndices = mock(AnomalyDetectionIndices.class); + client = mock(Client.class); + Settings settings = Settings.EMPTY; + Clock clock = mock(Clock.class); + Throttler throttler = new Throttler(clock); + ThreadPool threadpool = mock(ThreadPool.class); + ClientUtil clientUtil = new ClientUtil(Settings.EMPTY, client, throttler, threadpool); + indexUtils = mock(IndexUtils.class); + ClusterService clusterService = mock(ClusterService.class); + ThreadPool threadPool = mock(ThreadPool.class); + stateManager = mock(TransportStateManager.class); + detectorStateHandler = new DetectionStateHandler( + client, + settings, + threadPool, + ThrowingConsumerWrapper.throwingConsumerWrapper(anomalyDetectionIndices::initDetectorStateIndex), + anomalyDetectionIndices::doesDetectorStateIndexExist, + clientUtil, + indexUtils, + clusterService, + NamedXContentRegistry.EMPTY, + stateManager + ); + } + + public void testNullState() { + ErrorStrategy errorStrategy = detectorStateHandler.new ErrorStrategy(error); + DetectorInternalState state = errorStrategy.createNewState(null); + assertEquals(error, state.getError()); + assertTrue(state.getLastUpdateTime() != null); + } + + public void testNonNullState() { + String error = "blah"; + DetectorInternalState oldState = new DetectorInternalState.Builder().error(error).lastUpdateTime(Instant.ofEpochSecond(1L)).build(); + ErrorStrategy errorStrategy = detectorStateHandler.new ErrorStrategy(error); + DetectorInternalState state = errorStrategy.createNewState(oldState); + assertEquals(null, state); + } + + public void testOldErrorNull() { + ErrorStrategy errorStrategy = detectorStateHandler.new ErrorStrategy(error); + // old state's error is null + DetectorInternalState state = errorStrategy.createNewState(new DetectorInternalState.Builder().build()); + assertEquals(error, state.getError()); + assertTrue(state.getLastUpdateTime() != null); + } + + public void testBothErrorNull() { + ErrorStrategy errorStrategy = detectorStateHandler.new ErrorStrategy(null); + // old state's error is null + DetectorInternalState state = errorStrategy.createNewState(new DetectorInternalState.Builder().build()); + assertEquals(null, state); + } + + public void testNoUpdateWitoutErrorChange() { + when(stateManager.getLastError(anyString())).thenReturn(error); + doAnswer(invocation -> { + Object[] args = invocation.getArguments(); + @SuppressWarnings("unchecked") + ActionListener listener = (ActionListener) args[1]; + DetectorInternalState.Builder result = new DetectorInternalState.Builder().lastUpdateTime(Instant.now()).error(error); + listener.onResponse(TestHelpers.createGetResponse(result.build(), detectorId, DetectorInternalState.DETECTOR_STATE_INDEX)); + return null; + }).when(client).get(any(), any()); + + detectorStateHandler.saveError(error, detectorId); + + verify(indexUtils, never()).checkIndicesBlocked(any(), any(), any()); + } + + public void testUpdateWithErrorChange() { + when(stateManager.getLastError(anyString())).thenReturn("blah"); + doAnswer(invocation -> { + Object[] args = invocation.getArguments(); + @SuppressWarnings("unchecked") + ActionListener listener = (ActionListener) args[1]; + DetectorInternalState.Builder result = new DetectorInternalState.Builder().lastUpdateTime(Instant.now()).error("blah"); + listener.onResponse(TestHelpers.createGetResponse(result.build(), detectorId, DetectorInternalState.DETECTOR_STATE_INDEX)); + return null; + }).when(client).get(any(), any()); + + detectorStateHandler.saveError(error, detectorId); + + verify(indexUtils, times(1)).checkIndicesBlocked(any(), any(), any()); + } + + public void testUpdateWithFirstChange() { + when(stateManager.getLastError(anyString())).thenReturn(TransportStateManager.NO_ERROR); + doAnswer(invocation -> { + Object[] args = invocation.getArguments(); + @SuppressWarnings("unchecked") + ActionListener listener = (ActionListener) args[1]; + DetectorInternalState.Builder result = new DetectorInternalState.Builder() + .lastUpdateTime(Instant.ofEpochMilli(1)) + .error("blah"); + listener.onResponse(TestHelpers.createGetResponse(result.build(), detectorId, DetectorInternalState.DETECTOR_STATE_INDEX)); + return null; + }).when(client).get(any(), any()); + + detectorStateHandler.saveError(error, detectorId); + + verify(indexUtils, times(1)).checkIndicesBlocked(any(), any(), any()); + } +} diff --git a/src/test/java/com/amazon/opendistroforelasticsearch/ad/util/IndexUtilsTests.java b/src/test/java/com/amazon/opendistroforelasticsearch/ad/util/IndexUtilsTests.java index fc250f3f..d9386da4 100644 --- a/src/test/java/com/amazon/opendistroforelasticsearch/ad/util/IndexUtilsTests.java +++ b/src/test/java/com/amazon/opendistroforelasticsearch/ad/util/IndexUtilsTests.java @@ -21,6 +21,7 @@ import org.elasticsearch.action.support.master.AcknowledgedResponse; import org.elasticsearch.client.Client; +import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.threadpool.ThreadPool; @@ -33,6 +34,8 @@ public class IndexUtilsTests extends ESIntegTestCase { private ClientUtil clientUtil; + private IndexNameExpressionResolver indexNameResolver; + @Before public void setup() { Client client = client(); @@ -40,11 +43,12 @@ public void setup() { Throttler throttler = new Throttler(clock); ThreadPool context = TestHelpers.createThreadPool(); clientUtil = new ClientUtil(Settings.EMPTY, client, throttler, context); + indexNameResolver = mock(IndexNameExpressionResolver.class); } @Test public void testGetIndexHealth_NoIndex() { - IndexUtils indexUtils = new IndexUtils(client(), clientUtil, clusterService()); + IndexUtils indexUtils = new IndexUtils(client(), clientUtil, clusterService(), indexNameResolver); String output = indexUtils.getIndexHealthStatus("test"); assertEquals(IndexUtils.NONEXISTENT_INDEX_STATUS, output); } @@ -54,7 +58,7 @@ public void testGetIndexHealth_Index() { String indexName = "test-2"; createIndex(indexName); flush(); - IndexUtils indexUtils = new IndexUtils(client(), clientUtil, clusterService()); + IndexUtils indexUtils = new IndexUtils(client(), clientUtil, clusterService(), indexNameResolver); String status = indexUtils.getIndexHealthStatus(indexName); assertTrue(status.equals("green") || status.equals("yellow")); } @@ -67,14 +71,14 @@ public void testGetIndexHealth_Alias() { flush(); AcknowledgedResponse response = client().admin().indices().prepareAliases().addAlias(indexName, aliasName).execute().actionGet(); assertTrue(response.isAcknowledged()); - IndexUtils indexUtils = new IndexUtils(client(), clientUtil, clusterService()); + IndexUtils indexUtils = new IndexUtils(client(), clientUtil, clusterService(), indexNameResolver); String status = indexUtils.getIndexHealthStatus(aliasName); assertTrue(status.equals("green") || status.equals("yellow")); } @Test public void testGetNumberOfDocumentsInIndex_NonExistentIndex() { - IndexUtils indexUtils = new IndexUtils(client(), clientUtil, clusterService()); + IndexUtils indexUtils = new IndexUtils(client(), clientUtil, clusterService(), indexNameResolver); assertEquals((Long) 0L, indexUtils.getNumberOfDocumentsInIndex("index")); } @@ -89,7 +93,7 @@ public void testGetNumberOfDocumentsInIndex_RegularIndex() { index(indexName, "_doc", String.valueOf(i), "{}"); } flushAndRefresh(indexName); - IndexUtils indexUtils = new IndexUtils(client(), clientUtil, clusterService()); + IndexUtils indexUtils = new IndexUtils(client(), clientUtil, clusterService(), indexNameResolver); assertEquals((Long) count, indexUtils.getNumberOfDocumentsInIndex(indexName)); } } diff --git a/src/test/java/com/amazon/opendistroforelasticsearch/ad/util/MultiResponsesDelegateActionListenerTests.java b/src/test/java/com/amazon/opendistroforelasticsearch/ad/util/MultiResponsesDelegateActionListenerTests.java new file mode 100644 index 00000000..7bd905ca --- /dev/null +++ b/src/test/java/com/amazon/opendistroforelasticsearch/ad/util/MultiResponsesDelegateActionListenerTests.java @@ -0,0 +1,48 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.opendistroforelasticsearch.ad.util; + +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; + +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.test.ESTestCase; + +import com.amazon.opendistroforelasticsearch.ad.model.DetectorProfile; + +public class MultiResponsesDelegateActionListenerTests extends ESTestCase { + + public void testEmptyResponse() throws InterruptedException { + final CountDownLatch inProgressLatch = new CountDownLatch(1); + ActionListener actualListener = ActionListener.wrap(response -> { + assertTrue("Should not reach here", false); + inProgressLatch.countDown(); + }, exception -> { + String exceptionMsg = exception.getMessage(); + assertTrue(exceptionMsg, exceptionMsg.contains(MultiResponsesDelegateActionListener.NO_RESPONSE)); + inProgressLatch.countDown(); + }); + + MultiResponsesDelegateActionListener multiListener = new MultiResponsesDelegateActionListener( + actualListener, + 2, + "blah" + ); + multiListener.onResponse(null); + multiListener.onResponse(null); + assertTrue(inProgressLatch.await(100, TimeUnit.SECONDS)); + } +} diff --git a/src/test/java/test/com/amazon/opendistroforelasticsearch/ad/util/FakeNode.java b/src/test/java/test/com/amazon/opendistroforelasticsearch/ad/util/FakeNode.java index 7e0fe2b1..1dffbeda 100644 --- a/src/test/java/test/com/amazon/opendistroforelasticsearch/ad/util/FakeNode.java +++ b/src/test/java/test/com/amazon/opendistroforelasticsearch/ad/util/FakeNode.java @@ -23,7 +23,9 @@ import java.util.ArrayList; import java.util.Collections; import java.util.List; +import java.util.Map; import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; import java.util.function.Function; import org.apache.lucene.util.SetOnce; @@ -40,16 +42,19 @@ import org.elasticsearch.common.network.NetworkService; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.transport.BoundTransportAddress; +import org.elasticsearch.common.transport.TransportAddress; import org.elasticsearch.common.util.PageCacheRecycler; import org.elasticsearch.indices.breaker.NoneCircuitBreakerService; import org.elasticsearch.tasks.TaskManager; +import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.tasks.MockTaskManager; import org.elasticsearch.threadpool.ThreadPool; +import org.elasticsearch.transport.TransportInterceptor; import org.elasticsearch.transport.TransportService; import org.elasticsearch.transport.nio.MockNioTransport; public class FakeNode implements Releasable { - public FakeNode(String name, ThreadPool threadPool, Settings settings) { + public FakeNode(String name, ThreadPool threadPool, Settings settings, TransportInterceptor transportInterceptor) { final Function boundTransportAddressDiscoveryNodeFunction = address -> { discoveryNode.set(new DiscoveryNode(name, address.publishAddress(), emptyMap(), emptySet(), Version.CURRENT)); return discoveryNode.get(); @@ -64,9 +69,14 @@ public FakeNode(String name, ThreadPool threadPool, Settings settings) { PageCacheRecycler.NON_RECYCLING_INSTANCE, new NamedWriteableRegistry(ClusterModule.getNamedWriteables()), new NoneCircuitBreakerService() - ), + ) { + @Override + public TransportAddress[] addressesFromString(String address) { + return new TransportAddress[] { dns.getOrDefault(address, ESTestCase.buildNewFakeTransportAddress()) }; + } + }, threadPool, - TransportService.NOOP_TRANSPORT_INTERCEPTOR, + transportInterceptor, boundTransportAddressDiscoveryNodeFunction, null, Collections.emptySet() @@ -80,6 +90,7 @@ protected TaskManager createTaskManager(Settings settings, ThreadPool threadPool } } }; + transportService.start(); clusterService = createClusterService(threadPool, discoveryNode.get()); clusterService.addStateApplier(transportService.getTaskManager()); @@ -89,11 +100,16 @@ protected TaskManager createTaskManager(Settings settings, ThreadPool threadPool transportService.acceptIncomingRequests(); } + public FakeNode(String name, ThreadPool threadPool, Settings settings) { + this(name, threadPool, settings, TransportService.NOOP_TRANSPORT_INTERCEPTOR); + } + public final ClusterService clusterService; public final TransportService transportService; private final SetOnce discoveryNode = new SetOnce<>(); public final TransportListTasksAction transportListTasksAction; public final TransportCancelTasksAction transportCancelTasksAction; + private final Map dns = new ConcurrentHashMap<>(); @Override public void close() {