From e33265c74543911afd775e26607a3b3b3563939b Mon Sep 17 00:00:00 2001 From: Kaituo Li Date: Fri, 12 Jun 2020 12:55:24 -0700 Subject: [PATCH] Adds initialization progress to profile API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR adds init_progress to profile API. init_progress helps users track initialization percentage, needed shingles, and estimated time to go if the future data stream is continuous (no missing data). Initialization percentage measures how far away we can observe RCF models emitting scores.  The implementation fetches the RCF model's total updates while the AD job is running and materializes the value to the newly added index .opendistro-anomaly-info. Total updates record the number of times this RCF model has been updated.  Initialization percent is computed as x/128:  * if total updates > 128, x = 128. Otherwise, x is the total updates  * 128 is our output after the number in RCF. After observing 128 samples, RCF starts emitting scores. Needed shingles are computed as 128 -x.  Estimated minutes to go is computed as needed shingles * detector interval This PR also materializes the error message in the most recent run to speed up profile API's error fetching. During each AD execution, we also check if a checkpoint is there (the result is saved and maintained as other AD states), if yes, we cold start immediately. Testing done: 1. adds unit tests 2. run e2e tests to verify init_progress number makes sense. --- .../ad/AnomalyDetectorJobRunner.java | 18 +- .../ad/AnomalyDetectorPlugin.java | 62 ++- .../ad/AnomalyDetectorProfileRunner.java | 491 +++++++++--------- .../ad/constant/CommonName.java | 1 + .../ad/indices/AnomalyDetectionIndices.java | 36 +- .../ad/ml/ModelManager.java | 30 +- .../ad/ml/RcfResult.java | 19 +- .../ad/ml/rcf/CombinedRcfResult.java | 20 +- .../ad/model/AnomalyDetectorJob.java | 9 + .../ad/model/DetectorInternalState.java | 186 +++++++ .../ad/model/DetectorProfile.java | 142 ++++- .../ad/model/InitProgressProfile.java | 147 ++++++ .../ad/model/ModelProfile.java | 12 + .../ad/model/ProfileName.java | 5 +- .../ad/rest/RestAnomalyDetectorJobAction.java | 9 - .../rest/RestDeleteAnomalyDetectorAction.java | 33 +- .../IndexAnomalyDetectorActionHandler.java | 2 + .../IndexAnomalyDetectorJobActionHandler.java | 9 - .../ad/settings/AnomalyDetectorSettings.java | 1 + .../ad/transport/ADStateManager.java | 54 +- .../AnomalyResultTransportAction.java | 32 +- .../ad/transport/RCFPollingAction.java | 28 + .../ad/transport/RCFPollingRequest.java | 72 +++ .../ad/transport/RCFPollingResponse.java | 57 ++ .../transport/RCFPollingTransportAction.java | 135 +++++ .../ad/transport/RCFResultResponse.java | 12 +- .../transport/RCFResultTransportAction.java | 9 +- .../handler/AnomalyIndexHandler.java | 188 +++++++ .../handler/AnomalyResultHandler.java | 204 -------- .../handler/DetectorStateHandler.java | 175 +++++++ .../ad/util/IndexUtils.java | 30 +- .../MultiResponsesDelegateActionListener.java | 4 +- .../ad/util/ThrowingConsumer.java | 27 + .../ad/util/ThrowingConsumerWrapper.java | 41 ++ .../resources/mappings/anomaly-state.json | 21 + .../ad/AbstractADTest.java | 49 +- .../ad/AnomalyDetectorJobRunnerTests.java | 59 ++- .../ad/AnomalyDetectorProfileRunnerTests.java | 369 +++++++------ .../ad/TestHelpers.java | 44 +- .../ad/ml/ModelManagerTests.java | 30 +- .../ad/ml/RcfResultTests.java | 21 +- .../ad/ml/rcf/CombinedRcfResultTests.java | 19 +- .../ad/transport/ADStateManagerTests.java | 68 +++ .../ADStatsNodesTransportActionTests.java | 10 +- .../ad/transport/AnomalyResultTests.java | 100 +++- .../ad/transport/RCFResultTests.java | 8 +- .../handler/AnomalyResultHandlerTests.java | 121 +++-- .../handler/DetectorInfoHandlerTests.java | 65 +++ .../ad/util/IndexUtilsTests.java | 14 +- ...iResponsesDelegateActionListenerTests.java | 48 ++ 50 files changed, 2494 insertions(+), 852 deletions(-) create mode 100644 src/main/java/com/amazon/opendistroforelasticsearch/ad/model/DetectorInternalState.java create mode 100644 src/main/java/com/amazon/opendistroforelasticsearch/ad/model/InitProgressProfile.java create mode 100644 src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/RCFPollingAction.java create mode 100644 src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/RCFPollingRequest.java create mode 100644 src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/RCFPollingResponse.java create mode 100644 src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/RCFPollingTransportAction.java create mode 100644 src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/handler/AnomalyIndexHandler.java delete mode 100644 src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/handler/AnomalyResultHandler.java create mode 100644 src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/handler/DetectorStateHandler.java create mode 100644 src/main/java/com/amazon/opendistroforelasticsearch/ad/util/ThrowingConsumer.java create mode 100644 src/main/java/com/amazon/opendistroforelasticsearch/ad/util/ThrowingConsumerWrapper.java create mode 100644 src/main/resources/mappings/anomaly-state.json create mode 100644 src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/handler/DetectorInfoHandlerTests.java create mode 100644 src/test/java/com/amazon/opendistroforelasticsearch/ad/util/MultiResponsesDelegateActionListenerTests.java diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/AnomalyDetectorJobRunner.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/AnomalyDetectorJobRunner.java index 05099c48..12d68ff4 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/ad/AnomalyDetectorJobRunner.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/AnomalyDetectorJobRunner.java @@ -56,7 +56,8 @@ import com.amazon.opendistroforelasticsearch.ad.transport.AnomalyResultRequest; import com.amazon.opendistroforelasticsearch.ad.transport.AnomalyResultResponse; import com.amazon.opendistroforelasticsearch.ad.transport.AnomalyResultTransportAction; -import com.amazon.opendistroforelasticsearch.ad.transport.handler.AnomalyResultHandler; +import com.amazon.opendistroforelasticsearch.ad.transport.handler.AnomalyIndexHandler; +import com.amazon.opendistroforelasticsearch.ad.transport.handler.DetectorStateHandler; import com.amazon.opendistroforelasticsearch.ad.util.ClientUtil; import com.amazon.opendistroforelasticsearch.jobscheduler.spi.JobExecutionContext; import com.amazon.opendistroforelasticsearch.jobscheduler.spi.LockModel; @@ -77,8 +78,9 @@ public class AnomalyDetectorJobRunner implements ScheduledJobRunner { private Client client; private ClientUtil clientUtil; private ThreadPool threadPool; - private AnomalyResultHandler anomalyResultHandler; + private AnomalyIndexHandler anomalyResultHandler; private ConcurrentHashMap detectorEndRunExceptionCount; + private DetectorStateHandler detectorStateHandler; public static AnomalyDetectorJobRunner getJobRunnerInstance() { if (INSTANCE != null) { @@ -110,7 +112,7 @@ public void setThreadPool(ThreadPool threadPool) { this.threadPool = threadPool; } - public void setAnomalyResultHandler(AnomalyResultHandler anomalyResultHandler) { + public void setAnomalyResultHandler(AnomalyIndexHandler anomalyResultHandler) { this.anomalyResultHandler = anomalyResultHandler; } @@ -119,6 +121,10 @@ public void setSettings(Settings settings) { this.maxRetryForEndRunException = AnomalyDetectorSettings.MAX_RETRY_FOR_END_RUN_EXCEPTION.get(settings); } + public void setDetectorStateHandler(DetectorStateHandler detectorStateHandler) { + this.detectorStateHandler = detectorStateHandler; + } + @Override public void runJob(ScheduledJobParameter jobParameter, JobExecutionContext context) { String detectorId = jobParameter.getName(); @@ -436,7 +442,8 @@ private void indexAnomalyResult( Instant.now(), response.getError() ); - anomalyResultHandler.indexAnomalyResult(anomalyResult); + anomalyResultHandler.index(anomalyResult, detectorId); + detectorStateHandler.saveError(response.getError(), detectorId); } catch (Exception e) { log.error("Failed to index anomaly result for " + detectorId, e); } finally { @@ -490,7 +497,8 @@ private void indexAnomalyResultException( Instant.now(), errorMessage ); - anomalyResultHandler.indexAnomalyResult(anomalyResult); + anomalyResultHandler.index(anomalyResult, detectorId); + detectorStateHandler.saveError(errorMessage, detectorId); } catch (Exception e) { log.error("Failed to index anomaly result for " + detectorId, e); } finally { diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/AnomalyDetectorPlugin.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/AnomalyDetectorPlugin.java index c22e7da8..a8123039 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/ad/AnomalyDetectorPlugin.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/AnomalyDetectorPlugin.java @@ -21,12 +21,10 @@ import java.security.PrivilegedAction; import java.time.Clock; import java.util.Arrays; -import java.util.Calendar; import java.util.Collection; import java.util.Collections; import java.util.List; import java.util.Map; -import java.util.TimeZone; import java.util.function.Supplier; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -83,6 +81,7 @@ import com.amazon.opendistroforelasticsearch.ad.model.AnomalyDetector; import com.amazon.opendistroforelasticsearch.ad.model.AnomalyDetectorJob; import com.amazon.opendistroforelasticsearch.ad.model.AnomalyResult; +import com.amazon.opendistroforelasticsearch.ad.model.DetectorInternalState; import com.amazon.opendistroforelasticsearch.ad.rest.RestAnomalyDetectorJobAction; import com.amazon.opendistroforelasticsearch.ad.rest.RestDeleteAnomalyDetectorAction; import com.amazon.opendistroforelasticsearch.ad.rest.RestExecuteAnomalyDetectorAction; @@ -111,18 +110,22 @@ import com.amazon.opendistroforelasticsearch.ad.transport.DeleteModelTransportAction; import com.amazon.opendistroforelasticsearch.ad.transport.ProfileAction; import com.amazon.opendistroforelasticsearch.ad.transport.ProfileTransportAction; +import com.amazon.opendistroforelasticsearch.ad.transport.RCFPollingAction; +import com.amazon.opendistroforelasticsearch.ad.transport.RCFPollingTransportAction; import com.amazon.opendistroforelasticsearch.ad.transport.RCFResultAction; import com.amazon.opendistroforelasticsearch.ad.transport.RCFResultTransportAction; import com.amazon.opendistroforelasticsearch.ad.transport.StopDetectorAction; import com.amazon.opendistroforelasticsearch.ad.transport.StopDetectorTransportAction; import com.amazon.opendistroforelasticsearch.ad.transport.ThresholdResultAction; import com.amazon.opendistroforelasticsearch.ad.transport.ThresholdResultTransportAction; -import com.amazon.opendistroforelasticsearch.ad.transport.handler.AnomalyResultHandler; +import com.amazon.opendistroforelasticsearch.ad.transport.handler.AnomalyIndexHandler; +import com.amazon.opendistroforelasticsearch.ad.transport.handler.DetectorStateHandler; import com.amazon.opendistroforelasticsearch.ad.util.ClientUtil; import com.amazon.opendistroforelasticsearch.ad.util.ColdStartRunner; import com.amazon.opendistroforelasticsearch.ad.util.DiscoveryNodeFilterer; import com.amazon.opendistroforelasticsearch.ad.util.IndexUtils; import com.amazon.opendistroforelasticsearch.ad.util.Throttler; +import com.amazon.opendistroforelasticsearch.ad.util.ThrowingConsumerWrapper; import com.amazon.opendistroforelasticsearch.jobscheduler.spi.JobSchedulerExtension; import com.amazon.opendistroforelasticsearch.jobscheduler.spi.ScheduledJobParser; import com.amazon.opendistroforelasticsearch.jobscheduler.spi.ScheduledJobRunner; @@ -150,6 +153,8 @@ public class AnomalyDetectorPlugin extends Plugin implements ActionPlugin, Scrip private NamedXContentRegistry xContentRegistry; private ClientUtil clientUtil; private DiscoveryNodeFilterer nodeFilter; + private IndexUtils indexUtils; + private DetectorStateHandler detectorStateHandler; static { SpecialPermission.check(); @@ -170,28 +175,34 @@ public List getRestHandlers( IndexNameExpressionResolver indexNameExpressionResolver, Supplier nodesInCluster ) { - AnomalyResultHandler anomalyResultHandler = new AnomalyResultHandler( + + AnomalyIndexHandler anomalyResultHandler; + anomalyResultHandler = new AnomalyIndexHandler( client, settings, - clusterService, - indexNameExpressionResolver, - anomalyDetectionIndices, - threadPool + threadPool, + AnomalyResult.ANOMALY_RESULT_INDEX, + ThrowingConsumerWrapper.throwingConsumerWrapper(anomalyDetectionIndices::initAnomalyResultIndexDirectly), + anomalyDetectionIndices::doesAnomalyResultIndexExist, + false, + this.clientUtil, + this.indexUtils, + clusterService ); + AnomalyDetectorJobRunner jobRunner = AnomalyDetectorJobRunner.getJobRunnerInstance(); jobRunner.setClient(client); jobRunner.setClientUtil(clientUtil); jobRunner.setThreadPool(threadPool); jobRunner.setAnomalyResultHandler(anomalyResultHandler); + jobRunner.setDetectorStateHandler(detectorStateHandler); jobRunner.setSettings(settings); AnomalyDetectorProfileRunner profileRunner = new AnomalyDetectorProfileRunner( client, this.xContentRegistry, this.nodeFilter, - indexNameExpressionResolver, - clusterService, - Calendar.getInstance(TimeZone.getTimeZone("UTC")) + AnomalyDetectorSettings.NUM_MIN_SAMPLES ); RestGetAnomalyDetectorAction restGetAnomalyDetectorAction = new RestGetAnomalyDetectorAction(profileRunner); RestIndexAnomalyDetectorAction restIndexAnomalyDetectorAction = new RestIndexAnomalyDetectorAction( @@ -257,7 +268,7 @@ public Collection createComponents( Clock clock = Clock.systemUTC(); Throttler throttler = new Throttler(clock); this.clientUtil = new ClientUtil(settings, client, throttler, threadPool); - IndexUtils indexUtils = new IndexUtils(client, clientUtil, clusterService); + this.indexUtils = new IndexUtils(client, clientUtil, clusterService, indexNameExpressionResolver); anomalyDetectionIndices = new AnomalyDetectionIndices(client, clusterService, threadPool, settings); this.clusterService = clusterService; this.xContentRegistry = xContentRegistry; @@ -347,6 +358,16 @@ public Collection createComponents( adStats = new ADStats(indexUtils, modelManager, stats); ADCircuitBreakerService adCircuitBreakerService = new ADCircuitBreakerService(jvmService).init(); + this.detectorStateHandler = new DetectorStateHandler( + client, + settings, + threadPool, + ThrowingConsumerWrapper.throwingConsumerWrapper(anomalyDetectionIndices::initDetectorStateIndex), + anomalyDetectionIndices::doesDetectorStateIndexExist, + this.clientUtil, + this.indexUtils, + clusterService + ); return ImmutableList .of( @@ -367,7 +388,8 @@ public Collection createComponents( adCircuitBreakerService, adStats, new MasterEventListener(clusterService, threadPool, client, clock, clientUtil, nodeFilter), - nodeFilter + nodeFilter, + detectorStateHandler ); } @@ -411,7 +433,13 @@ public List> getSettings() { @Override public List getNamedXContent() { - return ImmutableList.of(AnomalyDetector.XCONTENT_REGISTRY, AnomalyResult.XCONTENT_REGISTRY); + return ImmutableList + .of( + AnomalyDetector.XCONTENT_REGISTRY, + AnomalyResult.XCONTENT_REGISTRY, + DetectorInternalState.XCONTENT_REGISTRY, + AnomalyDetectorJob.XCONTENT_REGISTRY + ); } /* @@ -428,7 +456,8 @@ public List getNamedXContent() { new ActionHandler<>(AnomalyResultAction.INSTANCE, AnomalyResultTransportAction.class), new ActionHandler<>(CronAction.INSTANCE, CronTransportAction.class), new ActionHandler<>(ADStatsNodesAction.INSTANCE, ADStatsNodesTransportAction.class), - new ActionHandler<>(ProfileAction.INSTANCE, ProfileTransportAction.class) + new ActionHandler<>(ProfileAction.INSTANCE, ProfileTransportAction.class), + new ActionHandler<>(RCFPollingAction.INSTANCE, RCFPollingTransportAction.class) ); } @@ -464,7 +493,8 @@ public Collection getSystemIndexDescriptors(Settings sett new SystemIndexDescriptor(AnomalyDetectionIndices.ALL_AD_RESULTS_INDEX_PATTERN, "anomaly result"), new SystemIndexDescriptor(AnomalyDetector.ANOMALY_DETECTORS_INDEX, "detector definition"), new SystemIndexDescriptor(AnomalyDetectorJob.ANOMALY_DETECTOR_JOB_INDEX, "detector job"), - new SystemIndexDescriptor(CommonName.CHECKPOINT_INDEX_NAME, "model checkpoint") + new SystemIndexDescriptor(CommonName.CHECKPOINT_INDEX_NAME, "model checkpoint"), + new SystemIndexDescriptor(DetectorInternalState.DETECTOR_STATE_INDEX, "detector information like total rcf updates") ) ); } diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/AnomalyDetectorProfileRunner.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/AnomalyDetectorProfileRunner.java index 943a1a4f..3352ae60 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/ad/AnomalyDetectorProfileRunner.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/AnomalyDetectorProfileRunner.java @@ -20,53 +20,37 @@ import static org.elasticsearch.common.xcontent.XContentParserUtils.ensureExpectedToken; import java.io.IOException; -import java.time.Instant; -import java.util.ArrayList; -import java.util.Calendar; -import java.util.List; -import java.util.Map; import java.util.Set; -import java.util.TimeZone; -import java.util.TreeMap; -import java.util.regex.Matcher; -import java.util.regex.Pattern; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.apache.logging.log4j.message.ParameterizedMessage; import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.get.GetRequest; import org.elasticsearch.action.get.GetResponse; -import org.elasticsearch.action.search.SearchRequest; -import org.elasticsearch.action.search.SearchResponse; -import org.elasticsearch.action.support.IndicesOptions; import org.elasticsearch.client.Client; -import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; import org.elasticsearch.cluster.node.DiscoveryNode; -import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.xcontent.LoggingDeprecationHandler; import org.elasticsearch.common.xcontent.NamedXContentRegistry; import org.elasticsearch.common.xcontent.XContentParseException; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.common.xcontent.XContentType; import org.elasticsearch.index.IndexNotFoundException; -import org.elasticsearch.index.query.BoolQueryBuilder; -import org.elasticsearch.index.query.QueryBuilders; -import org.elasticsearch.index.query.RangeQueryBuilder; -import org.elasticsearch.search.SearchHit; -import org.elasticsearch.search.SearchHits; -import org.elasticsearch.search.builder.SearchSourceBuilder; -import org.elasticsearch.search.sort.FieldSortBuilder; -import org.elasticsearch.search.sort.SortOrder; - -import com.amazon.opendistroforelasticsearch.ad.indices.AnomalyDetectionIndices; + +import com.amazon.opendistroforelasticsearch.ad.model.AnomalyDetector; import com.amazon.opendistroforelasticsearch.ad.model.AnomalyDetectorJob; -import com.amazon.opendistroforelasticsearch.ad.model.AnomalyResult; +import com.amazon.opendistroforelasticsearch.ad.model.DetectorInternalState; import com.amazon.opendistroforelasticsearch.ad.model.DetectorProfile; import com.amazon.opendistroforelasticsearch.ad.model.DetectorState; +import com.amazon.opendistroforelasticsearch.ad.model.InitProgressProfile; +import com.amazon.opendistroforelasticsearch.ad.model.IntervalTimeConfiguration; import com.amazon.opendistroforelasticsearch.ad.model.ProfileName; import com.amazon.opendistroforelasticsearch.ad.transport.ProfileAction; import com.amazon.opendistroforelasticsearch.ad.transport.ProfileRequest; import com.amazon.opendistroforelasticsearch.ad.transport.ProfileResponse; +import com.amazon.opendistroforelasticsearch.ad.transport.RCFPollingAction; +import com.amazon.opendistroforelasticsearch.ad.transport.RCFPollingRequest; +import com.amazon.opendistroforelasticsearch.ad.transport.RCFPollingResponse; import com.amazon.opendistroforelasticsearch.ad.util.DiscoveryNodeFilterer; import com.amazon.opendistroforelasticsearch.ad.util.MultiResponsesDelegateActionListener; @@ -75,31 +59,25 @@ public class AnomalyDetectorProfileRunner { private Client client; private NamedXContentRegistry xContentRegistry; private DiscoveryNodeFilterer nodeFilter; - private final IndexNameExpressionResolver indexNameExpressionResolver; static String FAIL_TO_FIND_DETECTOR_MSG = "Fail to find detector with id: "; static String FAIL_TO_GET_PROFILE_MSG = "Fail to get profile for detector "; - private final ClusterService clusterService; - private Calendar calendar; + private long requiredSamples; public AnomalyDetectorProfileRunner( Client client, NamedXContentRegistry xContentRegistry, DiscoveryNodeFilterer nodeFilter, - IndexNameExpressionResolver indexNameExpressionResolver, - ClusterService clusterService, - Calendar calendar + long requiredSamples ) { this.client = client; this.xContentRegistry = xContentRegistry; this.nodeFilter = nodeFilter; - this.indexNameExpressionResolver = indexNameExpressionResolver; - this.clusterService = clusterService; - this.calendar = Calendar.getInstance(TimeZone.getTimeZone("UTC")); + this.requiredSamples = requiredSamples; } - public void profile(String detectorId, ActionListener listener, Set profiles) { + public void profile(String detectorId, ActionListener listener, Set profilesToCollect) { - if (profiles.isEmpty()) { + if (profilesToCollect.isEmpty()) { listener.onFailure(new RuntimeException("Unsupported profile types.")); return; } @@ -108,18 +86,22 @@ public void profile(String detectorId, ActionListener listener, // and return to users int totalListener = 0; - if (profiles.contains(ProfileName.STATE)) { + if (profilesToCollect.contains(ProfileName.STATE)) { + totalListener++; + } + + if (profilesToCollect.contains(ProfileName.ERROR)) { totalListener++; } - if (profiles.contains(ProfileName.ERROR)) { + if (profilesToCollect.contains(ProfileName.INIT_PROGRESS)) { totalListener++; } - if (profiles.contains(ProfileName.COORDINATING_NODE) - || profiles.contains(ProfileName.SHINGLE_SIZE) - || profiles.contains(ProfileName.TOTAL_SIZE_IN_BYTES) - || profiles.contains(ProfileName.MODELS)) { + if (profilesToCollect.contains(ProfileName.COORDINATING_NODE) + || profilesToCollect.contains(ProfileName.SHINGLE_SIZE) + || profilesToCollect.contains(ProfileName.TOTAL_SIZE_IN_BYTES) + || profilesToCollect.contains(ProfileName.MODELS)) { totalListener++; } @@ -129,13 +111,13 @@ public void profile(String detectorId, ActionListener listener, "Fail to fetch profile for " + detectorId ); - prepareProfile(detectorId, delegateListener, profiles); + prepareProfile(detectorId, delegateListener, profilesToCollect); } private void prepareProfile( String detectorId, MultiResponsesDelegateActionListener listener, - Set profiles + Set profilesToCollect ) { GetRequest getRequest = new GetRequest(ANOMALY_DETECTOR_JOB_INDEX, detectorId); client.get(getRequest, ActionListener.wrap(getResponse -> { @@ -149,18 +131,21 @@ private void prepareProfile( AnomalyDetectorJob job = AnomalyDetectorJob.parse(parser); long enabledTimeMs = job.getEnabledTime().toEpochMilli(); - if (profiles.contains(ProfileName.STATE)) { - profileState(detectorId, enabledTimeMs, listener, job.isEnabled()); + if (profilesToCollect.contains(ProfileName.ERROR)) { + GetRequest getErrorRequest = new GetRequest(DetectorInternalState.DETECTOR_STATE_INDEX, detectorId); + client.get(getErrorRequest, onGetError(listener, detectorId)); } - if (profiles.contains(ProfileName.ERROR)) { - profileError(detectorId, enabledTimeMs, job.getDisabledTime(), listener); + + if (profilesToCollect.contains(ProfileName.STATE) + || profilesToCollect.contains(ProfileName.INIT_PROGRESS)) { + profileStateRelated(detectorId, enabledTimeMs, listener, job.isEnabled(), profilesToCollect); } - if (profiles.contains(ProfileName.COORDINATING_NODE) - || profiles.contains(ProfileName.SHINGLE_SIZE) - || profiles.contains(ProfileName.TOTAL_SIZE_IN_BYTES) - || profiles.contains(ProfileName.MODELS)) { - profileModels(detectorId, profiles, listener); + if (profilesToCollect.contains(ProfileName.COORDINATING_NODE) + || profilesToCollect.contains(ProfileName.SHINGLE_SIZE) + || profilesToCollect.contains(ProfileName.TOTAL_SIZE_IN_BYTES) + || profilesToCollect.contains(ProfileName.MODELS)) { + profileModels(detectorId, profilesToCollect, listener); } } catch (IOException | XContentParseException | NullPointerException e) { logger.error(e); @@ -168,13 +153,13 @@ private void prepareProfile( } } else { GetRequest getDetectorRequest = new GetRequest(ANOMALY_DETECTORS_INDEX, detectorId); - client.get(getDetectorRequest, onGetDetectorResponse(listener, detectorId, profiles)); + client.get(getDetectorRequest, onGetDetectorForPrepare(listener, detectorId, profilesToCollect)); } }, exception -> { if (exception instanceof IndexNotFoundException) { logger.info(exception.getMessage()); GetRequest getDetectorRequest = new GetRequest(ANOMALY_DETECTORS_INDEX, detectorId); - client.get(getDetectorRequest, onGetDetectorResponse(listener, detectorId, profiles)); + client.get(getDetectorRequest, onGetDetectorForPrepare(listener, detectorId, profilesToCollect)); } else { logger.error(FAIL_TO_GET_PROFILE_MSG + detectorId); listener.onFailure(exception); @@ -182,18 +167,18 @@ private void prepareProfile( })); } - private ActionListener onGetDetectorResponse( + private ActionListener onGetDetectorForPrepare( MultiResponsesDelegateActionListener listener, String detectorId, Set profiles ) { return ActionListener.wrap(getResponse -> { if (getResponse != null && getResponse.isExists()) { - DetectorProfile profile = new DetectorProfile(); + DetectorProfile.Builder profileBuilder = new DetectorProfile.Builder(); if (profiles.contains(ProfileName.STATE)) { - profile.setState(DetectorState.DISABLED); + profileBuilder.state(DetectorState.DISABLED); } - listener.respondImmediately(profile); + listener.respondImmediately(profileBuilder.build()); } else { listener.failImmediately(FAIL_TO_FIND_DETECTOR_MSG + detectorId); } @@ -203,242 +188,209 @@ private ActionListener onGetDetectorResponse( /** * We expect three kinds of states: * -Disabled: if get ad job api says the job is disabled; - * -Init: if anomaly score after the last update time of the detector is larger than 0 + * -Init: if rcf model's total updates is less than required * -Running: if neither of the above applies and no exceptions. * @param detectorId detector id * @param enabledTime the time when AD job is enabled in milliseconds * @param listener listener to process the returned state or exception * @param enabled whether the detector job is enabled or not + * @param profilesToCollect target profiles to fetch */ - private void profileState( + private void profileStateRelated( String detectorId, long enabledTime, MultiResponsesDelegateActionListener listener, - boolean enabled + boolean enabled, + Set profilesToCollect ) { if (enabled) { - SearchRequest searchLatestResult = createInittedEverRequest(detectorId, enabledTime); - client.search(searchLatestResult, onInittedEver(listener, detectorId, enabledTime)); - } else { - DetectorProfile profile = new DetectorProfile(); - profile.setState(DetectorState.DISABLED); - listener.onResponse(profile); + RCFPollingRequest request = new RCFPollingRequest(detectorId); + client.execute(RCFPollingAction.INSTANCE, request, onPollRCFUpdates(detectorId, profilesToCollect, listener, enabledTime)); + } else if (profilesToCollect.contains(ProfileName.STATE)) { + listener.onResponse(new DetectorProfile.Builder().state(DetectorState.DISABLED).build()); } } - private ActionListener onInittedEver( + /** + * Action listener for a detector in running or init state + * @param listener listener to consolidate results and return a final response + * @param detectorId detector id + * @param enabledTimeMs AD job enabled time + * @param profilesToCollect target profiles to fetch + * @return the listener for a detector in running or init state + */ + private ActionListener onGetEnabledDetectorState( MultiResponsesDelegateActionListener listener, String detectorId, - long lastUpdateTimeMs + long enabledTimeMs, + Set profilesToCollect ) { - return ActionListener.wrap(searchResponse -> { - SearchHits hits = searchResponse.getHits(); - DetectorProfile profile = new DetectorProfile(); - if (hits.getHits().length == 0L) { - profile.setState(DetectorState.INIT); + return ActionListener.wrap(getResponse -> { + if (getResponse != null && getResponse.isExists()) { + try ( + XContentParser parser = XContentType.JSON + .xContent() + .createParser(xContentRegistry, LoggingDeprecationHandler.INSTANCE, getResponse.getSourceAsString()) + ) { + ensureExpectedToken(XContentParser.Token.START_OBJECT, parser.nextToken(), parser::getTokenLocation); + DetectorInternalState detectorState = DetectorInternalState.parse(parser); + long stateLastUpdateTimeMs = detectorState.getLastUpdateTime().toEpochMilli(); + if (stateLastUpdateTimeMs < enabledTimeMs) { + // state index hasn't been updated yet + respondEmptyInitProfile(listener, profilesToCollect); + } else { + long totalUpdates = detectorState.getRcfUpdates(); + if (totalUpdates < requiredSamples) { + if (profilesToCollect.contains(ProfileName.STATE)) { + listener.onResponse(new DetectorProfile.Builder().state(DetectorState.INIT).build()); + } + + if (profilesToCollect.contains(ProfileName.INIT_PROGRESS)) { + if (totalUpdates < 0) { + // no totalUpdates record in the detector state index + listener.onResponse(getEmptyInitProfile(ProfileName.INIT_PROGRESS)); + } else { + GetRequest getDetectorRequest = new GetRequest(ANOMALY_DETECTORS_INDEX, detectorId); + client.get( + getDetectorRequest, + onGetDetectorForInitProgress(listener, detectorId, profilesToCollect, totalUpdates, requiredSamples) + ); + } + } + + } else { + if (profilesToCollect.contains(ProfileName.STATE)) { + listener.onResponse(new DetectorProfile.Builder().state(DetectorState.RUNNING).build()); + } + if (profilesToCollect.contains(ProfileName.INIT_PROGRESS)) { + InitProgressProfile initProgress = new InitProgressProfile("100%", 0, 0); + listener.onResponse(new DetectorProfile.Builder().initProgress(initProgress).build()); + } + } + + } + } catch (IOException | XContentParseException | NullPointerException e) { + logger.error(e); + listener.failImmediately(FAIL_TO_GET_PROFILE_MSG, e); + } } else { - profile.setState(DetectorState.RUNNING); + // detector state for this detector does not exist + respondEmptyInitProfile(listener, profilesToCollect); } - - listener.onResponse(profile); - }, exception -> { if (exception instanceof IndexNotFoundException) { - DetectorProfile profile = new DetectorProfile(); - // anomaly result index is not created yet - profile.setState(DetectorState.INIT); - listener.onResponse(profile); + // detector state index is not created yet + respondEmptyInitProfile(listener, profilesToCollect); } else { - logger - .error( - "Fail to find any anomaly result with anomaly score larger than 0 after AD job enabled time for detector {}", - detectorId - ); - listener.onFailure(new RuntimeException("Fail to find detector state: " + detectorId, exception)); + listener.failImmediately(String.format("Fail to find any detector state for detector {}", detectorId), exception); } }); } /** - * Precondition: - * 1. Index are rotated with name pattern ".opendistro-anomaly-results-history-{now/d}-1" and now is using UTC. - * 2. Latest entry with error is recorded within enabled and disabled time. Note disabled time can be null. - * - * Error is populated if error of the latest anomaly result is not empty. - * - * Two optimization to avoid scanning all anomaly result indices to get a detector's most recent error - * - * First, when a detector is running, we only need to scan the current index, not all of the rolled over ones - * since we are interested in the latest error. - * Second, when a detector is disabled, we only need to scan the latest anomaly result indices created before the - * detector's enable time. - * + * Action listener for getting error in internal state index + * @param listener listener to consolidate results and return a final response * @param detectorId detector id - * @param enabledTimeMillis the time when AD job is enabled in milliseconds - * @param listener listener to process the returned error or exception + * @return the listener for a detector in disabled state */ - private void profileError( - String detectorId, - long enabledTimeMillis, - Instant disabledTime, - MultiResponsesDelegateActionListener listener + private ActionListener onGetError( + MultiResponsesDelegateActionListener listener, + String detectorId ) { - String[] latestIndex = null; - - long disabledTimeMillis = 0; - if (disabledTime != null) { - disabledTimeMillis = disabledTime.toEpochMilli(); - } - if (enabledTimeMillis > disabledTimeMillis) { - // detector is still running - latestIndex = new String[1]; - latestIndex[0] = AnomalyResult.ANOMALY_RESULT_INDEX; - } else { - String[] concreteIndices = indexNameExpressionResolver - .concreteIndexNames( - clusterService.state(), - IndicesOptions.lenientExpandOpen(), - AnomalyDetectionIndices.ALL_AD_RESULTS_INDEX_PATTERN - ); - - // find the latest from result indices such as .opendistro-anomaly-results-history-2020.04.06-1 and - // /.opendistro-anomaly-results-history-2020.04.07-000002 - long maxTimestamp = -1; - TreeMap> candidateIndices = new TreeMap<>(); - for (String indexName : concreteIndices) { - Matcher m = Pattern.compile("\\.opendistro-anomaly-results-history-(\\d{4})\\.(\\d{2})\\.(\\d{2})-\\d+").matcher(indexName); - if (m.matches()) { - int year = Integer.parseInt(m.group(1)); - int month = Integer.parseInt(m.group(2)); - int date = Integer.parseInt(m.group(3)); - // month starts with 0 - calendar.clear(); - calendar.set(year, month - 1, date); - // 2020.05.08 is translated to 1588896000000 - long timestamp = calendar.getTimeInMillis(); - - // a candidate index can be created before or after enabled time, but the index is definitely created before disabled - // time - if (timestamp <= disabledTimeMillis && maxTimestamp <= timestamp) { - maxTimestamp = timestamp; - // we can have two rotations on the same day and we don't know which one has our data, so we keep all - List indexList = candidateIndices.computeIfAbsent(timestamp, k -> new ArrayList()); - indexList.add(indexName); - } - } - } - List candidates = new ArrayList(); - List latestCandidate = candidateIndices.get(maxTimestamp); - - if (latestCandidate != null) { - candidates.addAll(latestCandidate); - } - - // look back one more index for an edge case: - // Suppose detector interval is 1 minute. Detector last run is at 2020-05-07, 11:59:50 PM, - // then AD result indices rolled over as .opendistro-anomaly-results-history-2020.05.07-001 - // Detector next run will be 2020-05-08, 00:00:50 AM. If a user stop the detector at - // 2020-05-08 00:00:10 AM, detector will not have AD result on 2020-05-08. - // We check AD result indices one day earlier to make sure we can always get AD result. - Map.Entry> earlierCandidate = candidateIndices.lowerEntry(maxTimestamp); - if (earlierCandidate != null) { - candidates.addAll(earlierCandidate.getValue()); - } - latestIndex = candidates.toArray(new String[0]); - } - - if (latestIndex == null || latestIndex.length == 0) { - // no result index found: can be due to anomaly result is not created yet or result indices for the detector have been deleted. - listener.onResponse(new DetectorProfile()); - return; - } - SearchRequest searchLatestResult = createLatestAnomalyResultRequest(detectorId, enabledTimeMillis, disabledTimeMillis, latestIndex); - client.search(searchLatestResult, onGetLatestAnomalyResult(listener, detectorId)); - } - - private ActionListener onGetLatestAnomalyResult(ActionListener listener, String detectorId) { - return ActionListener.wrap(searchResponse -> { - SearchHits hits = searchResponse.getHits(); - if (hits.getHits().length == 0L) { - listener.onResponse(new DetectorProfile()); - } else { - SearchHit hit = hits.getAt(0); - + return ActionListener.wrap(getResponse -> { + DetectorProfile.Builder profileBuilder = new DetectorProfile.Builder(); + if (getResponse != null && getResponse.isExists()) { try ( XContentParser parser = XContentType.JSON .xContent() - .createParser(xContentRegistry, LoggingDeprecationHandler.INSTANCE, hit.getSourceAsString()) + .createParser(xContentRegistry, LoggingDeprecationHandler.INSTANCE, getResponse.getSourceAsString()) ) { ensureExpectedToken(XContentParser.Token.START_OBJECT, parser.nextToken(), parser::getTokenLocation); - AnomalyResult result = parser.namedObject(AnomalyResult.class, AnomalyResult.PARSE_FIELD_NAME, null); - DetectorProfile profile = new DetectorProfile(); - if (result.getError() != null) { - profile.setError(result.getError()); + DetectorInternalState detectorState = DetectorInternalState.parse(parser); + + if (detectorState.getError() != null) { + profileBuilder.error(detectorState.getError()); } - listener.onResponse(profile); + + listener.onResponse(profileBuilder.build()); } catch (IOException | XContentParseException | NullPointerException e) { - logger.error("Fail to parse anomaly result with " + hit.toString()); - listener.onFailure(new RuntimeException("Fail to find detector error: " + detectorId, e)); + logger.error(e); + listener.failImmediately(FAIL_TO_GET_PROFILE_MSG, e); } + } else { + // detector state for this detector does not exist + listener.onResponse(profileBuilder.build()); } }, exception -> { if (exception instanceof IndexNotFoundException) { - listener.onResponse(new DetectorProfile()); + // detector state index is not created yet + listener.onResponse(new DetectorProfile.Builder().build()); } else { - logger.error("Fail to find any anomaly result after AD job enabled time for detector {}", detectorId); - listener.onFailure(new RuntimeException("Fail to find detector error: " + detectorId, exception)); + logger.error("Fail to find any detector state for detector {}", detectorId); + listener.onFailure(exception); } }); } - /** - * Create search request to check if we have at least 1 anomaly score larger than 0 after AD job enabled time - * @param detectorId detector id - * @param enabledTime the time when AD job is enabled in milliseconds - * @return the search request - */ - private SearchRequest createInittedEverRequest(String detectorId, long enabledTime) { - BoolQueryBuilder filterQuery = new BoolQueryBuilder(); - filterQuery.filter(QueryBuilders.termQuery(AnomalyResult.DETECTOR_ID_FIELD, detectorId)); - filterQuery.filter(QueryBuilders.rangeQuery(AnomalyResult.EXECUTION_END_TIME_FIELD).gte(enabledTime)); - filterQuery.filter(QueryBuilders.rangeQuery(AnomalyResult.ANOMALY_SCORE_FIELD).gt(0)); - - // I am only looking for last 1 occurrence and have no interest in the total number of documents that match the query. - // ES will not try to count the number of documents and will be able to terminate the query as soon as 1 document - // have been collected per segment. - SearchSourceBuilder source = new SearchSourceBuilder().query(filterQuery).size(1).trackTotalHits(false); - - SearchRequest request = new SearchRequest(AnomalyResult.ANOMALY_RESULT_INDEX); - request.source(source); - return request; + private DetectorProfile getEmptyInitProfile(ProfileName profileToCollect) { + DetectorProfile.Builder profile = new DetectorProfile.Builder(); + if (profileToCollect.equals(ProfileName.STATE)) { + profile.state(DetectorState.INIT); + } + if (profileToCollect.equals(ProfileName.INIT_PROGRESS)) { + InitProgressProfile initProgress = new InitProgressProfile("0%", 0, 0); + profile.initProgress(initProgress); + } + return profile.build(); } - /** - * Create search request to get the latest anomaly result after AD job enabled time - * @param detectorId detector id - * @param enabledTime the time when AD job is enabled in milliseconds - * @return the search request - */ - private SearchRequest createLatestAnomalyResultRequest(String detectorId, long enabledTime, long disabledTime, String[] index) { - BoolQueryBuilder filterQuery = new BoolQueryBuilder(); - filterQuery.filter(QueryBuilders.termQuery(AnomalyResult.DETECTOR_ID_FIELD, detectorId)); - RangeQueryBuilder rangeBuilder = QueryBuilders.rangeQuery(AnomalyResult.EXECUTION_END_TIME_FIELD).gte(enabledTime); - if (disabledTime >= enabledTime) { - rangeBuilder.lte(disabledTime); + private void respondEmptyInitProfile(MultiResponsesDelegateActionListener listener, + Set profilesToCollect) { + if (profilesToCollect.contains(ProfileName.STATE)) { + listener.onResponse(getEmptyInitProfile(ProfileName.STATE)); } - filterQuery.filter(rangeBuilder); - - FieldSortBuilder sortQuery = new FieldSortBuilder(AnomalyResult.EXECUTION_END_TIME_FIELD).order(SortOrder.DESC); + if (profilesToCollect.contains(ProfileName.INIT_PROGRESS)) { + listener.onResponse(getEmptyInitProfile(ProfileName.INIT_PROGRESS)); + } + } - // I am only looking for last 1 occurrence and have no interest in the total number of documents that match the query. - // ES will not try to count the number of documents and will be able to terminate the query as soon as 1 document - // have been collected per segment. - SearchSourceBuilder source = new SearchSourceBuilder().query(filterQuery).size(1).sort(sortQuery).trackTotalHits(false); + private ActionListener onGetDetectorForInitProgress( + MultiResponsesDelegateActionListener listener, + String detectorId, + Set profilesToCollect, + long totalUpdates, + long requiredSamples + ) { + return ActionListener.wrap(getResponse -> { + if (getResponse != null && getResponse.isExists()) { + try ( + XContentParser parser = XContentType.JSON + .xContent() + .createParser(xContentRegistry, LoggingDeprecationHandler.INSTANCE, getResponse.getSourceAsString()) + ) { + ensureExpectedToken(XContentParser.Token.START_OBJECT, parser.nextToken(), parser::getTokenLocation); + AnomalyDetector detector = AnomalyDetector.parse(parser, detectorId); + long intervalMins = ((IntervalTimeConfiguration) detector.getDetectionInterval()).toDuration().toMinutes(); + float percent = (100.0f * totalUpdates) / requiredSamples; + int neededPoints = (int) (requiredSamples - totalUpdates); + InitProgressProfile initProgress = new InitProgressProfile( + // rounding: 93.456 => 93%, 93.556 => 94% + String.format("%.0f%%", percent), + intervalMins * neededPoints, + neededPoints + ); - SearchRequest request = new SearchRequest(index); - request.source(source); - return request; + listener.onResponse(new DetectorProfile.Builder().initProgress(initProgress).build()); + } catch (Exception t) { + logger.error("Fail to parse detector {}", detectorId); + logger.error("Stack trace:", t); + listener.failImmediately(FAIL_TO_FIND_DETECTOR_MSG + detectorId, t); + } + } else { + listener.failImmediately(FAIL_TO_FIND_DETECTOR_MSG + detectorId); + } + }, exception -> { listener.failImmediately(FAIL_TO_FIND_DETECTOR_MSG + detectorId, exception); }); } private void profileModels( @@ -457,21 +409,62 @@ private ActionListener onModelResponse( MultiResponsesDelegateActionListener listener ) { return ActionListener.wrap(profileResponse -> { - DetectorProfile profile = new DetectorProfile(); + DetectorProfile.Builder profile = new DetectorProfile.Builder(); if (profiles.contains(ProfileName.COORDINATING_NODE)) { - profile.setCoordinatingNode(profileResponse.getCoordinatingNode()); + profile.coordinatingNode(profileResponse.getCoordinatingNode()); } if (profiles.contains(ProfileName.SHINGLE_SIZE)) { - profile.setShingleSize(profileResponse.getShingleSize()); + profile.shingleSize(profileResponse.getShingleSize()); } if (profiles.contains(ProfileName.TOTAL_SIZE_IN_BYTES)) { - profile.setTotalSizeInBytes(profileResponse.getTotalSizeInBytes()); + profile.totalSizeInBytes(profileResponse.getTotalSizeInBytes()); } if (profiles.contains(ProfileName.MODELS)) { - profile.setModelProfile(profileResponse.getModelProfile()); + profile.modelProfile(profileResponse.getModelProfile()); } - listener.onResponse(profile); + listener.onResponse(profile.build()); }, listener::onFailure); } + + private ActionListener onPollRCFUpdates( + String detectorId, + Set profilesToCollect, + MultiResponsesDelegateActionListener listener, + long enabledTime + ) { + return ActionListener.wrap(rcfPollResponse -> { + long totalUpdates = rcfPollResponse.getTotalUpdates(); + if (totalUpdates == 0) { + // no totalUpdates found maybe because either cold start hasn't started/finished or the first shingle + // is not ready to trigger model initialization among nodes + GetRequest getRequest = new GetRequest(DetectorInternalState.DETECTOR_STATE_INDEX, detectorId); + client.get(getRequest, onGetEnabledDetectorState(listener, detectorId, enabledTime, profilesToCollect)); + } + else if (totalUpdates < requiredSamples) { + if (profilesToCollect.contains(ProfileName.STATE)) { + listener.onResponse(new DetectorProfile.Builder().state(DetectorState.INIT).build()); + } + if (profilesToCollect.contains(ProfileName.INIT_PROGRESS)) { + GetRequest getDetectorRequest = new GetRequest(ANOMALY_DETECTORS_INDEX, detectorId); + client.get( + getDetectorRequest, + onGetDetectorForInitProgress(listener, detectorId, profilesToCollect, totalUpdates, requiredSamples) + ); + } + } else { + if (profilesToCollect.contains(ProfileName.STATE)) { + listener.onResponse(new DetectorProfile.Builder().state(DetectorState.RUNNING).build()); + } + + if (profilesToCollect.contains(ProfileName.INIT_PROGRESS)) { + InitProgressProfile initProgress = new InitProgressProfile("100%", 0, 0); + listener.onResponse(new DetectorProfile.Builder().initProgress(initProgress).build()); + } + } + }, exception -> { + logger.error(new ParameterizedMessage("Fail to get state and init progress for {}", detectorId), exception); + listener.failImmediately(FAIL_TO_GET_PROFILE_MSG + detectorId, exception); + }); + } } diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/constant/CommonName.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/constant/CommonName.java index 8f730ccb..c6336fd6 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/ad/constant/CommonName.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/constant/CommonName.java @@ -54,4 +54,5 @@ public class CommonName { public static final String SHINGLE_SIZE = "shingle_size"; public static final String TOTAL_SIZE_IN_BYTES = "total_size_in_bytes"; public static final String MODELS = "models"; + public static final String INIT_PROGRESS = "init_progress"; } diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/indices/AnomalyDetectionIndices.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/indices/AnomalyDetectionIndices.java index ff80bd47..1a3a00ef 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/ad/indices/AnomalyDetectionIndices.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/indices/AnomalyDetectionIndices.java @@ -19,6 +19,7 @@ import static com.amazon.opendistroforelasticsearch.ad.settings.AnomalyDetectorSettings.AD_RESULT_HISTORY_MAX_DOCS; import static com.amazon.opendistroforelasticsearch.ad.settings.AnomalyDetectorSettings.AD_RESULT_HISTORY_ROLLOVER_PERIOD; import static com.amazon.opendistroforelasticsearch.ad.settings.AnomalyDetectorSettings.ANOMALY_DETECTORS_INDEX_MAPPING_FILE; +import static com.amazon.opendistroforelasticsearch.ad.settings.AnomalyDetectorSettings.ANOMALY_DETECTOR_STATE_INDEX_MAPPING_FILE; import static com.amazon.opendistroforelasticsearch.ad.settings.AnomalyDetectorSettings.ANOMALY_DETECTOR_JOBS_INDEX_MAPPING_FILE; import static com.amazon.opendistroforelasticsearch.ad.settings.AnomalyDetectorSettings.ANOMALY_RESULTS_INDEX_MAPPING_FILE; import static com.amazon.opendistroforelasticsearch.ad.settings.AnomalyDetectorSettings.REQUEST_TIMEOUT; @@ -47,11 +48,12 @@ import com.amazon.opendistroforelasticsearch.ad.model.AnomalyDetector; import com.amazon.opendistroforelasticsearch.ad.model.AnomalyDetectorJob; import com.amazon.opendistroforelasticsearch.ad.model.AnomalyResult; +import com.amazon.opendistroforelasticsearch.ad.model.DetectorInternalState; import com.google.common.base.Charsets; import com.google.common.io.Resources; /** - * This class manages creation of anomaly detector index. + * This class provides utility methods for various anomaly detection indices. */ public class AnomalyDetectionIndices implements LocalNodeMasterListener { @@ -139,6 +141,17 @@ private String getAnomalyDetectorJobMappings() throws IOException { return Resources.toString(url, Charsets.UTF_8); } + /** + * Get anomaly detector state index mapping json content. + * + * @return anomaly detector state index mapping + * @throws IOException IOException if mapping file can't be read correctly + */ + private String getDetectorStateMappings() throws IOException { + URL url = AnomalyDetectionIndices.class.getClassLoader().getResource(ANOMALY_DETECTOR_STATE_INDEX_MAPPING_FILE); + return Resources.toString(url, Charsets.UTF_8); + } + /** * Anomaly detector index exist or not. * @@ -166,6 +179,15 @@ public boolean doesAnomalyResultIndexExist() { return clusterService.state().metadata().hasAlias(AnomalyResult.ANOMALY_RESULT_INDEX); } + /** + * Anomaly result index exist or not. + * + * @return true if anomaly detector index exists + */ + public boolean doesDetectorStateIndexExist() { + return clusterService.state().getRoutingTable().hasIndex(DetectorInternalState.DETECTOR_STATE_INDEX); + } + /** * Create anomaly detector index if not exist. * @@ -229,6 +251,18 @@ public void initAnomalyDetectorJobIndex(ActionListener acti adminClient.indices().create(request, actionListener); } + /** + * Create an index. + * + * @param actionListener action called after create index + * @throws IOException IOException from {@link AnomalyDetectionIndices#getAnomalyDetectorJobMappings} + */ + public void initDetectorStateIndex(ActionListener actionListener) throws IOException { + CreateIndexRequest request = new CreateIndexRequest(DetectorInternalState.DETECTOR_STATE_INDEX) + .mapping(AnomalyDetector.TYPE, getDetectorStateMappings(), XContentType.JSON); + adminClient.indices().create(request, actionListener); + } + @Override public void onMaster() { try { diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/ml/ModelManager.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/ml/ModelManager.java index bc6d2e25..04d2defd 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/ad/ml/ModelManager.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/ml/ModelManager.java @@ -211,17 +211,19 @@ public ModelManager( public CombinedRcfResult combineRcfResults(List rcfResults) { CombinedRcfResult combinedResult = null; if (rcfResults.isEmpty()) { - combinedResult = new CombinedRcfResult(0, 0); + combinedResult = new CombinedRcfResult(0, 0, 0); } else { int totalForestSize = rcfResults.stream().mapToInt(RcfResult::getForestSize).sum(); if (totalForestSize == 0) { - combinedResult = new CombinedRcfResult(0, 0); + combinedResult = new CombinedRcfResult(0, 0, 0); } else { double score = rcfResults.stream().mapToDouble(r -> r.getScore() * r.getForestSize()).sum() / totalForestSize; double confidence = rcfResults.stream().mapToDouble(r -> r.getConfidence() * r.getForestSize()).sum() / Math .max(rcfNumTrees, totalForestSize); - combinedResult = new CombinedRcfResult(score, confidence); + long totalUpdates = rcfResults.stream().mapToLong(RcfResult::getTotalUpdates).max().orElse(0L); + combinedResult = new CombinedRcfResult(score, confidence, totalUpdates); } + } return combinedResult; } @@ -355,7 +357,7 @@ public RcfResult getRcfResult(String detectorId, String modelId, double[] point) int forestSize = rcf.getNumberOfTrees(); rcf.update(point); modelState.setLastUsedTime(clock.instant()); - return new RcfResult(score, confidence, forestSize); + return new RcfResult(score, confidence, forestSize, rcf.getTotalUpdates()); } /** @@ -388,7 +390,7 @@ private void getRcfResult(ModelState modelState, double[] point int forestSize = rcf.getNumberOfTrees(); rcf.update(point); modelState.setLastUsedTime(clock.instant()); - listener.onResponse(new RcfResult(score, confidence, forestSize)); + listener.onResponse(new RcfResult(score, confidence, forestSize, rcf.getTotalUpdates())); } private void processRcfCheckpoint( @@ -1032,11 +1034,19 @@ public Map getModelSize(String detectorId) { .stream() .filter(entry -> getDetectorIdForModelId(entry.getKey()).equals(detectorId)) .forEach(entry -> { res.put(entry.getKey(), estimateModelSize(entry.getValue().getModel())); }); - thresholds - .entrySet() - .stream() - .filter(entry -> getDetectorIdForModelId(entry.getKey()).equals(detectorId)) - .forEach(entry -> { res.put(entry.getKey(), 0L); }); return res; } + + /** + * Get all RCF partition's size corresponding to a detector. Thresholding models' size is a constant since they are small in size (KB). + * @param modelId model id + */ + public long getTotalUpdates(String modelId) { + ModelState model = forests.get(modelId); + if (model != null) { + return model.getModel().getTotalUpdates(); + } + + return 0; + } } diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/ml/RcfResult.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/ml/RcfResult.java index 8164fefc..be311015 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/ad/ml/RcfResult.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/ml/RcfResult.java @@ -25,6 +25,7 @@ public class RcfResult { private final double score; private final double confidence; private final int forestSize; + private final long totalUpdates; /** * Constructor with all arguments. @@ -32,11 +33,13 @@ public class RcfResult { * @param score RCF score * @param confidence RCF confidence * @param forestSize number of RCF trees used for the score + * @param totalUpdates total updates made to rcf partitions so far */ - public RcfResult(double score, double confidence, int forestSize) { + public RcfResult(double score, double confidence, int forestSize, long totalUpdates) { this.score = score; this.confidence = confidence; this.forestSize = forestSize; + this.totalUpdates = totalUpdates; } /** @@ -66,6 +69,15 @@ public int getForestSize() { return forestSize; } + /** + * Return the number of RCF forest updates + * + * @return the number of RCF forest updates + */ + public long getTotalUpdates() { + return totalUpdates; + } + @Override public boolean equals(Object o) { if (this == o) @@ -75,11 +87,12 @@ public boolean equals(Object o) { RcfResult that = (RcfResult) o; return Objects.equals(this.score, that.score) && Objects.equals(this.confidence, that.confidence) - && Objects.equals(this.forestSize, that.forestSize); + && Objects.equals(this.forestSize, that.forestSize) + && Objects.equals(this.totalUpdates, that.totalUpdates); } @Override public int hashCode() { - return Objects.hash(score, confidence, forestSize); + return Objects.hash(score, confidence, forestSize, totalUpdates); } } diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/ml/rcf/CombinedRcfResult.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/ml/rcf/CombinedRcfResult.java index 412e60c4..a43ebe54 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/ad/ml/rcf/CombinedRcfResult.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/ml/rcf/CombinedRcfResult.java @@ -24,16 +24,19 @@ public class CombinedRcfResult { private final double score; private final double confidence; + private final long totalUpdates; /** * Constructor with all arguments. * * @param score combined RCF score * @param confidence confidence of the score + * @param totalUpdates max total updates made to all rcf partitions so far */ - public CombinedRcfResult(double score, double confidence) { + public CombinedRcfResult(double score, double confidence, long totalUpdates) { this.score = score; this.confidence = confidence; + this.totalUpdates = totalUpdates; } /** @@ -54,6 +57,15 @@ public double getConfidence() { return confidence; } + /** + * Return max total updates made to all rcf partitions so far + * + * @return max total updates made to all rcf partitions so far + */ + public long getTotalUpdates() { + return totalUpdates; + } + @Override public boolean equals(Object o) { if (this == o) @@ -61,11 +73,13 @@ public boolean equals(Object o) { if (o == null || getClass() != o.getClass()) return false; CombinedRcfResult that = (CombinedRcfResult) o; - return Objects.equals(this.score, that.score) && Objects.equals(this.confidence, that.confidence); + return Objects.equals(this.score, that.score) + && Objects.equals(this.confidence, that.confidence) + && Objects.equals(this.totalUpdates, that.totalUpdates); } @Override public int hashCode() { - return Objects.hash(score, confidence); + return Objects.hash(score, confidence, totalUpdates); } } diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/model/AnomalyDetectorJob.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/model/AnomalyDetectorJob.java index 30f36939..9a99e60c 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/ad/model/AnomalyDetectorJob.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/model/AnomalyDetectorJob.java @@ -21,6 +21,8 @@ import java.io.IOException; import java.time.Instant; +import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.xcontent.NamedXContentRegistry; import org.elasticsearch.common.xcontent.ToXContentObject; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; @@ -36,6 +38,13 @@ */ public class AnomalyDetectorJob implements ToXContentObject, ScheduledJobParameter { + public static final String PARSE_FIELD_NAME = "AnomalyDetectorJob"; + public static final NamedXContentRegistry.Entry XCONTENT_REGISTRY = new NamedXContentRegistry.Entry( + AnomalyDetectorJob.class, + new ParseField(PARSE_FIELD_NAME), + it -> parse(it) + ); + public static final String ANOMALY_DETECTOR_JOB_INDEX = ".opendistro-anomaly-detector-jobs"; public static final String NAME_FIELD = "name"; public static final String LAST_UPDATE_TIME_FIELD = "last_update_time"; diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/model/DetectorInternalState.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/model/DetectorInternalState.java new file mode 100644 index 00000000..0cada9a9 --- /dev/null +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/model/DetectorInternalState.java @@ -0,0 +1,186 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.opendistroforelasticsearch.ad.model; + +import static org.elasticsearch.common.xcontent.XContentParserUtils.ensureExpectedToken; + +import java.io.IOException; +import java.time.Instant; + +import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.xcontent.NamedXContentRegistry; +import org.elasticsearch.common.xcontent.ToXContentObject; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentParser; + +import com.amazon.opendistroforelasticsearch.ad.annotation.Generated; +import com.amazon.opendistroforelasticsearch.ad.util.ParseUtils; +import com.google.common.base.Objects; + +/** + * Include anomaly detector's state + */ +public class DetectorInternalState implements ToXContentObject, Cloneable { + + public static final String PARSE_FIELD_NAME = "DetectorInternalState"; + public static final NamedXContentRegistry.Entry XCONTENT_REGISTRY = new NamedXContentRegistry.Entry( + DetectorInternalState.class, + new ParseField(PARSE_FIELD_NAME), + it -> parse(it) + ); + + public static final String DETECTOR_STATE_INDEX = ".opendistro-anomaly-state"; + + public static final String RCF_UPDATES_FIELD = "rcf_updates"; + public static final String LAST_UPDATE_TIME_FIELD = "last_update_time"; + public static final String ERROR_FIELD = "error"; + + private long rcfUpdates = -1L; + private Instant lastUpdateTime = null; + private String error = null; + + private DetectorInternalState() {} + + public static class Builder { + private long rcfUpdates = -1; + private Instant lastUpdateTime = null; + private String error = null; + + public Builder() {} + + public Builder rcfUpdates(long rcfUpdates) { + this.rcfUpdates = rcfUpdates; + return this; + } + + public Builder lastUpdateTime(Instant lastUpdateTime) { + this.lastUpdateTime = lastUpdateTime; + return this; + } + + public Builder error(String error) { + this.error = error; + return this; + } + + public DetectorInternalState build() { + DetectorInternalState info = new DetectorInternalState(); + info.rcfUpdates = this.rcfUpdates; + info.lastUpdateTime = this.lastUpdateTime; + info.error = this.error; + + return info; + } + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + XContentBuilder xContentBuilder = builder.startObject(); + + if (rcfUpdates >= 0) { + xContentBuilder.field(RCF_UPDATES_FIELD, rcfUpdates); + } + if (lastUpdateTime != null) { + xContentBuilder.field(LAST_UPDATE_TIME_FIELD, lastUpdateTime.toEpochMilli()); + } + if (error != null) { + xContentBuilder.field(ERROR_FIELD, error); + } + return xContentBuilder.endObject(); + } + + public static DetectorInternalState parse(XContentParser parser) throws IOException { + long rcfUpdates = -1L; + Instant lastUpdateTime = null; + String error = null; + + ensureExpectedToken(XContentParser.Token.START_OBJECT, parser.currentToken(), parser::getTokenLocation); + while (parser.nextToken() != XContentParser.Token.END_OBJECT) { + String fieldName = parser.currentName(); + parser.nextToken(); + + switch (fieldName) { + case RCF_UPDATES_FIELD: + rcfUpdates = parser.longValue(); + break; + case LAST_UPDATE_TIME_FIELD: + lastUpdateTime = ParseUtils.toInstant(parser); + break; + case ERROR_FIELD: + error = parser.text(); + break; + default: + parser.skipChildren(); + break; + } + } + return new DetectorInternalState.Builder().rcfUpdates(rcfUpdates).lastUpdateTime(lastUpdateTime).error(error).build(); + } + + @Generated + @Override + public boolean equals(Object o) { + if (this == o) + return true; + if (o == null || getClass() != o.getClass()) + return false; + DetectorInternalState that = (DetectorInternalState) o; + return Objects.equal(getRcfUpdates(), that.getRcfUpdates()) + && Objects.equal(getLastUpdateTime(), that.getLastUpdateTime()) + && Objects.equal(getError(), that.getError()); + } + + @Generated + @Override + public int hashCode() { + return Objects.hashCode(rcfUpdates, lastUpdateTime, error); + } + + @Override + public Object clone() { + DetectorInternalState info = null; + try { + info = (DetectorInternalState) super.clone(); + } catch (CloneNotSupportedException e) { + info = new DetectorInternalState.Builder().rcfUpdates(rcfUpdates).lastUpdateTime(lastUpdateTime).error(error).build(); + } + return info; + } + + public long getRcfUpdates() { + return rcfUpdates; + } + + public void setRcfUpdates(long rcfUpdates) { + this.rcfUpdates = rcfUpdates; + } + + public Instant getLastUpdateTime() { + return lastUpdateTime; + } + + public void setLastUpdateTime(Instant lastUpdateTime) { + this.lastUpdateTime = lastUpdateTime; + } + + public String getError() { + return error; + } + + public void setError(String error) { + this.error = error; + } +} diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/model/DetectorProfile.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/model/DetectorProfile.java index 8ee3efd4..6b066491 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/ad/model/DetectorProfile.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/model/DetectorProfile.java @@ -33,18 +33,72 @@ public class DetectorProfile implements ToXContentObject, Mergeable { private int shingleSize; private String coordinatingNode; private long totalSizeInBytes; + private InitProgressProfile initProgress; public XContentBuilder toXContent(XContentBuilder builder) throws IOException { return toXContent(builder, ToXContent.EMPTY_PARAMS); } - public DetectorProfile() { - state = null; - error = null; - modelProfile = null; - shingleSize = -1; - coordinatingNode = null; - totalSizeInBytes = -1; + private DetectorProfile() {} + + public static class Builder { + private DetectorState state = null; + private String error = null; + private ModelProfile[] modelProfile = null; + private int shingleSize = -1; + private String coordinatingNode = null; + private long totalSizeInBytes = -1; + private InitProgressProfile initProgress = null; + + public Builder() {} + + public Builder state(DetectorState state) { + this.state = state; + return this; + } + + public Builder error(String error) { + this.error = error; + return this; + } + + public Builder modelProfile(ModelProfile[] modelProfile) { + this.modelProfile = modelProfile; + return this; + } + + public Builder shingleSize(int shingleSize) { + this.shingleSize = shingleSize; + return this; + } + + public Builder coordinatingNode(String coordinatingNode) { + this.coordinatingNode = coordinatingNode; + return this; + } + + public Builder totalSizeInBytes(long totalSizeInBytes) { + this.totalSizeInBytes = totalSizeInBytes; + return this; + } + + public Builder initProgress(InitProgressProfile initProgress) { + this.initProgress = initProgress; + return this; + } + + public DetectorProfile build() { + DetectorProfile profile = new DetectorProfile(); + profile.state = this.state; + profile.error = this.error; + profile.modelProfile = modelProfile; + profile.shingleSize = shingleSize; + profile.coordinatingNode = coordinatingNode; + profile.totalSizeInBytes = totalSizeInBytes; + profile.initProgress = initProgress; + + return profile; + } } @Override @@ -73,7 +127,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws if (totalSizeInBytes != -1) { xContentBuilder.field(CommonName.TOTAL_SIZE_IN_BYTES, totalSizeInBytes); } - + if (initProgress != null) { + xContentBuilder.field(CommonName.INIT_PROGRESS, initProgress); + } return xContentBuilder.endObject(); } @@ -125,6 +181,14 @@ public void setTotalSizeInBytes(long totalSizeInBytes) { this.totalSizeInBytes = totalSizeInBytes; } + public InitProgressProfile getInitProgress() { + return initProgress; + } + + public void setInitProgress(InitProgressProfile initProgress) { + this.initProgress = initProgress; + } + @Override public void merge(Mergeable other) { if (this == other || other == null || getClass() != other.getClass()) { @@ -149,6 +213,9 @@ public void merge(Mergeable other) { if (otherProfile.getTotalSizeInBytes() != -1) { this.totalSizeInBytes = otherProfile.getTotalSizeInBytes(); } + if (otherProfile.getInitProgress() != null) { + this.initProgress = otherProfile.getInitProgress(); + } } @Override @@ -162,18 +229,71 @@ public boolean equals(Object obj) { if (obj instanceof DetectorProfile) { DetectorProfile other = (DetectorProfile) obj; - return new EqualsBuilder().append(state, other.state).append(error, other.error).isEquals(); + EqualsBuilder equalsBuilder = new EqualsBuilder(); + if (state != null) { + equalsBuilder.append(state, other.state); + } + if (error != null) { + equalsBuilder.append(error, other.error); + } + if (modelProfile != null && modelProfile.length > 0) { + equalsBuilder.append(modelProfile, other.modelProfile); + } + if (shingleSize != -1) { + equalsBuilder.append(shingleSize, other.shingleSize); + } + if (coordinatingNode != null) { + equalsBuilder.append(coordinatingNode, other.coordinatingNode); + } + if (totalSizeInBytes != -1) { + equalsBuilder.append(totalSizeInBytes, other.totalSizeInBytes); + } + if (initProgress != null) { + equalsBuilder.append(initProgress, other.initProgress); + } + return equalsBuilder.isEquals(); } return false; } @Override public int hashCode() { - return new HashCodeBuilder().append(state).append(error).toHashCode(); + return new HashCodeBuilder() + .append(state) + .append(error) + .append(modelProfile) + .append(shingleSize) + .append(coordinatingNode) + .append(totalSizeInBytes) + .append(initProgress) + .toHashCode(); } @Override public String toString() { - return new ToStringBuilder(this).append("state", state).append("error", error).toString(); + ToStringBuilder toStringBuilder = new ToStringBuilder(this); + + if (state != null) { + toStringBuilder.append(CommonName.STATE, state); + } + if (error != null) { + toStringBuilder.append(CommonName.ERROR, error); + } + if (modelProfile != null && modelProfile.length > 0) { + toStringBuilder.append(modelProfile); + } + if (shingleSize != -1) { + toStringBuilder.append(CommonName.SHINGLE_SIZE, shingleSize); + } + if (coordinatingNode != null) { + toStringBuilder.append(CommonName.COORDINATING_NODE, coordinatingNode); + } + if (totalSizeInBytes != -1) { + toStringBuilder.append(CommonName.TOTAL_SIZE_IN_BYTES, totalSizeInBytes); + } + if (initProgress != null) { + toStringBuilder.append(CommonName.INIT_PROGRESS, initProgress); + } + return toStringBuilder.toString(); } } diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/model/InitProgressProfile.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/model/InitProgressProfile.java new file mode 100644 index 00000000..2feee350 --- /dev/null +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/model/InitProgressProfile.java @@ -0,0 +1,147 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.opendistroforelasticsearch.ad.model; + +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +import java.io.IOException; + +import org.apache.commons.lang.builder.EqualsBuilder; +import org.apache.commons.lang.builder.HashCodeBuilder; +import org.apache.commons.lang.builder.ToStringBuilder; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.common.xcontent.ToXContent; +import org.elasticsearch.common.xcontent.XContentBuilder; + +/** + * Profile output for detector initialization progress. When the new detector is created, it is possible that + * there hasn’t been enough continuous data in the index. We need to use live data to initialize. + * During initialization, we need to tell users progress (using a percentage), how many more + * shingles to go, and approximately how many minutes before the detector becomes operational + * if they keep their data stream continuous. + * @author kaituo + * + */ +public class InitProgressProfile implements Writeable, ToXContent { + // field name in toXContent + public static final String PERCENTAGE = "percentage"; + public static final String ESTIMATED_MINUTES_LEFT = "estimated_minutes_left"; + public static final String NEEDED_SHINGLES = "needed_shingles"; + + private final String percentage; + private final long estimatedMinutesLeft; + private final int neededShingles; + + public InitProgressProfile(String percentage, long estimatedMinutesLeft, int neededDataPoints) { + super(); + this.percentage = percentage; + this.estimatedMinutesLeft = estimatedMinutesLeft; + this.neededShingles = neededDataPoints; + } + + public InitProgressProfile(StreamInput in) throws IOException { + percentage = in.readString(); + estimatedMinutesLeft = in.readVLong(); + neededShingles = in.readVInt(); + } + + public String getPercentage() { + return percentage; + } + + public long getEstimatedMinutesLeft() { + return estimatedMinutesLeft; + } + + public int getNeededDataPoints() { + return neededShingles; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.field(PERCENTAGE, percentage); + if (estimatedMinutesLeft > 0) { + builder.field(ESTIMATED_MINUTES_LEFT, estimatedMinutesLeft); + } + if (neededShingles > 0) { + builder.field(NEEDED_SHINGLES, neededShingles); + } + builder.endObject(); + return builder; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeString(percentage); + out.writeVLong(estimatedMinutesLeft); + out.writeVInt(neededShingles); + } + + @Override + public String toString() { + ToStringBuilder builder = new ToStringBuilder(this); + builder.append(PERCENTAGE, percentage); + if (estimatedMinutesLeft > 0) { + builder.append(ESTIMATED_MINUTES_LEFT, estimatedMinutesLeft); + } + if (neededShingles > 0) { + builder.append(NEEDED_SHINGLES, neededShingles); + } + return builder.toString(); + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + if (obj instanceof InitProgressProfile) { + InitProgressProfile other = (InitProgressProfile) obj; + + EqualsBuilder equalsBuilder = new EqualsBuilder(); + equalsBuilder.append(percentage, other.percentage); + equalsBuilder.append(estimatedMinutesLeft, other.estimatedMinutesLeft); + equalsBuilder.append(neededShingles, other.neededShingles); + + return equalsBuilder.isEquals(); + } + return false; + } + + @Override + public int hashCode() { + return new HashCodeBuilder().append(percentage).append(estimatedMinutesLeft).append(neededShingles).toHashCode(); + } +} diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/model/ModelProfile.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/model/ModelProfile.java index 71d61530..f0c8b9e6 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/ad/model/ModelProfile.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/model/ModelProfile.java @@ -32,6 +32,7 @@ import java.io.IOException; +import org.apache.commons.lang.builder.ToStringBuilder; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.io.stream.Writeable; @@ -91,4 +92,15 @@ public void writeTo(StreamOutput out) throws IOException { out.writeVLong(modelSizeInBytes); out.writeString(nodeId); } + + @Override + public String toString() { + ToStringBuilder builder = new ToStringBuilder(this); + builder.append(MODEL_ID, modelId); + if (modelSizeInBytes > 0) { + builder.append(MODEL_SIZE_IN_BYTES, modelSizeInBytes); + } + builder.append(NODE_ID, nodeId); + return builder.toString(); + } } diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/model/ProfileName.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/model/ProfileName.java index 3c3fa93b..1ab1c19d 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/ad/model/ProfileName.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/model/ProfileName.java @@ -27,7 +27,8 @@ public enum ProfileName { COORDINATING_NODE(CommonName.COORDINATING_NODE), SHINGLE_SIZE(CommonName.SHINGLE_SIZE), TOTAL_SIZE_IN_BYTES(CommonName.TOTAL_SIZE_IN_BYTES), - MODELS(CommonName.MODELS); + MODELS(CommonName.MODELS), + INIT_PROGRESS(CommonName.INIT_PROGRESS); private String name; @@ -58,6 +59,8 @@ public static ProfileName getName(String name) { return TOTAL_SIZE_IN_BYTES; case CommonName.MODELS: return MODELS; + case CommonName.INIT_PROGRESS: + return INIT_PROGRESS; default: throw new IllegalArgumentException("Unsupported profile types"); } diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/rest/RestAnomalyDetectorJobAction.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/rest/RestAnomalyDetectorJobAction.java index 5410b532..a418a81f 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/ad/rest/RestAnomalyDetectorJobAction.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/rest/RestAnomalyDetectorJobAction.java @@ -19,7 +19,6 @@ import static com.amazon.opendistroforelasticsearch.ad.util.RestHandlerUtils.DETECTOR_ID; import static com.amazon.opendistroforelasticsearch.ad.util.RestHandlerUtils.IF_PRIMARY_TERM; import static com.amazon.opendistroforelasticsearch.ad.util.RestHandlerUtils.IF_SEQ_NO; -import static com.amazon.opendistroforelasticsearch.ad.util.RestHandlerUtils.REFRESH; import static com.amazon.opendistroforelasticsearch.ad.util.RestHandlerUtils.START_JOB; import static com.amazon.opendistroforelasticsearch.ad.util.RestHandlerUtils.STOP_JOB; @@ -27,7 +26,6 @@ import java.util.List; import java.util.Locale; -import org.elasticsearch.action.support.WriteRequest; import org.elasticsearch.client.node.NodeClient; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.settings.Settings; @@ -51,12 +49,10 @@ public class RestAnomalyDetectorJobAction extends BaseRestHandler { public static final String AD_JOB_ACTION = "anomaly_detector_job_action"; private volatile TimeValue requestTimeout; private final AnomalyDetectionIndices anomalyDetectionIndices; - private final ClusterService clusterService; public RestAnomalyDetectorJobAction(Settings settings, ClusterService clusterService, AnomalyDetectionIndices anomalyDetectionIndices) { this.anomalyDetectionIndices = anomalyDetectionIndices; this.requestTimeout = REQUEST_TIMEOUT.get(settings); - this.clusterService = clusterService; clusterService.getClusterSettings().addSettingsUpdateConsumer(REQUEST_TIMEOUT, it -> requestTimeout = it); } @@ -76,19 +72,14 @@ protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient cli return channel -> { long seqNo = request.paramAsLong(IF_SEQ_NO, SequenceNumbers.UNASSIGNED_SEQ_NO); long primaryTerm = request.paramAsLong(IF_PRIMARY_TERM, SequenceNumbers.UNASSIGNED_PRIMARY_TERM); - WriteRequest.RefreshPolicy refreshPolicy = request.hasParam(REFRESH) - ? WriteRequest.RefreshPolicy.parse(request.param(REFRESH)) - : WriteRequest.RefreshPolicy.IMMEDIATE; IndexAnomalyDetectorJobActionHandler handler = new IndexAnomalyDetectorJobActionHandler( - clusterService, client, channel, anomalyDetectionIndices, detectorId, seqNo, primaryTerm, - refreshPolicy, requestTimeout ); diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/rest/RestDeleteAnomalyDetectorAction.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/rest/RestDeleteAnomalyDetectorAction.java index b7b2f17c..5fd89781 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/ad/rest/RestDeleteAnomalyDetectorAction.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/rest/RestDeleteAnomalyDetectorAction.java @@ -40,6 +40,7 @@ import com.amazon.opendistroforelasticsearch.ad.constant.CommonErrorMessages; import com.amazon.opendistroforelasticsearch.ad.model.AnomalyDetector; import com.amazon.opendistroforelasticsearch.ad.model.AnomalyDetectorJob; +import com.amazon.opendistroforelasticsearch.ad.model.DetectorInternalState; import com.amazon.opendistroforelasticsearch.ad.rest.handler.AnomalyDetectorActionHandler; import com.amazon.opendistroforelasticsearch.ad.settings.EnabledSetting; import com.google.common.collect.ImmutableList; @@ -91,13 +92,13 @@ private void deleteAnomalyDetectorJobDoc(NodeClient client, String detectorId, R .setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE); client.delete(deleteRequest, ActionListener.wrap(response -> { if (response.getResult() == DocWriteResponse.Result.DELETED || response.getResult() == DocWriteResponse.Result.NOT_FOUND) { - deleteAnomalyDetectorDoc(client, detectorId, channel); + deleteDetectorInfoDoc(client, detectorId, channel); } else { logger.error("Fail to delete anomaly detector job {}", detectorId); } }, exception -> { if (exception instanceof IndexNotFoundException) { - deleteAnomalyDetectorDoc(client, detectorId, channel); + deleteDetectorInfoDoc(client, detectorId, channel); } else { logger.error("Failed to delete anomaly detector job", exception); try { @@ -109,6 +110,34 @@ private void deleteAnomalyDetectorJobDoc(NodeClient client, String detectorId, R })); } + private void deleteDetectorInfoDoc(NodeClient client, String detectorId, RestChannel channel) { + logger.info("Delete detector info {}", detectorId); + DeleteRequest deleteRequest = new DeleteRequest(DetectorInternalState.DETECTOR_STATE_INDEX, detectorId); + client + .delete( + deleteRequest, + ActionListener + .wrap( + response -> { + // whether deleted info doc or not, continue as info doc may not exist + deleteAnomalyDetectorDoc(client, detectorId, channel); + }, + exception -> { + if (exception instanceof IndexNotFoundException) { + deleteAnomalyDetectorDoc(client, detectorId, channel); + } else { + logger.error("Failed to delete detector info", exception); + try { + channel.sendResponse(new BytesRestResponse(channel, exception)); + } catch (IOException e) { + logger.error("Failed to send response of deletedetector info", e); + } + } + } + ) + ); + } + private void deleteAnomalyDetectorDoc(NodeClient client, String detectorId, RestChannel channel) { logger.info("Delete anomaly detector {}", detectorId); DeleteRequest deleteRequest = new DeleteRequest(AnomalyDetector.ANOMALY_DETECTORS_INDEX, detectorId) diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/rest/handler/IndexAnomalyDetectorActionHandler.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/rest/handler/IndexAnomalyDetectorActionHandler.java index d712967c..5f6fd438 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/ad/rest/handler/IndexAnomalyDetectorActionHandler.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/rest/handler/IndexAnomalyDetectorActionHandler.java @@ -93,6 +93,8 @@ public class IndexAnomalyDetectorActionHandler extends AbstractActionHandler { * @param refreshPolicy refresh policy * @param anomalyDetector anomaly detector instance * @param requestTimeout request time out configuration + * @param maxAnomalyDetectors max anomaly detector allowed + * @param maxAnomalyFeatures max features allowed per detector */ public IndexAnomalyDetectorActionHandler( Settings settings, diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/rest/handler/IndexAnomalyDetectorJobActionHandler.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/rest/handler/IndexAnomalyDetectorJobActionHandler.java index be72172a..4cf0894c 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/ad/rest/handler/IndexAnomalyDetectorJobActionHandler.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/rest/handler/IndexAnomalyDetectorJobActionHandler.java @@ -37,7 +37,6 @@ import org.elasticsearch.action.index.IndexResponse; import org.elasticsearch.action.support.WriteRequest; import org.elasticsearch.client.node.NodeClient; -import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; @@ -65,8 +64,6 @@ public class IndexAnomalyDetectorJobActionHandler extends AbstractActionHandler private final String detectorId; private final Long seqNo; private final Long primaryTerm; - private final WriteRequest.RefreshPolicy refreshPolicy; - private final ClusterService clusterService; private final Logger logger = LogManager.getLogger(IndexAnomalyDetectorJobActionHandler.class); private final TimeValue requestTimeout; @@ -74,34 +71,28 @@ public class IndexAnomalyDetectorJobActionHandler extends AbstractActionHandler /** * Constructor function. * - * @param clusterService ClusterService * @param client ES node client that executes actions on the local node * @param channel ES channel used to construct bytes / builder based outputs, and send responses * @param anomalyDetectionIndices anomaly detector index manager * @param detectorId detector identifier * @param seqNo sequence number of last modification * @param primaryTerm primary term of last modification - * @param refreshPolicy refresh policy * @param requestTimeout request time out configuration */ public IndexAnomalyDetectorJobActionHandler( - ClusterService clusterService, NodeClient client, RestChannel channel, AnomalyDetectionIndices anomalyDetectionIndices, String detectorId, Long seqNo, Long primaryTerm, - WriteRequest.RefreshPolicy refreshPolicy, TimeValue requestTimeout ) { super(client, channel); - this.clusterService = clusterService; this.anomalyDetectionIndices = anomalyDetectionIndices; this.detectorId = detectorId; this.seqNo = seqNo; this.primaryTerm = primaryTerm; - this.refreshPolicy = refreshPolicy; this.requestTimeout = requestTimeout; } diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/settings/AnomalyDetectorSettings.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/settings/AnomalyDetectorSettings.java index 59ba31dd..d908ba7f 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/ad/settings/AnomalyDetectorSettings.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/settings/AnomalyDetectorSettings.java @@ -142,6 +142,7 @@ private AnomalyDetectorSettings() {} public static final String ANOMALY_DETECTORS_INDEX_MAPPING_FILE = "mappings/anomaly-detectors.json"; public static final String ANOMALY_DETECTOR_JOBS_INDEX_MAPPING_FILE = "mappings/anomaly-detector-jobs.json"; public static final String ANOMALY_RESULTS_INDEX_MAPPING_FILE = "mappings/anomaly-results.json"; + public static final String ANOMALY_DETECTOR_STATE_INDEX_MAPPING_FILE = "mappings/anomaly-state.json"; public static final Duration HOURLY_MAINTENANCE = Duration.ofHours(1); diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/ADStateManager.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/ADStateManager.java index 9eb5bbcc..22fd2584 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/ADStateManager.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/ADStateManager.java @@ -39,6 +39,7 @@ import org.elasticsearch.common.xcontent.XContentType; import com.amazon.opendistroforelasticsearch.ad.common.exception.LimitExceededException; +import com.amazon.opendistroforelasticsearch.ad.constant.CommonName; import com.amazon.opendistroforelasticsearch.ad.ml.ModelManager; import com.amazon.opendistroforelasticsearch.ad.model.AnomalyDetector; import com.amazon.opendistroforelasticsearch.ad.util.ClientUtil; @@ -52,6 +53,7 @@ public class ADStateManager { private static final Logger LOG = LogManager.getLogger(ADStateManager.class); private ConcurrentHashMap> currentDetectors; private ConcurrentHashMap> partitionNumber; + private ConcurrentHashMap currentCheckpoints; private Client client; private ModelManager modelManager; private NamedXContentRegistry xContentRegistry; @@ -81,6 +83,7 @@ public ADStateManager( this.clock = clock; this.settings = settings; this.stateTtl = stateTtl; + this.currentCheckpoints = new ConcurrentHashMap<>(); } /** @@ -112,10 +115,10 @@ public void getAnomalyDetector(String adID, ActionListenerasyncRequest(request, client::get, onGetResponse(adID, listener)); + clientUtil.asyncRequest(request, client::get, onGetDetectorResponse(adID, listener)); } - private ActionListener onGetResponse(String adID, ActionListener> listener) { + private ActionListener onGetDetectorResponse(String adID, ActionListener> listener) { return ActionListener.wrap(response -> { if (response == null || !response.isExists()) { listener.onResponse(Optional.empty()); @@ -140,6 +143,30 @@ private ActionListener onGetResponse(String adID, ActionListener listener) { + Instant timeGettingCheckpoint = currentCheckpoints.get(adID); + if (timeGettingCheckpoint != null) { + currentCheckpoints.put(adID, clock.instant()); + listener.onResponse(Boolean.TRUE); + return; + } + + GetRequest request = new GetRequest(CommonName.CHECKPOINT_INDEX_NAME, modelManager.getRcfModelId(adID, 0)); + + clientUtil.asyncRequest(request, client::get, onGetCheckpointResponse(adID, listener)); + } + + private ActionListener onGetCheckpointResponse(String adID, ActionListener listener) { + return ActionListener.wrap(response -> { + if (response == null || !response.isExists()) { + listener.onResponse(Boolean.FALSE); + } else { + currentCheckpoints.put(adID, clock.instant()); + listener.onResponse(Boolean.TRUE); + } + }, listener::onFailure); + } + /** * Used in delete workflow * @@ -148,11 +175,13 @@ private ActionListener onGetResponse(String adID, ActionListener void maintenance(ConcurrentHashMap> states) { }); } + /** + * Clean states if it is older than our stateTtl. The input has to be a + * ConcurrentHashMap otherwise we will have + * java.util.ConcurrentModificationException. + * + * @param flags flags to be maintained + */ + void maintenanceFlag(ConcurrentHashMap flags) { + flags.entrySet().stream().forEach(entry -> { + String detectorId = entry.getKey(); + try { + Instant time = entry.getValue(); + if (time.plus(stateTtl).isBefore(clock.instant())) { + flags.remove(detectorId); + } + } catch (Exception e) { + LOG.warn("Failed to finish maintenance for detector id " + detectorId, e); + } + }); + } + public boolean isMuted(String nodeId) { return backpressureMuter.containsKey(nodeId) && backpressureMuter.get(nodeId).isMuted(); } diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/AnomalyResultTransportAction.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/AnomalyResultTransportAction.java index 08afa578..0d532de3 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/AnomalyResultTransportAction.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/AnomalyResultTransportAction.java @@ -74,6 +74,7 @@ import com.amazon.opendistroforelasticsearch.ad.settings.EnabledSetting; import com.amazon.opendistroforelasticsearch.ad.stats.ADStats; import com.amazon.opendistroforelasticsearch.ad.stats.StatNames; +import com.amazon.opendistroforelasticsearch.ad.transport.handler.DetectorStateHandler; import com.amazon.opendistroforelasticsearch.ad.util.ColdStartRunner; public class AnomalyResultTransportAction extends HandledTransportAction { @@ -103,6 +104,7 @@ public class AnomalyResultTransportAction extends HandledTransportAction getFeatureData(double[] currentFeature, AnomalyDetector detector) { @@ -287,6 +291,20 @@ private ActionListener onFeatureResponse( } if (!featureOptional.getProcessedFeatures().isPresent()) { + stateManager.getDetectorCheckpoint(adID, ActionListener.wrap(checkpointExists -> { + if (!checkpointExists) { + LOG.info("Trigger cold start for {}", adID); + globalRunner.compute(new ColdStartJob(detector)); + } + }, exception -> { + Throwable cause = ExceptionsHelper.unwrapCause(exception); + if (cause instanceof IndexNotFoundException) { + LOG.info("Trigger cold start for {}", adID); + globalRunner.compute(new ColdStartJob(detector)); + } else { + LOG.error(String.format("Fail to get checkpoint state for %s", adID), exception); + } + })); if (!featureOptional.getUnprocessedFeatures().isPresent()) { // Feature not available is common when we have data holes. Respond empty response // so that alerting will not print stack trace to avoid bloating our logs. @@ -403,7 +421,7 @@ private boolean coldStartIfNoModel(AtomicReference fa AnomalyDetectionException exp = failure.get(); if (exp != null) { if (exp instanceof ResourceNotFoundException) { - LOG.info("Cold start for {}", detector.getDetectorId()); + LOG.info("Trigger cold start for {}", detector.getDetectorId()); globalRunner.compute(new ColdStartJob(detector)); return true; } else { @@ -472,7 +490,7 @@ private boolean isException(Throwable exception, Class expe private CombinedRcfResult getCombinedResult(List rcfResults) { List rcfResultLib = new ArrayList<>(); for (RCFResultResponse result : rcfResults) { - rcfResultLib.add(new RcfResult(result.getRCFScore(), result.getConfidence(), result.getForestSize())); + rcfResultLib.add(new RcfResult(result.getRCFScore(), result.getConfidence(), result.getForestSize(), result.getTotalUpdates())); } return modelManager.combineRcfResults(rcfResultLib); } @@ -582,6 +600,7 @@ private void handleRCFResults() { } CombinedRcfResult combinedResult = getCombinedResult(rcfResults); + // detectorInfoHandler.saveRcfUpdates(combinedResult.getTotalUpdates(), adID); double combinedScore = combinedResult.getScore(); final AtomicReference anomalyResultResponse = new AtomicReference<>(); @@ -799,13 +818,16 @@ class ColdStartJob implements Callable { @Override public Boolean call() { + String detectorId = detector.getDetectorId(); try { Optional traingData = featureManager.getColdStartData(detector); if (traingData.isPresent()) { - modelManager.trainModel(detector, traingData.get()); + double[][] trainingPoints = traingData.get(); + modelManager.trainModel(detector, trainingPoints); + detectorInfoHandler.saveRcfUpdates(trainingPoints.length, detectorId); return true; } else { - throw new EndRunException(detector.getDetectorId(), "Cannot get training data", false); + throw new EndRunException(detectorId, "Cannot get training data", false); } } catch (ElasticsearchTimeoutException timeoutEx) { diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/RCFPollingAction.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/RCFPollingAction.java new file mode 100644 index 00000000..103aa2e3 --- /dev/null +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/RCFPollingAction.java @@ -0,0 +1,28 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.opendistroforelasticsearch.ad.transport; + +import org.elasticsearch.action.ActionType; + +public class RCFPollingAction extends ActionType { + public static final RCFPollingAction INSTANCE = new RCFPollingAction(); + public static final String NAME = "cluster:admin/ad/rcfpolling"; + + private RCFPollingAction() { + super(NAME, RCFPollingResponse::new); + } + +} diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/RCFPollingRequest.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/RCFPollingRequest.java new file mode 100644 index 00000000..fb5e7ea6 --- /dev/null +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/RCFPollingRequest.java @@ -0,0 +1,72 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.opendistroforelasticsearch.ad.transport; + +import static org.elasticsearch.action.ValidateActions.addValidationError; + +import java.io.IOException; + +import org.elasticsearch.action.ActionRequest; +import org.elasticsearch.action.ActionRequestValidationException; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.xcontent.ToXContentObject; +import org.elasticsearch.common.xcontent.XContentBuilder; + +import com.amazon.opendistroforelasticsearch.ad.constant.CommonErrorMessages; +import com.amazon.opendistroforelasticsearch.ad.constant.CommonMessageAttributes; + +public class RCFPollingRequest extends ActionRequest implements ToXContentObject { + private String adID; + + public RCFPollingRequest(StreamInput in) throws IOException { + super(in); + adID = in.readString(); + } + + public RCFPollingRequest(String adID) { + super(); + this.adID = adID; + } + + public String getAdID() { + return adID; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); + out.writeString(adID); + } + + @Override + public ActionRequestValidationException validate() { + ActionRequestValidationException validationException = null; + if (Strings.isEmpty(adID)) { + validationException = addValidationError(CommonErrorMessages.AD_ID_MISSING_MSG, validationException); + } + return validationException; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.field(CommonMessageAttributes.ID_JSON_KEY, adID); + builder.endObject(); + return builder; + } +} diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/RCFPollingResponse.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/RCFPollingResponse.java new file mode 100644 index 00000000..3bc4640f --- /dev/null +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/RCFPollingResponse.java @@ -0,0 +1,57 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.opendistroforelasticsearch.ad.transport; + +import java.io.IOException; + +import org.elasticsearch.action.ActionResponse; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.xcontent.ToXContentObject; +import org.elasticsearch.common.xcontent.XContentBuilder; + + +public class RCFPollingResponse extends ActionResponse implements ToXContentObject { + public static final String TOTAL_UPDATES_KEY = "totalUpdates"; + + private final long totalUpdates; + + public RCFPollingResponse(long totalUpdates) { + this.totalUpdates = totalUpdates; + } + + public RCFPollingResponse(StreamInput in) throws IOException { + super(in); + totalUpdates = in.readVLong(); + } + + public long getTotalUpdates() { + return totalUpdates; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeVLong(totalUpdates); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.field(TOTAL_UPDATES_KEY, totalUpdates); + builder.endObject(); + return builder; + } +} diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/RCFPollingTransportAction.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/RCFPollingTransportAction.java new file mode 100644 index 00000000..18fbdbe1 --- /dev/null +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/RCFPollingTransportAction.java @@ -0,0 +1,135 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.opendistroforelasticsearch.ad.transport; + +import java.io.IOException; +import java.util.Optional; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.support.ActionFilters; +import org.elasticsearch.action.support.HandledTransportAction; +import org.elasticsearch.cluster.node.DiscoveryNode; +import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.tasks.Task; +import org.elasticsearch.threadpool.ThreadPool; +import org.elasticsearch.transport.TransportException; +import org.elasticsearch.transport.TransportRequestOptions; +import org.elasticsearch.transport.TransportResponseHandler; +import org.elasticsearch.transport.TransportService; + +import com.amazon.opendistroforelasticsearch.ad.cluster.HashRing; +import com.amazon.opendistroforelasticsearch.ad.common.exception.EndRunException; +import com.amazon.opendistroforelasticsearch.ad.constant.CommonErrorMessages; +import com.amazon.opendistroforelasticsearch.ad.ml.ModelManager; +import com.amazon.opendistroforelasticsearch.ad.settings.AnomalyDetectorSettings; +import com.amazon.opendistroforelasticsearch.ad.settings.EnabledSetting; + +public class RCFPollingTransportAction extends HandledTransportAction { + + private static final Logger LOG = LogManager.getLogger(RCFPollingTransportAction.class); + + private final TransportService transportService; + private final ModelManager modelManager; + private final HashRing hashRing; + private final TransportRequestOptions option; + private final ClusterService clusterService; + + @Inject + public RCFPollingTransportAction( + ActionFilters actionFilters, + TransportService transportService, + Settings settings, + ModelManager modelManager, + HashRing hashRing, + ClusterService clusterService + ) { + super(RCFPollingAction.NAME, transportService, actionFilters, RCFPollingRequest::new); + this.transportService = transportService; + this.modelManager = modelManager; + this.hashRing = hashRing; + this.option = TransportRequestOptions + .builder() + .withType(TransportRequestOptions.Type.REG) + .withTimeout(AnomalyDetectorSettings.REQUEST_TIMEOUT.get(settings)) + .build(); + this.clusterService = clusterService; + } + + @Override + protected void doExecute(Task task, RCFPollingRequest request, ActionListener listener) { + + String adID = request.getAdID(); + + if (!EnabledSetting.isADPluginEnabled()) { + throw new EndRunException(adID, CommonErrorMessages.DISABLED_ERR_MSG, true); + } + + String rcfModelID = modelManager.getRcfModelId(adID, 0); + + Optional rcfNode = hashRing.getOwningNode(rcfModelID.toString()); + if (!rcfNode.isPresent()) { + LOG.error("Cannot find model hosting node for {}", adID); + return; + } + + String rcfNodeId = rcfNode.get().getId(); + + DiscoveryNode localNode = clusterService.localNode(); + + if (localNode.getId().equals(rcfNodeId)) { + listener.onResponse(new RCFPollingResponse(modelManager.getTotalUpdates(rcfModelID))); + } else { + // redirect + LOG.info("Sending RCF polling request to {} for model {}", rcfNodeId, rcfModelID); + + transportService + .sendRequest( + rcfNode.get(), + RCFPollingAction.NAME, + request, + option, + new TransportResponseHandler() { + + @Override + public RCFPollingResponse read(StreamInput in) throws IOException { + return new RCFPollingResponse(in); + } + + @Override + public void handleResponse(RCFPollingResponse response) { + listener.onResponse(response); + } + + @Override + public void handleException(TransportException exp) { + listener.onFailure(exp); + } + + @Override + public String executor() { + return ThreadPool.Names.GENERIC; + } + + } + ); + } + } +} diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/RCFResultResponse.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/RCFResultResponse.java index e5938225..d796d66b 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/RCFResultResponse.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/RCFResultResponse.java @@ -27,14 +27,17 @@ public class RCFResultResponse extends ActionResponse implements ToXContentObjec public static final String RCF_SCORE_JSON_KEY = "rcfScore"; public static final String CONFIDENCE_JSON_KEY = "confidence"; public static final String FOREST_SIZE_JSON_KEY = "forestSize"; + public static final String TOTAL_UPDATES_KEY = "totalUpdates"; private double rcfScore; private double confidence; private int forestSize; + private long totalUpdates; - public RCFResultResponse(double rcfScore, double confidence, int forestSize) { + public RCFResultResponse(double rcfScore, double confidence, int forestSize, long totalUpdates) { this.rcfScore = rcfScore; this.confidence = confidence; this.forestSize = forestSize; + this.totalUpdates = totalUpdates; } public RCFResultResponse(StreamInput in) throws IOException { @@ -42,6 +45,7 @@ public RCFResultResponse(StreamInput in) throws IOException { rcfScore = in.readDouble(); confidence = in.readDouble(); forestSize = in.readVInt(); + totalUpdates = in.readVLong(); } public double getRCFScore() { @@ -56,11 +60,16 @@ public int getForestSize() { return forestSize; } + public long getTotalUpdates() { + return totalUpdates; + } + @Override public void writeTo(StreamOutput out) throws IOException { out.writeDouble(rcfScore); out.writeDouble(confidence); out.writeVInt(forestSize); + out.writeVLong(totalUpdates); } @Override @@ -69,6 +78,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.field(RCF_SCORE_JSON_KEY, rcfScore); builder.field(CONFIDENCE_JSON_KEY, confidence); builder.field(FOREST_SIZE_JSON_KEY, forestSize); + builder.field(TOTAL_UPDATES_KEY, totalUpdates); builder.endObject(); return builder; } diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/RCFResultTransportAction.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/RCFResultTransportAction.java index c2b28058..51468c56 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/RCFResultTransportAction.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/RCFResultTransportAction.java @@ -65,7 +65,14 @@ protected void doExecute(Task task, RCFResultRequest request, ActionListener listener - .onResponse(new RCFResultResponse(result.getScore(), result.getConfidence(), result.getForestSize())), + .onResponse( + new RCFResultResponse( + result.getScore(), + result.getConfidence(), + result.getForestSize(), + result.getTotalUpdates() + ) + ), exception -> { LOG.warn(exception); listener.onFailure(exception); diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/handler/AnomalyIndexHandler.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/handler/AnomalyIndexHandler.java new file mode 100644 index 00000000..95be0690 --- /dev/null +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/handler/AnomalyIndexHandler.java @@ -0,0 +1,188 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.opendistroforelasticsearch.ad.transport.handler; + +import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; + +import java.util.Iterator; +import java.util.Locale; +import java.util.function.BooleanSupplier; +import java.util.function.Consumer; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.elasticsearch.ExceptionsHelper; +import org.elasticsearch.ResourceAlreadyExistsException; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.admin.indices.create.CreateIndexResponse; +import org.elasticsearch.action.bulk.BackoffPolicy; +import org.elasticsearch.action.index.IndexRequest; +import org.elasticsearch.action.index.IndexResponse; +import org.elasticsearch.client.Client; +import org.elasticsearch.cluster.block.ClusterBlockLevel; +import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.unit.TimeValue; +import org.elasticsearch.common.util.concurrent.EsRejectedExecutionException; +import org.elasticsearch.common.xcontent.ToXContentObject; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.threadpool.ThreadPool; + +import com.amazon.opendistroforelasticsearch.ad.common.exception.AnomalyDetectionException; +import com.amazon.opendistroforelasticsearch.ad.settings.AnomalyDetectorSettings; +import com.amazon.opendistroforelasticsearch.ad.util.ClientUtil; +import com.amazon.opendistroforelasticsearch.ad.util.IndexUtils; +import com.amazon.opendistroforelasticsearch.ad.util.RestHandlerUtils; + +public class AnomalyIndexHandler { + private static final Logger LOG = LogManager.getLogger(AnomalyIndexHandler.class); + + static final String CANNOT_SAVE_ERR_MSG = "Cannot save %s due to write block."; + static final String FAIL_TO_SAVE_ERR_MSG = "Fail to save %s: "; + static final String RETRY_SAVING_ERR_MSG = "Retry in saving %s: "; + static final String SUCCESS_SAVING_MSG = "Succeed in saving %s"; + + protected final Client client; + + private final ThreadPool threadPool; + private final BackoffPolicy savingBackoffPolicy; + protected final String indexName; + private final Consumer> createIndex; + private final BooleanSupplier indexExists; + // whether save to a specific doc id or not + private final boolean fixedDoc; + protected final ClientUtil clientUtil; + private final IndexUtils indexUtils; + private final ClusterService clusterService; + + public AnomalyIndexHandler( + Client client, + Settings settings, + ThreadPool threadPool, + String indexName, + Consumer> createIndex, + BooleanSupplier indexExists, + boolean fixedDoc, + ClientUtil clientUtil, + IndexUtils indexUtils, + ClusterService clusterService + ) { + this.client = client; + this.threadPool = threadPool; + this.savingBackoffPolicy = BackoffPolicy + .exponentialBackoff( + AnomalyDetectorSettings.BACKOFF_INITIAL_DELAY.get(settings), + AnomalyDetectorSettings.MAX_RETRY_FOR_BACKOFF.get(settings) + ); + this.indexName = indexName; + this.createIndex = createIndex; + this.indexExists = indexExists; + this.fixedDoc = fixedDoc; + this.clientUtil = clientUtil; + this.indexUtils = indexUtils; + this.clusterService = clusterService; + } + + public void index(T toSave, String detectorId) { + if (indexUtils.checkIndicesBlocked(clusterService.state(), ClusterBlockLevel.WRITE, this.indexName)) { + LOG.warn(String.format(Locale.ROOT, CANNOT_SAVE_ERR_MSG, detectorId)); + return; + } + + try { + if (!indexExists.getAsBoolean()) { + createIndex + .accept(ActionListener.wrap(initResponse -> onCreateIndexResponse(initResponse, toSave, detectorId), exception -> { + if (ExceptionsHelper.unwrapCause(exception) instanceof ResourceAlreadyExistsException) { + // It is possible the index has been created while we sending the create request + save(toSave, detectorId); + } else { + throw new AnomalyDetectionException( + detectorId, + String.format("Unexpected error creating index %s", indexName), + exception + ); + } + })); + } else { + save(toSave, detectorId); + } + } catch (Exception e) { + throw new AnomalyDetectionException( + detectorId, + String.format(Locale.ROOT, "Error in saving %s for detector %s", indexName, detectorId), + e + ); + } + } + + private void onCreateIndexResponse(CreateIndexResponse response, T toSave, String detectorId) { + if (response.isAcknowledged()) { + save(toSave, detectorId); + } else { + throw new AnomalyDetectionException(detectorId, "Creating %s with mappings call not acknowledged."); + } + } + + protected void save(T toSave, String detectorId) { + try (XContentBuilder builder = jsonBuilder()) { + IndexRequest indexRequest = new IndexRequest(indexName).source(toSave.toXContent(builder, RestHandlerUtils.XCONTENT_WITH_TYPE)); + if (fixedDoc) { + indexRequest.id(detectorId); + } + + saveIteration(indexRequest, detectorId, savingBackoffPolicy.iterator()); + } catch (Exception e) { + LOG.error(String.format("Failed to save %s", indexName), e); + throw new AnomalyDetectionException(detectorId, String.format("Cannot save %s", indexName)); + } + } + + void saveIteration(IndexRequest indexRequest, String detectorId, Iterator backoff) { + clientUtil + .asyncRequest( + indexRequest, + client::index, + ActionListener.wrap(response -> { LOG.debug(String.format(SUCCESS_SAVING_MSG, detectorId)); }, exception -> { + // Elasticsearch has a thread pool and a queue for write per node. A thread + // pool will have N number of workers ready to handle the requests. When a + // request comes and if a worker is free , this is handled by the worker. Now by + // default the number of workers is equal to the number of cores on that CPU. + // When the workers are full and there are more write requests, the request + // will go to queue. The size of queue is also limited. If by default size is, + // say, 200 and if there happens more parallel requests than this, then those + // requests would be rejected as you can see EsRejectedExecutionException. + // So EsRejectedExecutionException is the way that Elasticsearch tells us that + // it cannot keep up with the current indexing rate. + // When it happens, we should pause indexing a bit before trying again, ideally + // with randomized exponential backoff. + Throwable cause = ExceptionsHelper.unwrapCause(exception); + if (!(cause instanceof EsRejectedExecutionException) || !backoff.hasNext()) { + LOG.error(String.format(FAIL_TO_SAVE_ERR_MSG, detectorId), cause); + } else { + TimeValue nextDelay = backoff.next(); + LOG.warn(String.format(RETRY_SAVING_ERR_MSG, detectorId), cause); + // copy original request's source without other information like autoGeneratedTimestamp + // otherwise, an exception will be thrown indicating autoGeneratedTimestamp should not be set + // while request id is already set (id is set because we have already sent the request before). + IndexRequest newReuqest = new IndexRequest(indexRequest.index()); + newReuqest.source(indexRequest.source(), indexRequest.getContentType()); + threadPool.schedule(() -> saveIteration(newReuqest, detectorId, backoff), nextDelay, ThreadPool.Names.SAME); + } + }) + ); + } +} diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/handler/AnomalyResultHandler.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/handler/AnomalyResultHandler.java deleted file mode 100644 index 670503c6..00000000 --- a/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/handler/AnomalyResultHandler.java +++ /dev/null @@ -1,204 +0,0 @@ -/* - * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * or in the "license" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ - -package com.amazon.opendistroforelasticsearch.ad.transport.handler; - -import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; - -import java.util.Iterator; -import java.util.Locale; - -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; -import org.elasticsearch.ExceptionsHelper; -import org.elasticsearch.ResourceAlreadyExistsException; -import org.elasticsearch.action.ActionListener; -import org.elasticsearch.action.admin.indices.create.CreateIndexResponse; -import org.elasticsearch.action.bulk.BackoffPolicy; -import org.elasticsearch.action.index.IndexRequest; -import org.elasticsearch.action.index.IndexResponse; -import org.elasticsearch.action.support.IndicesOptions; -import org.elasticsearch.client.Client; -import org.elasticsearch.cluster.ClusterState; -import org.elasticsearch.cluster.block.ClusterBlockLevel; -import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; -import org.elasticsearch.cluster.service.ClusterService; -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.common.unit.TimeValue; -import org.elasticsearch.common.util.concurrent.EsRejectedExecutionException; -import org.elasticsearch.common.xcontent.XContentBuilder; -import org.elasticsearch.threadpool.ThreadPool; - -import com.amazon.opendistroforelasticsearch.ad.common.exception.AnomalyDetectionException; -import com.amazon.opendistroforelasticsearch.ad.indices.AnomalyDetectionIndices; -import com.amazon.opendistroforelasticsearch.ad.model.AnomalyResult; -import com.amazon.opendistroforelasticsearch.ad.settings.AnomalyDetectorSettings; -import com.amazon.opendistroforelasticsearch.ad.util.RestHandlerUtils; - -public class AnomalyResultHandler { - private static final Logger LOG = LogManager.getLogger(AnomalyResultHandler.class); - - static final String CANNOT_SAVE_ERR_MSG = "Cannot save anomaly result due to write block."; - static final String FAIL_TO_SAVE_ERR_MSG = "Fail to save anomaly index: "; - static final String RETRY_SAVING_ERR_MSG = "Retry in saving anomaly index: "; - static final String SUCCESS_SAVING_MSG = "SSUCCESS_SAVING_MSGuccess in saving anomaly index: "; - - private final Client client; - private final ClusterService clusterService; - private final IndexNameExpressionResolver indexNameExpressionResolver; - private final AnomalyDetectionIndices anomalyDetectionIndices; - private final ThreadPool threadPool; - private final BackoffPolicy resultSavingBackoffPolicy; - - public AnomalyResultHandler( - Client client, - Settings settings, - ClusterService clusterService, - IndexNameExpressionResolver indexNameExpressionResolver, - AnomalyDetectionIndices anomalyDetectionIndices, - ThreadPool threadPool - ) { - this.client = client; - this.clusterService = clusterService; - this.indexNameExpressionResolver = indexNameExpressionResolver; - this.anomalyDetectionIndices = anomalyDetectionIndices; - this.threadPool = threadPool; - this.resultSavingBackoffPolicy = BackoffPolicy - .exponentialBackoff( - AnomalyDetectorSettings.BACKOFF_INITIAL_DELAY.get(settings), - AnomalyDetectorSettings.MAX_RETRY_FOR_BACKOFF.get(settings) - ); - } - - public void indexAnomalyResult(AnomalyResult anomalyResult) { - try { - if (checkIndicesBlocked(clusterService.state(), ClusterBlockLevel.WRITE, AnomalyResult.ANOMALY_RESULT_INDEX)) { - LOG.warn(CANNOT_SAVE_ERR_MSG); - return; - } - if (!anomalyDetectionIndices.doesAnomalyResultIndexExist()) { - anomalyDetectionIndices - .initAnomalyResultIndexDirectly( - ActionListener.wrap(initResponse -> onCreateAnomalyResultIndexResponse(initResponse, anomalyResult), exception -> { - if (ExceptionsHelper.unwrapCause(exception) instanceof ResourceAlreadyExistsException) { - // It is possible the index has been created while we sending the create request - saveDetectorResult(anomalyResult); - } else { - throw new AnomalyDetectionException( - anomalyResult.getDetectorId(), - "Unexpected error creating anomaly result index", - exception - ); - } - }) - ); - } else { - saveDetectorResult(anomalyResult); - } - } catch (Exception e) { - throw new AnomalyDetectionException( - anomalyResult.getDetectorId(), - String - .format( - Locale.ROOT, - "Error in saving anomaly index for ID %s from %s to %s", - anomalyResult.getDetectorId(), - anomalyResult.getDataStartTime(), - anomalyResult.getDataEndTime() - ), - e - ); - } - } - - /** - * Similar to checkGlobalBlock, we check block on the indices level. - * - * @param state Cluster state - * @param level block level - * @param indices the indices on which to check block - * @return whether any of the index has block on the level. - */ - private boolean checkIndicesBlocked(ClusterState state, ClusterBlockLevel level, String... indices) { - // the original index might be an index expression with wildcards like "log*", - // so we need to expand the expression to concrete index name - String[] concreteIndices = indexNameExpressionResolver.concreteIndexNames(state, IndicesOptions.lenientExpandOpen(), indices); - - return state.blocks().indicesBlockedException(level, concreteIndices) != null; - } - - private void onCreateAnomalyResultIndexResponse(CreateIndexResponse response, AnomalyResult anomalyResult) { - if (response.isAcknowledged()) { - saveDetectorResult(anomalyResult); - } else { - throw new AnomalyDetectionException( - anomalyResult.getDetectorId(), - "Creating anomaly result index with mappings call not acknowledged." - ); - } - } - - private void saveDetectorResult(AnomalyResult anomalyResult) { - try (XContentBuilder builder = jsonBuilder()) { - IndexRequest indexRequest = new IndexRequest(AnomalyResult.ANOMALY_RESULT_INDEX) - .source(anomalyResult.toXContent(builder, RestHandlerUtils.XCONTENT_WITH_TYPE)); - saveDetectorResult( - indexRequest, - String - .format( - Locale.ROOT, - "ID %s from %s to %s", - anomalyResult.getDetectorId(), - anomalyResult.getDataStartTime(), - anomalyResult.getDataEndTime() - ), - resultSavingBackoffPolicy.iterator() - ); - } catch (Exception e) { - LOG.error("Failed to save anomaly result", e); - throw new AnomalyDetectionException(anomalyResult.getDetectorId(), "Cannot save result"); - } - } - - void saveDetectorResult(IndexRequest indexRequest, String context, Iterator backoff) { - client.index(indexRequest, ActionListener.wrap(response -> LOG.debug(SUCCESS_SAVING_MSG + context), exception -> { - // Elasticsearch has a thread pool and a queue for write per node. A thread - // pool will have N number of workers ready to handle the requests. When a - // request comes and if a worker is free , this is handled by the worker. Now by - // default the number of workers is equal to the number of cores on that CPU. - // When the workers are full and there are more write requests, the request - // will go to queue. The size of queue is also limited. If by default size is, - // say, 200 and if there happens more parallel requests than this, then those - // requests would be rejected as you can see EsRejectedExecutionException. - // So EsRejectedExecutionException is the way that Elasticsearch tells us that - // it cannot keep up with the current indexing rate. - // When it happens, we should pause indexing a bit before trying again, ideally - // with randomized exponential backoff. - Throwable cause = ExceptionsHelper.unwrapCause(exception); - if (!(cause instanceof EsRejectedExecutionException) || !backoff.hasNext()) { - LOG.error(FAIL_TO_SAVE_ERR_MSG + context, cause); - } else { - TimeValue nextDelay = backoff.next(); - LOG.warn(RETRY_SAVING_ERR_MSG + context, cause); - // copy original request's source without other information like autoGeneratedTimestamp - // otherwise, an exception will be thrown indicating autoGeneratedTimestamp should not be set - // while request id is already set (id is set because we have already sent the request before). - IndexRequest newReuqest = new IndexRequest(AnomalyResult.ANOMALY_RESULT_INDEX); - newReuqest.source(indexRequest.source(), indexRequest.getContentType()); - threadPool.schedule(() -> saveDetectorResult(newReuqest, context, backoff), nextDelay, ThreadPool.Names.SAME); - } - })); - } -} diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/handler/DetectorStateHandler.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/handler/DetectorStateHandler.java new file mode 100644 index 00000000..6dd9d9bd --- /dev/null +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/handler/DetectorStateHandler.java @@ -0,0 +1,175 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.opendistroforelasticsearch.ad.transport.handler; + +import static org.elasticsearch.common.xcontent.XContentParserUtils.ensureExpectedToken; + +import java.io.IOException; +import java.time.Instant; +import java.util.function.BooleanSupplier; +import java.util.function.Consumer; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.elasticsearch.ExceptionsHelper; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.admin.indices.create.CreateIndexResponse; +import org.elasticsearch.action.get.GetRequest; +import org.elasticsearch.action.get.GetResponse; +import org.elasticsearch.client.Client; +import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.xcontent.LoggingDeprecationHandler; +import org.elasticsearch.common.xcontent.NamedXContentRegistry; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.common.xcontent.XContentType; +import org.elasticsearch.index.IndexNotFoundException; +import org.elasticsearch.threadpool.ThreadPool; + +import com.amazon.opendistroforelasticsearch.ad.model.DetectorInternalState; +import com.amazon.opendistroforelasticsearch.ad.util.ClientUtil; +import com.amazon.opendistroforelasticsearch.ad.util.IndexUtils; + +public class DetectorStateHandler extends AnomalyIndexHandler { + interface GetInfoStrategy { + DetectorInternalState createNewInfo(DetectorInternalState info); + } + + class TotalRcfUpdatesStrategy implements GetInfoStrategy { + private long totalRcfUpdates; + + TotalRcfUpdatesStrategy(long totalRcfUpdates) { + this.totalRcfUpdates = totalRcfUpdates; + } + + @Override + public DetectorInternalState createNewInfo(DetectorInternalState info) { + DetectorInternalState newInfo = null; + if (info == null) { + newInfo = new DetectorInternalState.Builder().rcfUpdates(totalRcfUpdates).lastUpdateTime(Instant.now()).build(); + } else { + newInfo = (DetectorInternalState) info.clone(); + newInfo.setRcfUpdates(totalRcfUpdates); + newInfo.setLastUpdateTime(Instant.now()); + } + return newInfo; + } + } + + class ErrorStrategy implements GetInfoStrategy { + private String error; + + ErrorStrategy(String error) { + this.error = error; + } + + @Override + public DetectorInternalState createNewInfo(DetectorInternalState info) { + DetectorInternalState newInfo = null; + if (info == null) { + newInfo = new DetectorInternalState.Builder().error(error).lastUpdateTime(Instant.now()).build(); + } else { + newInfo = (DetectorInternalState) info.clone(); + newInfo.setError(error); + newInfo.setLastUpdateTime(Instant.now()); + } + + return newInfo; + } + } + + private static final Logger LOG = LogManager.getLogger(DetectorStateHandler.class); + + public DetectorStateHandler( + Client client, + Settings settings, + ThreadPool threadPool, + Consumer> createIndex, + BooleanSupplier indexExists, + ClientUtil clientUtil, + IndexUtils indexUtils, + ClusterService clusterService + ) { + super( + client, + settings, + threadPool, + DetectorInternalState.DETECTOR_STATE_INDEX, + createIndex, + indexExists, + true, + clientUtil, + indexUtils, + clusterService + ); + } + + public void saveRcfUpdates(long totalRcfUpdates, String detectorId) { + if (totalRcfUpdates == 0L) { + // either initialization haven't started or all rcf partitions are missing + LOG.info(String.format("Don't save the info of detector %s as its total updates is 0", detectorId)); + return; + } + + update(detectorId, new TotalRcfUpdatesStrategy(totalRcfUpdates)); + } + + public void saveError(String error, String detectorId) { + update(detectorId, new ErrorStrategy(error)); + } + + /** + * Updates a detector's info according to GetInfoHandler + * @param detectorId detector id + * @param handler specify how to convert from existing info object to an object we want to save + */ + private void update(String detectorId, GetInfoStrategy handler) { + try { + GetRequest getRequest = new GetRequest(this.indexName).id(detectorId); + + clientUtil.asyncRequest(getRequest, client::get, ActionListener.wrap(response -> { + DetectorInternalState newInfo = null; + if (response.isExists()) { + try ( + XContentParser parser = XContentType.JSON + .xContent() + .createParser(NamedXContentRegistry.EMPTY, LoggingDeprecationHandler.INSTANCE, response.getSourceAsString()) + ) { + ensureExpectedToken(XContentParser.Token.START_OBJECT, parser.nextToken(), parser::getTokenLocation); + DetectorInternalState info = DetectorInternalState.parse(parser); + newInfo = handler.createNewInfo(info); + + } catch (IOException e) { + LOG.error("Failed to update AD info for " + detectorId, e); + return; + } + } else { + newInfo = handler.createNewInfo(null); + } + super.index(newInfo, detectorId); + }, exception -> { + Throwable cause = ExceptionsHelper.unwrapCause(exception); + if (cause instanceof IndexNotFoundException) { + super.index(handler.createNewInfo(null), detectorId); + } else { + LOG.error("Failed to get detector info " + detectorId, exception); + } + })); + } catch (Exception e) { + LOG.error("Failed to update AD info for " + detectorId, e); + } + } +} diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/util/IndexUtils.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/util/IndexUtils.java index b54d9b86..82b881c4 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/ad/util/IndexUtils.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/util/IndexUtils.java @@ -22,9 +22,13 @@ import org.apache.logging.log4j.Logger; import org.elasticsearch.action.admin.indices.stats.IndicesStatsRequest; import org.elasticsearch.action.admin.indices.stats.IndicesStatsResponse; +import org.elasticsearch.action.support.IndicesOptions; import org.elasticsearch.client.Client; +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.block.ClusterBlockLevel; import org.elasticsearch.cluster.health.ClusterIndexHealth; import org.elasticsearch.cluster.metadata.IndexMetadata; +import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; import org.elasticsearch.cluster.service.ClusterService; public class IndexUtils { @@ -44,6 +48,7 @@ public class IndexUtils { private Client client; private ClientUtil clientUtil; private ClusterService clusterService; + private final IndexNameExpressionResolver indexNameExpressionResolver; /** * Constructor @@ -51,11 +56,18 @@ public class IndexUtils { * @param client Client to make calls to ElasticSearch * @param clientUtil AD Client utility * @param clusterService ES ClusterService + * @param indexNameExpressionResolver index name resolver */ - public IndexUtils(Client client, ClientUtil clientUtil, ClusterService clusterService) { + public IndexUtils( + Client client, + ClientUtil clientUtil, + ClusterService clusterService, + IndexNameExpressionResolver indexNameExpressionResolver + ) { this.client = client; this.clientUtil = clientUtil; this.clusterService = clusterService; + this.indexNameExpressionResolver = indexNameExpressionResolver; } /** @@ -117,4 +129,20 @@ public Long getNumberOfDocumentsInIndex(String indexName) { Optional response = clientUtil.timedRequest(indicesStatsRequest, logger, client.admin().indices()::stats); return response.map(r -> r.getIndex(indexName).getPrimaries().docs.getCount()).orElse(-1L); } + + /** + * Similar to checkGlobalBlock, we check block on the indices level. + * + * @param state Cluster state + * @param level block level + * @param indices the indices on which to check block + * @return whether any of the index has block on the level. + */ + public boolean checkIndicesBlocked(ClusterState state, ClusterBlockLevel level, String... indices) { + // the original index might be an index expression with wildcards like "log*", + // so we need to expand the expression to concrete index name + String[] concreteIndices = indexNameExpressionResolver.concreteIndexNames(state, IndicesOptions.lenientExpandOpen(), indices); + + return state.blocks().indicesBlockedException(level, concreteIndices) != null; + } } diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/util/MultiResponsesDelegateActionListener.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/util/MultiResponsesDelegateActionListener.java index 3f42a18c..39c829a9 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/ad/util/MultiResponsesDelegateActionListener.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/util/MultiResponsesDelegateActionListener.java @@ -32,6 +32,8 @@ */ public class MultiResponsesDelegateActionListener implements ActionListener { private static final Logger LOG = LogManager.getLogger(MultiResponsesDelegateActionListener.class); + static final String NO_RESPONSE = "No response collected"; + private final ActionListener delegate; private final AtomicInteger collectedResponseCount; private final int maxResponseCount; @@ -81,7 +83,7 @@ public void onFailure(Exception e) { private void finish() { if (this.exceptions.size() == 0) { if (savedResponses.size() == 0) { - this.delegate.onFailure(new RuntimeException("No response collected")); + this.delegate.onFailure(new RuntimeException(NO_RESPONSE)); } else { T response0 = savedResponses.get(0); for (int i = 1; i < savedResponses.size(); i++) { diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/util/ThrowingConsumer.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/util/ThrowingConsumer.java new file mode 100644 index 00000000..afb1b551 --- /dev/null +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/util/ThrowingConsumer.java @@ -0,0 +1,27 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.opendistroforelasticsearch.ad.util; + +/** + * A consumer that can throw checked exception + * + * @param method parameter type + * @param Exception type + */ +@FunctionalInterface +public interface ThrowingConsumer { + void accept(T t) throws E; +} diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/util/ThrowingConsumerWrapper.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/util/ThrowingConsumerWrapper.java new file mode 100644 index 00000000..2facdc92 --- /dev/null +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/util/ThrowingConsumerWrapper.java @@ -0,0 +1,41 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.opendistroforelasticsearch.ad.util; + +import java.util.function.Consumer; + +public class ThrowingConsumerWrapper { + /** + * Utility method to use a method throwing checked exception inside a function + * that does not throw the corresponding checked exception. This happens + * when we are in a ES function that we have no control over its signature. + * Convert the checked exception thrown by by throwingConsumer to a RuntimeException + * so that the compier won't complain. + * @param the method's parameter type + * @param throwingConsumer the method reference that can throw checked exception + * @return converted method reference + */ + public static Consumer throwingConsumerWrapper(ThrowingConsumer throwingConsumer) { + + return i -> { + try { + throwingConsumer.accept(i); + } catch (Exception ex) { + throw new RuntimeException(ex); + } + }; + } +} diff --git a/src/main/resources/mappings/anomaly-state.json b/src/main/resources/mappings/anomaly-state.json new file mode 100644 index 00000000..d26c46bb --- /dev/null +++ b/src/main/resources/mappings/anomaly-state.json @@ -0,0 +1,21 @@ +{ + "dynamic": false, + "_meta": { + "schema_version": 1 + }, + "properties": { + "schema_version": { + "type": "integer" + }, + "rcf_updates": { + "type": "integer" + }, + "last_update_time": { + "type": "date", + "format": "strict_date_time||epoch_millis" + }, + "error": { + "type": "text" + } + } +} \ No newline at end of file diff --git a/src/test/java/com/amazon/opendistroforelasticsearch/ad/AbstractADTest.java b/src/test/java/com/amazon/opendistroforelasticsearch/ad/AbstractADTest.java index 9cb23a24..0c3f169d 100644 --- a/src/test/java/com/amazon/opendistroforelasticsearch/ad/AbstractADTest.java +++ b/src/test/java/com/amazon/opendistroforelasticsearch/ad/AbstractADTest.java @@ -18,6 +18,8 @@ import java.util.ArrayList; import java.util.List; import java.util.concurrent.TimeUnit; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import org.apache.logging.log4j.Level; import org.apache.logging.log4j.LogManager; @@ -52,31 +54,70 @@ protected TestAppender(String name) { public List messages = new ArrayList(); - public boolean containsMessage(String msg) { + public boolean containsMessage(String msg, boolean formatString) { + Pattern p = null; + if (formatString) { + String regex = convertToRegex(msg); + p = Pattern.compile(regex); + } for (String logMsg : messages) { LOG.info(logMsg); - if (logMsg.contains(msg)) { + if (p != null) { + Matcher m = p.matcher(logMsg); + if (m.matches()) { + return true; + } + } else if (logMsg.contains(msg)) { return true; } } return false; } - public int countMessage(String msg) { + public boolean containsMessage(String msg) { + return containsMessage(msg, false); + } + + public int countMessage(String msg, boolean formatString) { + Pattern p = null; + if (formatString) { + String regex = convertToRegex(msg); + p = Pattern.compile(regex); + } int count = 0; for (String logMsg : messages) { LOG.info(logMsg); - if (logMsg.contains(msg)) { + if (p != null) { + Matcher m = p.matcher(logMsg); + if (m.matches()) { + count++; + } + } else if (logMsg.contains(msg)) { count++; } } return count; } + public int countMessage(String msg) { + return countMessage(msg, false); + } + @Override public void append(LogEvent event) { messages.add(event.getMessage().getFormattedMessage()); } + + /** + * Convert a string with format like "Cannot save %s due to write block." + * to a regex with .* like "Cannot save .* due to write block." + * @return converted regex + */ + private String convertToRegex(String formattedStr) { + int percentIndex = formattedStr.indexOf("%"); + return formattedStr.substring(0, percentIndex) + ".*" + formattedStr.substring(percentIndex + 2); + } + } protected static ThreadPool threadPool; diff --git a/src/test/java/com/amazon/opendistroforelasticsearch/ad/AnomalyDetectorJobRunnerTests.java b/src/test/java/com/amazon/opendistroforelasticsearch/ad/AnomalyDetectorJobRunnerTests.java index 8be5cdb5..ec6164a9 100644 --- a/src/test/java/com/amazon/opendistroforelasticsearch/ad/AnomalyDetectorJobRunnerTests.java +++ b/src/test/java/com/amazon/opendistroforelasticsearch/ad/AnomalyDetectorJobRunnerTests.java @@ -40,6 +40,7 @@ import org.elasticsearch.action.index.IndexRequest; import org.elasticsearch.action.index.IndexResponse; import org.elasticsearch.client.Client; +import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.settings.Settings; @@ -62,10 +63,15 @@ import org.mockito.MockitoAnnotations; import com.amazon.opendistroforelasticsearch.ad.common.exception.EndRunException; +import com.amazon.opendistroforelasticsearch.ad.indices.AnomalyDetectionIndices; import com.amazon.opendistroforelasticsearch.ad.model.AnomalyDetectorJob; +import com.amazon.opendistroforelasticsearch.ad.model.AnomalyResult; import com.amazon.opendistroforelasticsearch.ad.model.IntervalTimeConfiguration; -import com.amazon.opendistroforelasticsearch.ad.transport.handler.AnomalyResultHandler; +import com.amazon.opendistroforelasticsearch.ad.transport.handler.AnomalyIndexHandler; +import com.amazon.opendistroforelasticsearch.ad.transport.handler.DetectorStateHandler; import com.amazon.opendistroforelasticsearch.ad.util.ClientUtil; +import com.amazon.opendistroforelasticsearch.ad.util.IndexUtils; +import com.amazon.opendistroforelasticsearch.ad.util.ThrowingConsumerWrapper; import com.amazon.opendistroforelasticsearch.jobscheduler.spi.JobExecutionContext; import com.amazon.opendistroforelasticsearch.jobscheduler.spi.LockModel; import com.amazon.opendistroforelasticsearch.jobscheduler.spi.ScheduledJobParameter; @@ -103,7 +109,9 @@ public class AnomalyDetectorJobRunnerTests extends AbstractADTest { private Iterator backoff; @Mock - private AnomalyResultHandler anomalyResultHandler; + private AnomalyIndexHandler anomalyResultHandler; + + private DetectorStateHandler detectorInfoHandler; @BeforeClass public static void setUpBeforeClass() { @@ -129,17 +137,30 @@ public void setup() throws Exception { runner.setClientUtil(clientUtil); runner.setAnomalyResultHandler(anomalyResultHandler); + Settings settings = Settings + .builder() + .put("opendistro.anomaly_detection.max_retry_for_backoff", 2) + .put("opendistro.anomaly_detection.backoff_initial_delay", TimeValue.timeValueMillis(1)) + .put("opendistro.anomaly_detection.max_retry_for_end_run_exception", 3) + .build(); setUpJobParameter(); - runner - .setSettings( - Settings - .builder() - .put("opendistro.anomaly_detection.max_retry_for_backoff", 2) - .put("opendistro.anomaly_detection.backoff_initial_delay", TimeValue.timeValueMillis(1)) - .put("opendistro.anomaly_detection.max_retry_for_end_run_exception", 3) - .build() - ); + runner.setSettings(settings); + + AnomalyDetectionIndices anomalyDetectionIndices = mock(AnomalyDetectionIndices.class); + IndexNameExpressionResolver indexNameResolver = mock(IndexNameExpressionResolver.class); + IndexUtils indexUtils = new IndexUtils(client, clientUtil, clusterService, indexNameResolver); + detectorInfoHandler = new DetectorStateHandler( + client, + settings, + threadPool, + ThrowingConsumerWrapper.throwingConsumerWrapper(anomalyDetectionIndices::initDetectorStateIndex), + anomalyDetectionIndices::doesDetectorStateIndexExist, + this.clientUtil, + indexUtils, + clusterService + ); + runner.setDetectorStateHandler(detectorInfoHandler); lockService = new LockService(client, clusterService); doReturn(lockService).when(context).getLockService(); @@ -215,13 +236,13 @@ public void testRunAdJobWithEndRunExceptionNow() { LockModel lock = new LockModel("indexName", "jobId", Instant.now(), 10, false); Exception exception = new EndRunException(jobParameter.getName(), randomAlphaOfLength(5), true); runner.handleAdException(jobParameter, lockService, lock, Instant.now().minusMillis(1000 * 60), Instant.now(), exception); - verify(anomalyResultHandler).indexAnomalyResult(any()); + verify(anomalyResultHandler).index(any(), any()); } @Test public void testRunAdJobWithEndRunExceptionNowAndExistingAdJob() { testRunAdJobWithEndRunExceptionNowAndStopAdJob(true, true, true); - verify(anomalyResultHandler).indexAnomalyResult(any()); + verify(anomalyResultHandler).index(any(), any()); verify(clientUtil).asyncRequest(any(IndexRequest.class), any(), any()); assertTrue(testAppender.containsMessage("AD Job was disabled by JobRunner for")); } @@ -229,7 +250,7 @@ public void testRunAdJobWithEndRunExceptionNowAndExistingAdJob() { @Test public void testRunAdJobWithEndRunExceptionNowAndExistingAdJobAndIndexException() { testRunAdJobWithEndRunExceptionNowAndStopAdJob(true, true, false); - verify(anomalyResultHandler).indexAnomalyResult(any()); + verify(anomalyResultHandler).index(any(), any()); verify(clientUtil).asyncRequest(any(IndexRequest.class), any(), any()); assertTrue(testAppender.containsMessage("Failed to disable AD job for")); } @@ -237,7 +258,7 @@ public void testRunAdJobWithEndRunExceptionNowAndExistingAdJobAndIndexException( @Test public void testRunAdJobWithEndRunExceptionNowAndNotExistingEnabledAdJob() { testRunAdJobWithEndRunExceptionNowAndStopAdJob(false, true, true); - verify(anomalyResultHandler).indexAnomalyResult(any()); + verify(anomalyResultHandler).index(any(), any()); verify(client, never()).index(any(), any()); assertFalse(testAppender.containsMessage("AD Job was disabled by JobRunner for")); assertFalse(testAppender.containsMessage("Failed to disable AD job for")); @@ -246,7 +267,7 @@ public void testRunAdJobWithEndRunExceptionNowAndNotExistingEnabledAdJob() { @Test public void testRunAdJobWithEndRunExceptionNowAndExistingDisabledAdJob() { testRunAdJobWithEndRunExceptionNowAndStopAdJob(true, false, true); - verify(anomalyResultHandler).indexAnomalyResult(any()); + verify(anomalyResultHandler).index(any(), any()); verify(client, never()).index(any(), any()); assertFalse(testAppender.containsMessage("AD Job was disabled by JobRunner for")); } @@ -254,7 +275,7 @@ public void testRunAdJobWithEndRunExceptionNowAndExistingDisabledAdJob() { @Test public void testRunAdJobWithEndRunExceptionNowAndNotExistingDisabledAdJob() { testRunAdJobWithEndRunExceptionNowAndStopAdJob(false, false, true); - verify(anomalyResultHandler).indexAnomalyResult(any()); + verify(anomalyResultHandler).index(any(), any()); verify(client, never()).index(any(), any()); assertFalse(testAppender.containsMessage("AD Job was disabled by JobRunner for")); } @@ -323,7 +344,7 @@ public void testRunAdJobWithEndRunExceptionNowAndGetJobException() { }).when(clientUtil).asyncRequest(any(GetRequest.class), any(), any()); runner.handleAdException(jobParameter, lockService, lock, Instant.now().minusMillis(1000 * 60), Instant.now(), exception); - verify(anomalyResultHandler).indexAnomalyResult(any()); + verify(anomalyResultHandler).index(any(), any()); assertEquals(1, testAppender.countMessage("JobRunner failed to get detector job")); } @@ -335,7 +356,7 @@ public void testRunAdJobWithEndRunExceptionNowAndFailToGetJob() { doThrow(new RuntimeException("fail to get AD job")).when(clientUtil).asyncRequest(any(GetRequest.class), any(), any()); runner.handleAdException(jobParameter, lockService, lock, Instant.now().minusMillis(1000 * 60), Instant.now(), exception); - verify(anomalyResultHandler).indexAnomalyResult(any()); + verify(anomalyResultHandler).index(any(), any()); assertEquals(1, testAppender.countMessage("JobRunner failed to stop AD job")); } diff --git a/src/test/java/com/amazon/opendistroforelasticsearch/ad/AnomalyDetectorProfileRunnerTests.java b/src/test/java/com/amazon/opendistroforelasticsearch/ad/AnomalyDetectorProfileRunnerTests.java index 1f135bba..8ec87356 100644 --- a/src/test/java/com/amazon/opendistroforelasticsearch/ad/AnomalyDetectorProfileRunnerTests.java +++ b/src/test/java/com/amazon/opendistroforelasticsearch/ad/AnomalyDetectorProfileRunnerTests.java @@ -26,8 +26,8 @@ import java.io.IOException; import java.time.Instant; +import java.time.temporal.ChronoUnit; import java.util.Arrays; -import java.util.Calendar; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; @@ -37,19 +37,14 @@ import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; import org.elasticsearch.Version; import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.FailedNodeException; import org.elasticsearch.action.get.GetRequest; import org.elasticsearch.action.get.GetResponse; -import org.elasticsearch.action.search.SearchRequest; -import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.client.Client; import org.elasticsearch.cluster.ClusterName; import org.elasticsearch.cluster.ClusterState; -import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.settings.Settings; @@ -61,12 +56,14 @@ import org.junit.Before; import org.junit.BeforeClass; -import com.amazon.opendistroforelasticsearch.ad.indices.AnomalyDetectionIndices; import com.amazon.opendistroforelasticsearch.ad.model.AnomalyDetector; import com.amazon.opendistroforelasticsearch.ad.model.AnomalyDetectorJob; import com.amazon.opendistroforelasticsearch.ad.model.AnomalyResult; +import com.amazon.opendistroforelasticsearch.ad.model.DetectorInternalState; import com.amazon.opendistroforelasticsearch.ad.model.DetectorProfile; import com.amazon.opendistroforelasticsearch.ad.model.DetectorState; +import com.amazon.opendistroforelasticsearch.ad.model.InitProgressProfile; +import com.amazon.opendistroforelasticsearch.ad.model.IntervalTimeConfiguration; import com.amazon.opendistroforelasticsearch.ad.model.ModelProfile; import com.amazon.opendistroforelasticsearch.ad.model.ProfileName; import com.amazon.opendistroforelasticsearch.ad.transport.ProfileNodeResponse; @@ -74,44 +71,55 @@ import com.amazon.opendistroforelasticsearch.ad.util.DiscoveryNodeFilterer; public class AnomalyDetectorProfileRunnerTests extends ESTestCase { - private static final Logger LOG = LogManager.getLogger(AnomalyDetectorProfileRunnerTests.class); private AnomalyDetectorProfileRunner runner; private Client client; private DiscoveryNodeFilterer nodeFilter; private AnomalyDetector detector; - private IndexNameExpressionResolver resolver; private ClusterService clusterService; private static Set stateOnly; private static Set stateNError; private static Set modelProfile; + private static Set stateInitProgress; private static String noFullShingleError = "No full shingle in current detection window"; private static String stoppedError = "Stopped detector as job failed consecutively for more than 3 times: Having trouble querying data." + " Maybe all of your features have been disabled."; - private Calendar calendar; - private String indexWithRequiredError1 = ".opendistro-anomaly-results-history-2020.04.06-1"; - private String indexWithRequiredError2 = ".opendistro-anomaly-results-history-2020.04.07-000002"; + + private int requiredSamples; + private int neededSamples; // profile model related - String node1; - String nodeName1; - DiscoveryNode discoveryNode1; + private String node1; + private String nodeName1; + private DiscoveryNode discoveryNode1; + + private String node2; + private String nodeName2; + private DiscoveryNode discoveryNode2; - String node2; - String nodeName2; - DiscoveryNode discoveryNode2; + private long modelSize; + private String model1Id; + private String model0Id; - long modelSize; - String model1Id; - String model0Id; + private int shingleSize; - int shingleSize; + private int detectorIntervalMin; + private GetResponse detectorGetReponse; @Override protected NamedXContentRegistry xContentRegistry() { SearchModule searchModule = new SearchModule(Settings.EMPTY, false, Collections.emptyList()); List entries = searchModule.getNamedXContents(); - entries.addAll(Arrays.asList(AnomalyDetector.XCONTENT_REGISTRY, AnomalyResult.XCONTENT_REGISTRY)); + entries + .addAll( + Arrays + .asList( + AnomalyDetector.XCONTENT_REGISTRY, + AnomalyResult.XCONTENT_REGISTRY, + DetectorInternalState.XCONTENT_REGISTRY, + AnomalyDetectorJob.XCONTENT_REGISTRY + ) + ); return new NamedXContentRegistry(entries); } @@ -122,6 +130,9 @@ public static void setUpOnce() { stateNError = new HashSet(); stateNError.add(ProfileName.ERROR); stateNError.add(ProfileName.STATE); + stateInitProgress = new HashSet(); + stateInitProgress.add(ProfileName.INIT_PROGRESS); + stateInitProgress.add(ProfileName.STATE); modelProfile = new HashSet( Arrays.asList(ProfileName.SHINGLE_SIZE, ProfileName.MODELS, ProfileName.COORDINATING_NODE, ProfileName.TOTAL_SIZE_IN_BYTES) ); @@ -133,57 +144,77 @@ public void setUp() throws Exception { super.setUp(); client = mock(Client.class); nodeFilter = mock(DiscoveryNodeFilterer.class); - calendar = mock(Calendar.class); - resolver = mock(IndexNameExpressionResolver.class); clusterService = mock(ClusterService.class); - when(resolver.concreteIndexNames(any(), any(), any())) - .thenReturn( - new String[] { indexWithRequiredError1, indexWithRequiredError2, ".opendistro-anomaly-results-history-2020.04.08-000003" } - ); when(clusterService.state()).thenReturn(ClusterState.builder(new ClusterName("test cluster")).build()); - runner = new AnomalyDetectorProfileRunner(client, xContentRegistry(), nodeFilter, resolver, clusterService, calendar); + requiredSamples = 128; + neededSamples = 5; + runner = new AnomalyDetectorProfileRunner(client, xContentRegistry(), nodeFilter, requiredSamples); + + detectorIntervalMin = 3; + detectorGetReponse = mock(GetResponse.class); + } + + enum DetectorStatus { + INDEX_NOT_EXIST, + NO_DOC, + EXIST } enum JobStatus { INDEX_NOT_EXIT, DISABLED, - ENABLED, - DISABLED_ROTATED_1, - DISABLED_ROTATED_2, - DISABLED_ROTATED_3 + ENABLED } enum InittedEverResultStatus { INDEX_NOT_EXIT, - GREATER_THAN_ZERO, + INIT_DONE, EMPTY, - EXCEPTION + EXCEPTION, + INITTING } enum ErrorResultStatus { INDEX_NOT_EXIT, NO_ERROR, SHINGLE_ERROR, - STOPPED_ERROR_1, - STOPPED_ERROR_2 + STOPPED_ERROR } @SuppressWarnings("unchecked") - private void setUpClientGet(boolean detectorExists, JobStatus jobStatus) throws IOException { - detector = TestHelpers.randomAnomalyDetector(TestHelpers.randomUiMetadata(), Instant.now()); + private void setUpClientGet( + DetectorStatus detectorStatus, + JobStatus jobStatus, + InittedEverResultStatus inittedEverResultStatus, + ErrorResultStatus errorResultStatus + ) throws IOException { + detector = TestHelpers.randomAnomalyDetectorWithInterval(new IntervalTimeConfiguration(detectorIntervalMin, ChronoUnit.MINUTES)); doAnswer(invocation -> { Object[] args = invocation.getArguments(); GetRequest request = (GetRequest) args[0]; ActionListener listener = (ActionListener) args[1]; if (request.index().equals(ANOMALY_DETECTORS_INDEX)) { - if (detectorExists) { - listener.onResponse(TestHelpers.createGetResponse(detector, detector.getDetectorId())); - } else { - listener.onFailure(new IndexNotFoundException(ANOMALY_DETECTORS_INDEX)); + switch (detectorStatus) { + case EXIST: + listener + .onResponse( + TestHelpers.createGetResponse(detector, detector.getDetectorId(), AnomalyDetector.ANOMALY_DETECTORS_INDEX) + ); + break; + case INDEX_NOT_EXIST: + listener.onFailure(new IndexNotFoundException(ANOMALY_DETECTORS_INDEX)); + break; + case NO_DOC: + when(detectorGetReponse.isExists()).thenReturn(false); + listener.onResponse(detectorGetReponse); + break; + default: + assertTrue("should not reach here", false); + break; } - } else { + } else if (request.index().equals(ANOMALY_DETECTOR_JOB_INDEX)) { AnomalyDetectorJob job = null; switch (jobStatus) { case INDEX_NOT_EXIT: @@ -191,63 +222,37 @@ private void setUpClientGet(boolean detectorExists, JobStatus jobStatus) throws break; case DISABLED: job = TestHelpers.randomAnomalyDetectorJob(false); - listener.onResponse(TestHelpers.createGetResponse(job, detector.getDetectorId())); + listener + .onResponse( + TestHelpers.createGetResponse(job, detector.getDetectorId(), AnomalyDetectorJob.ANOMALY_DETECTOR_JOB_INDEX) + ); break; case ENABLED: job = TestHelpers.randomAnomalyDetectorJob(true); - listener.onResponse(TestHelpers.createGetResponse(job, detector.getDetectorId())); - break; - case DISABLED_ROTATED_1: - // enabled time is smaller than 1586217600000, while disabled time is larger than 1586217600000 - // which is April 7, 2020 12:00:00 AM. - job = TestHelpers - .randomAnomalyDetectorJob(false, Instant.ofEpochMilli(1586217500000L), Instant.ofEpochMilli(1586227600000L)); - listener.onResponse(TestHelpers.createGetResponse(job, detector.getDetectorId())); - break; - case DISABLED_ROTATED_2: - // both enabled and disabled time are larger than 1586217600000, - // which is April 7, 2020 12:00:00 AM. - job = TestHelpers - .randomAnomalyDetectorJob(false, Instant.ofEpochMilli(1586217500000L), Instant.ofEpochMilli(1586227600000L)); - listener.onResponse(TestHelpers.createGetResponse(job, detector.getDetectorId())); - break; - case DISABLED_ROTATED_3: - // both enabled and disabled time are larger than 1586131200000, - // which is April 6, 2020 12:00:00 AM. - job = TestHelpers - .randomAnomalyDetectorJob(false, Instant.ofEpochMilli(1586131300000L), Instant.ofEpochMilli(1586131400000L)); - listener.onResponse(TestHelpers.createGetResponse(job, detector.getDetectorId())); + listener + .onResponse( + TestHelpers.createGetResponse(job, detector.getDetectorId(), AnomalyDetectorJob.ANOMALY_DETECTOR_JOB_INDEX) + ); break; default: assertTrue("should not reach here", false); break; } - } - - return null; - }).when(client).get(any(), any()); - } - - @SuppressWarnings("unchecked") - private void setUpClientSearch(InittedEverResultStatus inittedEverResultStatus, ErrorResultStatus errorResultStatus) { - doAnswer(invocation -> { - Object[] args = invocation.getArguments(); - SearchRequest request = (SearchRequest) args[0]; - ActionListener listener = (ActionListener) args[1]; - if (errorResultStatus == ErrorResultStatus.INDEX_NOT_EXIT - || inittedEverResultStatus == InittedEverResultStatus.INDEX_NOT_EXIT) { - listener.onFailure(new IndexNotFoundException(AnomalyResult.ANOMALY_RESULT_INDEX)); - return null; - } - AnomalyResult result = null; - if (request.source().query().toString().contains(AnomalyResult.ANOMALY_SCORE_FIELD)) { + } else { + if (errorResultStatus == ErrorResultStatus.INDEX_NOT_EXIT + || inittedEverResultStatus == InittedEverResultStatus.INDEX_NOT_EXIT) { + listener.onFailure(new IndexNotFoundException(DetectorInternalState.DETECTOR_STATE_INDEX)); + return null; + } + DetectorInternalState.Builder result = new DetectorInternalState.Builder().lastUpdateTime(Instant.now()); switch (inittedEverResultStatus) { - case GREATER_THAN_ZERO: - result = TestHelpers.randomAnomalyDetectResult(0.87); - listener.onResponse(TestHelpers.createSearchResponse(result)); + case INIT_DONE: + result.rcfUpdates(requiredSamples + 1); + break; + case INITTING: + result.rcfUpdates(requiredSamples - neededSamples); break; case EMPTY: - listener.onResponse(TestHelpers.createEmptySearchResponse()); break; case EXCEPTION: listener.onFailure(new RuntimeException()); @@ -256,51 +261,31 @@ private void setUpClientSearch(InittedEverResultStatus inittedEverResultStatus, assertTrue("should not reach here", false); break; } - } else { + switch (errorResultStatus) { case NO_ERROR: - result = TestHelpers.randomAnomalyDetectResult(null); - listener.onResponse(TestHelpers.createSearchResponse(result)); break; case SHINGLE_ERROR: - result = TestHelpers.randomAnomalyDetectResult(noFullShingleError); - listener.onResponse(TestHelpers.createSearchResponse(result)); + result.error(noFullShingleError); break; - case STOPPED_ERROR_2: - if (request.indices().length == 2) { - for (int i = 0; i < 2; i++) { - assertTrue( - request.indices()[i].equals(indexWithRequiredError1) - || request.indices()[i].equals(indexWithRequiredError2) - ); - } - result = TestHelpers.randomAnomalyDetectResult(stoppedError); - listener.onResponse(TestHelpers.createSearchResponse(result)); - } else { - assertTrue("should not reach here", false); - } - break; - case STOPPED_ERROR_1: - if (request.indices().length == 1 && request.indices()[0].equals(indexWithRequiredError1)) { - result = TestHelpers.randomAnomalyDetectResult(stoppedError); - listener.onResponse(TestHelpers.createSearchResponse(result)); - } else { - assertTrue("should not reach here", false); - } + case STOPPED_ERROR: + result.error(stoppedError); break; default: assertTrue("should not reach here", false); break; } + listener + .onResponse(TestHelpers.createGetResponse(result.build(), detector.getDetectorId(), DetectorInternalState.DETECTOR_STATE_INDEX)); + } return null; - }).when(client).search(any(), any()); - + }).when(client).get(any(), any()); } public void testDetectorNotExist() throws IOException, InterruptedException { - setUpClientGet(false, JobStatus.INDEX_NOT_EXIT); + setUpClientGet(DetectorStatus.INDEX_NOT_EXIST, JobStatus.INDEX_NOT_EXIT, InittedEverResultStatus.EMPTY, ErrorResultStatus.NO_ERROR); final CountDownLatch inProgressLatch = new CountDownLatch(1); runner.profile("x123", ActionListener.wrap(response -> { @@ -314,9 +299,8 @@ public void testDetectorNotExist() throws IOException, InterruptedException { } public void testDisabledJobIndexTemplate(JobStatus status) throws IOException, InterruptedException { - setUpClientGet(true, status); - DetectorProfile expectedProfile = new DetectorProfile(); - expectedProfile.setState(DetectorState.DISABLED); + setUpClientGet(DetectorStatus.EXIST, status, InittedEverResultStatus.EMPTY, ErrorResultStatus.NO_ERROR); + DetectorProfile expectedProfile = new DetectorProfile.Builder().state(DetectorState.DISABLED).build(); final CountDownLatch inProgressLatch = new CountDownLatch(1); runner.profile(detector.getDetectorId(), ActionListener.wrap(response -> { @@ -339,10 +323,8 @@ public void testJobDisabled() throws IOException, InterruptedException { public void testInitOrRunningStateTemplate(InittedEverResultStatus status, DetectorState expectedState) throws IOException, InterruptedException { - setUpClientGet(true, JobStatus.ENABLED); - setUpClientSearch(status, ErrorResultStatus.NO_ERROR); - DetectorProfile expectedProfile = new DetectorProfile(); - expectedProfile.setState(expectedState); + setUpClientGet(DetectorStatus.EXIST, JobStatus.ENABLED, status, ErrorResultStatus.NO_ERROR); + DetectorProfile expectedProfile = new DetectorProfile.Builder().state(expectedState).build(); final CountDownLatch inProgressLatch = new CountDownLatch(1); runner.profile(detector.getDetectorId(), ActionListener.wrap(response -> { @@ -364,53 +346,87 @@ public void testResultEmpty() throws IOException, InterruptedException { } public void testResultGreaterThanZero() throws IOException, InterruptedException { - testInitOrRunningStateTemplate(InittedEverResultStatus.GREATER_THAN_ZERO, DetectorState.RUNNING); + testInitOrRunningStateTemplate(InittedEverResultStatus.INIT_DONE, DetectorState.RUNNING); } - public void testErrorStateTemplate(InittedEverResultStatus initStatus, ErrorResultStatus status, DetectorState state, String error) - throws IOException, + public void testErrorStateTemplate( + InittedEverResultStatus initStatus, + ErrorResultStatus status, + DetectorState state, + String error, + JobStatus jobStatus + ) throws IOException, InterruptedException { - setUpClientGet(true, JobStatus.ENABLED); - setUpClientSearch(initStatus, status); - DetectorProfile expectedProfile = new DetectorProfile(); - expectedProfile.setState(state); - expectedProfile.setError(error); + setUpClientGet(DetectorStatus.EXIST, jobStatus, initStatus, status); + DetectorProfile expectedProfile = new DetectorProfile.Builder().state(state).error(error).build(); final CountDownLatch inProgressLatch = new CountDownLatch(1); runner.profile(detector.getDetectorId(), ActionListener.wrap(response -> { assertEquals(expectedProfile, response); inProgressLatch.countDown(); }, exception -> { - assertTrue("Should not reach here ", false); + logger.info(exception); + for (StackTraceElement ste : exception.getStackTrace()) { + logger.info(ste); + } + assertTrue("Should not reach here", false); inProgressLatch.countDown(); }), stateNError); assertTrue(inProgressLatch.await(100, TimeUnit.SECONDS)); } public void testInitNoError() throws IOException, InterruptedException { - testErrorStateTemplate(InittedEverResultStatus.INDEX_NOT_EXIT, ErrorResultStatus.INDEX_NOT_EXIT, DetectorState.INIT, null); + testErrorStateTemplate( + InittedEverResultStatus.INDEX_NOT_EXIT, + ErrorResultStatus.INDEX_NOT_EXIT, + DetectorState.INIT, + null, + JobStatus.ENABLED + ); } public void testRunningNoError() throws IOException, InterruptedException { - testErrorStateTemplate(InittedEverResultStatus.GREATER_THAN_ZERO, ErrorResultStatus.NO_ERROR, DetectorState.RUNNING, null); + testErrorStateTemplate( + InittedEverResultStatus.INIT_DONE, + ErrorResultStatus.NO_ERROR, + DetectorState.RUNNING, + null, + JobStatus.ENABLED + ); } public void testRunningWithError() throws IOException, InterruptedException { testErrorStateTemplate( - InittedEverResultStatus.GREATER_THAN_ZERO, + InittedEverResultStatus.INIT_DONE, ErrorResultStatus.SHINGLE_ERROR, DetectorState.RUNNING, - noFullShingleError + noFullShingleError, + JobStatus.ENABLED + ); + } + + public void testDisabledWithError() throws IOException, InterruptedException { + testErrorStateTemplate( + InittedEverResultStatus.INITTING, + ErrorResultStatus.STOPPED_ERROR, + DetectorState.DISABLED, + stoppedError, + JobStatus.DISABLED ); } public void testInitWithError() throws IOException, InterruptedException { - testErrorStateTemplate(InittedEverResultStatus.EMPTY, ErrorResultStatus.SHINGLE_ERROR, DetectorState.INIT, noFullShingleError); + testErrorStateTemplate( + InittedEverResultStatus.EMPTY, + ErrorResultStatus.SHINGLE_ERROR, + DetectorState.INIT, + noFullShingleError, + JobStatus.ENABLED + ); } public void testExceptionOnStateFetching() throws IOException, InterruptedException { - setUpClientGet(true, JobStatus.ENABLED); - setUpClientSearch(InittedEverResultStatus.EXCEPTION, ErrorResultStatus.NO_ERROR); + setUpClientGet(DetectorStatus.EXIST, JobStatus.ENABLED, InittedEverResultStatus.EXCEPTION, ErrorResultStatus.NO_ERROR); final CountDownLatch inProgressLatch = new CountDownLatch(1); @@ -472,7 +488,6 @@ private void setUpClientExecute() { } }; - LOG.info("hello"); ProfileNodeResponse profileNodeResponse1 = new ProfileNodeResponse(discoveryNode1, modelSizeMap1, shingleSize); ProfileNodeResponse profileNodeResponse2 = new ProfileNodeResponse(discoveryNode2, modelSizeMap2, -1); List profileNodeResponses = Arrays.asList(profileNodeResponse1, profileNodeResponse2); @@ -487,7 +502,7 @@ private void setUpClientExecute() { } public void testProfileModels() throws InterruptedException, IOException { - setUpClientGet(true, JobStatus.ENABLED); + setUpClientGet(DetectorStatus.EXIST, JobStatus.ENABLED, InittedEverResultStatus.EMPTY, ErrorResultStatus.NO_ERROR); setUpClientExecute(); final CountDownLatch inProgressLatch = new CountDownLatch(1); @@ -515,21 +530,13 @@ public void testProfileModels() throws InterruptedException, IOException { assertTrue(inProgressLatch.await(100, TimeUnit.SECONDS)); } - /** - * A detector's error message can be on a rotated index. This test makes sure we get error info - * from .opendistro-anomaly-results index that has been rolled over. - * @param state expected detector state - * @param jobStatus job status to config in the test case - * @throws IOException when profile API throws it - * @throws InterruptedException when our CountDownLatch has been interruptted - */ - private void stoppedDetectorErrorTemplate(DetectorState state, JobStatus jobStatus, ErrorResultStatus errorStatus) throws IOException, - InterruptedException { - setUpClientGet(true, jobStatus); - setUpClientSearch(InittedEverResultStatus.GREATER_THAN_ZERO, errorStatus); - DetectorProfile expectedProfile = new DetectorProfile(); - expectedProfile.setState(state); - expectedProfile.setError(stoppedError); + public void testInitProgress() throws IOException, InterruptedException { + setUpClientGet(DetectorStatus.EXIST, JobStatus.ENABLED, InittedEverResultStatus.INITTING, ErrorResultStatus.NO_ERROR); + DetectorProfile expectedProfile = new DetectorProfile.Builder().state(DetectorState.INIT).build(); + + // 123 / 128 rounded to 96% + InitProgressProfile profile = new InitProgressProfile("96%", neededSamples * detectorIntervalMin, neededSamples); + expectedProfile.setInitProgress(profile); final CountDownLatch inProgressLatch = new CountDownLatch(1); runner.profile(detector.getDetectorId(), ActionListener.wrap(response -> { @@ -538,36 +545,26 @@ private void stoppedDetectorErrorTemplate(DetectorState state, JobStatus jobStat }, exception -> { assertTrue("Should not reach here ", false); inProgressLatch.countDown(); - }), stateNError); + }), stateInitProgress); assertTrue(inProgressLatch.await(100, TimeUnit.SECONDS)); } - /** - * Job enabled time is earlier than and disabled time is later than index 2 creation date, we expect to search 2 indices - */ - public void testDetectorStoppedEnabledTimeLtIndex2Date() throws IOException, InterruptedException { - stoppedDetectorErrorTemplate(DetectorState.DISABLED, JobStatus.DISABLED_ROTATED_1, ErrorResultStatus.STOPPED_ERROR_2); - } - - /** - * Both job enabled and disabled time are later than index 2 creation date, we expect to search 2 indices - */ - public void testDetectorStoppedEnabledTimeGtIndex2Date() throws IOException, InterruptedException { - stoppedDetectorErrorTemplate(DetectorState.DISABLED, JobStatus.DISABLED_ROTATED_2, ErrorResultStatus.STOPPED_ERROR_2); - } + public void testInitProgressFailImmediately() throws IOException, InterruptedException { + setUpClientGet(DetectorStatus.NO_DOC, JobStatus.ENABLED, InittedEverResultStatus.INITTING, ErrorResultStatus.NO_ERROR); + DetectorProfile expectedProfile = new DetectorProfile.Builder().state(DetectorState.INIT).build(); - /** - * Both job enabled and disabled time are earlier than index 2 creation date, we expect to search 1 indices - */ - public void testDetectorStoppedEnabledTimeGtIndex1Date() throws IOException, InterruptedException { - stoppedDetectorErrorTemplate(DetectorState.DISABLED, JobStatus.DISABLED_ROTATED_3, ErrorResultStatus.STOPPED_ERROR_1); - } + // 123 / 128 rounded to 96% + InitProgressProfile profile = new InitProgressProfile("96%", neededSamples * detectorIntervalMin, neededSamples); + expectedProfile.setInitProgress(profile); + final CountDownLatch inProgressLatch = new CountDownLatch(1); - public void testAssumption() { - assertEquals( - "profileError depends on this assumption.", - ".opendistro-anomaly-results*", - AnomalyDetectionIndices.ALL_AD_RESULTS_INDEX_PATTERN - ); + runner.profile(detector.getDetectorId(), ActionListener.wrap(response -> { + assertTrue("Should not reach here ", false); + inProgressLatch.countDown(); + }, exception -> { + assertTrue(exception.getMessage().contains(AnomalyDetectorProfileRunner.FAIL_TO_FIND_DETECTOR_MSG)); + inProgressLatch.countDown(); + }), stateInitProgress); + assertTrue(inProgressLatch.await(100, TimeUnit.SECONDS)); } } diff --git a/src/test/java/com/amazon/opendistroforelasticsearch/ad/TestHelpers.java b/src/test/java/com/amazon/opendistroforelasticsearch/ad/TestHelpers.java index c56ccdd6..cd490bf0 100644 --- a/src/test/java/com/amazon/opendistroforelasticsearch/ad/TestHelpers.java +++ b/src/test/java/com/amazon/opendistroforelasticsearch/ad/TestHelpers.java @@ -99,6 +99,7 @@ import com.amazon.opendistroforelasticsearch.ad.model.AnomalyDetectorExecutionInput; import com.amazon.opendistroforelasticsearch.ad.model.AnomalyDetectorJob; import com.amazon.opendistroforelasticsearch.ad.model.AnomalyResult; +import com.amazon.opendistroforelasticsearch.ad.model.DetectorInternalState; import com.amazon.opendistroforelasticsearch.ad.model.Feature; import com.amazon.opendistroforelasticsearch.ad.model.FeatureData; import com.amazon.opendistroforelasticsearch.ad.model.IntervalTimeConfiguration; @@ -230,6 +231,24 @@ public static AnomalyDetector randomAnomalyDetectorWithEmptyFeature() throws IOE ); } + public static AnomalyDetector randomAnomalyDetectorWithInterval(TimeConfiguration interval) throws IOException { + return new AnomalyDetector( + randomAlphaOfLength(10), + randomLong(), + randomAlphaOfLength(20), + randomAlphaOfLength(30), + randomAlphaOfLength(5), + ImmutableList.of(randomAlphaOfLength(10).toLowerCase()), + ImmutableList.of(randomFeature()), + randomQuery(), + interval, + randomIntervalTimeConfiguration(), + null, + randomInt(), + Instant.now().truncatedTo(ChronoUnit.SECONDS) + ); + } + public static SearchSourceBuilder randomFeatureQuery() throws IOException { String query = "{\"query\":{\"match\":{\"user\":{\"query\":\"kimchy\",\"operator\":\"OR\",\"prefix_length\":0," + "\"max_expansions\":50,\"fuzzy_transpositions\":true,\"lenient\":false,\"zero_terms_query\":\"NONE\"," @@ -455,12 +474,11 @@ public static void createIndex(RestClient client, String indexName, HttpEntity d ); } - public static GetResponse createGetResponse(ToXContentObject o, String id) throws IOException { + public static GetResponse createGetResponse(ToXContentObject o, String id, String indexName) throws IOException { XContentBuilder content = o.toXContent(XContentFactory.jsonBuilder(), ToXContent.EMPTY_PARAMS); - return new GetResponse( new GetResult( - AnomalyDetector.ANOMALY_DETECTORS_INDEX, + indexName, MapperService.SINGLE_MAPPING_NAME, id, UNASSIGNED_SEQ_NO, @@ -520,4 +538,24 @@ public static SearchResponse createEmptySearchResponse() throws IOException { SearchResponse.Clusters.EMPTY ); } + + public static AnomalyResult randomDetectInfo() { + return randomAnomalyDetectResult(randomDouble(), randomAlphaOfLength(5)); + } + + public static DetectorInternalState randomDetectInfo(long rcfUpdates) { + return randomDetectInfo(rcfUpdates, randomAlphaOfLength(5), Instant.now()); + } + + public static DetectorInternalState randomDetectInfo(String error) { + return randomDetectInfo(randomLong(), error, Instant.now()); + } + + public static DetectorInternalState randomDetectInfo(Instant lastUpdateTime) { + return randomDetectInfo(randomLong(), randomAlphaOfLength(5), lastUpdateTime); + } + + public static DetectorInternalState randomDetectInfo(long rcfUpdates, String error, Instant lastUpdateTime) { + return new DetectorInternalState.Builder().rcfUpdates(rcfUpdates).lastUpdateTime(lastUpdateTime).error(error).build(); + } } diff --git a/src/test/java/com/amazon/opendistroforelasticsearch/ad/ml/ModelManagerTests.java b/src/test/java/com/amazon/opendistroforelasticsearch/ad/ml/ModelManagerTests.java index 4477f481..70fd4501 100644 --- a/src/test/java/com/amazon/opendistroforelasticsearch/ad/ml/ModelManagerTests.java +++ b/src/test/java/com/amazon/opendistroforelasticsearch/ad/ml/ModelManagerTests.java @@ -226,21 +226,21 @@ public void getDetectorIdForModelId_throwIllegalArgument_forInvalidId(String mod private Object[] combineRcfResultsData() { return new Object[] { - new Object[] { asList(), new CombinedRcfResult(0, 0) }, - new Object[] { asList(new RcfResult(0, 0, 0)), new CombinedRcfResult(0, 0) }, - new Object[] { asList(new RcfResult(1, 0, 50)), new CombinedRcfResult(1, 0) }, - new Object[] { asList(new RcfResult(1, 0, 50), new RcfResult(2, 0, 50)), new CombinedRcfResult(1.5, 0) }, + new Object[] { asList(), new CombinedRcfResult(0, 0, 0) }, + new Object[] { asList(new RcfResult(0, 0, 0, 0)), new CombinedRcfResult(0, 0, 0) }, + new Object[] { asList(new RcfResult(1, 0, 50, 12)), new CombinedRcfResult(1, 0, 12) }, + new Object[] { asList(new RcfResult(1, 0, 50, 12), new RcfResult(2, 0, 50, 13)), new CombinedRcfResult(1.5, 0, 13) }, new Object[] { - asList(new RcfResult(1, 0, 40), new RcfResult(2, 0, 60), new RcfResult(3, 0, 100)), - new CombinedRcfResult(2.3, 0) }, - new Object[] { asList(new RcfResult(0, 1, 100)), new CombinedRcfResult(0, 1) }, - new Object[] { asList(new RcfResult(0, 1, 50)), new CombinedRcfResult(0, 0.5) }, - new Object[] { asList(new RcfResult(0, 0.5, 1000)), new CombinedRcfResult(0, 0.5) }, - new Object[] { asList(new RcfResult(0, 1, 50), new RcfResult(0, 0, 50)), new CombinedRcfResult(0, 0.5) }, - new Object[] { asList(new RcfResult(0, 0.5, 50), new RcfResult(0, 0.5, 50)), new CombinedRcfResult(0, 0.5) }, + asList(new RcfResult(1, 0, 40, 12), new RcfResult(2, 0, 60, 13), new RcfResult(3, 0, 100, 14)), + new CombinedRcfResult(2.3, 0, 14) }, + new Object[] { asList(new RcfResult(0, 1, 100, 5)), new CombinedRcfResult(0, 1, 5) }, + new Object[] { asList(new RcfResult(0, 1, 50, 100)), new CombinedRcfResult(0, 0.5, 100) }, + new Object[] { asList(new RcfResult(0, 0.5, 1000, 10000)), new CombinedRcfResult(0, 0.5, 10000) }, + new Object[] { asList(new RcfResult(0, 1, 50, 12), new RcfResult(0, 0, 50, 12)), new CombinedRcfResult(0, 0.5, 12) }, + new Object[] { asList(new RcfResult(0, 0.5, 50, 101), new RcfResult(0, 0.5, 50, 101)), new CombinedRcfResult(0, 0.5, 101) }, new Object[] { - asList(new RcfResult(0, 1, 20), new RcfResult(0, 1, 30), new RcfResult(0, 0.5, 50)), - new CombinedRcfResult(0, 0.75) }, }; + asList(new RcfResult(0, 1, 20, 60), new RcfResult(0, 1, 30, 70), new RcfResult(0, 0.5, 50, 80)), + new CombinedRcfResult(0, 0.75, 80) }, }; } @Test @@ -336,7 +336,7 @@ public void getRcfResult_returnExpected() { RcfResult result = modelManager.getRcfResult(detectorId, rcfModelId, point); - RcfResult expected = new RcfResult(score, 0, numTrees); + RcfResult expected = new RcfResult(score, 0, numTrees, numSamples); assertEquals(expected, result); when(forest.getTotalUpdates()).thenReturn(numSamples + 1L); @@ -391,7 +391,7 @@ public void getRcfResult_returnExpectedToListener() { ActionListener listener = mock(ActionListener.class); modelManager.getRcfResult(detectorId, rcfModelId, point, listener); - RcfResult expected = new RcfResult(score, 0, numTrees); + RcfResult expected = new RcfResult(score, 0, numTrees, numSamples); verify(listener).onResponse(eq(expected)); when(forest.getTotalUpdates()).thenReturn(numSamples + 1L); diff --git a/src/test/java/com/amazon/opendistroforelasticsearch/ad/ml/RcfResultTests.java b/src/test/java/com/amazon/opendistroforelasticsearch/ad/ml/RcfResultTests.java index 7720f929..5c4dcce6 100644 --- a/src/test/java/com/amazon/opendistroforelasticsearch/ad/ml/RcfResultTests.java +++ b/src/test/java/com/amazon/opendistroforelasticsearch/ad/ml/RcfResultTests.java @@ -29,7 +29,8 @@ public class RcfResultTests { private double score = 1.; private double confidence = 0; private int forestSize = 10; - private RcfResult rcfResult = new RcfResult(score, confidence, forestSize); + private long totalUpdates = 1000; + private RcfResult rcfResult = new RcfResult(score, confidence, forestSize, totalUpdates); @Test public void getters_returnExcepted() { @@ -42,11 +43,11 @@ private Object[] equalsData() { new Object[] { rcfResult, null, false }, new Object[] { rcfResult, rcfResult, true }, new Object[] { rcfResult, 1, false }, - new Object[] { rcfResult, new RcfResult(score, confidence, forestSize), true }, - new Object[] { rcfResult, new RcfResult(score + 1, confidence, forestSize), false }, - new Object[] { rcfResult, new RcfResult(score, confidence, forestSize + 1), false }, - new Object[] { rcfResult, new RcfResult(score + 1, confidence, forestSize + 1), false }, - new Object[] { rcfResult, new RcfResult(score, confidence + 1, forestSize), false }, }; + new Object[] { rcfResult, new RcfResult(score, confidence, forestSize, totalUpdates), true }, + new Object[] { rcfResult, new RcfResult(score + 1, confidence, forestSize, totalUpdates), false }, + new Object[] { rcfResult, new RcfResult(score, confidence, forestSize + 1, totalUpdates), false }, + new Object[] { rcfResult, new RcfResult(score + 1, confidence, forestSize + 1, totalUpdates), false }, + new Object[] { rcfResult, new RcfResult(score, confidence + 1, forestSize, totalUpdates), false }, }; } @Test @@ -57,10 +58,10 @@ public void equals_returnExpected(RcfResult result, Object other, boolean expect private Object[] hashCodeData() { return new Object[] { - new Object[] { rcfResult, new RcfResult(score, confidence, forestSize), true }, - new Object[] { rcfResult, new RcfResult(score + 1, confidence, forestSize), false }, - new Object[] { rcfResult, new RcfResult(score, confidence, forestSize + 1), false }, - new Object[] { rcfResult, new RcfResult(score + 1, confidence, forestSize + 1), false }, }; + new Object[] { rcfResult, new RcfResult(score, confidence, forestSize, totalUpdates), true }, + new Object[] { rcfResult, new RcfResult(score + 1, confidence, forestSize, totalUpdates), false }, + new Object[] { rcfResult, new RcfResult(score, confidence, forestSize + 1, totalUpdates), false }, + new Object[] { rcfResult, new RcfResult(score + 1, confidence, forestSize + 1, totalUpdates), false }, }; } @Test diff --git a/src/test/java/com/amazon/opendistroforelasticsearch/ad/ml/rcf/CombinedRcfResultTests.java b/src/test/java/com/amazon/opendistroforelasticsearch/ad/ml/rcf/CombinedRcfResultTests.java index f1fd737c..38b17bec 100644 --- a/src/test/java/com/amazon/opendistroforelasticsearch/ad/ml/rcf/CombinedRcfResultTests.java +++ b/src/test/java/com/amazon/opendistroforelasticsearch/ad/ml/rcf/CombinedRcfResultTests.java @@ -28,7 +28,8 @@ public class CombinedRcfResultTests { private double score = 1.; private double confidence = .5; - private CombinedRcfResult rcfResult = new CombinedRcfResult(score, confidence); + private long totalUpdates = 1000; + private CombinedRcfResult rcfResult = new CombinedRcfResult(score, confidence, totalUpdates); @Test public void getters_returnExcepted() { @@ -41,10 +42,10 @@ private Object[] equalsData() { new Object[] { rcfResult, null, false }, new Object[] { rcfResult, rcfResult, true }, new Object[] { rcfResult, 1, false }, - new Object[] { rcfResult, new CombinedRcfResult(score, confidence), true }, - new Object[] { rcfResult, new CombinedRcfResult(score + 1, confidence), false }, - new Object[] { rcfResult, new CombinedRcfResult(score, confidence + 1), false }, - new Object[] { rcfResult, new CombinedRcfResult(score + 1, confidence + 1), false }, }; + new Object[] { rcfResult, new CombinedRcfResult(score, confidence, totalUpdates), true }, + new Object[] { rcfResult, new CombinedRcfResult(score + 1, confidence, totalUpdates), false }, + new Object[] { rcfResult, new CombinedRcfResult(score, confidence + 1, totalUpdates), false }, + new Object[] { rcfResult, new CombinedRcfResult(score + 1, confidence + 1, totalUpdates), false }, }; } @Test @@ -55,10 +56,10 @@ public void equals_returnExpected(CombinedRcfResult result, Object other, boolea private Object[] hashCodeData() { return new Object[] { - new Object[] { rcfResult, new CombinedRcfResult(score, confidence), true }, - new Object[] { rcfResult, new CombinedRcfResult(score + 1, confidence), false }, - new Object[] { rcfResult, new CombinedRcfResult(score, confidence + 1), false }, - new Object[] { rcfResult, new CombinedRcfResult(score + 1, confidence + 1), false }, }; + new Object[] { rcfResult, new CombinedRcfResult(score, confidence, totalUpdates), true }, + new Object[] { rcfResult, new CombinedRcfResult(score + 1, confidence, totalUpdates), false }, + new Object[] { rcfResult, new CombinedRcfResult(score, confidence + 1, totalUpdates), false }, + new Object[] { rcfResult, new CombinedRcfResult(score + 1, confidence + 1, totalUpdates), false }, }; } @Test diff --git a/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/ADStateManagerTests.java b/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/ADStateManagerTests.java index a088ec39..bdc3d296 100644 --- a/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/ADStateManagerTests.java +++ b/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/ADStateManagerTests.java @@ -72,6 +72,8 @@ public class ADStateManagerTests extends ESTestCase { private AnomalyDetector detectorToCheck; private Settings settings; + private GetResponse checkpointResponse; + @Override protected NamedXContentRegistry xContentRegistry() { SearchModule searchModule = new SearchModule(Settings.EMPTY, false, Collections.emptyList()); @@ -98,6 +100,7 @@ public void setUp() throws Exception { stateManager = new ADStateManager(client, xContentRegistry(), modelManager, settings, clientUtil, clock, duration); + checkpointResponse = mock(GetResponse.class); } @Override @@ -153,6 +156,30 @@ private String setupDetector(boolean responseExists) throws IOException { return detectorToCheck.getDetectorId(); } + @SuppressWarnings("unchecked") + private void setupCheckpoint(boolean responseExists) throws IOException { + when(checkpointResponse.isExists()).thenReturn(responseExists); + + doAnswer(invocation -> { + Object[] args = invocation.getArguments(); + assertTrue(String.format("The size of args is %d. Its content is %s", args.length, Arrays.toString(args)), args.length >= 2); + + GetRequest request = null; + ActionListener listener = null; + if (args[0] instanceof GetRequest) { + request = (GetRequest) args[0]; + } + if (args[2] instanceof ActionListener) { + listener = (ActionListener) args[2]; + } + + assertTrue(request != null && listener != null); + listener.onResponse(checkpointResponse); + + return null; + }).when(clientUtil).asyncRequest(any(GetRequest.class), any(), any(ActionListener.class)); + } + public void testGetPartitionNumber() throws IOException, InterruptedException { String detectorId = setupDetector(true); int partitionNumber = stateManager @@ -204,6 +231,24 @@ public void testMaintenancRemove() throws IOException { } + public void testMaintenanceFlagNotRemove() throws IOException { + ConcurrentHashMap flags = new ConcurrentHashMap<>(); + when(clock.instant()).thenReturn(Instant.MIN); + flags.put("123", Instant.MAX); + stateManager.maintenanceFlag(flags); + assertEquals(1, flags.size()); + + } + + public void testMaintenancFlagRemove() throws IOException { + ConcurrentHashMap flags = new ConcurrentHashMap<>(); + when(clock.instant()).thenReturn(Instant.MAX); + flags.put("123", Instant.MIN); + stateManager.maintenanceFlag(flags); + assertEquals(0, flags.size()); + + } + public void testHasRunningQuery() throws IOException { stateManager = new ADStateManager( client, @@ -230,4 +275,27 @@ public void testGetAnomalyDetector() throws IOException { ActionListener.wrap(asDetector -> { assertEquals(detectorToCheck, asDetector.get()); }, exception -> assertTrue(false)) ); } + + public void getCheckpointTestTemplate(boolean exists) throws IOException { + setupCheckpoint(exists); + when(clock.instant()).thenReturn(Instant.MIN); + stateManager + .getDetectorCheckpoint( + "123", + ActionListener.wrap(checkpointExists -> { assertEquals(exists, checkpointExists); }, exception -> { + for (StackTraceElement ste : exception.getStackTrace()) { + logger.info(ste); + } + assertTrue(false); + }) + ); + } + + public void testCheckpointExists() throws IOException { + getCheckpointTestTemplate(true); + } + + public void testCheckpointNotExists() throws IOException { + getCheckpointTestTemplate(false); + } } diff --git a/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/ADStatsNodesTransportActionTests.java b/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/ADStatsNodesTransportActionTests.java index 49fd78c8..8c66031b 100644 --- a/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/ADStatsNodesTransportActionTests.java +++ b/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/ADStatsNodesTransportActionTests.java @@ -26,6 +26,7 @@ import org.elasticsearch.action.support.ActionFilters; import org.elasticsearch.client.Client; +import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.threadpool.ThreadPool; @@ -51,6 +52,7 @@ public class ADStatsNodesTransportActionTests extends ESIntegTestCase { private String clusterStatName1, clusterStatName2; private String nodeStatName1, nodeStatName2; + @Override @Before public void setUp() throws Exception { super.setUp(); @@ -59,7 +61,13 @@ public void setUp() throws Exception { Clock clock = mock(Clock.class); Throttler throttler = new Throttler(clock); ThreadPool threadPool = mock(ThreadPool.class); - IndexUtils indexUtils = new IndexUtils(client, new ClientUtil(Settings.EMPTY, client, throttler, threadPool), clusterService()); + IndexNameExpressionResolver indexNameResolver = mock(IndexNameExpressionResolver.class); + IndexUtils indexUtils = new IndexUtils( + client, + new ClientUtil(Settings.EMPTY, client, throttler, threadPool), + clusterService(), + indexNameResolver + ); ModelManager modelManager = mock(ModelManager.class); clusterStatName1 = "clusterStat1"; diff --git a/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/AnomalyResultTests.java b/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/AnomalyResultTests.java index f8ebc54b..158be404 100644 --- a/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/AnomalyResultTests.java +++ b/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/AnomalyResultTests.java @@ -42,6 +42,7 @@ import java.io.IOException; import java.time.Clock; +import java.time.Instant; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -55,6 +56,8 @@ import org.elasticsearch.ElasticsearchTimeoutException; import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.ActionRequestValidationException; +import org.elasticsearch.action.get.GetRequest; +import org.elasticsearch.action.get.GetResponse; import org.elasticsearch.action.index.IndexRequest; import org.elasticsearch.action.index.IndexResponse; import org.elasticsearch.action.support.ActionFilters; @@ -92,6 +95,7 @@ import test.com.amazon.opendistroforelasticsearch.ad.util.JsonDeserializer; import com.amazon.opendistroforelasticsearch.ad.AbstractADTest; +import com.amazon.opendistroforelasticsearch.ad.TestHelpers; import com.amazon.opendistroforelasticsearch.ad.breaker.ADCircuitBreakerService; import com.amazon.opendistroforelasticsearch.ad.cluster.HashRing; import com.amazon.opendistroforelasticsearch.ad.common.exception.AnomalyDetectionException; @@ -105,21 +109,25 @@ import com.amazon.opendistroforelasticsearch.ad.constant.CommonName; import com.amazon.opendistroforelasticsearch.ad.feature.FeatureManager; import com.amazon.opendistroforelasticsearch.ad.feature.SinglePointFeatures; +import com.amazon.opendistroforelasticsearch.ad.indices.AnomalyDetectionIndices; import com.amazon.opendistroforelasticsearch.ad.ml.ModelManager; import com.amazon.opendistroforelasticsearch.ad.ml.RcfResult; import com.amazon.opendistroforelasticsearch.ad.ml.ThresholdingResult; import com.amazon.opendistroforelasticsearch.ad.ml.rcf.CombinedRcfResult; import com.amazon.opendistroforelasticsearch.ad.model.AnomalyDetector; import com.amazon.opendistroforelasticsearch.ad.model.AnomalyResult; +import com.amazon.opendistroforelasticsearch.ad.model.DetectorInternalState; import com.amazon.opendistroforelasticsearch.ad.model.FeatureData; import com.amazon.opendistroforelasticsearch.ad.stats.ADStat; import com.amazon.opendistroforelasticsearch.ad.stats.ADStats; import com.amazon.opendistroforelasticsearch.ad.stats.StatNames; import com.amazon.opendistroforelasticsearch.ad.stats.suppliers.CounterSupplier; +import com.amazon.opendistroforelasticsearch.ad.transport.handler.DetectorStateHandler; import com.amazon.opendistroforelasticsearch.ad.util.ClientUtil; import com.amazon.opendistroforelasticsearch.ad.util.ColdStartRunner; import com.amazon.opendistroforelasticsearch.ad.util.IndexUtils; import com.amazon.opendistroforelasticsearch.ad.util.Throttler; +import com.amazon.opendistroforelasticsearch.ad.util.ThrowingConsumerWrapper; import com.google.gson.JsonElement; public class AnomalyResultTests extends AbstractADTest { @@ -141,6 +149,7 @@ public class AnomalyResultTests extends AbstractADTest { private String featureName; private ADCircuitBreakerService adCircuitBreakerService; private ADStats adStats; + private DetectorStateHandler detectorInfoHandler; @BeforeClass public static void setUpBeforeClass() { @@ -204,10 +213,10 @@ public void setUp() throws Exception { doAnswer(invocation -> { ActionListener listener = invocation.getArgument(3); - listener.onResponse(new RcfResult(0.2, 0, 100)); + listener.onResponse(new RcfResult(0.2, 0, 100, 1000)); return null; }).when(normalModelManager).getRcfResult(any(String.class), any(String.class), any(double[].class), any(ActionListener.class)); - when(normalModelManager.combineRcfResults(any())).thenReturn(new CombinedRcfResult(0, 1.0d)); + when(normalModelManager.combineRcfResults(any())).thenReturn(new CombinedRcfResult(0, 1.0d, 1000)); adID = "123"; rcfModelID = "123-rcf-1"; when(normalModelManager.getRcfModelId(any(String.class), anyInt())).thenReturn(rcfModelID); @@ -242,7 +251,7 @@ public void setUp() throws Exception { Throttler throttler = new Throttler(clock); ThreadPool threadpool = mock(ThreadPool.class); ClientUtil clientUtil = new ClientUtil(Settings.EMPTY, client, throttler, threadpool); - IndexUtils indexUtils = new IndexUtils(client, clientUtil, clusterService); + IndexUtils indexUtils = new IndexUtils(client, clientUtil, clusterService, indexNameResolver); Map> statsMap = new HashMap>() { { @@ -252,6 +261,37 @@ public void setUp() throws Exception { }; adStats = new ADStats(indexUtils, normalModelManager, statsMap); + + AnomalyDetectionIndices anomalyDetectionIndices = mock(AnomalyDetectionIndices.class); + detectorInfoHandler = new DetectorStateHandler( + client, + settings, + threadPool, + ThrowingConsumerWrapper.throwingConsumerWrapper(anomalyDetectionIndices::initDetectorStateIndex), + anomalyDetectionIndices::doesDetectorStateIndexExist, + clientUtil, + indexUtils, + clusterService + ); + + doAnswer(invocation -> { + Object[] args = invocation.getArguments(); + GetRequest request = (GetRequest) args[0]; + ActionListener listener = (ActionListener) args[1]; + + if (request.index().equals(DetectorInternalState.DETECTOR_STATE_INDEX)) { + + DetectorInternalState.Builder result = new DetectorInternalState.Builder().lastUpdateTime(Instant.now()); + + result.rcfUpdates(1000); + + listener + .onResponse(TestHelpers.createGetResponse(result.build(), detector.getDetectorId(), DetectorInternalState.DETECTOR_STATE_INDEX)); + + } + + return null; + }).when(client).get(any(), any()); } @Override @@ -297,7 +337,8 @@ public void testNormal() throws IOException { clusterService, indexNameResolver, adCircuitBreakerService, - adStats + adStats, + detectorInfoHandler ); AnomalyResultRequest request = new AnomalyResultRequest(adID, 100, 200); @@ -349,7 +390,8 @@ public Throwable noModelExceptionTemplate( clusterService, indexNameResolver, adCircuitBreakerService, - adStats + adStats, + detectorInfoHandler ); AnomalyResultRequest request = new AnomalyResultRequest(adID, 100, 200); @@ -430,7 +472,8 @@ public void testInsufficientCapacityExceptionDuringColdStart() { clusterService, indexNameResolver, adCircuitBreakerService, - adStats + adStats, + detectorInfoHandler ); AnomalyResultRequest request = new AnomalyResultRequest(adID, 100, 200); @@ -464,7 +507,8 @@ public void testInsufficientCapacityExceptionDuringRestoringModel() { clusterService, indexNameResolver, adCircuitBreakerService, - adStats + adStats, + detectorInfoHandler ); AnomalyResultRequest request = new AnomalyResultRequest(adID, 100, 200); @@ -502,7 +546,8 @@ public void testThresholdException() { clusterService, indexNameResolver, adCircuitBreakerService, - adStats + adStats, + detectorInfoHandler ); AnomalyResultRequest request = new AnomalyResultRequest(adID, 100, 200); @@ -533,7 +578,8 @@ public void testCircuitBreaker() { clusterService, indexNameResolver, breakerService, - adStats + adStats, + detectorInfoHandler ); AnomalyResultRequest request = new AnomalyResultRequest(adID, 100, 200); @@ -597,7 +643,8 @@ private void nodeNotConnectedExceptionTemplate(boolean isRCF, boolean temporary, hackedClusterService, indexNameResolver, adCircuitBreakerService, - adStats + adStats, + detectorInfoHandler ); AnomalyResultRequest request = new AnomalyResultRequest(adID, 100, 200); @@ -662,7 +709,8 @@ public void testMute() { clusterService, indexNameResolver, adCircuitBreakerService, - adStats + adStats, + detectorInfoHandler ); AnomalyResultRequest request = new AnomalyResultRequest(adID, 100, 200); PlainActionFuture listener = new PlainActionFuture<>(); @@ -694,7 +742,8 @@ public void alertingRequestTemplate(boolean anomalyResultIndexExists) throws IOE clusterService, indexNameResolver, adCircuitBreakerService, - adStats + adStats, + detectorInfoHandler ); TransportRequestOptions option = TransportRequestOptions @@ -846,7 +895,8 @@ public void testOnFailureNull() throws IOException { clusterService, indexNameResolver, adCircuitBreakerService, - adStats + adStats, + detectorInfoHandler ); AnomalyResultTransportAction.RCFActionListener listener = action.new RCFActionListener( null, null, null, null, null, null, null, null, null, 0, new AtomicInteger(), null @@ -868,7 +918,8 @@ public void testColdStartNoTrainingData() throws Exception { clusterService, indexNameResolver, adCircuitBreakerService, - adStats + adStats, + detectorInfoHandler ); AnomalyResultTransportAction.ColdStartJob job = action.new ColdStartJob(detector); @@ -893,7 +944,8 @@ public void testColdStartTimeoutPutCheckpoint() throws Exception { clusterService, indexNameResolver, adCircuitBreakerService, - adStats + adStats, + detectorInfoHandler ); AnomalyResultTransportAction.ColdStartJob job = action.new ColdStartJob(detector); @@ -916,7 +968,8 @@ public void testColdStartIllegalArgumentException() throws Exception { clusterService, indexNameResolver, adCircuitBreakerService, - adStats + adStats, + detectorInfoHandler ); AnomalyResultTransportAction.ColdStartJob job = action.new ColdStartJob(detector); @@ -959,7 +1012,8 @@ public void featureTestTemplate(FeatureTestMode mode) { clusterService, indexNameResolver, adCircuitBreakerService, - adStats + adStats, + detectorInfoHandler ); AnomalyResultRequest request = new AnomalyResultRequest(adID, 100, 200); @@ -1042,7 +1096,8 @@ private void globalBlockTemplate(BlockType type, String errLogMsg, Settings inde hackedClusterService, indexNameResolver, adCircuitBreakerService, - adStats + adStats, + detectorInfoHandler ); AnomalyResultRequest request = new AnomalyResultRequest(adID, 100, 200); @@ -1086,7 +1141,8 @@ public void testNullRCFResult() { clusterService, indexNameResolver, adCircuitBreakerService, - adStats + adStats, + detectorInfoHandler ); AnomalyResultTransportAction.RCFActionListener listener = action.new RCFActionListener( null, "123-rcf-0", null, "123", null, null, null, null, null, 0, new AtomicInteger(), null @@ -1118,7 +1174,8 @@ public void testAllFeaturesDisabled() { clusterService, indexNameResolver, adCircuitBreakerService, - adStats + adStats, + detectorInfoHandler ); AnomalyResultRequest request = new AnomalyResultRequest(adID, 100, 200); @@ -1159,7 +1216,8 @@ public void testEndRunDueToNoTrainingData() { clusterService, indexNameResolver, adCircuitBreakerService, - adStats + adStats, + detectorInfoHandler ); AnomalyResultRequest request = new AnomalyResultRequest(adID, 100, 200); diff --git a/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/RCFResultTests.java b/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/RCFResultTests.java index a11ca716..5df91388 100644 --- a/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/RCFResultTests.java +++ b/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/RCFResultTests.java @@ -80,7 +80,7 @@ public void testNormal() { ); doAnswer(invocation -> { ActionListener listener = invocation.getArgument(3); - listener.onResponse(new RcfResult(0, 0, 25)); + listener.onResponse(new RcfResult(0, 0, 25, 1000)); return null; }).when(manager).getRcfResult(any(String.class), any(String.class), any(double[].class), any(ActionListener.class)); @@ -128,7 +128,7 @@ public void testExecutionException() { } public void testSerialzationResponse() throws IOException { - RCFResultResponse response = new RCFResultResponse(0.3, 0, 26); + RCFResultResponse response = new RCFResultResponse(0.3, 0, 26, 1000); BytesStreamOutput output = new BytesStreamOutput(); response.writeTo(output); @@ -139,7 +139,7 @@ public void testSerialzationResponse() throws IOException { } public void testJsonResponse() throws IOException, JsonPathNotFoundException { - RCFResultResponse response = new RCFResultResponse(0.3, 0, 26); + RCFResultResponse response = new RCFResultResponse(0.3, 0, 26, 1000); XContentBuilder builder = jsonBuilder(); response.toXContent(builder, ToXContent.EMPTY_PARAMS); @@ -205,7 +205,7 @@ public void testCircuitBreaker() { ); doAnswer(invocation -> { ActionListener listener = invocation.getArgument(3); - listener.onResponse(new RcfResult(0, 0, 25)); + listener.onResponse(new RcfResult(0, 0, 25, 1000)); return null; }).when(manager).getRcfResult(any(String.class), any(String.class), any(double[].class), any(ActionListener.class)); when(breakerService.isOpen()).thenReturn(true); diff --git a/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/handler/AnomalyResultHandlerTests.java b/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/handler/AnomalyResultHandlerTests.java index 65565bf1..075e704a 100644 --- a/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/handler/AnomalyResultHandlerTests.java +++ b/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/handler/AnomalyResultHandlerTests.java @@ -41,6 +41,7 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.common.util.concurrent.EsRejectedExecutionException; +import org.elasticsearch.threadpool.ThreadPool; import org.junit.AfterClass; import org.junit.Before; import org.junit.BeforeClass; @@ -57,6 +58,10 @@ import com.amazon.opendistroforelasticsearch.ad.indices.AnomalyDetectionIndices; import com.amazon.opendistroforelasticsearch.ad.model.AnomalyResult; import com.amazon.opendistroforelasticsearch.ad.transport.AnomalyResultTests; +import com.amazon.opendistroforelasticsearch.ad.util.ClientUtil; +import com.amazon.opendistroforelasticsearch.ad.util.IndexUtils; +import com.amazon.opendistroforelasticsearch.ad.util.Throttler; +import com.amazon.opendistroforelasticsearch.ad.util.ThrowingConsumerWrapper; public class AnomalyResultHandlerTests extends AbstractADTest { private static Settings settings; @@ -66,11 +71,22 @@ public class AnomalyResultHandlerTests extends AbstractADTest { @Mock private Client client; + private ClientUtil clientUtil; + + @Mock + private IndexNameExpressionResolver indexNameResolver; + @Mock private AnomalyDetectionIndices anomalyDetectionIndices; + private String detectorId = "123"; + @Mock - private IndexNameExpressionResolver indexNameResolver; + private Throttler throttler; + + private ThreadPool context; + + private IndexUtils indexUtil; @BeforeClass public static void setUpBeforeClass() { @@ -88,9 +104,12 @@ public static void tearDownAfterClass() { @Before public void setUp() throws Exception { super.setUp(); - super.setUpLog4jForJUnit(AnomalyResultHandler.class); + super.setUpLog4jForJUnit(AnomalyIndexHandler.class); MockitoAnnotations.initMocks(this); setWriteBlockAdResultIndex(false); + context = TestHelpers.createThreadPool(); + clientUtil = new ClientUtil(settings, client, throttler, context); + indexUtil = new IndexUtils(client, clientUtil, clusterService, indexNameResolver); } @Override @@ -114,25 +133,29 @@ public void testSavingAdResult() throws IOException { listener.onResponse(mock(IndexResponse.class)); return null; }).when(client).index(any(IndexRequest.class), ArgumentMatchers.>any()); - AnomalyResultHandler handler = new AnomalyResultHandler( + AnomalyIndexHandler handler = new AnomalyIndexHandler( client, settings, - clusterService, - indexNameResolver, - anomalyDetectionIndices, - threadPool + threadPool, + AnomalyResult.ANOMALY_RESULT_INDEX, + ThrowingConsumerWrapper.throwingConsumerWrapper(anomalyDetectionIndices::initAnomalyResultIndexDirectly), + anomalyDetectionIndices::doesAnomalyResultIndexExist, + false, + clientUtil, + indexUtil, + clusterService ); - handler.indexAnomalyResult(TestHelpers.randomAnomalyDetectResult()); - assertEquals(1, testAppender.countMessage((AnomalyResultHandler.SUCCESS_SAVING_MSG))); + handler.index(TestHelpers.randomAnomalyDetectResult(), detectorId); + assertEquals(1, testAppender.countMessage(AnomalyIndexHandler.SUCCESS_SAVING_MSG, true)); } @Test public void testSavingFailureNotRetry() throws InterruptedException, IOException { savingFailureTemplate(false, 1, true); - assertEquals(1, testAppender.countMessage((AnomalyResultHandler.FAIL_TO_SAVE_ERR_MSG))); - assertTrue(!testAppender.containsMessage(AnomalyResultHandler.SUCCESS_SAVING_MSG)); - assertTrue(!testAppender.containsMessage(AnomalyResultHandler.RETRY_SAVING_ERR_MSG)); + assertEquals(1, testAppender.countMessage(AnomalyIndexHandler.FAIL_TO_SAVE_ERR_MSG, true)); + assertTrue(!testAppender.containsMessage(AnomalyIndexHandler.SUCCESS_SAVING_MSG, true)); + assertTrue(!testAppender.containsMessage(AnomalyIndexHandler.RETRY_SAVING_ERR_MSG, true)); } @Test @@ -140,57 +163,69 @@ public void testSavingFailureRetry() throws InterruptedException, IOException { setWriteBlockAdResultIndex(false); savingFailureTemplate(true, 3, true); - assertEquals(2, testAppender.countMessage((AnomalyResultHandler.RETRY_SAVING_ERR_MSG))); - assertEquals(1, testAppender.countMessage((AnomalyResultHandler.FAIL_TO_SAVE_ERR_MSG))); - assertTrue(!testAppender.containsMessage(AnomalyResultHandler.SUCCESS_SAVING_MSG)); + assertEquals(2, testAppender.countMessage(AnomalyIndexHandler.RETRY_SAVING_ERR_MSG, true)); + assertEquals(1, testAppender.countMessage(AnomalyIndexHandler.FAIL_TO_SAVE_ERR_MSG, true)); + assertTrue(!testAppender.containsMessage(AnomalyIndexHandler.SUCCESS_SAVING_MSG, true)); } @Test public void testIndexWriteBlock() { setWriteBlockAdResultIndex(true); - AnomalyResultHandler handler = new AnomalyResultHandler( + AnomalyIndexHandler handler = new AnomalyIndexHandler( client, settings, - clusterService, - indexNameResolver, - anomalyDetectionIndices, - threadPool + threadPool, + AnomalyResult.ANOMALY_RESULT_INDEX, + ThrowingConsumerWrapper.throwingConsumerWrapper(anomalyDetectionIndices::initAnomalyResultIndexDirectly), + anomalyDetectionIndices::doesAnomalyResultIndexExist, + false, + clientUtil, + indexUtil, + clusterService ); - handler.indexAnomalyResult(TestHelpers.randomAnomalyDetectResult()); + handler.index(TestHelpers.randomAnomalyDetectResult(), detectorId); - assertTrue(testAppender.containsMessage(AnomalyResultHandler.CANNOT_SAVE_ERR_MSG)); + assertTrue(testAppender.containsMessage(AnomalyIndexHandler.CANNOT_SAVE_ERR_MSG, true)); } @Test public void testAdResultIndexExist() throws IOException { setInitAnomalyResultIndexException(true); - AnomalyResultHandler handler = new AnomalyResultHandler( + AnomalyIndexHandler handler = new AnomalyIndexHandler( client, settings, - clusterService, - indexNameResolver, - anomalyDetectionIndices, - threadPool + threadPool, + AnomalyResult.ANOMALY_RESULT_INDEX, + ThrowingConsumerWrapper.throwingConsumerWrapper(anomalyDetectionIndices::initAnomalyResultIndexDirectly), + anomalyDetectionIndices::doesAnomalyResultIndexExist, + false, + clientUtil, + indexUtil, + clusterService ); - handler.indexAnomalyResult(TestHelpers.randomAnomalyDetectResult()); + handler.index(TestHelpers.randomAnomalyDetectResult(), detectorId); verify(client, times(1)).index(any(), any()); } @Test public void testAdResultIndexOtherException() throws IOException { expectedEx.expect(AnomalyDetectionException.class); - expectedEx.expectMessage("Error in saving anomaly index for ID"); + expectedEx.expectMessage("Error in saving .opendistro-anomaly-results for detector " + detectorId); setInitAnomalyResultIndexException(false); - AnomalyResultHandler handler = new AnomalyResultHandler( + AnomalyIndexHandler handler = new AnomalyIndexHandler( client, settings, - clusterService, - indexNameResolver, - anomalyDetectionIndices, - threadPool + threadPool, + AnomalyResult.ANOMALY_RESULT_INDEX, + ThrowingConsumerWrapper.throwingConsumerWrapper(anomalyDetectionIndices::initAnomalyResultIndexDirectly), + anomalyDetectionIndices::doesAnomalyResultIndexExist, + false, + clientUtil, + indexUtil, + clusterService ); - handler.indexAnomalyResult(TestHelpers.randomAnomalyDetectResult()); + handler.index(TestHelpers.randomAnomalyDetectResult(), detectorId); verify(client, never()).index(any(), any()); } @@ -257,16 +292,20 @@ private void savingFailureTemplate(boolean throwEsRejectedExecutionException, in .put("opendistro.anomaly_detection.backoff_initial_delay", TimeValue.timeValueMillis(1)) .build(); - AnomalyResultHandler handler = new AnomalyResultHandler( + AnomalyIndexHandler handler = new AnomalyIndexHandler( client, backoffSettings, - clusterService, - indexNameResolver, - anomalyDetectionIndices, - threadPool + threadPool, + AnomalyResult.ANOMALY_RESULT_INDEX, + ThrowingConsumerWrapper.throwingConsumerWrapper(anomalyDetectionIndices::initAnomalyResultIndexDirectly), + anomalyDetectionIndices::doesAnomalyResultIndexExist, + false, + clientUtil, + indexUtil, + clusterService ); - handler.indexAnomalyResult(TestHelpers.randomAnomalyDetectResult()); + handler.index(TestHelpers.randomAnomalyDetectResult(), detectorId); backoffLatch.await(); } diff --git a/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/handler/DetectorInfoHandlerTests.java b/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/handler/DetectorInfoHandlerTests.java new file mode 100644 index 00000000..16325235 --- /dev/null +++ b/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/handler/DetectorInfoHandlerTests.java @@ -0,0 +1,65 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.opendistroforelasticsearch.ad.transport.handler; + +import static org.mockito.Mockito.mock; + +import org.elasticsearch.client.Client; +import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.threadpool.ThreadPool; + +import com.amazon.opendistroforelasticsearch.ad.indices.AnomalyDetectionIndices; +import com.amazon.opendistroforelasticsearch.ad.model.DetectorInternalState; +import com.amazon.opendistroforelasticsearch.ad.util.ClientUtil; +import com.amazon.opendistroforelasticsearch.ad.util.IndexUtils; +import com.amazon.opendistroforelasticsearch.ad.util.ThrowingConsumerWrapper; + +public class DetectorInfoHandlerTests extends ESTestCase { + private DetectorStateHandler detectorInfoHandler; + + @Override + public void setUp() throws Exception { + super.setUp(); + AnomalyDetectionIndices anomalyDetectionIndices = mock(AnomalyDetectionIndices.class); + Client client = mock(Client.class); + Settings settings = Settings.EMPTY; + ClientUtil clientUtil = mock(ClientUtil.class); + IndexUtils indexUtils = mock(IndexUtils.class); + ClusterService clusterService = mock(ClusterService.class); + ThreadPool threadPool = mock(ThreadPool.class); + detectorInfoHandler = new DetectorStateHandler( + client, + settings, + threadPool, + ThrowingConsumerWrapper.throwingConsumerWrapper(anomalyDetectionIndices::initDetectorStateIndex), + anomalyDetectionIndices::doesDetectorStateIndexExist, + clientUtil, + indexUtils, + clusterService + ); + } + + public void testRcfUpdates() { + long updates = 10; + DetectorStateHandler.TotalRcfUpdatesStrategy rcfStrategy = detectorInfoHandler.new TotalRcfUpdatesStrategy(updates); + DetectorInternalState info = rcfStrategy.createNewInfo(null); + assertTrue(null == info.getError()); + assertEquals(updates, info.getRcfUpdates()); + assertTrue(info.getLastUpdateTime() != null); + } +} diff --git a/src/test/java/com/amazon/opendistroforelasticsearch/ad/util/IndexUtilsTests.java b/src/test/java/com/amazon/opendistroforelasticsearch/ad/util/IndexUtilsTests.java index fc250f3f..d9386da4 100644 --- a/src/test/java/com/amazon/opendistroforelasticsearch/ad/util/IndexUtilsTests.java +++ b/src/test/java/com/amazon/opendistroforelasticsearch/ad/util/IndexUtilsTests.java @@ -21,6 +21,7 @@ import org.elasticsearch.action.support.master.AcknowledgedResponse; import org.elasticsearch.client.Client; +import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.threadpool.ThreadPool; @@ -33,6 +34,8 @@ public class IndexUtilsTests extends ESIntegTestCase { private ClientUtil clientUtil; + private IndexNameExpressionResolver indexNameResolver; + @Before public void setup() { Client client = client(); @@ -40,11 +43,12 @@ public void setup() { Throttler throttler = new Throttler(clock); ThreadPool context = TestHelpers.createThreadPool(); clientUtil = new ClientUtil(Settings.EMPTY, client, throttler, context); + indexNameResolver = mock(IndexNameExpressionResolver.class); } @Test public void testGetIndexHealth_NoIndex() { - IndexUtils indexUtils = new IndexUtils(client(), clientUtil, clusterService()); + IndexUtils indexUtils = new IndexUtils(client(), clientUtil, clusterService(), indexNameResolver); String output = indexUtils.getIndexHealthStatus("test"); assertEquals(IndexUtils.NONEXISTENT_INDEX_STATUS, output); } @@ -54,7 +58,7 @@ public void testGetIndexHealth_Index() { String indexName = "test-2"; createIndex(indexName); flush(); - IndexUtils indexUtils = new IndexUtils(client(), clientUtil, clusterService()); + IndexUtils indexUtils = new IndexUtils(client(), clientUtil, clusterService(), indexNameResolver); String status = indexUtils.getIndexHealthStatus(indexName); assertTrue(status.equals("green") || status.equals("yellow")); } @@ -67,14 +71,14 @@ public void testGetIndexHealth_Alias() { flush(); AcknowledgedResponse response = client().admin().indices().prepareAliases().addAlias(indexName, aliasName).execute().actionGet(); assertTrue(response.isAcknowledged()); - IndexUtils indexUtils = new IndexUtils(client(), clientUtil, clusterService()); + IndexUtils indexUtils = new IndexUtils(client(), clientUtil, clusterService(), indexNameResolver); String status = indexUtils.getIndexHealthStatus(aliasName); assertTrue(status.equals("green") || status.equals("yellow")); } @Test public void testGetNumberOfDocumentsInIndex_NonExistentIndex() { - IndexUtils indexUtils = new IndexUtils(client(), clientUtil, clusterService()); + IndexUtils indexUtils = new IndexUtils(client(), clientUtil, clusterService(), indexNameResolver); assertEquals((Long) 0L, indexUtils.getNumberOfDocumentsInIndex("index")); } @@ -89,7 +93,7 @@ public void testGetNumberOfDocumentsInIndex_RegularIndex() { index(indexName, "_doc", String.valueOf(i), "{}"); } flushAndRefresh(indexName); - IndexUtils indexUtils = new IndexUtils(client(), clientUtil, clusterService()); + IndexUtils indexUtils = new IndexUtils(client(), clientUtil, clusterService(), indexNameResolver); assertEquals((Long) count, indexUtils.getNumberOfDocumentsInIndex(indexName)); } } diff --git a/src/test/java/com/amazon/opendistroforelasticsearch/ad/util/MultiResponsesDelegateActionListenerTests.java b/src/test/java/com/amazon/opendistroforelasticsearch/ad/util/MultiResponsesDelegateActionListenerTests.java new file mode 100644 index 00000000..7bd905ca --- /dev/null +++ b/src/test/java/com/amazon/opendistroforelasticsearch/ad/util/MultiResponsesDelegateActionListenerTests.java @@ -0,0 +1,48 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.opendistroforelasticsearch.ad.util; + +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; + +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.test.ESTestCase; + +import com.amazon.opendistroforelasticsearch.ad.model.DetectorProfile; + +public class MultiResponsesDelegateActionListenerTests extends ESTestCase { + + public void testEmptyResponse() throws InterruptedException { + final CountDownLatch inProgressLatch = new CountDownLatch(1); + ActionListener actualListener = ActionListener.wrap(response -> { + assertTrue("Should not reach here", false); + inProgressLatch.countDown(); + }, exception -> { + String exceptionMsg = exception.getMessage(); + assertTrue(exceptionMsg, exceptionMsg.contains(MultiResponsesDelegateActionListener.NO_RESPONSE)); + inProgressLatch.countDown(); + }); + + MultiResponsesDelegateActionListener multiListener = new MultiResponsesDelegateActionListener( + actualListener, + 2, + "blah" + ); + multiListener.onResponse(null); + multiListener.onResponse(null); + assertTrue(inProgressLatch.await(100, TimeUnit.SECONDS)); + } +}