diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/AnomalyDetectorJobRunner.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/AnomalyDetectorJobRunner.java index 05099c48..abc22f38 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/ad/AnomalyDetectorJobRunner.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/AnomalyDetectorJobRunner.java @@ -56,7 +56,8 @@ import com.amazon.opendistroforelasticsearch.ad.transport.AnomalyResultRequest; import com.amazon.opendistroforelasticsearch.ad.transport.AnomalyResultResponse; import com.amazon.opendistroforelasticsearch.ad.transport.AnomalyResultTransportAction; -import com.amazon.opendistroforelasticsearch.ad.transport.handler.AnomalyResultHandler; +import com.amazon.opendistroforelasticsearch.ad.transport.handler.AnomalyIndexHandler; +import com.amazon.opendistroforelasticsearch.ad.transport.handler.DetectionStateHandler; import com.amazon.opendistroforelasticsearch.ad.util.ClientUtil; import com.amazon.opendistroforelasticsearch.jobscheduler.spi.JobExecutionContext; import com.amazon.opendistroforelasticsearch.jobscheduler.spi.LockModel; @@ -77,8 +78,9 @@ public class AnomalyDetectorJobRunner implements ScheduledJobRunner { private Client client; private ClientUtil clientUtil; private ThreadPool threadPool; - private AnomalyResultHandler anomalyResultHandler; + private AnomalyIndexHandler anomalyResultHandler; private ConcurrentHashMap detectorEndRunExceptionCount; + private DetectionStateHandler detectionStateHandler; public static AnomalyDetectorJobRunner getJobRunnerInstance() { if (INSTANCE != null) { @@ -110,7 +112,7 @@ public void setThreadPool(ThreadPool threadPool) { this.threadPool = threadPool; } - public void setAnomalyResultHandler(AnomalyResultHandler anomalyResultHandler) { + public void setAnomalyResultHandler(AnomalyIndexHandler anomalyResultHandler) { this.anomalyResultHandler = anomalyResultHandler; } @@ -119,6 +121,10 @@ public void setSettings(Settings settings) { this.maxRetryForEndRunException = AnomalyDetectorSettings.MAX_RETRY_FOR_END_RUN_EXCEPTION.get(settings); } + public void setDetectionStateHandler(DetectionStateHandler detectionStateHandler) { + this.detectionStateHandler = detectionStateHandler; + } + @Override public void runJob(ScheduledJobParameter jobParameter, JobExecutionContext context) { String detectorId = jobParameter.getName(); @@ -436,7 +442,8 @@ private void indexAnomalyResult( Instant.now(), response.getError() ); - anomalyResultHandler.indexAnomalyResult(anomalyResult); + anomalyResultHandler.index(anomalyResult, detectorId); + detectionStateHandler.saveError(response.getError(), detectorId); } catch (Exception e) { log.error("Failed to index anomaly result for " + detectorId, e); } finally { @@ -490,7 +497,8 @@ private void indexAnomalyResultException( Instant.now(), errorMessage ); - anomalyResultHandler.indexAnomalyResult(anomalyResult); + anomalyResultHandler.index(anomalyResult, detectorId); + detectionStateHandler.saveError(errorMessage, detectorId); } catch (Exception e) { log.error("Failed to index anomaly result for " + detectorId, e); } finally { diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/AnomalyDetectorPlugin.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/AnomalyDetectorPlugin.java index 0acff42e..ff1ee2a3 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/ad/AnomalyDetectorPlugin.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/AnomalyDetectorPlugin.java @@ -21,12 +21,10 @@ import java.security.PrivilegedAction; import java.time.Clock; import java.util.Arrays; -import java.util.Calendar; import java.util.Collection; import java.util.Collections; import java.util.List; import java.util.Map; -import java.util.TimeZone; import java.util.function.Supplier; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -83,6 +81,7 @@ import com.amazon.opendistroforelasticsearch.ad.model.AnomalyDetector; import com.amazon.opendistroforelasticsearch.ad.model.AnomalyDetectorJob; import com.amazon.opendistroforelasticsearch.ad.model.AnomalyResult; +import com.amazon.opendistroforelasticsearch.ad.model.DetectorInternalState; import com.amazon.opendistroforelasticsearch.ad.rest.RestAnomalyDetectorJobAction; import com.amazon.opendistroforelasticsearch.ad.rest.RestDeleteAnomalyDetectorAction; import com.amazon.opendistroforelasticsearch.ad.rest.RestExecuteAnomalyDetectorAction; @@ -100,7 +99,6 @@ import com.amazon.opendistroforelasticsearch.ad.stats.suppliers.IndexStatusSupplier; import com.amazon.opendistroforelasticsearch.ad.stats.suppliers.ModelsOnNodeSupplier; import com.amazon.opendistroforelasticsearch.ad.stats.suppliers.SettableSupplier; -import com.amazon.opendistroforelasticsearch.ad.transport.ADStateManager; import com.amazon.opendistroforelasticsearch.ad.transport.ADStatsNodesAction; import com.amazon.opendistroforelasticsearch.ad.transport.ADStatsNodesTransportAction; import com.amazon.opendistroforelasticsearch.ad.transport.AnomalyResultAction; @@ -111,18 +109,23 @@ import com.amazon.opendistroforelasticsearch.ad.transport.DeleteModelTransportAction; import com.amazon.opendistroforelasticsearch.ad.transport.ProfileAction; import com.amazon.opendistroforelasticsearch.ad.transport.ProfileTransportAction; +import com.amazon.opendistroforelasticsearch.ad.transport.RCFPollingAction; +import com.amazon.opendistroforelasticsearch.ad.transport.RCFPollingTransportAction; import com.amazon.opendistroforelasticsearch.ad.transport.RCFResultAction; import com.amazon.opendistroforelasticsearch.ad.transport.RCFResultTransportAction; import com.amazon.opendistroforelasticsearch.ad.transport.StopDetectorAction; import com.amazon.opendistroforelasticsearch.ad.transport.StopDetectorTransportAction; import com.amazon.opendistroforelasticsearch.ad.transport.ThresholdResultAction; import com.amazon.opendistroforelasticsearch.ad.transport.ThresholdResultTransportAction; -import com.amazon.opendistroforelasticsearch.ad.transport.handler.AnomalyResultHandler; +import com.amazon.opendistroforelasticsearch.ad.transport.TransportStateManager; +import com.amazon.opendistroforelasticsearch.ad.transport.handler.AnomalyIndexHandler; +import com.amazon.opendistroforelasticsearch.ad.transport.handler.DetectionStateHandler; import com.amazon.opendistroforelasticsearch.ad.util.ClientUtil; import com.amazon.opendistroforelasticsearch.ad.util.ColdStartRunner; import com.amazon.opendistroforelasticsearch.ad.util.DiscoveryNodeFilterer; import com.amazon.opendistroforelasticsearch.ad.util.IndexUtils; import com.amazon.opendistroforelasticsearch.ad.util.Throttler; +import com.amazon.opendistroforelasticsearch.ad.util.ThrowingConsumerWrapper; import com.amazon.opendistroforelasticsearch.jobscheduler.spi.JobSchedulerExtension; import com.amazon.opendistroforelasticsearch.jobscheduler.spi.ScheduledJobParser; import com.amazon.opendistroforelasticsearch.jobscheduler.spi.ScheduledJobRunner; @@ -150,6 +153,8 @@ public class AnomalyDetectorPlugin extends Plugin implements ActionPlugin, Scrip private NamedXContentRegistry xContentRegistry; private ClientUtil clientUtil; private DiscoveryNodeFilterer nodeFilter; + private IndexUtils indexUtils; + private DetectionStateHandler detectorStateHandler; static { SpecialPermission.check(); @@ -170,28 +175,34 @@ public List getRestHandlers( IndexNameExpressionResolver indexNameExpressionResolver, Supplier nodesInCluster ) { - AnomalyResultHandler anomalyResultHandler = new AnomalyResultHandler( + + AnomalyIndexHandler anomalyResultHandler; + anomalyResultHandler = new AnomalyIndexHandler( client, settings, - clusterService, - indexNameExpressionResolver, - anomalyDetectionIndices, - threadPool + threadPool, + AnomalyResult.ANOMALY_RESULT_INDEX, + ThrowingConsumerWrapper.throwingConsumerWrapper(anomalyDetectionIndices::initAnomalyResultIndexDirectly), + anomalyDetectionIndices::doesAnomalyResultIndexExist, + false, + this.clientUtil, + this.indexUtils, + clusterService ); + AnomalyDetectorJobRunner jobRunner = AnomalyDetectorJobRunner.getJobRunnerInstance(); jobRunner.setClient(client); jobRunner.setClientUtil(clientUtil); jobRunner.setThreadPool(threadPool); jobRunner.setAnomalyResultHandler(anomalyResultHandler); + jobRunner.setDetectionStateHandler(detectorStateHandler); jobRunner.setSettings(settings); AnomalyDetectorProfileRunner profileRunner = new AnomalyDetectorProfileRunner( client, this.xContentRegistry, this.nodeFilter, - indexNameExpressionResolver, - clusterService, - Calendar.getInstance(TimeZone.getTimeZone("UTC")) + AnomalyDetectorSettings.NUM_MIN_SAMPLES ); RestGetAnomalyDetectorAction restGetAnomalyDetectorAction = new RestGetAnomalyDetectorAction(profileRunner); RestIndexAnomalyDetectorAction restIndexAnomalyDetectorAction = new RestIndexAnomalyDetectorAction( @@ -257,7 +268,7 @@ public Collection createComponents( Clock clock = Clock.systemUTC(); Throttler throttler = new Throttler(clock); this.clientUtil = new ClientUtil(settings, client, throttler, threadPool); - IndexUtils indexUtils = new IndexUtils(client, clientUtil, clusterService); + this.indexUtils = new IndexUtils(client, clientUtil, clusterService, indexNameExpressionResolver); anomalyDetectionIndices = new AnomalyDetectionIndices(client, clusterService, threadPool, settings); this.clusterService = clusterService; this.xContentRegistry = xContentRegistry; @@ -301,7 +312,7 @@ public Collection createComponents( ); HashRing hashRing = new HashRing(nodeFilter, clock, settings); - ADStateManager stateManager = new ADStateManager( + TransportStateManager stateManager = new TransportStateManager( client, xContentRegistry, modelManager, @@ -350,6 +361,18 @@ public Collection createComponents( adStats = new ADStats(indexUtils, modelManager, stats); ADCircuitBreakerService adCircuitBreakerService = new ADCircuitBreakerService(jvmService).init(); + this.detectorStateHandler = new DetectionStateHandler( + client, + settings, + threadPool, + ThrowingConsumerWrapper.throwingConsumerWrapper(anomalyDetectionIndices::initDetectorStateIndex), + anomalyDetectionIndices::doesDetectorStateIndexExist, + this.clientUtil, + this.indexUtils, + clusterService, + xContentRegistry, + stateManager + ); return ImmutableList .of( @@ -370,7 +393,8 @@ public Collection createComponents( adCircuitBreakerService, adStats, new MasterEventListener(clusterService, threadPool, client, clock, clientUtil, nodeFilter), - nodeFilter + nodeFilter, + detectorStateHandler ); } @@ -415,7 +439,13 @@ public List> getSettings() { @Override public List getNamedXContent() { - return ImmutableList.of(AnomalyDetector.XCONTENT_REGISTRY, AnomalyResult.XCONTENT_REGISTRY); + return ImmutableList + .of( + AnomalyDetector.XCONTENT_REGISTRY, + AnomalyResult.XCONTENT_REGISTRY, + DetectorInternalState.XCONTENT_REGISTRY, + AnomalyDetectorJob.XCONTENT_REGISTRY + ); } /* @@ -432,7 +462,8 @@ public List getNamedXContent() { new ActionHandler<>(AnomalyResultAction.INSTANCE, AnomalyResultTransportAction.class), new ActionHandler<>(CronAction.INSTANCE, CronTransportAction.class), new ActionHandler<>(ADStatsNodesAction.INSTANCE, ADStatsNodesTransportAction.class), - new ActionHandler<>(ProfileAction.INSTANCE, ProfileTransportAction.class) + new ActionHandler<>(ProfileAction.INSTANCE, ProfileTransportAction.class), + new ActionHandler<>(RCFPollingAction.INSTANCE, RCFPollingTransportAction.class) ); } @@ -468,7 +499,8 @@ public Collection getSystemIndexDescriptors(Settings sett new SystemIndexDescriptor(AnomalyDetectionIndices.ALL_AD_RESULTS_INDEX_PATTERN, "anomaly result"), new SystemIndexDescriptor(AnomalyDetector.ANOMALY_DETECTORS_INDEX, "detector definition"), new SystemIndexDescriptor(AnomalyDetectorJob.ANOMALY_DETECTOR_JOB_INDEX, "detector job"), - new SystemIndexDescriptor(CommonName.CHECKPOINT_INDEX_NAME, "model checkpoint") + new SystemIndexDescriptor(CommonName.CHECKPOINT_INDEX_NAME, "model checkpoint"), + new SystemIndexDescriptor(DetectorInternalState.DETECTOR_STATE_INDEX, "detector information like total rcf updates") ) ); } diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/AnomalyDetectorProfileRunner.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/AnomalyDetectorProfileRunner.java index 943a1a4f..aa92d855 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/ad/AnomalyDetectorProfileRunner.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/AnomalyDetectorProfileRunner.java @@ -20,54 +20,42 @@ import static org.elasticsearch.common.xcontent.XContentParserUtils.ensureExpectedToken; import java.io.IOException; -import java.time.Instant; -import java.util.ArrayList; -import java.util.Calendar; -import java.util.List; -import java.util.Map; import java.util.Set; -import java.util.TimeZone; -import java.util.TreeMap; -import java.util.regex.Matcher; -import java.util.regex.Pattern; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.apache.logging.log4j.core.util.Throwables; +import org.apache.logging.log4j.message.ParameterizedMessage; import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.get.GetRequest; import org.elasticsearch.action.get.GetResponse; -import org.elasticsearch.action.search.SearchRequest; -import org.elasticsearch.action.search.SearchResponse; -import org.elasticsearch.action.support.IndicesOptions; import org.elasticsearch.client.Client; -import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; import org.elasticsearch.cluster.node.DiscoveryNode; -import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.xcontent.LoggingDeprecationHandler; import org.elasticsearch.common.xcontent.NamedXContentRegistry; import org.elasticsearch.common.xcontent.XContentParseException; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.common.xcontent.XContentType; import org.elasticsearch.index.IndexNotFoundException; -import org.elasticsearch.index.query.BoolQueryBuilder; -import org.elasticsearch.index.query.QueryBuilders; -import org.elasticsearch.index.query.RangeQueryBuilder; -import org.elasticsearch.search.SearchHit; -import org.elasticsearch.search.SearchHits; -import org.elasticsearch.search.builder.SearchSourceBuilder; -import org.elasticsearch.search.sort.FieldSortBuilder; -import org.elasticsearch.search.sort.SortOrder; - -import com.amazon.opendistroforelasticsearch.ad.indices.AnomalyDetectionIndices; + +import com.amazon.opendistroforelasticsearch.ad.common.exception.ResourceNotFoundException; +import com.amazon.opendistroforelasticsearch.ad.constant.CommonName; +import com.amazon.opendistroforelasticsearch.ad.model.AnomalyDetector; import com.amazon.opendistroforelasticsearch.ad.model.AnomalyDetectorJob; -import com.amazon.opendistroforelasticsearch.ad.model.AnomalyResult; +import com.amazon.opendistroforelasticsearch.ad.model.DetectorInternalState; import com.amazon.opendistroforelasticsearch.ad.model.DetectorProfile; import com.amazon.opendistroforelasticsearch.ad.model.DetectorState; +import com.amazon.opendistroforelasticsearch.ad.model.InitProgressProfile; +import com.amazon.opendistroforelasticsearch.ad.model.IntervalTimeConfiguration; import com.amazon.opendistroforelasticsearch.ad.model.ProfileName; import com.amazon.opendistroforelasticsearch.ad.transport.ProfileAction; import com.amazon.opendistroforelasticsearch.ad.transport.ProfileRequest; import com.amazon.opendistroforelasticsearch.ad.transport.ProfileResponse; +import com.amazon.opendistroforelasticsearch.ad.transport.RCFPollingAction; +import com.amazon.opendistroforelasticsearch.ad.transport.RCFPollingRequest; +import com.amazon.opendistroforelasticsearch.ad.transport.RCFPollingResponse; import com.amazon.opendistroforelasticsearch.ad.util.DiscoveryNodeFilterer; +import com.amazon.opendistroforelasticsearch.ad.util.ExceptionUtil; import com.amazon.opendistroforelasticsearch.ad.util.MultiResponsesDelegateActionListener; public class AnomalyDetectorProfileRunner { @@ -75,31 +63,25 @@ public class AnomalyDetectorProfileRunner { private Client client; private NamedXContentRegistry xContentRegistry; private DiscoveryNodeFilterer nodeFilter; - private final IndexNameExpressionResolver indexNameExpressionResolver; static String FAIL_TO_FIND_DETECTOR_MSG = "Fail to find detector with id: "; static String FAIL_TO_GET_PROFILE_MSG = "Fail to get profile for detector "; - private final ClusterService clusterService; - private Calendar calendar; + private long requiredSamples; public AnomalyDetectorProfileRunner( Client client, NamedXContentRegistry xContentRegistry, DiscoveryNodeFilterer nodeFilter, - IndexNameExpressionResolver indexNameExpressionResolver, - ClusterService clusterService, - Calendar calendar + long requiredSamples ) { this.client = client; this.xContentRegistry = xContentRegistry; this.nodeFilter = nodeFilter; - this.indexNameExpressionResolver = indexNameExpressionResolver; - this.clusterService = clusterService; - this.calendar = Calendar.getInstance(TimeZone.getTimeZone("UTC")); + this.requiredSamples = requiredSamples; } - public void profile(String detectorId, ActionListener listener, Set profiles) { + public void profile(String detectorId, ActionListener listener, Set profilesToCollect) { - if (profiles.isEmpty()) { + if (profilesToCollect.isEmpty()) { listener.onFailure(new RuntimeException("Unsupported profile types.")); return; } @@ -108,18 +90,22 @@ public void profile(String detectorId, ActionListener listener, // and return to users int totalListener = 0; - if (profiles.contains(ProfileName.STATE)) { + if (profilesToCollect.contains(ProfileName.STATE)) { + totalListener++; + } + + if (profilesToCollect.contains(ProfileName.ERROR)) { totalListener++; } - if (profiles.contains(ProfileName.ERROR)) { + if (profilesToCollect.contains(ProfileName.INIT_PROGRESS)) { totalListener++; } - if (profiles.contains(ProfileName.COORDINATING_NODE) - || profiles.contains(ProfileName.SHINGLE_SIZE) - || profiles.contains(ProfileName.TOTAL_SIZE_IN_BYTES) - || profiles.contains(ProfileName.MODELS)) { + if (profilesToCollect.contains(ProfileName.COORDINATING_NODE) + || profilesToCollect.contains(ProfileName.SHINGLE_SIZE) + || profilesToCollect.contains(ProfileName.TOTAL_SIZE_IN_BYTES) + || profilesToCollect.contains(ProfileName.MODELS)) { totalListener++; } @@ -129,13 +115,13 @@ public void profile(String detectorId, ActionListener listener, "Fail to fetch profile for " + detectorId ); - prepareProfile(detectorId, delegateListener, profiles); + prepareProfile(detectorId, delegateListener, profilesToCollect); } private void prepareProfile( String detectorId, MultiResponsesDelegateActionListener listener, - Set profiles + Set profilesToCollect ) { GetRequest getRequest = new GetRequest(ANOMALY_DETECTOR_JOB_INDEX, detectorId); client.get(getRequest, ActionListener.wrap(getResponse -> { @@ -149,18 +135,20 @@ private void prepareProfile( AnomalyDetectorJob job = AnomalyDetectorJob.parse(parser); long enabledTimeMs = job.getEnabledTime().toEpochMilli(); - if (profiles.contains(ProfileName.STATE)) { - profileState(detectorId, enabledTimeMs, listener, job.isEnabled()); + if (profilesToCollect.contains(ProfileName.ERROR)) { + GetRequest getStateRequest = new GetRequest(DetectorInternalState.DETECTOR_STATE_INDEX, detectorId); + client.get(getStateRequest, onGetDetectorState(listener, detectorId, enabledTimeMs)); } - if (profiles.contains(ProfileName.ERROR)) { - profileError(detectorId, enabledTimeMs, job.getDisabledTime(), listener); + + if (profilesToCollect.contains(ProfileName.STATE) || profilesToCollect.contains(ProfileName.INIT_PROGRESS)) { + profileStateRelated(detectorId, listener, job.isEnabled(), profilesToCollect); } - if (profiles.contains(ProfileName.COORDINATING_NODE) - || profiles.contains(ProfileName.SHINGLE_SIZE) - || profiles.contains(ProfileName.TOTAL_SIZE_IN_BYTES) - || profiles.contains(ProfileName.MODELS)) { - profileModels(detectorId, profiles, listener); + if (profilesToCollect.contains(ProfileName.COORDINATING_NODE) + || profilesToCollect.contains(ProfileName.SHINGLE_SIZE) + || profilesToCollect.contains(ProfileName.TOTAL_SIZE_IN_BYTES) + || profilesToCollect.contains(ProfileName.MODELS)) { + profileModels(detectorId, profilesToCollect, listener); } } catch (IOException | XContentParseException | NullPointerException e) { logger.error(e); @@ -168,13 +156,13 @@ private void prepareProfile( } } else { GetRequest getDetectorRequest = new GetRequest(ANOMALY_DETECTORS_INDEX, detectorId); - client.get(getDetectorRequest, onGetDetectorResponse(listener, detectorId, profiles)); + client.get(getDetectorRequest, onGetDetectorForPrepare(listener, detectorId, profilesToCollect)); } }, exception -> { if (exception instanceof IndexNotFoundException) { logger.info(exception.getMessage()); GetRequest getDetectorRequest = new GetRequest(ANOMALY_DETECTORS_INDEX, detectorId); - client.get(getDetectorRequest, onGetDetectorResponse(listener, detectorId, profiles)); + client.get(getDetectorRequest, onGetDetectorForPrepare(listener, detectorId, profilesToCollect)); } else { logger.error(FAIL_TO_GET_PROFILE_MSG + detectorId); listener.onFailure(exception); @@ -182,18 +170,18 @@ private void prepareProfile( })); } - private ActionListener onGetDetectorResponse( + private ActionListener onGetDetectorForPrepare( MultiResponsesDelegateActionListener listener, String detectorId, Set profiles ) { return ActionListener.wrap(getResponse -> { if (getResponse != null && getResponse.isExists()) { - DetectorProfile profile = new DetectorProfile(); + DetectorProfile.Builder profileBuilder = new DetectorProfile.Builder(); if (profiles.contains(ProfileName.STATE)) { - profile.setState(DetectorState.DISABLED); + profileBuilder.state(DetectorState.DISABLED); } - listener.respondImmediately(profile); + listener.respondImmediately(profileBuilder.build()); } else { listener.failImmediately(FAIL_TO_FIND_DETECTOR_MSG + detectorId); } @@ -203,242 +191,121 @@ private ActionListener onGetDetectorResponse( /** * We expect three kinds of states: * -Disabled: if get ad job api says the job is disabled; - * -Init: if anomaly score after the last update time of the detector is larger than 0 + * -Init: if rcf model's total updates is less than required * -Running: if neither of the above applies and no exceptions. * @param detectorId detector id - * @param enabledTime the time when AD job is enabled in milliseconds * @param listener listener to process the returned state or exception * @param enabled whether the detector job is enabled or not + * @param profilesToCollect target profiles to fetch */ - private void profileState( + private void profileStateRelated( String detectorId, - long enabledTime, MultiResponsesDelegateActionListener listener, - boolean enabled + boolean enabled, + Set profilesToCollect ) { if (enabled) { - SearchRequest searchLatestResult = createInittedEverRequest(detectorId, enabledTime); - client.search(searchLatestResult, onInittedEver(listener, detectorId, enabledTime)); + RCFPollingRequest request = new RCFPollingRequest(detectorId); + client.execute(RCFPollingAction.INSTANCE, request, onPollRCFUpdates(detectorId, profilesToCollect, listener)); } else { - DetectorProfile profile = new DetectorProfile(); - profile.setState(DetectorState.DISABLED); - listener.onResponse(profile); - } - } - - private ActionListener onInittedEver( - MultiResponsesDelegateActionListener listener, - String detectorId, - long lastUpdateTimeMs - ) { - return ActionListener.wrap(searchResponse -> { - SearchHits hits = searchResponse.getHits(); - DetectorProfile profile = new DetectorProfile(); - if (hits.getHits().length == 0L) { - profile.setState(DetectorState.INIT); - } else { - profile.setState(DetectorState.RUNNING); + if (profilesToCollect.contains(ProfileName.STATE)) { + listener.onResponse(new DetectorProfile.Builder().state(DetectorState.DISABLED).build()); } - - listener.onResponse(profile); - - }, exception -> { - if (exception instanceof IndexNotFoundException) { - DetectorProfile profile = new DetectorProfile(); - // anomaly result index is not created yet - profile.setState(DetectorState.INIT); - listener.onResponse(profile); - } else { - logger - .error( - "Fail to find any anomaly result with anomaly score larger than 0 after AD job enabled time for detector {}", - detectorId - ); - listener.onFailure(new RuntimeException("Fail to find detector state: " + detectorId, exception)); + if (profilesToCollect.contains(ProfileName.INIT_PROGRESS)) { + listener.onResponse(new DetectorProfile.Builder().build()); } - }); + } } /** - * Precondition: - * 1. Index are rotated with name pattern ".opendistro-anomaly-results-history-{now/d}-1" and now is using UTC. - * 2. Latest entry with error is recorded within enabled and disabled time. Note disabled time can be null. - * - * Error is populated if error of the latest anomaly result is not empty. - * - * Two optimization to avoid scanning all anomaly result indices to get a detector's most recent error - * - * First, when a detector is running, we only need to scan the current index, not all of the rolled over ones - * since we are interested in the latest error. - * Second, when a detector is disabled, we only need to scan the latest anomaly result indices created before the - * detector's enable time. - * + * Action listener for a detector in running or init state + * @param listener listener to consolidate results and return a final response * @param detectorId detector id - * @param enabledTimeMillis the time when AD job is enabled in milliseconds - * @param listener listener to process the returned error or exception + * @param enabledTimeMs AD job enabled time + * @return the listener for a detector in disabled state */ - private void profileError( + private ActionListener onGetDetectorState( + MultiResponsesDelegateActionListener listener, String detectorId, - long enabledTimeMillis, - Instant disabledTime, - MultiResponsesDelegateActionListener listener + long enabledTimeMs ) { - String[] latestIndex = null; - - long disabledTimeMillis = 0; - if (disabledTime != null) { - disabledTimeMillis = disabledTime.toEpochMilli(); - } - if (enabledTimeMillis > disabledTimeMillis) { - // detector is still running - latestIndex = new String[1]; - latestIndex[0] = AnomalyResult.ANOMALY_RESULT_INDEX; - } else { - String[] concreteIndices = indexNameExpressionResolver - .concreteIndexNames( - clusterService.state(), - IndicesOptions.lenientExpandOpen(), - AnomalyDetectionIndices.ALL_AD_RESULTS_INDEX_PATTERN - ); - - // find the latest from result indices such as .opendistro-anomaly-results-history-2020.04.06-1 and - // /.opendistro-anomaly-results-history-2020.04.07-000002 - long maxTimestamp = -1; - TreeMap> candidateIndices = new TreeMap<>(); - for (String indexName : concreteIndices) { - Matcher m = Pattern.compile("\\.opendistro-anomaly-results-history-(\\d{4})\\.(\\d{2})\\.(\\d{2})-\\d+").matcher(indexName); - if (m.matches()) { - int year = Integer.parseInt(m.group(1)); - int month = Integer.parseInt(m.group(2)); - int date = Integer.parseInt(m.group(3)); - // month starts with 0 - calendar.clear(); - calendar.set(year, month - 1, date); - // 2020.05.08 is translated to 1588896000000 - long timestamp = calendar.getTimeInMillis(); - - // a candidate index can be created before or after enabled time, but the index is definitely created before disabled - // time - if (timestamp <= disabledTimeMillis && maxTimestamp <= timestamp) { - maxTimestamp = timestamp; - // we can have two rotations on the same day and we don't know which one has our data, so we keep all - List indexList = candidateIndices.computeIfAbsent(timestamp, k -> new ArrayList()); - indexList.add(indexName); - } - } - } - List candidates = new ArrayList(); - List latestCandidate = candidateIndices.get(maxTimestamp); - - if (latestCandidate != null) { - candidates.addAll(latestCandidate); - } - - // look back one more index for an edge case: - // Suppose detector interval is 1 minute. Detector last run is at 2020-05-07, 11:59:50 PM, - // then AD result indices rolled over as .opendistro-anomaly-results-history-2020.05.07-001 - // Detector next run will be 2020-05-08, 00:00:50 AM. If a user stop the detector at - // 2020-05-08 00:00:10 AM, detector will not have AD result on 2020-05-08. - // We check AD result indices one day earlier to make sure we can always get AD result. - Map.Entry> earlierCandidate = candidateIndices.lowerEntry(maxTimestamp); - if (earlierCandidate != null) { - candidates.addAll(earlierCandidate.getValue()); - } - latestIndex = candidates.toArray(new String[0]); - } - - if (latestIndex == null || latestIndex.length == 0) { - // no result index found: can be due to anomaly result is not created yet or result indices for the detector have been deleted. - listener.onResponse(new DetectorProfile()); - return; - } - SearchRequest searchLatestResult = createLatestAnomalyResultRequest(detectorId, enabledTimeMillis, disabledTimeMillis, latestIndex); - client.search(searchLatestResult, onGetLatestAnomalyResult(listener, detectorId)); - } - - private ActionListener onGetLatestAnomalyResult(ActionListener listener, String detectorId) { - return ActionListener.wrap(searchResponse -> { - SearchHits hits = searchResponse.getHits(); - if (hits.getHits().length == 0L) { - listener.onResponse(new DetectorProfile()); - } else { - SearchHit hit = hits.getAt(0); - + return ActionListener.wrap(getResponse -> { + DetectorProfile.Builder profileBuilder = new DetectorProfile.Builder(); + if (getResponse != null && getResponse.isExists()) { try ( XContentParser parser = XContentType.JSON .xContent() - .createParser(xContentRegistry, LoggingDeprecationHandler.INSTANCE, hit.getSourceAsString()) + .createParser(xContentRegistry, LoggingDeprecationHandler.INSTANCE, getResponse.getSourceAsString()) ) { ensureExpectedToken(XContentParser.Token.START_OBJECT, parser.nextToken(), parser::getTokenLocation); - AnomalyResult result = parser.namedObject(AnomalyResult.class, AnomalyResult.PARSE_FIELD_NAME, null); - DetectorProfile profile = new DetectorProfile(); - if (result.getError() != null) { - profile.setError(result.getError()); + DetectorInternalState detectorState = DetectorInternalState.parse(parser); + long lastUpdateTimeMs = detectorState.getLastUpdateTime().toEpochMilli(); + + // if state index hasn't been updated, we should not use the error field + // For example, before a detector is enabled, if the error message contains + // the phrase "stopped due to blah", we should not show this when the detector + // is enabled. + if (lastUpdateTimeMs > enabledTimeMs && detectorState.getError() != null) { + profileBuilder.error(detectorState.getError()); } - listener.onResponse(profile); + + listener.onResponse(profileBuilder.build()); } catch (IOException | XContentParseException | NullPointerException e) { - logger.error("Fail to parse anomaly result with " + hit.toString()); - listener.onFailure(new RuntimeException("Fail to find detector error: " + detectorId, e)); + logger.error(e); + listener.failImmediately(FAIL_TO_GET_PROFILE_MSG, e); } + } else { + // detector state for this detector does not exist + listener.onResponse(profileBuilder.build()); } }, exception -> { if (exception instanceof IndexNotFoundException) { - listener.onResponse(new DetectorProfile()); + // detector state index is not created yet + listener.onResponse(new DetectorProfile.Builder().build()); } else { - logger.error("Fail to find any anomaly result after AD job enabled time for detector {}", detectorId); - listener.onFailure(new RuntimeException("Fail to find detector error: " + detectorId, exception)); + logger.error("Fail to find any detector info for detector {}", detectorId); + listener.onFailure(exception); } }); } - /** - * Create search request to check if we have at least 1 anomaly score larger than 0 after AD job enabled time - * @param detectorId detector id - * @param enabledTime the time when AD job is enabled in milliseconds - * @return the search request - */ - private SearchRequest createInittedEverRequest(String detectorId, long enabledTime) { - BoolQueryBuilder filterQuery = new BoolQueryBuilder(); - filterQuery.filter(QueryBuilders.termQuery(AnomalyResult.DETECTOR_ID_FIELD, detectorId)); - filterQuery.filter(QueryBuilders.rangeQuery(AnomalyResult.EXECUTION_END_TIME_FIELD).gte(enabledTime)); - filterQuery.filter(QueryBuilders.rangeQuery(AnomalyResult.ANOMALY_SCORE_FIELD).gt(0)); - - // I am only looking for last 1 occurrence and have no interest in the total number of documents that match the query. - // ES will not try to count the number of documents and will be able to terminate the query as soon as 1 document - // have been collected per segment. - SearchSourceBuilder source = new SearchSourceBuilder().query(filterQuery).size(1).trackTotalHits(false); - - SearchRequest request = new SearchRequest(AnomalyResult.ANOMALY_RESULT_INDEX); - request.source(source); - return request; - } - - /** - * Create search request to get the latest anomaly result after AD job enabled time - * @param detectorId detector id - * @param enabledTime the time when AD job is enabled in milliseconds - * @return the search request - */ - private SearchRequest createLatestAnomalyResultRequest(String detectorId, long enabledTime, long disabledTime, String[] index) { - BoolQueryBuilder filterQuery = new BoolQueryBuilder(); - filterQuery.filter(QueryBuilders.termQuery(AnomalyResult.DETECTOR_ID_FIELD, detectorId)); - RangeQueryBuilder rangeBuilder = QueryBuilders.rangeQuery(AnomalyResult.EXECUTION_END_TIME_FIELD).gte(enabledTime); - if (disabledTime >= enabledTime) { - rangeBuilder.lte(disabledTime); - } - filterQuery.filter(rangeBuilder); - - FieldSortBuilder sortQuery = new FieldSortBuilder(AnomalyResult.EXECUTION_END_TIME_FIELD).order(SortOrder.DESC); - - // I am only looking for last 1 occurrence and have no interest in the total number of documents that match the query. - // ES will not try to count the number of documents and will be able to terminate the query as soon as 1 document - // have been collected per segment. - SearchSourceBuilder source = new SearchSourceBuilder().query(filterQuery).size(1).sort(sortQuery).trackTotalHits(false); + private ActionListener onGetDetectorForInitProgress( + MultiResponsesDelegateActionListener listener, + String detectorId, + Set profilesToCollect, + long totalUpdates, + long requiredSamples + ) { + return ActionListener.wrap(getResponse -> { + if (getResponse != null && getResponse.isExists()) { + try ( + XContentParser parser = XContentType.JSON + .xContent() + .createParser(xContentRegistry, LoggingDeprecationHandler.INSTANCE, getResponse.getSourceAsString()) + ) { + ensureExpectedToken(XContentParser.Token.START_OBJECT, parser.nextToken(), parser::getTokenLocation); + AnomalyDetector detector = AnomalyDetector.parse(parser, detectorId); + long intervalMins = ((IntervalTimeConfiguration) detector.getDetectionInterval()).toDuration().toMinutes(); + float percent = (100.0f * totalUpdates) / requiredSamples; + int neededPoints = (int) (requiredSamples - totalUpdates); + InitProgressProfile initProgress = new InitProgressProfile( + // rounding: 93.456 => 93%, 93.556 => 94% + String.format("%.0f%%", percent), + intervalMins * neededPoints, + neededPoints + ); - SearchRequest request = new SearchRequest(index); - request.source(source); - return request; + listener.onResponse(new DetectorProfile.Builder().initProgress(initProgress).build()); + } catch (Exception t) { + logger.error("Fail to parse detector {}", detectorId); + logger.error("Stack trace:", t); + listener.failImmediately(FAIL_TO_FIND_DETECTOR_MSG + detectorId, t); + } + } else { + listener.failImmediately(FAIL_TO_FIND_DETECTOR_MSG + detectorId); + } + }, exception -> { listener.failImmediately(FAIL_TO_FIND_DETECTOR_MSG + detectorId, exception); }); } private void profileModels( @@ -457,21 +324,86 @@ private ActionListener onModelResponse( MultiResponsesDelegateActionListener listener ) { return ActionListener.wrap(profileResponse -> { - DetectorProfile profile = new DetectorProfile(); + DetectorProfile.Builder profile = new DetectorProfile.Builder(); if (profiles.contains(ProfileName.COORDINATING_NODE)) { - profile.setCoordinatingNode(profileResponse.getCoordinatingNode()); + profile.coordinatingNode(profileResponse.getCoordinatingNode()); } if (profiles.contains(ProfileName.SHINGLE_SIZE)) { - profile.setShingleSize(profileResponse.getShingleSize()); + profile.shingleSize(profileResponse.getShingleSize()); } if (profiles.contains(ProfileName.TOTAL_SIZE_IN_BYTES)) { - profile.setTotalSizeInBytes(profileResponse.getTotalSizeInBytes()); + profile.totalSizeInBytes(profileResponse.getTotalSizeInBytes()); } if (profiles.contains(ProfileName.MODELS)) { - profile.setModelProfile(profileResponse.getModelProfile()); + profile.modelProfile(profileResponse.getModelProfile()); } - listener.onResponse(profile); + listener.onResponse(profile.build()); }, listener::onFailure); } + + /** + * Listener for polling rcf updates through transport messaging + * @param detectorId detector Id + * @param profilesToCollect profiles to collect like state + * @param listener delegate listener + * @return Listener for polling rcf updates through transport messaging + */ + private ActionListener onPollRCFUpdates( + String detectorId, + Set profilesToCollect, + MultiResponsesDelegateActionListener listener + ) { + return ActionListener.wrap(rcfPollResponse -> { + long totalUpdates = rcfPollResponse.getTotalUpdates(); + if (totalUpdates < requiredSamples) { + processInitResponse(detectorId, profilesToCollect, listener, totalUpdates); + } else { + if (profilesToCollect.contains(ProfileName.STATE)) { + listener.onResponse(new DetectorProfile.Builder().state(DetectorState.RUNNING).build()); + } + + if (profilesToCollect.contains(ProfileName.INIT_PROGRESS)) { + InitProgressProfile initProgress = new InitProgressProfile("100%", 0, 0); + listener.onResponse(new DetectorProfile.Builder().initProgress(initProgress).build()); + } + } + }, exception -> { + // we will get an AnomalyDetectionException wrapping the real exception inside + Throwable cause = Throwables.getRootCause(exception); + + // exception can be a RemoteTransportException + Exception causeException = (Exception) cause; + if (ExceptionUtil + .isException(causeException, ResourceNotFoundException.class, ExceptionUtil.RESOURCE_NOT_FOUND_EXCEPTION_NAME_UNDERSCORE) + || (causeException instanceof IndexNotFoundException + && causeException.getMessage().contains(CommonName.CHECKPOINT_INDEX_NAME))) { + // cannot find checkpoint + processInitResponse(detectorId, profilesToCollect, listener, 0L); + } else { + logger.error(new ParameterizedMessage("Fail to get init progress through messaging for {}", detectorId), exception); + listener.failImmediately(FAIL_TO_GET_PROFILE_MSG + detectorId, exception); + } + }); + } + + private void processInitResponse( + String detectorId, + Set profilesToCollect, + MultiResponsesDelegateActionListener listener, + long totalUpdates + ) { + if (profilesToCollect.contains(ProfileName.STATE)) { + listener.onResponse(new DetectorProfile.Builder().state(DetectorState.INIT).build()); + } + + if (profilesToCollect.contains(ProfileName.INIT_PROGRESS)) { + GetRequest getDetectorRequest = new GetRequest(ANOMALY_DETECTORS_INDEX, detectorId); + client + .get( + getDetectorRequest, + onGetDetectorForInitProgress(listener, detectorId, profilesToCollect, totalUpdates, requiredSamples) + ); + } + } } diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/constant/CommonName.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/constant/CommonName.java index 8f730ccb..c6336fd6 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/ad/constant/CommonName.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/constant/CommonName.java @@ -54,4 +54,5 @@ public class CommonName { public static final String SHINGLE_SIZE = "shingle_size"; public static final String TOTAL_SIZE_IN_BYTES = "total_size_in_bytes"; public static final String MODELS = "models"; + public static final String INIT_PROGRESS = "init_progress"; } diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/indices/AnomalyDetectionIndices.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/indices/AnomalyDetectionIndices.java index 61e13825..3cc730dd 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/ad/indices/AnomalyDetectionIndices.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/indices/AnomalyDetectionIndices.java @@ -18,6 +18,7 @@ import static com.amazon.opendistroforelasticsearch.ad.settings.AnomalyDetectorSettings.AD_RESULT_HISTORY_MAX_DOCS; import static com.amazon.opendistroforelasticsearch.ad.settings.AnomalyDetectorSettings.AD_RESULT_HISTORY_RETENTION_PERIOD; import static com.amazon.opendistroforelasticsearch.ad.settings.AnomalyDetectorSettings.AD_RESULT_HISTORY_ROLLOVER_PERIOD; +import static com.amazon.opendistroforelasticsearch.ad.settings.AnomalyDetectorSettings.ANOMALY_DETECTION_STATE_INDEX_MAPPING_FILE; import static com.amazon.opendistroforelasticsearch.ad.settings.AnomalyDetectorSettings.ANOMALY_DETECTORS_INDEX_MAPPING_FILE; import static com.amazon.opendistroforelasticsearch.ad.settings.AnomalyDetectorSettings.ANOMALY_DETECTOR_JOBS_INDEX_MAPPING_FILE; import static com.amazon.opendistroforelasticsearch.ad.settings.AnomalyDetectorSettings.ANOMALY_RESULTS_INDEX_MAPPING_FILE; @@ -56,12 +57,13 @@ import com.amazon.opendistroforelasticsearch.ad.model.AnomalyDetector; import com.amazon.opendistroforelasticsearch.ad.model.AnomalyDetectorJob; import com.amazon.opendistroforelasticsearch.ad.model.AnomalyResult; +import com.amazon.opendistroforelasticsearch.ad.model.DetectorInternalState; import com.carrotsearch.hppc.cursors.ObjectCursor; import com.google.common.base.Charsets; import com.google.common.io.Resources; /** - * This class manages creation of anomaly detector index. + * This class provides utility methods for various anomaly detection indices. */ public class AnomalyDetectionIndices implements LocalNodeMasterListener { @@ -148,6 +150,17 @@ private String getAnomalyDetectorJobMappings() throws IOException { return Resources.toString(url, Charsets.UTF_8); } + /** + * Get anomaly detector state index mapping json content. + * + * @return anomaly detector state index mapping + * @throws IOException IOException if mapping file can't be read correctly + */ + private String getDetectorStateMappings() throws IOException { + URL url = AnomalyDetectionIndices.class.getClassLoader().getResource(ANOMALY_DETECTION_STATE_INDEX_MAPPING_FILE); + return Resources.toString(url, Charsets.UTF_8); + } + /** * Anomaly detector index exist or not. * @@ -175,6 +188,15 @@ public boolean doesAnomalyResultIndexExist() { return clusterService.state().metadata().hasAlias(AnomalyResult.ANOMALY_RESULT_INDEX); } + /** + * Anomaly result index exist or not. + * + * @return true if anomaly detector index exists + */ + public boolean doesDetectorStateIndexExist() { + return clusterService.state().getRoutingTable().hasIndex(DetectorInternalState.DETECTOR_STATE_INDEX); + } + /** * Create anomaly detector index if not exist. * @@ -238,6 +260,18 @@ public void initAnomalyDetectorJobIndex(ActionListener acti adminClient.indices().create(request, actionListener); } + /** + * Create an index. + * + * @param actionListener action called after create index + * @throws IOException IOException from {@link AnomalyDetectionIndices#getAnomalyDetectorJobMappings} + */ + public void initDetectorStateIndex(ActionListener actionListener) throws IOException { + CreateIndexRequest request = new CreateIndexRequest(DetectorInternalState.DETECTOR_STATE_INDEX) + .mapping(AnomalyDetector.TYPE, getDetectorStateMappings(), XContentType.JSON); + adminClient.indices().create(request, actionListener); + } + @Override public void onMaster() { try { diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/ml/ModelManager.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/ml/ModelManager.java index cfe6ce0c..8fd81864 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/ad/ml/ModelManager.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/ml/ModelManager.java @@ -397,6 +397,13 @@ private void getRcfResult(ModelState modelState, double[] point listener.onResponse(new RcfResult(score, confidence, forestSize)); } + private Optional> restoreCheckpoint(Optional rcfCheckpoint, String modelId, String detectorId) { + return rcfCheckpoint + .map(checkpoint -> AccessController.doPrivileged((PrivilegedAction) () -> rcfSerde.fromJson(checkpoint))) + .filter(rcf -> isHostingAllowed(detectorId, rcf)) + .map(rcf -> new ModelState<>(rcf, modelId, detectorId, ModelType.RCF.getName(), clock.instant())); + } + private void processRcfCheckpoint( Optional rcfCheckpoint, String modelId, @@ -404,10 +411,7 @@ private void processRcfCheckpoint( double[] point, ActionListener listener ) { - Optional> model = rcfCheckpoint - .map(checkpoint -> AccessController.doPrivileged((PrivilegedAction) () -> rcfSerde.fromJson(checkpoint))) - .filter(rcf -> isHostingAllowed(detectorId, rcf)) - .map(rcf -> new ModelState<>(rcf, modelId, detectorId, ModelType.RCF.getName(), clock.instant())); + Optional> model = restoreCheckpoint(rcfCheckpoint, modelId, detectorId); if (model.isPresent()) { forests.put(modelId, model.get()); getRcfResult(model.get(), point, listener); @@ -416,6 +420,24 @@ private void processRcfCheckpoint( } } + /** + * Process rcf checkpoint for total rcf updates polling + * @param rcfCheckpoint rcf checkpoint json string + * @param modelId model Id + * @param detectorId detector Id + * @param listener listener to return total updates of rcf + */ + private void processRcfCheckpoint(Optional rcfCheckpoint, String modelId, String detectorId, ActionListener listener) { + logger.info("Restoring checkpoint for {}", modelId); + Optional> model = restoreCheckpoint(rcfCheckpoint, modelId, detectorId); + if (model.isPresent()) { + forests.put(modelId, model.get()); + listener.onResponse(model.get().getModel().getTotalUpdates()); + } else { + listener.onFailure(new ResourceNotFoundException(detectorId, CommonErrorMessages.NO_CHECKPOINT_ERR_MSG + modelId)); + } + } + /** * Gets the result using the specified thresholding model. * @@ -1045,4 +1067,24 @@ public Map getModelSize(String detectorId) { .forEach(entry -> { res.put(entry.getKey(), 0L); }); return res; } + + /** + * Get a RCF model's total updates. + * @param modelId the RCF model's id + * @param detectorId detector Id + * @param listener listener to return the result + */ + public void getTotalUpdates(String modelId, String detectorId, ActionListener listener) { + ModelState model = forests.get(modelId); + if (model != null) { + listener.onResponse(model.getModel().getTotalUpdates()); + } else { + checkpointDao + .getModelCheckpoint( + modelId, + ActionListener.wrap(checkpoint -> processRcfCheckpoint(checkpoint, modelId, detectorId, listener), listener::onFailure) + ); + } + + } } diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/model/AnomalyDetectorJob.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/model/AnomalyDetectorJob.java index 30f36939..9a99e60c 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/ad/model/AnomalyDetectorJob.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/model/AnomalyDetectorJob.java @@ -21,6 +21,8 @@ import java.io.IOException; import java.time.Instant; +import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.xcontent.NamedXContentRegistry; import org.elasticsearch.common.xcontent.ToXContentObject; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; @@ -36,6 +38,13 @@ */ public class AnomalyDetectorJob implements ToXContentObject, ScheduledJobParameter { + public static final String PARSE_FIELD_NAME = "AnomalyDetectorJob"; + public static final NamedXContentRegistry.Entry XCONTENT_REGISTRY = new NamedXContentRegistry.Entry( + AnomalyDetectorJob.class, + new ParseField(PARSE_FIELD_NAME), + it -> parse(it) + ); + public static final String ANOMALY_DETECTOR_JOB_INDEX = ".opendistro-anomaly-detector-jobs"; public static final String NAME_FIELD = "name"; public static final String LAST_UPDATE_TIME_FIELD = "last_update_time"; diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/model/DetectorInternalState.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/model/DetectorInternalState.java new file mode 100644 index 00000000..03633c79 --- /dev/null +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/model/DetectorInternalState.java @@ -0,0 +1,160 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.opendistroforelasticsearch.ad.model; + +import static org.elasticsearch.common.xcontent.XContentParserUtils.ensureExpectedToken; + +import java.io.IOException; +import java.time.Instant; + +import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.xcontent.NamedXContentRegistry; +import org.elasticsearch.common.xcontent.ToXContentObject; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentParser; + +import com.amazon.opendistroforelasticsearch.ad.annotation.Generated; +import com.amazon.opendistroforelasticsearch.ad.util.ParseUtils; +import com.google.common.base.Objects; + +/** + * Include anomaly detector's state + */ +public class DetectorInternalState implements ToXContentObject, Cloneable { + + public static final String PARSE_FIELD_NAME = "DetectorInternalState"; + public static final NamedXContentRegistry.Entry XCONTENT_REGISTRY = new NamedXContentRegistry.Entry( + DetectorInternalState.class, + new ParseField(PARSE_FIELD_NAME), + it -> parse(it) + ); + + public static final String DETECTOR_STATE_INDEX = ".opendistro-anomaly-detection-state"; + + public static final String LAST_UPDATE_TIME_FIELD = "last_update_time"; + public static final String ERROR_FIELD = "error"; + + private Instant lastUpdateTime = null; + private String error = null; + + private DetectorInternalState() {} + + public static class Builder { + private Instant lastUpdateTime = null; + private String error = null; + + public Builder() {} + + public Builder lastUpdateTime(Instant lastUpdateTime) { + this.lastUpdateTime = lastUpdateTime; + return this; + } + + public Builder error(String error) { + this.error = error; + return this; + } + + public DetectorInternalState build() { + DetectorInternalState state = new DetectorInternalState(); + state.lastUpdateTime = this.lastUpdateTime; + state.error = this.error; + + return state; + } + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + XContentBuilder xContentBuilder = builder.startObject(); + + if (lastUpdateTime != null) { + xContentBuilder.field(LAST_UPDATE_TIME_FIELD, lastUpdateTime.toEpochMilli()); + } + if (error != null) { + xContentBuilder.field(ERROR_FIELD, error); + } + return xContentBuilder.endObject(); + } + + public static DetectorInternalState parse(XContentParser parser) throws IOException { + Instant lastUpdateTime = null; + String error = null; + + ensureExpectedToken(XContentParser.Token.START_OBJECT, parser.currentToken(), parser::getTokenLocation); + while (parser.nextToken() != XContentParser.Token.END_OBJECT) { + String fieldName = parser.currentName(); + parser.nextToken(); + + switch (fieldName) { + case LAST_UPDATE_TIME_FIELD: + lastUpdateTime = ParseUtils.toInstant(parser); + break; + case ERROR_FIELD: + error = parser.text(); + break; + default: + parser.skipChildren(); + break; + } + } + return new DetectorInternalState.Builder().lastUpdateTime(lastUpdateTime).error(error).build(); + } + + @Generated + @Override + public boolean equals(Object o) { + if (this == o) + return true; + if (o == null || getClass() != o.getClass()) + return false; + DetectorInternalState that = (DetectorInternalState) o; + return Objects.equal(getLastUpdateTime(), that.getLastUpdateTime()) && Objects.equal(getError(), that.getError()); + } + + @Generated + @Override + public int hashCode() { + return Objects.hashCode(lastUpdateTime, error); + } + + @Override + public Object clone() { + DetectorInternalState state = null; + try { + state = (DetectorInternalState) super.clone(); + } catch (CloneNotSupportedException e) { + state = new DetectorInternalState.Builder().lastUpdateTime(lastUpdateTime).error(error).build(); + } + return state; + } + + public Instant getLastUpdateTime() { + return lastUpdateTime; + } + + public void setLastUpdateTime(Instant lastUpdateTime) { + this.lastUpdateTime = lastUpdateTime; + } + + public String getError() { + return error; + } + + public void setError(String error) { + this.error = error; + } +} diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/model/DetectorProfile.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/model/DetectorProfile.java index 8ee3efd4..6b066491 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/ad/model/DetectorProfile.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/model/DetectorProfile.java @@ -33,18 +33,72 @@ public class DetectorProfile implements ToXContentObject, Mergeable { private int shingleSize; private String coordinatingNode; private long totalSizeInBytes; + private InitProgressProfile initProgress; public XContentBuilder toXContent(XContentBuilder builder) throws IOException { return toXContent(builder, ToXContent.EMPTY_PARAMS); } - public DetectorProfile() { - state = null; - error = null; - modelProfile = null; - shingleSize = -1; - coordinatingNode = null; - totalSizeInBytes = -1; + private DetectorProfile() {} + + public static class Builder { + private DetectorState state = null; + private String error = null; + private ModelProfile[] modelProfile = null; + private int shingleSize = -1; + private String coordinatingNode = null; + private long totalSizeInBytes = -1; + private InitProgressProfile initProgress = null; + + public Builder() {} + + public Builder state(DetectorState state) { + this.state = state; + return this; + } + + public Builder error(String error) { + this.error = error; + return this; + } + + public Builder modelProfile(ModelProfile[] modelProfile) { + this.modelProfile = modelProfile; + return this; + } + + public Builder shingleSize(int shingleSize) { + this.shingleSize = shingleSize; + return this; + } + + public Builder coordinatingNode(String coordinatingNode) { + this.coordinatingNode = coordinatingNode; + return this; + } + + public Builder totalSizeInBytes(long totalSizeInBytes) { + this.totalSizeInBytes = totalSizeInBytes; + return this; + } + + public Builder initProgress(InitProgressProfile initProgress) { + this.initProgress = initProgress; + return this; + } + + public DetectorProfile build() { + DetectorProfile profile = new DetectorProfile(); + profile.state = this.state; + profile.error = this.error; + profile.modelProfile = modelProfile; + profile.shingleSize = shingleSize; + profile.coordinatingNode = coordinatingNode; + profile.totalSizeInBytes = totalSizeInBytes; + profile.initProgress = initProgress; + + return profile; + } } @Override @@ -73,7 +127,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws if (totalSizeInBytes != -1) { xContentBuilder.field(CommonName.TOTAL_SIZE_IN_BYTES, totalSizeInBytes); } - + if (initProgress != null) { + xContentBuilder.field(CommonName.INIT_PROGRESS, initProgress); + } return xContentBuilder.endObject(); } @@ -125,6 +181,14 @@ public void setTotalSizeInBytes(long totalSizeInBytes) { this.totalSizeInBytes = totalSizeInBytes; } + public InitProgressProfile getInitProgress() { + return initProgress; + } + + public void setInitProgress(InitProgressProfile initProgress) { + this.initProgress = initProgress; + } + @Override public void merge(Mergeable other) { if (this == other || other == null || getClass() != other.getClass()) { @@ -149,6 +213,9 @@ public void merge(Mergeable other) { if (otherProfile.getTotalSizeInBytes() != -1) { this.totalSizeInBytes = otherProfile.getTotalSizeInBytes(); } + if (otherProfile.getInitProgress() != null) { + this.initProgress = otherProfile.getInitProgress(); + } } @Override @@ -162,18 +229,71 @@ public boolean equals(Object obj) { if (obj instanceof DetectorProfile) { DetectorProfile other = (DetectorProfile) obj; - return new EqualsBuilder().append(state, other.state).append(error, other.error).isEquals(); + EqualsBuilder equalsBuilder = new EqualsBuilder(); + if (state != null) { + equalsBuilder.append(state, other.state); + } + if (error != null) { + equalsBuilder.append(error, other.error); + } + if (modelProfile != null && modelProfile.length > 0) { + equalsBuilder.append(modelProfile, other.modelProfile); + } + if (shingleSize != -1) { + equalsBuilder.append(shingleSize, other.shingleSize); + } + if (coordinatingNode != null) { + equalsBuilder.append(coordinatingNode, other.coordinatingNode); + } + if (totalSizeInBytes != -1) { + equalsBuilder.append(totalSizeInBytes, other.totalSizeInBytes); + } + if (initProgress != null) { + equalsBuilder.append(initProgress, other.initProgress); + } + return equalsBuilder.isEquals(); } return false; } @Override public int hashCode() { - return new HashCodeBuilder().append(state).append(error).toHashCode(); + return new HashCodeBuilder() + .append(state) + .append(error) + .append(modelProfile) + .append(shingleSize) + .append(coordinatingNode) + .append(totalSizeInBytes) + .append(initProgress) + .toHashCode(); } @Override public String toString() { - return new ToStringBuilder(this).append("state", state).append("error", error).toString(); + ToStringBuilder toStringBuilder = new ToStringBuilder(this); + + if (state != null) { + toStringBuilder.append(CommonName.STATE, state); + } + if (error != null) { + toStringBuilder.append(CommonName.ERROR, error); + } + if (modelProfile != null && modelProfile.length > 0) { + toStringBuilder.append(modelProfile); + } + if (shingleSize != -1) { + toStringBuilder.append(CommonName.SHINGLE_SIZE, shingleSize); + } + if (coordinatingNode != null) { + toStringBuilder.append(CommonName.COORDINATING_NODE, coordinatingNode); + } + if (totalSizeInBytes != -1) { + toStringBuilder.append(CommonName.TOTAL_SIZE_IN_BYTES, totalSizeInBytes); + } + if (initProgress != null) { + toStringBuilder.append(CommonName.INIT_PROGRESS, initProgress); + } + return toStringBuilder.toString(); } } diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/model/InitProgressProfile.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/model/InitProgressProfile.java new file mode 100644 index 00000000..2047439f --- /dev/null +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/model/InitProgressProfile.java @@ -0,0 +1,146 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.opendistroforelasticsearch.ad.model; + +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +import java.io.IOException; + +import org.apache.commons.lang.builder.EqualsBuilder; +import org.apache.commons.lang.builder.HashCodeBuilder; +import org.apache.commons.lang.builder.ToStringBuilder; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.common.xcontent.ToXContent; +import org.elasticsearch.common.xcontent.XContentBuilder; + +/** + * Profile output for detector initialization progress. When the new detector is created, it is possible that + * there hasn’t been enough continuous data in the index. We need to use live data to initialize. + * During initialization, we need to tell users progress (using a percentage), how many more + * shingles to go, and approximately how many minutes before the detector becomes operational + * if they keep their data stream continuous. + * + */ +public class InitProgressProfile implements Writeable, ToXContent { + // field name in toXContent + public static final String PERCENTAGE = "percentage"; + public static final String ESTIMATED_MINUTES_LEFT = "estimated_minutes_left"; + public static final String NEEDED_SHINGLES = "needed_shingles"; + + private final String percentage; + private final long estimatedMinutesLeft; + private final int neededShingles; + + public InitProgressProfile(String percentage, long estimatedMinutesLeft, int neededDataPoints) { + super(); + this.percentage = percentage; + this.estimatedMinutesLeft = estimatedMinutesLeft; + this.neededShingles = neededDataPoints; + } + + public InitProgressProfile(StreamInput in) throws IOException { + percentage = in.readString(); + estimatedMinutesLeft = in.readVLong(); + neededShingles = in.readVInt(); + } + + public String getPercentage() { + return percentage; + } + + public long getEstimatedMinutesLeft() { + return estimatedMinutesLeft; + } + + public int getNeededDataPoints() { + return neededShingles; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.field(PERCENTAGE, percentage); + if (estimatedMinutesLeft > 0) { + builder.field(ESTIMATED_MINUTES_LEFT, estimatedMinutesLeft); + } + if (neededShingles > 0) { + builder.field(NEEDED_SHINGLES, neededShingles); + } + builder.endObject(); + return builder; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeString(percentage); + out.writeVLong(estimatedMinutesLeft); + out.writeVInt(neededShingles); + } + + @Override + public String toString() { + ToStringBuilder builder = new ToStringBuilder(this); + builder.append(PERCENTAGE, percentage); + if (estimatedMinutesLeft > 0) { + builder.append(ESTIMATED_MINUTES_LEFT, estimatedMinutesLeft); + } + if (neededShingles > 0) { + builder.append(NEEDED_SHINGLES, neededShingles); + } + return builder.toString(); + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + if (obj instanceof InitProgressProfile) { + InitProgressProfile other = (InitProgressProfile) obj; + + EqualsBuilder equalsBuilder = new EqualsBuilder(); + equalsBuilder.append(percentage, other.percentage); + equalsBuilder.append(estimatedMinutesLeft, other.estimatedMinutesLeft); + equalsBuilder.append(neededShingles, other.neededShingles); + + return equalsBuilder.isEquals(); + } + return false; + } + + @Override + public int hashCode() { + return new HashCodeBuilder().append(percentage).append(estimatedMinutesLeft).append(neededShingles).toHashCode(); + } +} diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/model/ModelProfile.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/model/ModelProfile.java index 71d61530..f0c8b9e6 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/ad/model/ModelProfile.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/model/ModelProfile.java @@ -32,6 +32,7 @@ import java.io.IOException; +import org.apache.commons.lang.builder.ToStringBuilder; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.io.stream.Writeable; @@ -91,4 +92,15 @@ public void writeTo(StreamOutput out) throws IOException { out.writeVLong(modelSizeInBytes); out.writeString(nodeId); } + + @Override + public String toString() { + ToStringBuilder builder = new ToStringBuilder(this); + builder.append(MODEL_ID, modelId); + if (modelSizeInBytes > 0) { + builder.append(MODEL_SIZE_IN_BYTES, modelSizeInBytes); + } + builder.append(NODE_ID, nodeId); + return builder.toString(); + } } diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/model/ProfileName.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/model/ProfileName.java index 3c3fa93b..1ab1c19d 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/ad/model/ProfileName.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/model/ProfileName.java @@ -27,7 +27,8 @@ public enum ProfileName { COORDINATING_NODE(CommonName.COORDINATING_NODE), SHINGLE_SIZE(CommonName.SHINGLE_SIZE), TOTAL_SIZE_IN_BYTES(CommonName.TOTAL_SIZE_IN_BYTES), - MODELS(CommonName.MODELS); + MODELS(CommonName.MODELS), + INIT_PROGRESS(CommonName.INIT_PROGRESS); private String name; @@ -58,6 +59,8 @@ public static ProfileName getName(String name) { return TOTAL_SIZE_IN_BYTES; case CommonName.MODELS: return MODELS; + case CommonName.INIT_PROGRESS: + return INIT_PROGRESS; default: throw new IllegalArgumentException("Unsupported profile types"); } diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/rest/RestAnomalyDetectorJobAction.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/rest/RestAnomalyDetectorJobAction.java index 5410b532..a418a81f 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/ad/rest/RestAnomalyDetectorJobAction.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/rest/RestAnomalyDetectorJobAction.java @@ -19,7 +19,6 @@ import static com.amazon.opendistroforelasticsearch.ad.util.RestHandlerUtils.DETECTOR_ID; import static com.amazon.opendistroforelasticsearch.ad.util.RestHandlerUtils.IF_PRIMARY_TERM; import static com.amazon.opendistroforelasticsearch.ad.util.RestHandlerUtils.IF_SEQ_NO; -import static com.amazon.opendistroforelasticsearch.ad.util.RestHandlerUtils.REFRESH; import static com.amazon.opendistroforelasticsearch.ad.util.RestHandlerUtils.START_JOB; import static com.amazon.opendistroforelasticsearch.ad.util.RestHandlerUtils.STOP_JOB; @@ -27,7 +26,6 @@ import java.util.List; import java.util.Locale; -import org.elasticsearch.action.support.WriteRequest; import org.elasticsearch.client.node.NodeClient; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.settings.Settings; @@ -51,12 +49,10 @@ public class RestAnomalyDetectorJobAction extends BaseRestHandler { public static final String AD_JOB_ACTION = "anomaly_detector_job_action"; private volatile TimeValue requestTimeout; private final AnomalyDetectionIndices anomalyDetectionIndices; - private final ClusterService clusterService; public RestAnomalyDetectorJobAction(Settings settings, ClusterService clusterService, AnomalyDetectionIndices anomalyDetectionIndices) { this.anomalyDetectionIndices = anomalyDetectionIndices; this.requestTimeout = REQUEST_TIMEOUT.get(settings); - this.clusterService = clusterService; clusterService.getClusterSettings().addSettingsUpdateConsumer(REQUEST_TIMEOUT, it -> requestTimeout = it); } @@ -76,19 +72,14 @@ protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient cli return channel -> { long seqNo = request.paramAsLong(IF_SEQ_NO, SequenceNumbers.UNASSIGNED_SEQ_NO); long primaryTerm = request.paramAsLong(IF_PRIMARY_TERM, SequenceNumbers.UNASSIGNED_PRIMARY_TERM); - WriteRequest.RefreshPolicy refreshPolicy = request.hasParam(REFRESH) - ? WriteRequest.RefreshPolicy.parse(request.param(REFRESH)) - : WriteRequest.RefreshPolicy.IMMEDIATE; IndexAnomalyDetectorJobActionHandler handler = new IndexAnomalyDetectorJobActionHandler( - clusterService, client, channel, anomalyDetectionIndices, detectorId, seqNo, primaryTerm, - refreshPolicy, requestTimeout ); diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/rest/RestDeleteAnomalyDetectorAction.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/rest/RestDeleteAnomalyDetectorAction.java index b7b2f17c..a4b8bd70 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/ad/rest/RestDeleteAnomalyDetectorAction.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/rest/RestDeleteAnomalyDetectorAction.java @@ -40,6 +40,7 @@ import com.amazon.opendistroforelasticsearch.ad.constant.CommonErrorMessages; import com.amazon.opendistroforelasticsearch.ad.model.AnomalyDetector; import com.amazon.opendistroforelasticsearch.ad.model.AnomalyDetectorJob; +import com.amazon.opendistroforelasticsearch.ad.model.DetectorInternalState; import com.amazon.opendistroforelasticsearch.ad.rest.handler.AnomalyDetectorActionHandler; import com.amazon.opendistroforelasticsearch.ad.settings.EnabledSetting; import com.google.common.collect.ImmutableList; @@ -91,13 +92,13 @@ private void deleteAnomalyDetectorJobDoc(NodeClient client, String detectorId, R .setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE); client.delete(deleteRequest, ActionListener.wrap(response -> { if (response.getResult() == DocWriteResponse.Result.DELETED || response.getResult() == DocWriteResponse.Result.NOT_FOUND) { - deleteAnomalyDetectorDoc(client, detectorId, channel); + deleteDetectorStateDoc(client, detectorId, channel); } else { logger.error("Fail to delete anomaly detector job {}", detectorId); } }, exception -> { if (exception instanceof IndexNotFoundException) { - deleteAnomalyDetectorDoc(client, detectorId, channel); + deleteDetectorStateDoc(client, detectorId, channel); } else { logger.error("Failed to delete anomaly detector job", exception); try { @@ -109,6 +110,34 @@ private void deleteAnomalyDetectorJobDoc(NodeClient client, String detectorId, R })); } + private void deleteDetectorStateDoc(NodeClient client, String detectorId, RestChannel channel) { + logger.info("Delete detector info {}", detectorId); + DeleteRequest deleteRequest = new DeleteRequest(DetectorInternalState.DETECTOR_STATE_INDEX, detectorId); + client + .delete( + deleteRequest, + ActionListener + .wrap( + response -> { + // whether deleted state doc or not, continue as state doc may not exist + deleteAnomalyDetectorDoc(client, detectorId, channel); + }, + exception -> { + if (exception instanceof IndexNotFoundException) { + deleteAnomalyDetectorDoc(client, detectorId, channel); + } else { + logger.error("Failed to delete detector state", exception); + try { + channel.sendResponse(new BytesRestResponse(channel, exception)); + } catch (IOException e) { + logger.error("Failed to send response of deletedetector state", e); + } + } + } + ) + ); + } + private void deleteAnomalyDetectorDoc(NodeClient client, String detectorId, RestChannel channel) { logger.info("Delete anomaly detector {}", detectorId); DeleteRequest deleteRequest = new DeleteRequest(AnomalyDetector.ANOMALY_DETECTORS_INDEX, detectorId) diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/rest/handler/IndexAnomalyDetectorActionHandler.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/rest/handler/IndexAnomalyDetectorActionHandler.java index d712967c..5f6fd438 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/ad/rest/handler/IndexAnomalyDetectorActionHandler.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/rest/handler/IndexAnomalyDetectorActionHandler.java @@ -93,6 +93,8 @@ public class IndexAnomalyDetectorActionHandler extends AbstractActionHandler { * @param refreshPolicy refresh policy * @param anomalyDetector anomaly detector instance * @param requestTimeout request time out configuration + * @param maxAnomalyDetectors max anomaly detector allowed + * @param maxAnomalyFeatures max features allowed per detector */ public IndexAnomalyDetectorActionHandler( Settings settings, diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/rest/handler/IndexAnomalyDetectorJobActionHandler.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/rest/handler/IndexAnomalyDetectorJobActionHandler.java index be72172a..4cf0894c 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/ad/rest/handler/IndexAnomalyDetectorJobActionHandler.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/rest/handler/IndexAnomalyDetectorJobActionHandler.java @@ -37,7 +37,6 @@ import org.elasticsearch.action.index.IndexResponse; import org.elasticsearch.action.support.WriteRequest; import org.elasticsearch.client.node.NodeClient; -import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; @@ -65,8 +64,6 @@ public class IndexAnomalyDetectorJobActionHandler extends AbstractActionHandler private final String detectorId; private final Long seqNo; private final Long primaryTerm; - private final WriteRequest.RefreshPolicy refreshPolicy; - private final ClusterService clusterService; private final Logger logger = LogManager.getLogger(IndexAnomalyDetectorJobActionHandler.class); private final TimeValue requestTimeout; @@ -74,34 +71,28 @@ public class IndexAnomalyDetectorJobActionHandler extends AbstractActionHandler /** * Constructor function. * - * @param clusterService ClusterService * @param client ES node client that executes actions on the local node * @param channel ES channel used to construct bytes / builder based outputs, and send responses * @param anomalyDetectionIndices anomaly detector index manager * @param detectorId detector identifier * @param seqNo sequence number of last modification * @param primaryTerm primary term of last modification - * @param refreshPolicy refresh policy * @param requestTimeout request time out configuration */ public IndexAnomalyDetectorJobActionHandler( - ClusterService clusterService, NodeClient client, RestChannel channel, AnomalyDetectionIndices anomalyDetectionIndices, String detectorId, Long seqNo, Long primaryTerm, - WriteRequest.RefreshPolicy refreshPolicy, TimeValue requestTimeout ) { super(client, channel); - this.clusterService = clusterService; this.anomalyDetectionIndices = anomalyDetectionIndices; this.detectorId = detectorId; this.seqNo = seqNo; this.primaryTerm = primaryTerm; - this.refreshPolicy = refreshPolicy; this.requestTimeout = requestTimeout; } diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/settings/AnomalyDetectorSettings.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/settings/AnomalyDetectorSettings.java index a0067e93..805e5d6a 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/ad/settings/AnomalyDetectorSettings.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/settings/AnomalyDetectorSettings.java @@ -141,6 +141,7 @@ private AnomalyDetectorSettings() {} public static final String ANOMALY_DETECTORS_INDEX_MAPPING_FILE = "mappings/anomaly-detectors.json"; public static final String ANOMALY_DETECTOR_JOBS_INDEX_MAPPING_FILE = "mappings/anomaly-detector-jobs.json"; public static final String ANOMALY_RESULTS_INDEX_MAPPING_FILE = "mappings/anomaly-results.json"; + public static final String ANOMALY_DETECTION_STATE_INDEX_MAPPING_FILE = "mappings/anomaly-detection-state.json"; public static final Duration HOURLY_MAINTENANCE = Duration.ofHours(1); diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/AnomalyResultTransportAction.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/AnomalyResultTransportAction.java index 08afa578..4a4a200a 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/AnomalyResultTransportAction.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/AnomalyResultTransportAction.java @@ -42,7 +42,6 @@ import org.elasticsearch.cluster.node.DiscoveryNodes; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.inject.Inject; -import org.elasticsearch.common.io.stream.NotSerializableExceptionWrapper; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.IndexNotFoundException; import org.elasticsearch.node.NodeClosedException; @@ -75,6 +74,7 @@ import com.amazon.opendistroforelasticsearch.ad.stats.ADStats; import com.amazon.opendistroforelasticsearch.ad.stats.StatNames; import com.amazon.opendistroforelasticsearch.ad.util.ColdStartRunner; +import com.amazon.opendistroforelasticsearch.ad.util.ExceptionUtil; public class AnomalyResultTransportAction extends HandledTransportAction { @@ -88,12 +88,10 @@ public class AnomalyResultTransportAction extends HandledTransportAction onFeatureResponse( } if (!featureOptional.getProcessedFeatures().isPresent()) { + stateManager.getDetectorCheckpoint(adID, ActionListener.wrap(checkpointExists -> { + if (!checkpointExists) { + LOG.info("Trigger cold start for {}", adID); + globalRunner.compute(new ColdStartJob(detector)); + } + }, exception -> { + Throwable cause = ExceptionsHelper.unwrapCause(exception); + if (cause instanceof IndexNotFoundException) { + LOG.info("Trigger cold start for {}", adID); + globalRunner.compute(new ColdStartJob(detector)); + } else { + LOG.error(String.format("Fail to get checkpoint state for %s", adID), exception); + } + })); if (!featureOptional.getUnprocessedFeatures().isPresent()) { // Feature not available is common when we have data holes. Respond empty response // so that alerting will not print stack trace to avoid bloating our logs. @@ -403,7 +415,7 @@ private boolean coldStartIfNoModel(AtomicReference fa AnomalyDetectionException exp = failure.get(); if (exp != null) { if (exp instanceof ResourceNotFoundException) { - LOG.info("Cold start for {}", detector.getDetectorId()); + LOG.info("Trigger cold start for {}", detector.getDetectorId()); globalRunner.compute(new ColdStartJob(detector)); return true; } else { @@ -421,11 +433,12 @@ private void findException(Throwable cause, String adID, AtomicReference expected, String expectedErrorName) { - if (exception == null) { - return false; - } - - if (expected.isAssignableFrom(exception.getClass())) { - return true; - } - - // all exception that has not been registered to sent over wire can be wrapped - // inside NotSerializableExceptionWrapper. - // see StreamOutput.writeException - // ElasticsearchException.getExceptionName(exception) returns exception - // separated by underscore. For example, ResourceNotFoundException is converted - // to "resource_not_found_exception". - if (exception instanceof NotSerializableExceptionWrapper && exception.getMessage().trim().startsWith(expectedErrorName)) { - return true; - } - return false; - } - private CombinedRcfResult getCombinedResult(List rcfResults) { List rcfResultLib = new ArrayList<>(); for (RCFResultResponse result : rcfResults) { @@ -799,13 +780,15 @@ class ColdStartJob implements Callable { @Override public Boolean call() { + String detectorId = detector.getDetectorId(); try { Optional traingData = featureManager.getColdStartData(detector); if (traingData.isPresent()) { - modelManager.trainModel(detector, traingData.get()); + double[][] trainingPoints = traingData.get(); + modelManager.trainModel(detector, trainingPoints); return true; } else { - throw new EndRunException(detector.getDetectorId(), "Cannot get training data", false); + throw new EndRunException(detectorId, "Cannot get training data", false); } } catch (ElasticsearchTimeoutException timeoutEx) { diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/CronTransportAction.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/CronTransportAction.java index 3d63f77c..190f15d7 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/CronTransportAction.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/CronTransportAction.java @@ -32,7 +32,7 @@ public class CronTransportAction extends TransportNodesAction { - private ADStateManager transportStateManager; + private TransportStateManager transportStateManager; private ModelManager modelManager; private FeatureManager featureManager; @@ -42,7 +42,7 @@ public CronTransportAction( ClusterService clusterService, TransportService transportService, ActionFilters actionFilters, - ADStateManager tarnsportStatemanager, + TransportStateManager tarnsportStatemanager, ModelManager modelManager, FeatureManager featureManager ) { diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/DeleteModelTransportAction.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/DeleteModelTransportAction.java index 3256b70e..409c6400 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/DeleteModelTransportAction.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/DeleteModelTransportAction.java @@ -35,7 +35,7 @@ public class DeleteModelTransportAction extends TransportNodesAction { private static final Logger LOG = LogManager.getLogger(DeleteModelTransportAction.class); - private ADStateManager transportStateManager; + private TransportStateManager transportStateManager; private ModelManager modelManager; private FeatureManager featureManager; @@ -45,7 +45,7 @@ public DeleteModelTransportAction( ClusterService clusterService, TransportService transportService, ActionFilters actionFilters, - ADStateManager tarnsportStatemanager, + TransportStateManager tarnsportStatemanager, ModelManager modelManager, FeatureManager featureManager ) { @@ -85,8 +85,6 @@ protected DeleteModelNodeResponse newNodeResponse(StreamInput in) throws IOExcep } /** - * Precondition: - * associated alerting monitors have been deleted * * Delete checkpoint document (including both RCF and thresholding model), in-memory models, * buffered shingle data, transport state, and anomaly result diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/RCFPollingAction.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/RCFPollingAction.java new file mode 100644 index 00000000..103aa2e3 --- /dev/null +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/RCFPollingAction.java @@ -0,0 +1,28 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.opendistroforelasticsearch.ad.transport; + +import org.elasticsearch.action.ActionType; + +public class RCFPollingAction extends ActionType { + public static final RCFPollingAction INSTANCE = new RCFPollingAction(); + public static final String NAME = "cluster:admin/ad/rcfpolling"; + + private RCFPollingAction() { + super(NAME, RCFPollingResponse::new); + } + +} diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/RCFPollingRequest.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/RCFPollingRequest.java new file mode 100644 index 00000000..fb5e7ea6 --- /dev/null +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/RCFPollingRequest.java @@ -0,0 +1,72 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.opendistroforelasticsearch.ad.transport; + +import static org.elasticsearch.action.ValidateActions.addValidationError; + +import java.io.IOException; + +import org.elasticsearch.action.ActionRequest; +import org.elasticsearch.action.ActionRequestValidationException; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.xcontent.ToXContentObject; +import org.elasticsearch.common.xcontent.XContentBuilder; + +import com.amazon.opendistroforelasticsearch.ad.constant.CommonErrorMessages; +import com.amazon.opendistroforelasticsearch.ad.constant.CommonMessageAttributes; + +public class RCFPollingRequest extends ActionRequest implements ToXContentObject { + private String adID; + + public RCFPollingRequest(StreamInput in) throws IOException { + super(in); + adID = in.readString(); + } + + public RCFPollingRequest(String adID) { + super(); + this.adID = adID; + } + + public String getAdID() { + return adID; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); + out.writeString(adID); + } + + @Override + public ActionRequestValidationException validate() { + ActionRequestValidationException validationException = null; + if (Strings.isEmpty(adID)) { + validationException = addValidationError(CommonErrorMessages.AD_ID_MISSING_MSG, validationException); + } + return validationException; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.field(CommonMessageAttributes.ID_JSON_KEY, adID); + builder.endObject(); + return builder; + } +} diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/RCFPollingResponse.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/RCFPollingResponse.java new file mode 100644 index 00000000..52fbd0f7 --- /dev/null +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/RCFPollingResponse.java @@ -0,0 +1,56 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.opendistroforelasticsearch.ad.transport; + +import java.io.IOException; + +import org.elasticsearch.action.ActionResponse; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.xcontent.ToXContentObject; +import org.elasticsearch.common.xcontent.XContentBuilder; + +public class RCFPollingResponse extends ActionResponse implements ToXContentObject { + public static final String TOTAL_UPDATES_KEY = "totalUpdates"; + + private final long totalUpdates; + + public RCFPollingResponse(long totalUpdates) { + this.totalUpdates = totalUpdates; + } + + public RCFPollingResponse(StreamInput in) throws IOException { + super(in); + totalUpdates = in.readVLong(); + } + + public long getTotalUpdates() { + return totalUpdates; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeVLong(totalUpdates); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.field(TOTAL_UPDATES_KEY, totalUpdates); + builder.endObject(); + return builder; + } +} diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/RCFPollingTransportAction.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/RCFPollingTransportAction.java new file mode 100644 index 00000000..f288534f --- /dev/null +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/RCFPollingTransportAction.java @@ -0,0 +1,144 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.opendistroforelasticsearch.ad.transport; + +import java.io.IOException; +import java.util.Optional; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.support.ActionFilters; +import org.elasticsearch.action.support.HandledTransportAction; +import org.elasticsearch.cluster.node.DiscoveryNode; +import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.tasks.Task; +import org.elasticsearch.threadpool.ThreadPool; +import org.elasticsearch.transport.TransportException; +import org.elasticsearch.transport.TransportRequestOptions; +import org.elasticsearch.transport.TransportResponseHandler; +import org.elasticsearch.transport.TransportService; + +import com.amazon.opendistroforelasticsearch.ad.cluster.HashRing; +import com.amazon.opendistroforelasticsearch.ad.common.exception.AnomalyDetectionException; +import com.amazon.opendistroforelasticsearch.ad.ml.ModelManager; +import com.amazon.opendistroforelasticsearch.ad.settings.AnomalyDetectorSettings; + +/** + * Transport action to get total rcf updates from hosted models or checkpoint + * + */ +public class RCFPollingTransportAction extends HandledTransportAction { + + private static final Logger LOG = LogManager.getLogger(RCFPollingTransportAction.class); + static final String NO_NODE_FOUND_MSG = "Cannot find model hosting node"; + static final String FAIL_TO_GET_RCF_UPDATE_MSG = "Cannot find hosted model or related checkpoint"; + + private final TransportService transportService; + private final ModelManager modelManager; + private final HashRing hashRing; + private final TransportRequestOptions option; + private final ClusterService clusterService; + + @Inject + public RCFPollingTransportAction( + ActionFilters actionFilters, + TransportService transportService, + Settings settings, + ModelManager modelManager, + HashRing hashRing, + ClusterService clusterService + ) { + super(RCFPollingAction.NAME, transportService, actionFilters, RCFPollingRequest::new); + this.transportService = transportService; + this.modelManager = modelManager; + this.hashRing = hashRing; + this.option = TransportRequestOptions + .builder() + .withType(TransportRequestOptions.Type.REG) + .withTimeout(AnomalyDetectorSettings.REQUEST_TIMEOUT.get(settings)) + .build(); + this.clusterService = clusterService; + } + + @Override + protected void doExecute(Task task, RCFPollingRequest request, ActionListener listener) { + + String adID = request.getAdID(); + + String rcfModelID = modelManager.getRcfModelId(adID, 0); + + Optional rcfNode = hashRing.getOwningNode(rcfModelID.toString()); + if (!rcfNode.isPresent()) { + listener.onFailure(new AnomalyDetectionException(adID, NO_NODE_FOUND_MSG)); + return; + } + + String rcfNodeId = rcfNode.get().getId(); + + DiscoveryNode localNode = clusterService.localNode(); + + if (localNode.getId().equals(rcfNodeId)) { + modelManager + .getTotalUpdates( + rcfModelID, + adID, + ActionListener + .wrap( + totalUpdates -> listener.onResponse(new RCFPollingResponse(totalUpdates)), + e -> listener.onFailure(new AnomalyDetectionException(adID, FAIL_TO_GET_RCF_UPDATE_MSG, e)) + ) + ); + } else { + // redirect + LOG.info("Sending RCF polling request to {} for model {}", rcfNodeId, rcfModelID); + + try { + transportService + .sendRequest(rcfNode.get(), RCFPollingAction.NAME, request, option, new TransportResponseHandler() { + + @Override + public RCFPollingResponse read(StreamInput in) throws IOException { + return new RCFPollingResponse(in); + } + + @Override + public void handleResponse(RCFPollingResponse response) { + listener.onResponse(response); + } + + @Override + public void handleException(TransportException exp) { + listener.onFailure(exp); + } + + @Override + public String executor() { + return ThreadPool.Names.SAME; + } + + }); + } catch (Exception e) { + LOG.error(String.format("Fail to poll RCF models for {}", adID), e); + listener.onFailure(new AnomalyDetectionException(adID, FAIL_TO_GET_RCF_UPDATE_MSG, e)); + } + + } + } +} diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/TransportState.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/TransportState.java new file mode 100644 index 00000000..18d0e6ab --- /dev/null +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/TransportState.java @@ -0,0 +1,100 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.opendistroforelasticsearch.ad.transport; + +import java.time.Duration; +import java.time.Instant; +import java.util.Map.Entry; + +import com.amazon.opendistroforelasticsearch.ad.model.AnomalyDetector; + +public class TransportState { + private String detectorId; + // detector definition and the definition fetch time + private Entry detectorDef; + // number of partitions and the number's fetch time + private Entry partitonNumber; + // checkpoint fetch time + private Instant checkpoint; + // last error. Used by DetectorStateHandler to check if the error for a + // detector has changed or not. If changed, trigger indexing. + private Entry lastError; + + public TransportState(String detectorId) { + this.detectorId = detectorId; + detectorDef = null; + partitonNumber = null; + checkpoint = null; + lastError = null; + } + + public String getDetectorId() { + return detectorId; + } + + public Entry getDetectorDef() { + return detectorDef; + } + + public void setDetectorDef(Entry detectorDef) { + this.detectorDef = detectorDef; + } + + public Entry getPartitonNumber() { + return partitonNumber; + } + + public void setPartitonNumber(Entry partitonNumber) { + this.partitonNumber = partitonNumber; + } + + public Instant getCheckpoint() { + return checkpoint; + } + + public void setCheckpoint(Instant checkpoint) { + this.checkpoint = checkpoint; + }; + + public Entry getLastError() { + return lastError; + } + + public void setLastError(Entry lastError) { + this.lastError = lastError; + } + + public boolean expired(Duration stateTtl, Instant now) { + boolean ans = true; + if (detectorDef != null) { + ans = ans && expired(stateTtl, now, detectorDef.getValue()); + } + if (partitonNumber != null) { + ans = ans && expired(stateTtl, now, partitonNumber.getValue()); + } + if (checkpoint != null) { + ans = ans && expired(stateTtl, now, checkpoint); + } + if (lastError != null) { + ans = ans && expired(stateTtl, now, lastError.getValue()); + } + return ans; + } + + private boolean expired(Duration stateTtl, Instant now, Instant toCheck) { + return toCheck.plus(stateTtl).isBefore(now); + } +} diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/ADStateManager.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/TransportStateManager.java similarity index 60% rename from src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/ADStateManager.java rename to src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/TransportStateManager.java index 9eb5bbcc..ce06fc8c 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/ADStateManager.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/TransportStateManager.java @@ -39,6 +39,7 @@ import org.elasticsearch.common.xcontent.XContentType; import com.amazon.opendistroforelasticsearch.ad.common.exception.LimitExceededException; +import com.amazon.opendistroforelasticsearch.ad.constant.CommonName; import com.amazon.opendistroforelasticsearch.ad.ml.ModelManager; import com.amazon.opendistroforelasticsearch.ad.model.AnomalyDetector; import com.amazon.opendistroforelasticsearch.ad.util.ClientUtil; @@ -48,10 +49,9 @@ * and the number of partitions for a detector id. * */ -public class ADStateManager { - private static final Logger LOG = LogManager.getLogger(ADStateManager.class); - private ConcurrentHashMap> currentDetectors; - private ConcurrentHashMap> partitionNumber; +public class TransportStateManager { + private static final Logger LOG = LogManager.getLogger(TransportStateManager.class); + private ConcurrentHashMap transportStates; private Client client; private ModelManager modelManager; private NamedXContentRegistry xContentRegistry; @@ -62,7 +62,9 @@ public class ADStateManager { private final Settings settings; private final Duration stateTtl; - public ADStateManager( + public static final String NO_ERROR = "no_error"; + + public TransportStateManager( Client client, NamedXContentRegistry xContentRegistry, ModelManager modelManager, @@ -71,11 +73,10 @@ public ADStateManager( Clock clock, Duration stateTtl ) { - this.currentDetectors = new ConcurrentHashMap<>(); + this.transportStates = new ConcurrentHashMap<>(); this.client = client; this.modelManager = modelManager; this.xContentRegistry = xContentRegistry; - this.partitionNumber = new ConcurrentHashMap<>(); this.clientUtil = clientUtil; this.backpressureMuter = new ConcurrentHashMap<>(); this.clock = clock; @@ -91,31 +92,31 @@ public ADStateManager( * @throws LimitExceededException when there is no sufficient resource available */ public int getPartitionNumber(String adID, AnomalyDetector detector) { - Entry partitonAndTime = partitionNumber.get(adID); - if (partitonAndTime != null) { + if (transportStates.containsKey(adID) && transportStates.get(adID).getPartitonNumber() != null) { + Entry partitonAndTime = transportStates.get(adID).getPartitonNumber(); partitonAndTime.setValue(clock.instant()); return partitonAndTime.getKey(); } int partitionNum = modelManager.getPartitionedForestSizes(detector).getKey(); - partitionNumber.putIfAbsent(adID, new SimpleEntry<>(partitionNum, clock.instant())); + TransportState state = transportStates.computeIfAbsent(adID, id -> new TransportState(id)); + state.setPartitonNumber(new SimpleEntry<>(partitionNum, clock.instant())); + return partitionNum; } public void getAnomalyDetector(String adID, ActionListener> listener) { - Entry detectorAndTime = currentDetectors.get(adID); - if (detectorAndTime != null) { + if (transportStates.containsKey(adID) && transportStates.get(adID).getDetectorDef() != null) { + Entry detectorAndTime = transportStates.get(adID).getDetectorDef(); detectorAndTime.setValue(clock.instant()); listener.onResponse(Optional.of(detectorAndTime.getKey())); - return; + } else { + GetRequest request = new GetRequest(AnomalyDetector.ANOMALY_DETECTORS_INDEX, adID); + clientUtil.asyncRequest(request, client::get, onGetDetectorResponse(adID, listener)); } - - GetRequest request = new GetRequest(AnomalyDetector.ANOMALY_DETECTORS_INDEX, adID); - - clientUtil.asyncRequest(request, client::get, onGetResponse(adID, listener)); } - private ActionListener onGetResponse(String adID, ActionListener> listener) { + private ActionListener onGetDetectorResponse(String adID, ActionListener> listener) { return ActionListener.wrap(response -> { if (response == null || !response.isExists()) { listener.onResponse(Optional.empty()); @@ -130,7 +131,9 @@ private ActionListener onGetResponse(String adID, ActionListener(detector, clock.instant())); + TransportState state = transportStates.computeIfAbsent(adID, id -> new TransportState(id)); + state.setDetectorDef(new SimpleEntry<>(detector, clock.instant())); + listener.onResponse(Optional.of(detector)); } catch (Exception t) { LOG.error("Fail to parse detector {}", adID); @@ -140,35 +143,61 @@ private ActionListener onGetResponse(String adID, ActionListener listener) { + if (transportStates.containsKey(adID) && transportStates.get(adID).getCheckpoint() != null) { + transportStates.get(adID).setCheckpoint(clock.instant()); + listener.onResponse(Boolean.TRUE); + return; + } + + GetRequest request = new GetRequest(CommonName.CHECKPOINT_INDEX_NAME, modelManager.getRcfModelId(adID, 0)); + + clientUtil.asyncRequest(request, client::get, onGetCheckpointResponse(adID, listener)); + } + + private ActionListener onGetCheckpointResponse(String adID, ActionListener listener) { + return ActionListener.wrap(response -> { + if (response == null || !response.isExists()) { + listener.onResponse(Boolean.FALSE); + } else { + TransportState state = transportStates.get(adID); + if (state == null) { + state = new TransportState(adID); + transportStates.put(adID, state); + } + state.setCheckpoint(clock.instant()); + listener.onResponse(Boolean.TRUE); + } + }, listener::onFailure); + } + /** * Used in delete workflow * * @param adID detector ID */ public void clear(String adID) { - currentDetectors.remove(adID); - partitionNumber.remove(adID); - } - - public void maintenance() { - maintenance(currentDetectors); - maintenance(partitionNumber); + transportStates.remove(adID); } /** - * Clean states if it is older than our stateTtl. The input has to be a + * Clean states if it is older than our stateTtl. transportState has to be a * ConcurrentHashMap otherwise we will have * java.util.ConcurrentModificationException. * - * @param states states to be maintained */ - void maintenance(ConcurrentHashMap> states) { - states.entrySet().stream().forEach(entry -> { + public void maintenance() { + transportStates.entrySet().stream().forEach(entry -> { String detectorId = entry.getKey(); try { - Entry stateAndTime = entry.getValue(); - if (stateAndTime.getValue().plus(stateTtl).isBefore(clock.instant())) { - states.remove(detectorId); + TransportState state = entry.getValue(); + if (state.expired(stateTtl, clock.instant())) { + transportStates.remove(detectorId); } } catch (Exception e) { LOG.warn("Failed to finish maintenance for detector id " + detectorId, e); @@ -204,4 +233,29 @@ public void resetBackpressureCounter(String nodeId) { public boolean hasRunningQuery(AnomalyDetector detector) { return clientUtil.hasRunningQuery(detector); } + + /** + * Get last error of a detector + * @param adID detector id + * @return last error for the detector + */ + public String getLastError(String adID) { + if (transportStates.containsKey(adID) && transportStates.get(adID).getLastError() != null) { + Entry errorAndTime = transportStates.get(adID).getLastError(); + errorAndTime.setValue(clock.instant()); + return errorAndTime.getKey(); + } + + return NO_ERROR; + } + + /** + * Set last error of a detector + * @param adID detector id + * @param error error, can be null + */ + public void setLastError(String adID, String error) { + TransportState state = transportStates.computeIfAbsent(adID, id -> new TransportState(id)); + state.setLastError(new SimpleEntry<>(error, clock.instant())); + } } diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/handler/AnomalyIndexHandler.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/handler/AnomalyIndexHandler.java new file mode 100644 index 00000000..95be0690 --- /dev/null +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/handler/AnomalyIndexHandler.java @@ -0,0 +1,188 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.opendistroforelasticsearch.ad.transport.handler; + +import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; + +import java.util.Iterator; +import java.util.Locale; +import java.util.function.BooleanSupplier; +import java.util.function.Consumer; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.elasticsearch.ExceptionsHelper; +import org.elasticsearch.ResourceAlreadyExistsException; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.admin.indices.create.CreateIndexResponse; +import org.elasticsearch.action.bulk.BackoffPolicy; +import org.elasticsearch.action.index.IndexRequest; +import org.elasticsearch.action.index.IndexResponse; +import org.elasticsearch.client.Client; +import org.elasticsearch.cluster.block.ClusterBlockLevel; +import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.unit.TimeValue; +import org.elasticsearch.common.util.concurrent.EsRejectedExecutionException; +import org.elasticsearch.common.xcontent.ToXContentObject; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.threadpool.ThreadPool; + +import com.amazon.opendistroforelasticsearch.ad.common.exception.AnomalyDetectionException; +import com.amazon.opendistroforelasticsearch.ad.settings.AnomalyDetectorSettings; +import com.amazon.opendistroforelasticsearch.ad.util.ClientUtil; +import com.amazon.opendistroforelasticsearch.ad.util.IndexUtils; +import com.amazon.opendistroforelasticsearch.ad.util.RestHandlerUtils; + +public class AnomalyIndexHandler { + private static final Logger LOG = LogManager.getLogger(AnomalyIndexHandler.class); + + static final String CANNOT_SAVE_ERR_MSG = "Cannot save %s due to write block."; + static final String FAIL_TO_SAVE_ERR_MSG = "Fail to save %s: "; + static final String RETRY_SAVING_ERR_MSG = "Retry in saving %s: "; + static final String SUCCESS_SAVING_MSG = "Succeed in saving %s"; + + protected final Client client; + + private final ThreadPool threadPool; + private final BackoffPolicy savingBackoffPolicy; + protected final String indexName; + private final Consumer> createIndex; + private final BooleanSupplier indexExists; + // whether save to a specific doc id or not + private final boolean fixedDoc; + protected final ClientUtil clientUtil; + private final IndexUtils indexUtils; + private final ClusterService clusterService; + + public AnomalyIndexHandler( + Client client, + Settings settings, + ThreadPool threadPool, + String indexName, + Consumer> createIndex, + BooleanSupplier indexExists, + boolean fixedDoc, + ClientUtil clientUtil, + IndexUtils indexUtils, + ClusterService clusterService + ) { + this.client = client; + this.threadPool = threadPool; + this.savingBackoffPolicy = BackoffPolicy + .exponentialBackoff( + AnomalyDetectorSettings.BACKOFF_INITIAL_DELAY.get(settings), + AnomalyDetectorSettings.MAX_RETRY_FOR_BACKOFF.get(settings) + ); + this.indexName = indexName; + this.createIndex = createIndex; + this.indexExists = indexExists; + this.fixedDoc = fixedDoc; + this.clientUtil = clientUtil; + this.indexUtils = indexUtils; + this.clusterService = clusterService; + } + + public void index(T toSave, String detectorId) { + if (indexUtils.checkIndicesBlocked(clusterService.state(), ClusterBlockLevel.WRITE, this.indexName)) { + LOG.warn(String.format(Locale.ROOT, CANNOT_SAVE_ERR_MSG, detectorId)); + return; + } + + try { + if (!indexExists.getAsBoolean()) { + createIndex + .accept(ActionListener.wrap(initResponse -> onCreateIndexResponse(initResponse, toSave, detectorId), exception -> { + if (ExceptionsHelper.unwrapCause(exception) instanceof ResourceAlreadyExistsException) { + // It is possible the index has been created while we sending the create request + save(toSave, detectorId); + } else { + throw new AnomalyDetectionException( + detectorId, + String.format("Unexpected error creating index %s", indexName), + exception + ); + } + })); + } else { + save(toSave, detectorId); + } + } catch (Exception e) { + throw new AnomalyDetectionException( + detectorId, + String.format(Locale.ROOT, "Error in saving %s for detector %s", indexName, detectorId), + e + ); + } + } + + private void onCreateIndexResponse(CreateIndexResponse response, T toSave, String detectorId) { + if (response.isAcknowledged()) { + save(toSave, detectorId); + } else { + throw new AnomalyDetectionException(detectorId, "Creating %s with mappings call not acknowledged."); + } + } + + protected void save(T toSave, String detectorId) { + try (XContentBuilder builder = jsonBuilder()) { + IndexRequest indexRequest = new IndexRequest(indexName).source(toSave.toXContent(builder, RestHandlerUtils.XCONTENT_WITH_TYPE)); + if (fixedDoc) { + indexRequest.id(detectorId); + } + + saveIteration(indexRequest, detectorId, savingBackoffPolicy.iterator()); + } catch (Exception e) { + LOG.error(String.format("Failed to save %s", indexName), e); + throw new AnomalyDetectionException(detectorId, String.format("Cannot save %s", indexName)); + } + } + + void saveIteration(IndexRequest indexRequest, String detectorId, Iterator backoff) { + clientUtil + .asyncRequest( + indexRequest, + client::index, + ActionListener.wrap(response -> { LOG.debug(String.format(SUCCESS_SAVING_MSG, detectorId)); }, exception -> { + // Elasticsearch has a thread pool and a queue for write per node. A thread + // pool will have N number of workers ready to handle the requests. When a + // request comes and if a worker is free , this is handled by the worker. Now by + // default the number of workers is equal to the number of cores on that CPU. + // When the workers are full and there are more write requests, the request + // will go to queue. The size of queue is also limited. If by default size is, + // say, 200 and if there happens more parallel requests than this, then those + // requests would be rejected as you can see EsRejectedExecutionException. + // So EsRejectedExecutionException is the way that Elasticsearch tells us that + // it cannot keep up with the current indexing rate. + // When it happens, we should pause indexing a bit before trying again, ideally + // with randomized exponential backoff. + Throwable cause = ExceptionsHelper.unwrapCause(exception); + if (!(cause instanceof EsRejectedExecutionException) || !backoff.hasNext()) { + LOG.error(String.format(FAIL_TO_SAVE_ERR_MSG, detectorId), cause); + } else { + TimeValue nextDelay = backoff.next(); + LOG.warn(String.format(RETRY_SAVING_ERR_MSG, detectorId), cause); + // copy original request's source without other information like autoGeneratedTimestamp + // otherwise, an exception will be thrown indicating autoGeneratedTimestamp should not be set + // while request id is already set (id is set because we have already sent the request before). + IndexRequest newReuqest = new IndexRequest(indexRequest.index()); + newReuqest.source(indexRequest.source(), indexRequest.getContentType()); + threadPool.schedule(() -> saveIteration(newReuqest, detectorId, backoff), nextDelay, ThreadPool.Names.SAME); + } + }) + ); + } +} diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/handler/AnomalyResultHandler.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/handler/AnomalyResultHandler.java deleted file mode 100644 index 670503c6..00000000 --- a/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/handler/AnomalyResultHandler.java +++ /dev/null @@ -1,204 +0,0 @@ -/* - * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * or in the "license" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ - -package com.amazon.opendistroforelasticsearch.ad.transport.handler; - -import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; - -import java.util.Iterator; -import java.util.Locale; - -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; -import org.elasticsearch.ExceptionsHelper; -import org.elasticsearch.ResourceAlreadyExistsException; -import org.elasticsearch.action.ActionListener; -import org.elasticsearch.action.admin.indices.create.CreateIndexResponse; -import org.elasticsearch.action.bulk.BackoffPolicy; -import org.elasticsearch.action.index.IndexRequest; -import org.elasticsearch.action.index.IndexResponse; -import org.elasticsearch.action.support.IndicesOptions; -import org.elasticsearch.client.Client; -import org.elasticsearch.cluster.ClusterState; -import org.elasticsearch.cluster.block.ClusterBlockLevel; -import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; -import org.elasticsearch.cluster.service.ClusterService; -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.common.unit.TimeValue; -import org.elasticsearch.common.util.concurrent.EsRejectedExecutionException; -import org.elasticsearch.common.xcontent.XContentBuilder; -import org.elasticsearch.threadpool.ThreadPool; - -import com.amazon.opendistroforelasticsearch.ad.common.exception.AnomalyDetectionException; -import com.amazon.opendistroforelasticsearch.ad.indices.AnomalyDetectionIndices; -import com.amazon.opendistroforelasticsearch.ad.model.AnomalyResult; -import com.amazon.opendistroforelasticsearch.ad.settings.AnomalyDetectorSettings; -import com.amazon.opendistroforelasticsearch.ad.util.RestHandlerUtils; - -public class AnomalyResultHandler { - private static final Logger LOG = LogManager.getLogger(AnomalyResultHandler.class); - - static final String CANNOT_SAVE_ERR_MSG = "Cannot save anomaly result due to write block."; - static final String FAIL_TO_SAVE_ERR_MSG = "Fail to save anomaly index: "; - static final String RETRY_SAVING_ERR_MSG = "Retry in saving anomaly index: "; - static final String SUCCESS_SAVING_MSG = "SSUCCESS_SAVING_MSGuccess in saving anomaly index: "; - - private final Client client; - private final ClusterService clusterService; - private final IndexNameExpressionResolver indexNameExpressionResolver; - private final AnomalyDetectionIndices anomalyDetectionIndices; - private final ThreadPool threadPool; - private final BackoffPolicy resultSavingBackoffPolicy; - - public AnomalyResultHandler( - Client client, - Settings settings, - ClusterService clusterService, - IndexNameExpressionResolver indexNameExpressionResolver, - AnomalyDetectionIndices anomalyDetectionIndices, - ThreadPool threadPool - ) { - this.client = client; - this.clusterService = clusterService; - this.indexNameExpressionResolver = indexNameExpressionResolver; - this.anomalyDetectionIndices = anomalyDetectionIndices; - this.threadPool = threadPool; - this.resultSavingBackoffPolicy = BackoffPolicy - .exponentialBackoff( - AnomalyDetectorSettings.BACKOFF_INITIAL_DELAY.get(settings), - AnomalyDetectorSettings.MAX_RETRY_FOR_BACKOFF.get(settings) - ); - } - - public void indexAnomalyResult(AnomalyResult anomalyResult) { - try { - if (checkIndicesBlocked(clusterService.state(), ClusterBlockLevel.WRITE, AnomalyResult.ANOMALY_RESULT_INDEX)) { - LOG.warn(CANNOT_SAVE_ERR_MSG); - return; - } - if (!anomalyDetectionIndices.doesAnomalyResultIndexExist()) { - anomalyDetectionIndices - .initAnomalyResultIndexDirectly( - ActionListener.wrap(initResponse -> onCreateAnomalyResultIndexResponse(initResponse, anomalyResult), exception -> { - if (ExceptionsHelper.unwrapCause(exception) instanceof ResourceAlreadyExistsException) { - // It is possible the index has been created while we sending the create request - saveDetectorResult(anomalyResult); - } else { - throw new AnomalyDetectionException( - anomalyResult.getDetectorId(), - "Unexpected error creating anomaly result index", - exception - ); - } - }) - ); - } else { - saveDetectorResult(anomalyResult); - } - } catch (Exception e) { - throw new AnomalyDetectionException( - anomalyResult.getDetectorId(), - String - .format( - Locale.ROOT, - "Error in saving anomaly index for ID %s from %s to %s", - anomalyResult.getDetectorId(), - anomalyResult.getDataStartTime(), - anomalyResult.getDataEndTime() - ), - e - ); - } - } - - /** - * Similar to checkGlobalBlock, we check block on the indices level. - * - * @param state Cluster state - * @param level block level - * @param indices the indices on which to check block - * @return whether any of the index has block on the level. - */ - private boolean checkIndicesBlocked(ClusterState state, ClusterBlockLevel level, String... indices) { - // the original index might be an index expression with wildcards like "log*", - // so we need to expand the expression to concrete index name - String[] concreteIndices = indexNameExpressionResolver.concreteIndexNames(state, IndicesOptions.lenientExpandOpen(), indices); - - return state.blocks().indicesBlockedException(level, concreteIndices) != null; - } - - private void onCreateAnomalyResultIndexResponse(CreateIndexResponse response, AnomalyResult anomalyResult) { - if (response.isAcknowledged()) { - saveDetectorResult(anomalyResult); - } else { - throw new AnomalyDetectionException( - anomalyResult.getDetectorId(), - "Creating anomaly result index with mappings call not acknowledged." - ); - } - } - - private void saveDetectorResult(AnomalyResult anomalyResult) { - try (XContentBuilder builder = jsonBuilder()) { - IndexRequest indexRequest = new IndexRequest(AnomalyResult.ANOMALY_RESULT_INDEX) - .source(anomalyResult.toXContent(builder, RestHandlerUtils.XCONTENT_WITH_TYPE)); - saveDetectorResult( - indexRequest, - String - .format( - Locale.ROOT, - "ID %s from %s to %s", - anomalyResult.getDetectorId(), - anomalyResult.getDataStartTime(), - anomalyResult.getDataEndTime() - ), - resultSavingBackoffPolicy.iterator() - ); - } catch (Exception e) { - LOG.error("Failed to save anomaly result", e); - throw new AnomalyDetectionException(anomalyResult.getDetectorId(), "Cannot save result"); - } - } - - void saveDetectorResult(IndexRequest indexRequest, String context, Iterator backoff) { - client.index(indexRequest, ActionListener.wrap(response -> LOG.debug(SUCCESS_SAVING_MSG + context), exception -> { - // Elasticsearch has a thread pool and a queue for write per node. A thread - // pool will have N number of workers ready to handle the requests. When a - // request comes and if a worker is free , this is handled by the worker. Now by - // default the number of workers is equal to the number of cores on that CPU. - // When the workers are full and there are more write requests, the request - // will go to queue. The size of queue is also limited. If by default size is, - // say, 200 and if there happens more parallel requests than this, then those - // requests would be rejected as you can see EsRejectedExecutionException. - // So EsRejectedExecutionException is the way that Elasticsearch tells us that - // it cannot keep up with the current indexing rate. - // When it happens, we should pause indexing a bit before trying again, ideally - // with randomized exponential backoff. - Throwable cause = ExceptionsHelper.unwrapCause(exception); - if (!(cause instanceof EsRejectedExecutionException) || !backoff.hasNext()) { - LOG.error(FAIL_TO_SAVE_ERR_MSG + context, cause); - } else { - TimeValue nextDelay = backoff.next(); - LOG.warn(RETRY_SAVING_ERR_MSG + context, cause); - // copy original request's source without other information like autoGeneratedTimestamp - // otherwise, an exception will be thrown indicating autoGeneratedTimestamp should not be set - // while request id is already set (id is set because we have already sent the request before). - IndexRequest newReuqest = new IndexRequest(AnomalyResult.ANOMALY_RESULT_INDEX); - newReuqest.source(indexRequest.source(), indexRequest.getContentType()); - threadPool.schedule(() -> saveDetectorResult(newReuqest, context, backoff), nextDelay, ThreadPool.Names.SAME); - } - })); - } -} diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/handler/DetectionStateHandler.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/handler/DetectionStateHandler.java new file mode 100644 index 00000000..d1aebbb8 --- /dev/null +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/transport/handler/DetectionStateHandler.java @@ -0,0 +1,165 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.opendistroforelasticsearch.ad.transport.handler; + +import static org.elasticsearch.common.xcontent.XContentParserUtils.ensureExpectedToken; + +import java.io.IOException; +import java.time.Instant; +import java.util.function.BooleanSupplier; +import java.util.function.Consumer; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.elasticsearch.ExceptionsHelper; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.admin.indices.create.CreateIndexResponse; +import org.elasticsearch.action.get.GetRequest; +import org.elasticsearch.action.get.GetResponse; +import org.elasticsearch.client.Client; +import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.xcontent.LoggingDeprecationHandler; +import org.elasticsearch.common.xcontent.NamedXContentRegistry; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.common.xcontent.XContentType; +import org.elasticsearch.index.IndexNotFoundException; +import org.elasticsearch.threadpool.ThreadPool; + +import com.amazon.opendistroforelasticsearch.ad.model.DetectorInternalState; +import com.amazon.opendistroforelasticsearch.ad.transport.TransportStateManager; +import com.amazon.opendistroforelasticsearch.ad.util.ClientUtil; +import com.amazon.opendistroforelasticsearch.ad.util.IndexUtils; +import com.google.common.base.Objects; + +public class DetectionStateHandler extends AnomalyIndexHandler { + interface GetStateStrategy { + /** + * Strategy to create new state to save. Return null if state does not change and don't need to save. + * @param state old state + * @return new state or null if state does not change + */ + DetectorInternalState createNewState(DetectorInternalState state); + } + + class ErrorStrategy implements GetStateStrategy { + private String error; + + ErrorStrategy(String error) { + this.error = error; + } + + @Override + public DetectorInternalState createNewState(DetectorInternalState state) { + DetectorInternalState newState = null; + if (state == null) { + newState = new DetectorInternalState.Builder().error(error).lastUpdateTime(Instant.now()).build(); + } else if (!Objects.equal(state.getError(), error)) { + newState = (DetectorInternalState) state.clone(); + newState.setError(error); + newState.setLastUpdateTime(Instant.now()); + } + + return newState; + } + } + + private static final Logger LOG = LogManager.getLogger(DetectionStateHandler.class); + private NamedXContentRegistry xContentRegistry; + private TransportStateManager adStateManager; + + public DetectionStateHandler( + Client client, + Settings settings, + ThreadPool threadPool, + Consumer> createIndex, + BooleanSupplier indexExists, + ClientUtil clientUtil, + IndexUtils indexUtils, + ClusterService clusterService, + NamedXContentRegistry xContentRegistry, + TransportStateManager adStateManager + ) { + super( + client, + settings, + threadPool, + DetectorInternalState.DETECTOR_STATE_INDEX, + createIndex, + indexExists, + true, + clientUtil, + indexUtils, + clusterService + ); + this.xContentRegistry = xContentRegistry; + this.adStateManager = adStateManager; + } + + public void saveError(String error, String detectorId) { + // trigger indexing if no error recorded (e.g., this detector got enabled just now) + // or the recorded error is different than this one. + if (!Objects.equal(adStateManager.getLastError(detectorId), error)) { + update(detectorId, new ErrorStrategy(error)); + adStateManager.setLastError(detectorId, error); + } + } + + /** + * Updates a detector's state according to GetStateHandler + * @param detectorId detector id + * @param handler specify how to convert from existing state object to an object we want to save + */ + private void update(String detectorId, GetStateStrategy handler) { + try { + GetRequest getRequest = new GetRequest(this.indexName).id(detectorId); + + clientUtil.asyncRequest(getRequest, client::get, ActionListener.wrap(response -> { + DetectorInternalState newState = null; + if (response.isExists()) { + try ( + XContentParser parser = XContentType.JSON + .xContent() + .createParser(xContentRegistry, LoggingDeprecationHandler.INSTANCE, response.getSourceAsString()) + ) { + ensureExpectedToken(XContentParser.Token.START_OBJECT, parser.nextToken(), parser::getTokenLocation); + DetectorInternalState state = DetectorInternalState.parse(parser); + newState = handler.createNewState(state); + } catch (IOException e) { + LOG.error("Failed to update AD state for " + detectorId, e); + return; + } + } else { + newState = handler.createNewState(null); + } + + if (newState != null) { + super.index(newState, detectorId); + } + + }, exception -> { + Throwable cause = ExceptionsHelper.unwrapCause(exception); + if (cause instanceof IndexNotFoundException) { + super.index(handler.createNewState(null), detectorId); + } else { + LOG.error("Failed to get detector state " + detectorId, exception); + } + })); + } catch (Exception e) { + LOG.error("Failed to update AD state for " + detectorId, e); + } + } +} diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/util/ExceptionUtil.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/util/ExceptionUtil.java new file mode 100644 index 00000000..7ae1ffaa --- /dev/null +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/util/ExceptionUtil.java @@ -0,0 +1,60 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.opendistroforelasticsearch.ad.util; + +import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.common.io.stream.NotSerializableExceptionWrapper; + +import com.amazon.opendistroforelasticsearch.ad.common.exception.ResourceNotFoundException; + +public class ExceptionUtil { + public static final String RESOURCE_NOT_FOUND_EXCEPTION_NAME_UNDERSCORE = ElasticsearchException + .getExceptionName(new ResourceNotFoundException("", "")); + + /** + * Elasticsearch restricts the kind of exceptions can be thrown over the wire + * (See ElasticsearchException.ElasticsearchExceptionHandle). Since we cannot + * add our own exception like ResourceNotFoundException without modifying + * Elasticsearch's code, we have to unwrap the remote transport exception and + * check its root cause message. + * + * @param exception exception thrown locally or over the wire + * @param expected expected root cause + * @param expectedExceptionName expected exception name + * @return whether the exception wraps the expected exception as the cause + */ + public static boolean isException(Throwable exception, Class expected, String expectedExceptionName) { + if (exception == null) { + return false; + } + + if (expected.isAssignableFrom(exception.getClass())) { + return true; + } + + // all exception that has not been registered to sent over wire can be wrapped + // inside NotSerializableExceptionWrapper. + // see StreamOutput.writeException + // ElasticsearchException.getExceptionName(exception) returns exception + // separated by underscore. For example, ResourceNotFoundException is converted + // to "resource_not_found_exception". + if (exception instanceof NotSerializableExceptionWrapper && exception.getMessage().trim().startsWith(expectedExceptionName)) { + return true; + } + return false; + } + +} diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/util/IndexUtils.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/util/IndexUtils.java index b54d9b86..82b881c4 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/ad/util/IndexUtils.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/util/IndexUtils.java @@ -22,9 +22,13 @@ import org.apache.logging.log4j.Logger; import org.elasticsearch.action.admin.indices.stats.IndicesStatsRequest; import org.elasticsearch.action.admin.indices.stats.IndicesStatsResponse; +import org.elasticsearch.action.support.IndicesOptions; import org.elasticsearch.client.Client; +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.block.ClusterBlockLevel; import org.elasticsearch.cluster.health.ClusterIndexHealth; import org.elasticsearch.cluster.metadata.IndexMetadata; +import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; import org.elasticsearch.cluster.service.ClusterService; public class IndexUtils { @@ -44,6 +48,7 @@ public class IndexUtils { private Client client; private ClientUtil clientUtil; private ClusterService clusterService; + private final IndexNameExpressionResolver indexNameExpressionResolver; /** * Constructor @@ -51,11 +56,18 @@ public class IndexUtils { * @param client Client to make calls to ElasticSearch * @param clientUtil AD Client utility * @param clusterService ES ClusterService + * @param indexNameExpressionResolver index name resolver */ - public IndexUtils(Client client, ClientUtil clientUtil, ClusterService clusterService) { + public IndexUtils( + Client client, + ClientUtil clientUtil, + ClusterService clusterService, + IndexNameExpressionResolver indexNameExpressionResolver + ) { this.client = client; this.clientUtil = clientUtil; this.clusterService = clusterService; + this.indexNameExpressionResolver = indexNameExpressionResolver; } /** @@ -117,4 +129,20 @@ public Long getNumberOfDocumentsInIndex(String indexName) { Optional response = clientUtil.timedRequest(indicesStatsRequest, logger, client.admin().indices()::stats); return response.map(r -> r.getIndex(indexName).getPrimaries().docs.getCount()).orElse(-1L); } + + /** + * Similar to checkGlobalBlock, we check block on the indices level. + * + * @param state Cluster state + * @param level block level + * @param indices the indices on which to check block + * @return whether any of the index has block on the level. + */ + public boolean checkIndicesBlocked(ClusterState state, ClusterBlockLevel level, String... indices) { + // the original index might be an index expression with wildcards like "log*", + // so we need to expand the expression to concrete index name + String[] concreteIndices = indexNameExpressionResolver.concreteIndexNames(state, IndicesOptions.lenientExpandOpen(), indices); + + return state.blocks().indicesBlockedException(level, concreteIndices) != null; + } } diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/util/MultiResponsesDelegateActionListener.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/util/MultiResponsesDelegateActionListener.java index 3f42a18c..39c829a9 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/ad/util/MultiResponsesDelegateActionListener.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/util/MultiResponsesDelegateActionListener.java @@ -32,6 +32,8 @@ */ public class MultiResponsesDelegateActionListener implements ActionListener { private static final Logger LOG = LogManager.getLogger(MultiResponsesDelegateActionListener.class); + static final String NO_RESPONSE = "No response collected"; + private final ActionListener delegate; private final AtomicInteger collectedResponseCount; private final int maxResponseCount; @@ -81,7 +83,7 @@ public void onFailure(Exception e) { private void finish() { if (this.exceptions.size() == 0) { if (savedResponses.size() == 0) { - this.delegate.onFailure(new RuntimeException("No response collected")); + this.delegate.onFailure(new RuntimeException(NO_RESPONSE)); } else { T response0 = savedResponses.get(0); for (int i = 1; i < savedResponses.size(); i++) { diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/util/ThrowingConsumer.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/util/ThrowingConsumer.java new file mode 100644 index 00000000..afb1b551 --- /dev/null +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/util/ThrowingConsumer.java @@ -0,0 +1,27 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.opendistroforelasticsearch.ad.util; + +/** + * A consumer that can throw checked exception + * + * @param method parameter type + * @param Exception type + */ +@FunctionalInterface +public interface ThrowingConsumer { + void accept(T t) throws E; +} diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/util/ThrowingConsumerWrapper.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/util/ThrowingConsumerWrapper.java new file mode 100644 index 00000000..2facdc92 --- /dev/null +++ b/src/main/java/com/amazon/opendistroforelasticsearch/ad/util/ThrowingConsumerWrapper.java @@ -0,0 +1,41 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.opendistroforelasticsearch.ad.util; + +import java.util.function.Consumer; + +public class ThrowingConsumerWrapper { + /** + * Utility method to use a method throwing checked exception inside a function + * that does not throw the corresponding checked exception. This happens + * when we are in a ES function that we have no control over its signature. + * Convert the checked exception thrown by by throwingConsumer to a RuntimeException + * so that the compier won't complain. + * @param the method's parameter type + * @param throwingConsumer the method reference that can throw checked exception + * @return converted method reference + */ + public static Consumer throwingConsumerWrapper(ThrowingConsumer throwingConsumer) { + + return i -> { + try { + throwingConsumer.accept(i); + } catch (Exception ex) { + throw new RuntimeException(ex); + } + }; + } +} diff --git a/src/main/resources/mappings/anomaly-detection-state.json b/src/main/resources/mappings/anomaly-detection-state.json new file mode 100644 index 00000000..dcb0f7c0 --- /dev/null +++ b/src/main/resources/mappings/anomaly-detection-state.json @@ -0,0 +1,18 @@ +{ + "dynamic": false, + "_meta": { + "schema_version": 1 + }, + "properties": { + "schema_version": { + "type": "integer" + }, + "last_update_time": { + "type": "date", + "format": "strict_date_time||epoch_millis" + }, + "error": { + "type": "text" + } + } +} \ No newline at end of file diff --git a/src/test/java/com/amazon/opendistroforelasticsearch/ad/AbstractADTest.java b/src/test/java/com/amazon/opendistroforelasticsearch/ad/AbstractADTest.java index 9cb23a24..dc60acf9 100644 --- a/src/test/java/com/amazon/opendistroforelasticsearch/ad/AbstractADTest.java +++ b/src/test/java/com/amazon/opendistroforelasticsearch/ad/AbstractADTest.java @@ -15,9 +15,13 @@ package com.amazon.opendistroforelasticsearch.ad; +import static org.hamcrest.Matchers.containsString; + import java.util.ArrayList; import java.util.List; import java.util.concurrent.TimeUnit; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import org.apache.logging.log4j.Level; import org.apache.logging.log4j.LogManager; @@ -26,10 +30,14 @@ import org.apache.logging.log4j.core.appender.AbstractAppender; import org.apache.logging.log4j.core.layout.PatternLayout; import org.apache.logging.log4j.util.StackLocatorUtil; +import org.elasticsearch.action.ActionResponse; +import org.elasticsearch.action.support.PlainActionFuture; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.threadpool.TestThreadPool; import org.elasticsearch.threadpool.ThreadPool; +import org.elasticsearch.transport.TransportInterceptor; +import org.elasticsearch.transport.TransportService; import test.com.amazon.opendistroforelasticsearch.ad.util.FakeNode; @@ -52,31 +60,70 @@ protected TestAppender(String name) { public List messages = new ArrayList(); - public boolean containsMessage(String msg) { + public boolean containsMessage(String msg, boolean formatString) { + Pattern p = null; + if (formatString) { + String regex = convertToRegex(msg); + p = Pattern.compile(regex); + } for (String logMsg : messages) { LOG.info(logMsg); - if (logMsg.contains(msg)) { + if (p != null) { + Matcher m = p.matcher(logMsg); + if (m.matches()) { + return true; + } + } else if (logMsg.contains(msg)) { return true; } } return false; } - public int countMessage(String msg) { + public boolean containsMessage(String msg) { + return containsMessage(msg, false); + } + + public int countMessage(String msg, boolean formatString) { + Pattern p = null; + if (formatString) { + String regex = convertToRegex(msg); + p = Pattern.compile(regex); + } int count = 0; for (String logMsg : messages) { LOG.info(logMsg); - if (logMsg.contains(msg)) { + if (p != null) { + Matcher m = p.matcher(logMsg); + if (m.matches()) { + count++; + } + } else if (logMsg.contains(msg)) { count++; } } return count; } + public int countMessage(String msg) { + return countMessage(msg, false); + } + @Override public void append(LogEvent event) { messages.add(event.getMessage().getFormattedMessage()); } + + /** + * Convert a string with format like "Cannot save %s due to write block." + * to a regex with .* like "Cannot save .* due to write block." + * @return converted regex + */ + private String convertToRegex(String formattedStr) { + int percentIndex = formattedStr.indexOf("%"); + return formattedStr.substring(0, percentIndex) + ".*" + formattedStr.substring(percentIndex + 2); + } + } protected static ThreadPool threadPool; @@ -130,19 +177,32 @@ protected static void tearDownThreadPool() { threadPool = null; } - public void setupTestNodes(Settings settings) { + public void setupTestNodes(Settings settings, TransportInterceptor transportInterceptor) { nodesCount = randomIntBetween(2, 10); testNodes = new FakeNode[nodesCount]; for (int i = 0; i < testNodes.length; i++) { - testNodes[i] = new FakeNode("node" + i, threadPool, settings); + testNodes[i] = new FakeNode("node" + i, threadPool, settings, transportInterceptor); } FakeNode.connectNodes(testNodes); } + public void setupTestNodes(Settings settings) { + setupTestNodes(settings, TransportService.NOOP_TRANSPORT_INTERCEPTOR); + } + public void tearDownTestNodes() { for (FakeNode testNode : testNodes) { testNode.close(); } testNodes = null; } + + public void assertException( + PlainActionFuture listener, + Class exceptionType, + String msg + ) { + Exception e = expectThrows(exceptionType, () -> listener.actionGet()); + assertThat(e.getMessage(), containsString(msg)); + } } diff --git a/src/test/java/com/amazon/opendistroforelasticsearch/ad/AnomalyDetectorJobRunnerTests.java b/src/test/java/com/amazon/opendistroforelasticsearch/ad/AnomalyDetectorJobRunnerTests.java index 8be5cdb5..35dd8a65 100644 --- a/src/test/java/com/amazon/opendistroforelasticsearch/ad/AnomalyDetectorJobRunnerTests.java +++ b/src/test/java/com/amazon/opendistroforelasticsearch/ad/AnomalyDetectorJobRunnerTests.java @@ -40,12 +40,14 @@ import org.elasticsearch.action.index.IndexRequest; import org.elasticsearch.action.index.IndexResponse; import org.elasticsearch.client.Client; +import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.common.util.concurrent.EsExecutors; import org.elasticsearch.common.util.concurrent.ThreadContext; +import org.elasticsearch.common.xcontent.NamedXContentRegistry; import org.elasticsearch.common.xcontent.ToXContent; import org.elasticsearch.index.Index; import org.elasticsearch.index.get.GetResult; @@ -62,10 +64,16 @@ import org.mockito.MockitoAnnotations; import com.amazon.opendistroforelasticsearch.ad.common.exception.EndRunException; +import com.amazon.opendistroforelasticsearch.ad.indices.AnomalyDetectionIndices; import com.amazon.opendistroforelasticsearch.ad.model.AnomalyDetectorJob; +import com.amazon.opendistroforelasticsearch.ad.model.AnomalyResult; import com.amazon.opendistroforelasticsearch.ad.model.IntervalTimeConfiguration; -import com.amazon.opendistroforelasticsearch.ad.transport.handler.AnomalyResultHandler; +import com.amazon.opendistroforelasticsearch.ad.transport.TransportStateManager; +import com.amazon.opendistroforelasticsearch.ad.transport.handler.AnomalyIndexHandler; +import com.amazon.opendistroforelasticsearch.ad.transport.handler.DetectionStateHandler; import com.amazon.opendistroforelasticsearch.ad.util.ClientUtil; +import com.amazon.opendistroforelasticsearch.ad.util.IndexUtils; +import com.amazon.opendistroforelasticsearch.ad.util.ThrowingConsumerWrapper; import com.amazon.opendistroforelasticsearch.jobscheduler.spi.JobExecutionContext; import com.amazon.opendistroforelasticsearch.jobscheduler.spi.LockModel; import com.amazon.opendistroforelasticsearch.jobscheduler.spi.ScheduledJobParameter; @@ -103,7 +111,9 @@ public class AnomalyDetectorJobRunnerTests extends AbstractADTest { private Iterator backoff; @Mock - private AnomalyResultHandler anomalyResultHandler; + private AnomalyIndexHandler anomalyResultHandler; + + private DetectionStateHandler detectorStateHandler; @BeforeClass public static void setUpBeforeClass() { @@ -129,17 +139,33 @@ public void setup() throws Exception { runner.setClientUtil(clientUtil); runner.setAnomalyResultHandler(anomalyResultHandler); + Settings settings = Settings + .builder() + .put("opendistro.anomaly_detection.max_retry_for_backoff", 2) + .put("opendistro.anomaly_detection.backoff_initial_delay", TimeValue.timeValueMillis(1)) + .put("opendistro.anomaly_detection.max_retry_for_end_run_exception", 3) + .build(); setUpJobParameter(); - runner - .setSettings( - Settings - .builder() - .put("opendistro.anomaly_detection.max_retry_for_backoff", 2) - .put("opendistro.anomaly_detection.backoff_initial_delay", TimeValue.timeValueMillis(1)) - .put("opendistro.anomaly_detection.max_retry_for_end_run_exception", 3) - .build() - ); + runner.setSettings(settings); + + AnomalyDetectionIndices anomalyDetectionIndices = mock(AnomalyDetectionIndices.class); + IndexNameExpressionResolver indexNameResolver = mock(IndexNameExpressionResolver.class); + IndexUtils indexUtils = new IndexUtils(client, clientUtil, clusterService, indexNameResolver); + TransportStateManager stateManager = mock(TransportStateManager.class); + detectorStateHandler = new DetectionStateHandler( + client, + settings, + threadPool, + ThrowingConsumerWrapper.throwingConsumerWrapper(anomalyDetectionIndices::initDetectorStateIndex), + anomalyDetectionIndices::doesDetectorStateIndexExist, + this.clientUtil, + indexUtils, + clusterService, + NamedXContentRegistry.EMPTY, + stateManager + ); + runner.setDetectionStateHandler(detectorStateHandler); lockService = new LockService(client, clusterService); doReturn(lockService).when(context).getLockService(); @@ -215,13 +241,13 @@ public void testRunAdJobWithEndRunExceptionNow() { LockModel lock = new LockModel("indexName", "jobId", Instant.now(), 10, false); Exception exception = new EndRunException(jobParameter.getName(), randomAlphaOfLength(5), true); runner.handleAdException(jobParameter, lockService, lock, Instant.now().minusMillis(1000 * 60), Instant.now(), exception); - verify(anomalyResultHandler).indexAnomalyResult(any()); + verify(anomalyResultHandler).index(any(), any()); } @Test public void testRunAdJobWithEndRunExceptionNowAndExistingAdJob() { testRunAdJobWithEndRunExceptionNowAndStopAdJob(true, true, true); - verify(anomalyResultHandler).indexAnomalyResult(any()); + verify(anomalyResultHandler).index(any(), any()); verify(clientUtil).asyncRequest(any(IndexRequest.class), any(), any()); assertTrue(testAppender.containsMessage("AD Job was disabled by JobRunner for")); } @@ -229,7 +255,7 @@ public void testRunAdJobWithEndRunExceptionNowAndExistingAdJob() { @Test public void testRunAdJobWithEndRunExceptionNowAndExistingAdJobAndIndexException() { testRunAdJobWithEndRunExceptionNowAndStopAdJob(true, true, false); - verify(anomalyResultHandler).indexAnomalyResult(any()); + verify(anomalyResultHandler).index(any(), any()); verify(clientUtil).asyncRequest(any(IndexRequest.class), any(), any()); assertTrue(testAppender.containsMessage("Failed to disable AD job for")); } @@ -237,7 +263,7 @@ public void testRunAdJobWithEndRunExceptionNowAndExistingAdJobAndIndexException( @Test public void testRunAdJobWithEndRunExceptionNowAndNotExistingEnabledAdJob() { testRunAdJobWithEndRunExceptionNowAndStopAdJob(false, true, true); - verify(anomalyResultHandler).indexAnomalyResult(any()); + verify(anomalyResultHandler).index(any(), any()); verify(client, never()).index(any(), any()); assertFalse(testAppender.containsMessage("AD Job was disabled by JobRunner for")); assertFalse(testAppender.containsMessage("Failed to disable AD job for")); @@ -246,7 +272,7 @@ public void testRunAdJobWithEndRunExceptionNowAndNotExistingEnabledAdJob() { @Test public void testRunAdJobWithEndRunExceptionNowAndExistingDisabledAdJob() { testRunAdJobWithEndRunExceptionNowAndStopAdJob(true, false, true); - verify(anomalyResultHandler).indexAnomalyResult(any()); + verify(anomalyResultHandler).index(any(), any()); verify(client, never()).index(any(), any()); assertFalse(testAppender.containsMessage("AD Job was disabled by JobRunner for")); } @@ -254,7 +280,7 @@ public void testRunAdJobWithEndRunExceptionNowAndExistingDisabledAdJob() { @Test public void testRunAdJobWithEndRunExceptionNowAndNotExistingDisabledAdJob() { testRunAdJobWithEndRunExceptionNowAndStopAdJob(false, false, true); - verify(anomalyResultHandler).indexAnomalyResult(any()); + verify(anomalyResultHandler).index(any(), any()); verify(client, never()).index(any(), any()); assertFalse(testAppender.containsMessage("AD Job was disabled by JobRunner for")); } @@ -323,7 +349,7 @@ public void testRunAdJobWithEndRunExceptionNowAndGetJobException() { }).when(clientUtil).asyncRequest(any(GetRequest.class), any(), any()); runner.handleAdException(jobParameter, lockService, lock, Instant.now().minusMillis(1000 * 60), Instant.now(), exception); - verify(anomalyResultHandler).indexAnomalyResult(any()); + verify(anomalyResultHandler).index(any(), any()); assertEquals(1, testAppender.countMessage("JobRunner failed to get detector job")); } @@ -335,7 +361,7 @@ public void testRunAdJobWithEndRunExceptionNowAndFailToGetJob() { doThrow(new RuntimeException("fail to get AD job")).when(clientUtil).asyncRequest(any(GetRequest.class), any(), any()); runner.handleAdException(jobParameter, lockService, lock, Instant.now().minusMillis(1000 * 60), Instant.now(), exception); - verify(anomalyResultHandler).indexAnomalyResult(any()); + verify(anomalyResultHandler).index(any(), any()); assertEquals(1, testAppender.countMessage("JobRunner failed to stop AD job")); } diff --git a/src/test/java/com/amazon/opendistroforelasticsearch/ad/AnomalyDetectorProfileRunnerTests.java b/src/test/java/com/amazon/opendistroforelasticsearch/ad/AnomalyDetectorProfileRunnerTests.java index 1f135bba..73142fc4 100644 --- a/src/test/java/com/amazon/opendistroforelasticsearch/ad/AnomalyDetectorProfileRunnerTests.java +++ b/src/test/java/com/amazon/opendistroforelasticsearch/ad/AnomalyDetectorProfileRunnerTests.java @@ -26,8 +26,8 @@ import java.io.IOException; import java.time.Instant; +import java.time.temporal.ChronoUnit; import java.util.Arrays; -import java.util.Calendar; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; @@ -37,81 +37,98 @@ import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; import org.elasticsearch.Version; import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.FailedNodeException; import org.elasticsearch.action.get.GetRequest; import org.elasticsearch.action.get.GetResponse; -import org.elasticsearch.action.search.SearchRequest; -import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.client.Client; import org.elasticsearch.cluster.ClusterName; import org.elasticsearch.cluster.ClusterState; -import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.common.io.stream.NotSerializableExceptionWrapper; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.transport.TransportAddress; import org.elasticsearch.common.xcontent.NamedXContentRegistry; import org.elasticsearch.index.IndexNotFoundException; import org.elasticsearch.search.SearchModule; import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.transport.RemoteTransportException; import org.junit.Before; import org.junit.BeforeClass; -import com.amazon.opendistroforelasticsearch.ad.indices.AnomalyDetectionIndices; +import com.amazon.opendistroforelasticsearch.ad.common.exception.AnomalyDetectionException; +import com.amazon.opendistroforelasticsearch.ad.common.exception.ResourceNotFoundException; +import com.amazon.opendistroforelasticsearch.ad.constant.CommonName; import com.amazon.opendistroforelasticsearch.ad.model.AnomalyDetector; import com.amazon.opendistroforelasticsearch.ad.model.AnomalyDetectorJob; import com.amazon.opendistroforelasticsearch.ad.model.AnomalyResult; +import com.amazon.opendistroforelasticsearch.ad.model.DetectorInternalState; import com.amazon.opendistroforelasticsearch.ad.model.DetectorProfile; import com.amazon.opendistroforelasticsearch.ad.model.DetectorState; +import com.amazon.opendistroforelasticsearch.ad.model.InitProgressProfile; +import com.amazon.opendistroforelasticsearch.ad.model.IntervalTimeConfiguration; import com.amazon.opendistroforelasticsearch.ad.model.ModelProfile; import com.amazon.opendistroforelasticsearch.ad.model.ProfileName; +import com.amazon.opendistroforelasticsearch.ad.transport.ProfileAction; import com.amazon.opendistroforelasticsearch.ad.transport.ProfileNodeResponse; import com.amazon.opendistroforelasticsearch.ad.transport.ProfileResponse; +import com.amazon.opendistroforelasticsearch.ad.transport.RCFPollingAction; +import com.amazon.opendistroforelasticsearch.ad.transport.RCFPollingResponse; import com.amazon.opendistroforelasticsearch.ad.util.DiscoveryNodeFilterer; public class AnomalyDetectorProfileRunnerTests extends ESTestCase { - private static final Logger LOG = LogManager.getLogger(AnomalyDetectorProfileRunnerTests.class); private AnomalyDetectorProfileRunner runner; private Client client; private DiscoveryNodeFilterer nodeFilter; private AnomalyDetector detector; - private IndexNameExpressionResolver resolver; private ClusterService clusterService; private static Set stateOnly; private static Set stateNError; private static Set modelProfile; + private static Set stateInitProgress; private static String noFullShingleError = "No full shingle in current detection window"; private static String stoppedError = "Stopped detector as job failed consecutively for more than 3 times: Having trouble querying data." + " Maybe all of your features have been disabled."; - private Calendar calendar; - private String indexWithRequiredError1 = ".opendistro-anomaly-results-history-2020.04.06-1"; - private String indexWithRequiredError2 = ".opendistro-anomaly-results-history-2020.04.07-000002"; + + private int requiredSamples; + private int neededSamples; // profile model related - String node1; - String nodeName1; - DiscoveryNode discoveryNode1; + private String node1; + private String nodeName1; + private DiscoveryNode discoveryNode1; + + private String node2; + private String nodeName2; + private DiscoveryNode discoveryNode2; - String node2; - String nodeName2; - DiscoveryNode discoveryNode2; + private long modelSize; + private String model1Id; + private String model0Id; - long modelSize; - String model1Id; - String model0Id; + private int shingleSize; - int shingleSize; + private int detectorIntervalMin; + private GetResponse detectorGetReponse; + private String messaingExceptionError = "blah"; @Override protected NamedXContentRegistry xContentRegistry() { SearchModule searchModule = new SearchModule(Settings.EMPTY, false, Collections.emptyList()); List entries = searchModule.getNamedXContents(); - entries.addAll(Arrays.asList(AnomalyDetector.XCONTENT_REGISTRY, AnomalyResult.XCONTENT_REGISTRY)); + entries + .addAll( + Arrays + .asList( + AnomalyDetector.XCONTENT_REGISTRY, + AnomalyResult.XCONTENT_REGISTRY, + DetectorInternalState.XCONTENT_REGISTRY, + AnomalyDetectorJob.XCONTENT_REGISTRY + ) + ); return new NamedXContentRegistry(entries); } @@ -122,6 +139,9 @@ public static void setUpOnce() { stateNError = new HashSet(); stateNError.add(ProfileName.ERROR); stateNError.add(ProfileName.STATE); + stateInitProgress = new HashSet(); + stateInitProgress.add(ProfileName.INIT_PROGRESS); + stateInitProgress.add(ProfileName.STATE); modelProfile = new HashSet( Arrays.asList(ProfileName.SHINGLE_SIZE, ProfileName.MODELS, ProfileName.COORDINATING_NODE, ProfileName.TOTAL_SIZE_IN_BYTES) ); @@ -133,57 +153,81 @@ public void setUp() throws Exception { super.setUp(); client = mock(Client.class); nodeFilter = mock(DiscoveryNodeFilterer.class); - calendar = mock(Calendar.class); - resolver = mock(IndexNameExpressionResolver.class); clusterService = mock(ClusterService.class); - when(resolver.concreteIndexNames(any(), any(), any())) - .thenReturn( - new String[] { indexWithRequiredError1, indexWithRequiredError2, ".opendistro-anomaly-results-history-2020.04.08-000003" } - ); when(clusterService.state()).thenReturn(ClusterState.builder(new ClusterName("test cluster")).build()); - runner = new AnomalyDetectorProfileRunner(client, xContentRegistry(), nodeFilter, resolver, clusterService, calendar); + requiredSamples = 128; + neededSamples = 5; + + runner = new AnomalyDetectorProfileRunner(client, xContentRegistry(), nodeFilter, requiredSamples); + + detectorIntervalMin = 3; + detectorGetReponse = mock(GetResponse.class); + } + + enum DetectorStatus { + INDEX_NOT_EXIST, + NO_DOC, + EXIST } enum JobStatus { INDEX_NOT_EXIT, DISABLED, - ENABLED, - DISABLED_ROTATED_1, - DISABLED_ROTATED_2, - DISABLED_ROTATED_3 + ENABLED } - enum InittedEverResultStatus { - INDEX_NOT_EXIT, - GREATER_THAN_ZERO, + enum RCFPollingStatus { + INIT_NOT_EXIT, + REMOTE_INIT_NOT_EXIT, + INDEX_NOT_FOUND, + REMOTE_INDEX_NOT_FOUND, + INIT_DONE, EMPTY, - EXCEPTION + EXCEPTION, + INITTING } enum ErrorResultStatus { INDEX_NOT_EXIT, NO_ERROR, SHINGLE_ERROR, - STOPPED_ERROR_1, - STOPPED_ERROR_2 + STOPPED_ERROR } @SuppressWarnings("unchecked") - private void setUpClientGet(boolean detectorExists, JobStatus jobStatus) throws IOException { - detector = TestHelpers.randomAnomalyDetector(TestHelpers.randomUiMetadata(), Instant.now()); + private void setUpClientGet( + DetectorStatus detectorStatus, + JobStatus jobStatus, + RCFPollingStatus rcfPollingStatus, + ErrorResultStatus errorResultStatus + ) throws IOException { + detector = TestHelpers.randomAnomalyDetectorWithInterval(new IntervalTimeConfiguration(detectorIntervalMin, ChronoUnit.MINUTES)); doAnswer(invocation -> { Object[] args = invocation.getArguments(); GetRequest request = (GetRequest) args[0]; ActionListener listener = (ActionListener) args[1]; if (request.index().equals(ANOMALY_DETECTORS_INDEX)) { - if (detectorExists) { - listener.onResponse(TestHelpers.createGetResponse(detector, detector.getDetectorId())); - } else { - listener.onFailure(new IndexNotFoundException(ANOMALY_DETECTORS_INDEX)); + switch (detectorStatus) { + case EXIST: + listener + .onResponse( + TestHelpers.createGetResponse(detector, detector.getDetectorId(), AnomalyDetector.ANOMALY_DETECTORS_INDEX) + ); + break; + case INDEX_NOT_EXIST: + listener.onFailure(new IndexNotFoundException(ANOMALY_DETECTORS_INDEX)); + break; + case NO_DOC: + when(detectorGetReponse.isExists()).thenReturn(false); + listener.onResponse(detectorGetReponse); + break; + default: + assertTrue("should not reach here", false); + break; } - } else { + } else if (request.index().equals(ANOMALY_DETECTOR_JOB_INDEX)) { AnomalyDetectorJob job = null; switch (jobStatus) { case INDEX_NOT_EXIT: @@ -191,116 +235,57 @@ private void setUpClientGet(boolean detectorExists, JobStatus jobStatus) throws break; case DISABLED: job = TestHelpers.randomAnomalyDetectorJob(false); - listener.onResponse(TestHelpers.createGetResponse(job, detector.getDetectorId())); + listener + .onResponse( + TestHelpers.createGetResponse(job, detector.getDetectorId(), AnomalyDetectorJob.ANOMALY_DETECTOR_JOB_INDEX) + ); break; case ENABLED: job = TestHelpers.randomAnomalyDetectorJob(true); - listener.onResponse(TestHelpers.createGetResponse(job, detector.getDetectorId())); - break; - case DISABLED_ROTATED_1: - // enabled time is smaller than 1586217600000, while disabled time is larger than 1586217600000 - // which is April 7, 2020 12:00:00 AM. - job = TestHelpers - .randomAnomalyDetectorJob(false, Instant.ofEpochMilli(1586217500000L), Instant.ofEpochMilli(1586227600000L)); - listener.onResponse(TestHelpers.createGetResponse(job, detector.getDetectorId())); - break; - case DISABLED_ROTATED_2: - // both enabled and disabled time are larger than 1586217600000, - // which is April 7, 2020 12:00:00 AM. - job = TestHelpers - .randomAnomalyDetectorJob(false, Instant.ofEpochMilli(1586217500000L), Instant.ofEpochMilli(1586227600000L)); - listener.onResponse(TestHelpers.createGetResponse(job, detector.getDetectorId())); - break; - case DISABLED_ROTATED_3: - // both enabled and disabled time are larger than 1586131200000, - // which is April 6, 2020 12:00:00 AM. - job = TestHelpers - .randomAnomalyDetectorJob(false, Instant.ofEpochMilli(1586131300000L), Instant.ofEpochMilli(1586131400000L)); - listener.onResponse(TestHelpers.createGetResponse(job, detector.getDetectorId())); - break; - default: - assertTrue("should not reach here", false); - break; - } - } - - return null; - }).when(client).get(any(), any()); - } - - @SuppressWarnings("unchecked") - private void setUpClientSearch(InittedEverResultStatus inittedEverResultStatus, ErrorResultStatus errorResultStatus) { - doAnswer(invocation -> { - Object[] args = invocation.getArguments(); - SearchRequest request = (SearchRequest) args[0]; - ActionListener listener = (ActionListener) args[1]; - if (errorResultStatus == ErrorResultStatus.INDEX_NOT_EXIT - || inittedEverResultStatus == InittedEverResultStatus.INDEX_NOT_EXIT) { - listener.onFailure(new IndexNotFoundException(AnomalyResult.ANOMALY_RESULT_INDEX)); - return null; - } - AnomalyResult result = null; - if (request.source().query().toString().contains(AnomalyResult.ANOMALY_SCORE_FIELD)) { - switch (inittedEverResultStatus) { - case GREATER_THAN_ZERO: - result = TestHelpers.randomAnomalyDetectResult(0.87); - listener.onResponse(TestHelpers.createSearchResponse(result)); - break; - case EMPTY: - listener.onResponse(TestHelpers.createEmptySearchResponse()); - break; - case EXCEPTION: - listener.onFailure(new RuntimeException()); + listener + .onResponse( + TestHelpers.createGetResponse(job, detector.getDetectorId(), AnomalyDetectorJob.ANOMALY_DETECTOR_JOB_INDEX) + ); break; default: assertTrue("should not reach here", false); break; } } else { + if (errorResultStatus == ErrorResultStatus.INDEX_NOT_EXIT) { + listener.onFailure(new IndexNotFoundException(DetectorInternalState.DETECTOR_STATE_INDEX)); + return null; + } + DetectorInternalState.Builder result = new DetectorInternalState.Builder().lastUpdateTime(Instant.now()); + switch (errorResultStatus) { case NO_ERROR: - result = TestHelpers.randomAnomalyDetectResult(null); - listener.onResponse(TestHelpers.createSearchResponse(result)); break; case SHINGLE_ERROR: - result = TestHelpers.randomAnomalyDetectResult(noFullShingleError); - listener.onResponse(TestHelpers.createSearchResponse(result)); - break; - case STOPPED_ERROR_2: - if (request.indices().length == 2) { - for (int i = 0; i < 2; i++) { - assertTrue( - request.indices()[i].equals(indexWithRequiredError1) - || request.indices()[i].equals(indexWithRequiredError2) - ); - } - result = TestHelpers.randomAnomalyDetectResult(stoppedError); - listener.onResponse(TestHelpers.createSearchResponse(result)); - } else { - assertTrue("should not reach here", false); - } + result.error(noFullShingleError); break; - case STOPPED_ERROR_1: - if (request.indices().length == 1 && request.indices()[0].equals(indexWithRequiredError1)) { - result = TestHelpers.randomAnomalyDetectResult(stoppedError); - listener.onResponse(TestHelpers.createSearchResponse(result)); - } else { - assertTrue("should not reach here", false); - } + case STOPPED_ERROR: + result.error(stoppedError); break; default: assertTrue("should not reach here", false); break; } + listener + .onResponse( + TestHelpers.createGetResponse(result.build(), detector.getDetectorId(), DetectorInternalState.DETECTOR_STATE_INDEX) + ); + } return null; - }).when(client).search(any(), any()); + }).when(client).get(any(), any()); + setUpClientExecuteRCFPollingAction(rcfPollingStatus); } public void testDetectorNotExist() throws IOException, InterruptedException { - setUpClientGet(false, JobStatus.INDEX_NOT_EXIT); + setUpClientGet(DetectorStatus.INDEX_NOT_EXIST, JobStatus.INDEX_NOT_EXIT, RCFPollingStatus.EMPTY, ErrorResultStatus.NO_ERROR); final CountDownLatch inProgressLatch = new CountDownLatch(1); runner.profile("x123", ActionListener.wrap(response -> { @@ -314,9 +299,8 @@ public void testDetectorNotExist() throws IOException, InterruptedException { } public void testDisabledJobIndexTemplate(JobStatus status) throws IOException, InterruptedException { - setUpClientGet(true, status); - DetectorProfile expectedProfile = new DetectorProfile(); - expectedProfile.setState(DetectorState.DISABLED); + setUpClientGet(DetectorStatus.EXIST, status, RCFPollingStatus.EMPTY, ErrorResultStatus.NO_ERROR); + DetectorProfile expectedProfile = new DetectorProfile.Builder().state(DetectorState.DISABLED).build(); final CountDownLatch inProgressLatch = new CountDownLatch(1); runner.profile(detector.getDetectorId(), ActionListener.wrap(response -> { @@ -337,18 +321,20 @@ public void testJobDisabled() throws IOException, InterruptedException { testDisabledJobIndexTemplate(JobStatus.DISABLED); } - public void testInitOrRunningStateTemplate(InittedEverResultStatus status, DetectorState expectedState) throws IOException, + public void testInitOrRunningStateTemplate(RCFPollingStatus status, DetectorState expectedState) throws IOException, InterruptedException { - setUpClientGet(true, JobStatus.ENABLED); - setUpClientSearch(status, ErrorResultStatus.NO_ERROR); - DetectorProfile expectedProfile = new DetectorProfile(); - expectedProfile.setState(expectedState); + setUpClientGet(DetectorStatus.EXIST, JobStatus.ENABLED, status, ErrorResultStatus.NO_ERROR); + DetectorProfile expectedProfile = new DetectorProfile.Builder().state(expectedState).build(); final CountDownLatch inProgressLatch = new CountDownLatch(1); runner.profile(detector.getDetectorId(), ActionListener.wrap(response -> { assertEquals(expectedProfile, response); inProgressLatch.countDown(); }, exception -> { + logger.error(exception); + for (StackTraceElement ste : exception.getStackTrace()) { + logger.info(ste); + } assertTrue("Should not reach here ", false); inProgressLatch.countDown(); }), stateOnly); @@ -356,76 +342,122 @@ public void testInitOrRunningStateTemplate(InittedEverResultStatus status, Detec } public void testResultNotExist() throws IOException, InterruptedException { - testInitOrRunningStateTemplate(InittedEverResultStatus.INDEX_NOT_EXIT, DetectorState.INIT); + testInitOrRunningStateTemplate(RCFPollingStatus.INIT_NOT_EXIT, DetectorState.INIT); + } + + public void testRemoteResultNotExist() throws IOException, InterruptedException { + testInitOrRunningStateTemplate(RCFPollingStatus.REMOTE_INIT_NOT_EXIT, DetectorState.INIT); + } + + public void testCheckpointIndexNotExist() throws IOException, InterruptedException { + testInitOrRunningStateTemplate(RCFPollingStatus.INDEX_NOT_FOUND, DetectorState.INIT); + } + + public void testRemoteCheckpointIndexNotExist() throws IOException, InterruptedException { + testInitOrRunningStateTemplate(RCFPollingStatus.REMOTE_INDEX_NOT_FOUND, DetectorState.INIT); } public void testResultEmpty() throws IOException, InterruptedException { - testInitOrRunningStateTemplate(InittedEverResultStatus.EMPTY, DetectorState.INIT); + testInitOrRunningStateTemplate(RCFPollingStatus.EMPTY, DetectorState.INIT); } public void testResultGreaterThanZero() throws IOException, InterruptedException { - testInitOrRunningStateTemplate(InittedEverResultStatus.GREATER_THAN_ZERO, DetectorState.RUNNING); + testInitOrRunningStateTemplate(RCFPollingStatus.INIT_DONE, DetectorState.RUNNING); } - public void testErrorStateTemplate(InittedEverResultStatus initStatus, ErrorResultStatus status, DetectorState state, String error) - throws IOException, + public void testErrorStateTemplate( + RCFPollingStatus initStatus, + ErrorResultStatus status, + DetectorState state, + String error, + JobStatus jobStatus, + Set profilesToCollect + ) throws IOException, InterruptedException { - setUpClientGet(true, JobStatus.ENABLED); - setUpClientSearch(initStatus, status); - DetectorProfile expectedProfile = new DetectorProfile(); - expectedProfile.setState(state); - expectedProfile.setError(error); + setUpClientExecuteRCFPollingAction(initStatus); + setUpClientGet(DetectorStatus.EXIST, jobStatus, initStatus, status); + DetectorProfile.Builder builder = new DetectorProfile.Builder(); + if (profilesToCollect.contains(ProfileName.STATE)) { + builder.state(state); + } + if (profilesToCollect.contains(ProfileName.ERROR)) { + builder.error(error); + } + DetectorProfile expectedProfile = builder.build(); final CountDownLatch inProgressLatch = new CountDownLatch(1); runner.profile(detector.getDetectorId(), ActionListener.wrap(response -> { assertEquals(expectedProfile, response); inProgressLatch.countDown(); }, exception -> { - assertTrue("Should not reach here ", false); + logger.info(exception); + for (StackTraceElement ste : exception.getStackTrace()) { + logger.info(ste); + } + assertTrue("Should not reach here", false); inProgressLatch.countDown(); - }), stateNError); + }), profilesToCollect); assertTrue(inProgressLatch.await(100, TimeUnit.SECONDS)); } - public void testInitNoError() throws IOException, InterruptedException { - testErrorStateTemplate(InittedEverResultStatus.INDEX_NOT_EXIT, ErrorResultStatus.INDEX_NOT_EXIT, DetectorState.INIT, null); + public void testErrorStateTemplate( + RCFPollingStatus initStatus, + ErrorResultStatus status, + DetectorState state, + String error, + JobStatus jobStatus + ) throws IOException, + InterruptedException { + testErrorStateTemplate(initStatus, status, state, error, jobStatus, stateNError); } public void testRunningNoError() throws IOException, InterruptedException { - testErrorStateTemplate(InittedEverResultStatus.GREATER_THAN_ZERO, ErrorResultStatus.NO_ERROR, DetectorState.RUNNING, null); + testErrorStateTemplate(RCFPollingStatus.INIT_DONE, ErrorResultStatus.NO_ERROR, DetectorState.RUNNING, null, JobStatus.ENABLED); } public void testRunningWithError() throws IOException, InterruptedException { testErrorStateTemplate( - InittedEverResultStatus.GREATER_THAN_ZERO, + RCFPollingStatus.INIT_DONE, ErrorResultStatus.SHINGLE_ERROR, DetectorState.RUNNING, - noFullShingleError + noFullShingleError, + JobStatus.ENABLED ); } - public void testInitWithError() throws IOException, InterruptedException { - testErrorStateTemplate(InittedEverResultStatus.EMPTY, ErrorResultStatus.SHINGLE_ERROR, DetectorState.INIT, noFullShingleError); + public void testDisabledForStateError() throws IOException, InterruptedException { + testErrorStateTemplate( + RCFPollingStatus.INITTING, + ErrorResultStatus.STOPPED_ERROR, + DetectorState.DISABLED, + stoppedError, + JobStatus.DISABLED + ); } - public void testExceptionOnStateFetching() throws IOException, InterruptedException { - setUpClientGet(true, JobStatus.ENABLED); - setUpClientSearch(InittedEverResultStatus.EXCEPTION, ErrorResultStatus.NO_ERROR); - - final CountDownLatch inProgressLatch = new CountDownLatch(1); + public void testDisabledForStateInit() throws IOException, InterruptedException { + testErrorStateTemplate( + RCFPollingStatus.INITTING, + ErrorResultStatus.STOPPED_ERROR, + DetectorState.DISABLED, + stoppedError, + JobStatus.DISABLED, + stateInitProgress + ); + } - runner.profile(detector.getDetectorId(), ActionListener.wrap(response -> { - assertTrue("Should not reach here ", false); - inProgressLatch.countDown(); - }, exception -> { - assertTrue("Unexcpeted exception " + exception.getMessage(), exception instanceof RuntimeException); - inProgressLatch.countDown(); - }), stateOnly); - assertTrue(inProgressLatch.await(100, TimeUnit.SECONDS)); + public void testInitWithError() throws IOException, InterruptedException { + testErrorStateTemplate( + RCFPollingStatus.EMPTY, + ErrorResultStatus.SHINGLE_ERROR, + DetectorState.INIT, + noFullShingleError, + JobStatus.ENABLED + ); } @SuppressWarnings("unchecked") - private void setUpClientExecute() { + private void setUpClientExecuteProfileAction() { doAnswer(invocation -> { Object[] args = invocation.getArguments(); ActionListener listener = (ActionListener) args[2]; @@ -472,7 +504,6 @@ private void setUpClientExecute() { } }; - LOG.info("hello"); ProfileNodeResponse profileNodeResponse1 = new ProfileNodeResponse(discoveryNode1, modelSizeMap1, shingleSize); ProfileNodeResponse profileNodeResponse2 = new ProfileNodeResponse(discoveryNode2, modelSizeMap2, -1); List profileNodeResponses = Arrays.asList(profileNodeResponse1, profileNodeResponse2); @@ -482,13 +513,71 @@ private void setUpClientExecute() { listener.onResponse(profileResponse); return null; - }).when(client).execute(any(), any(), any()); + }).when(client).execute(any(ProfileAction.class), any(), any()); + + } + + @SuppressWarnings("unchecked") + private void setUpClientExecuteRCFPollingAction(RCFPollingStatus inittedEverResultStatus) { + doAnswer(invocation -> { + Object[] args = invocation.getArguments(); + ActionListener listener = (ActionListener) args[2]; + + Exception cause = null; + String detectorId = "123"; + if (inittedEverResultStatus == RCFPollingStatus.INIT_NOT_EXIT + || inittedEverResultStatus == RCFPollingStatus.REMOTE_INIT_NOT_EXIT + || inittedEverResultStatus == RCFPollingStatus.INDEX_NOT_FOUND + || inittedEverResultStatus == RCFPollingStatus.REMOTE_INDEX_NOT_FOUND) { + switch (inittedEverResultStatus) { + case INIT_NOT_EXIT: + case REMOTE_INIT_NOT_EXIT: + cause = new ResourceNotFoundException(detectorId, messaingExceptionError); + break; + case INDEX_NOT_FOUND: + case REMOTE_INDEX_NOT_FOUND: + cause = new IndexNotFoundException(detectorId, CommonName.CHECKPOINT_INDEX_NAME); + break; + default: + assertTrue("should not reach here", false); + break; + } + cause = new AnomalyDetectionException(detectorId, cause); + if (inittedEverResultStatus == RCFPollingStatus.REMOTE_INIT_NOT_EXIT + || inittedEverResultStatus == RCFPollingStatus.REMOTE_INDEX_NOT_FOUND) { + cause = new RemoteTransportException(RCFPollingAction.NAME, new NotSerializableExceptionWrapper(cause)); + } + listener.onFailure(cause); + } else { + RCFPollingResponse result = null; + switch (inittedEverResultStatus) { + case INIT_DONE: + result = new RCFPollingResponse(requiredSamples + 1); + break; + case INITTING: + result = new RCFPollingResponse(requiredSamples - neededSamples); + break; + case EMPTY: + result = new RCFPollingResponse(0); + break; + case EXCEPTION: + listener.onFailure(new RuntimeException()); + break; + default: + assertTrue("should not reach here", false); + break; + } + + listener.onResponse(result); + } + return null; + }).when(client).execute(any(RCFPollingAction.class), any(), any()); } public void testProfileModels() throws InterruptedException, IOException { - setUpClientGet(true, JobStatus.ENABLED); - setUpClientExecute(); + setUpClientGet(DetectorStatus.EXIST, JobStatus.ENABLED, RCFPollingStatus.EMPTY, ErrorResultStatus.NO_ERROR); + setUpClientExecuteProfileAction(); final CountDownLatch inProgressLatch = new CountDownLatch(1); @@ -515,21 +604,13 @@ public void testProfileModels() throws InterruptedException, IOException { assertTrue(inProgressLatch.await(100, TimeUnit.SECONDS)); } - /** - * A detector's error message can be on a rotated index. This test makes sure we get error info - * from .opendistro-anomaly-results index that has been rolled over. - * @param state expected detector state - * @param jobStatus job status to config in the test case - * @throws IOException when profile API throws it - * @throws InterruptedException when our CountDownLatch has been interruptted - */ - private void stoppedDetectorErrorTemplate(DetectorState state, JobStatus jobStatus, ErrorResultStatus errorStatus) throws IOException, - InterruptedException { - setUpClientGet(true, jobStatus); - setUpClientSearch(InittedEverResultStatus.GREATER_THAN_ZERO, errorStatus); - DetectorProfile expectedProfile = new DetectorProfile(); - expectedProfile.setState(state); - expectedProfile.setError(stoppedError); + public void testInitProgress() throws IOException, InterruptedException { + setUpClientGet(DetectorStatus.EXIST, JobStatus.ENABLED, RCFPollingStatus.INITTING, ErrorResultStatus.NO_ERROR); + DetectorProfile expectedProfile = new DetectorProfile.Builder().state(DetectorState.INIT).build(); + + // 123 / 128 rounded to 96% + InitProgressProfile profile = new InitProgressProfile("96%", neededSamples * detectorIntervalMin, neededSamples); + expectedProfile.setInitProgress(profile); final CountDownLatch inProgressLatch = new CountDownLatch(1); runner.profile(detector.getDetectorId(), ActionListener.wrap(response -> { @@ -538,36 +619,48 @@ private void stoppedDetectorErrorTemplate(DetectorState state, JobStatus jobStat }, exception -> { assertTrue("Should not reach here ", false); inProgressLatch.countDown(); - }), stateNError); + }), stateInitProgress); assertTrue(inProgressLatch.await(100, TimeUnit.SECONDS)); } - /** - * Job enabled time is earlier than and disabled time is later than index 2 creation date, we expect to search 2 indices - */ - public void testDetectorStoppedEnabledTimeLtIndex2Date() throws IOException, InterruptedException { - stoppedDetectorErrorTemplate(DetectorState.DISABLED, JobStatus.DISABLED_ROTATED_1, ErrorResultStatus.STOPPED_ERROR_2); - } + public void testInitProgressFailImmediately() throws IOException, InterruptedException { + setUpClientGet(DetectorStatus.NO_DOC, JobStatus.ENABLED, RCFPollingStatus.INITTING, ErrorResultStatus.NO_ERROR); + DetectorProfile expectedProfile = new DetectorProfile.Builder().state(DetectorState.INIT).build(); - /** - * Both job enabled and disabled time are later than index 2 creation date, we expect to search 2 indices - */ - public void testDetectorStoppedEnabledTimeGtIndex2Date() throws IOException, InterruptedException { - stoppedDetectorErrorTemplate(DetectorState.DISABLED, JobStatus.DISABLED_ROTATED_2, ErrorResultStatus.STOPPED_ERROR_2); - } + // 123 / 128 rounded to 96% + InitProgressProfile profile = new InitProgressProfile("96%", neededSamples * detectorIntervalMin, neededSamples); + expectedProfile.setInitProgress(profile); + final CountDownLatch inProgressLatch = new CountDownLatch(1); - /** - * Both job enabled and disabled time are earlier than index 2 creation date, we expect to search 1 indices - */ - public void testDetectorStoppedEnabledTimeGtIndex1Date() throws IOException, InterruptedException { - stoppedDetectorErrorTemplate(DetectorState.DISABLED, JobStatus.DISABLED_ROTATED_3, ErrorResultStatus.STOPPED_ERROR_1); + runner.profile(detector.getDetectorId(), ActionListener.wrap(response -> { + assertTrue("Should not reach here ", false); + inProgressLatch.countDown(); + }, exception -> { + assertTrue(exception.getMessage().contains(AnomalyDetectorProfileRunner.FAIL_TO_FIND_DETECTOR_MSG)); + inProgressLatch.countDown(); + }), stateInitProgress); + assertTrue(inProgressLatch.await(100, TimeUnit.SECONDS)); } - public void testAssumption() { - assertEquals( - "profileError depends on this assumption.", - ".opendistro-anomaly-results*", - AnomalyDetectionIndices.ALL_AD_RESULTS_INDEX_PATTERN - ); + public void testInitNoUpdateNoIndex() throws IOException, InterruptedException { + setUpClientGet(DetectorStatus.EXIST, JobStatus.ENABLED, RCFPollingStatus.EMPTY, ErrorResultStatus.NO_ERROR); + DetectorProfile expectedProfile = new DetectorProfile.Builder() + .state(DetectorState.INIT) + .initProgress(new InitProgressProfile("0%", detectorIntervalMin * requiredSamples, requiredSamples)) + .build(); + final CountDownLatch inProgressLatch = new CountDownLatch(1); + + runner.profile(detector.getDetectorId(), ActionListener.wrap(response -> { + assertEquals(expectedProfile, response); + inProgressLatch.countDown(); + }, exception -> { + logger.error(exception); + for (StackTraceElement ste : exception.getStackTrace()) { + logger.info(ste); + } + assertTrue("Should not reach here ", false); + inProgressLatch.countDown(); + }), stateInitProgress); + assertTrue(inProgressLatch.await(100, TimeUnit.SECONDS)); } } diff --git a/src/test/java/com/amazon/opendistroforelasticsearch/ad/TestHelpers.java b/src/test/java/com/amazon/opendistroforelasticsearch/ad/TestHelpers.java index c56ccdd6..1cc9167d 100644 --- a/src/test/java/com/amazon/opendistroforelasticsearch/ad/TestHelpers.java +++ b/src/test/java/com/amazon/opendistroforelasticsearch/ad/TestHelpers.java @@ -99,6 +99,7 @@ import com.amazon.opendistroforelasticsearch.ad.model.AnomalyDetectorExecutionInput; import com.amazon.opendistroforelasticsearch.ad.model.AnomalyDetectorJob; import com.amazon.opendistroforelasticsearch.ad.model.AnomalyResult; +import com.amazon.opendistroforelasticsearch.ad.model.DetectorInternalState; import com.amazon.opendistroforelasticsearch.ad.model.Feature; import com.amazon.opendistroforelasticsearch.ad.model.FeatureData; import com.amazon.opendistroforelasticsearch.ad.model.IntervalTimeConfiguration; @@ -230,6 +231,24 @@ public static AnomalyDetector randomAnomalyDetectorWithEmptyFeature() throws IOE ); } + public static AnomalyDetector randomAnomalyDetectorWithInterval(TimeConfiguration interval) throws IOException { + return new AnomalyDetector( + randomAlphaOfLength(10), + randomLong(), + randomAlphaOfLength(20), + randomAlphaOfLength(30), + randomAlphaOfLength(5), + ImmutableList.of(randomAlphaOfLength(10).toLowerCase()), + ImmutableList.of(randomFeature()), + randomQuery(), + interval, + randomIntervalTimeConfiguration(), + null, + randomInt(), + Instant.now().truncatedTo(ChronoUnit.SECONDS) + ); + } + public static SearchSourceBuilder randomFeatureQuery() throws IOException { String query = "{\"query\":{\"match\":{\"user\":{\"query\":\"kimchy\",\"operator\":\"OR\",\"prefix_length\":0," + "\"max_expansions\":50,\"fuzzy_transpositions\":true,\"lenient\":false,\"zero_terms_query\":\"NONE\"," @@ -455,12 +474,11 @@ public static void createIndex(RestClient client, String indexName, HttpEntity d ); } - public static GetResponse createGetResponse(ToXContentObject o, String id) throws IOException { + public static GetResponse createGetResponse(ToXContentObject o, String id, String indexName) throws IOException { XContentBuilder content = o.toXContent(XContentFactory.jsonBuilder(), ToXContent.EMPTY_PARAMS); - return new GetResponse( new GetResult( - AnomalyDetector.ANOMALY_DETECTORS_INDEX, + indexName, MapperService.SINGLE_MAPPING_NAME, id, UNASSIGNED_SEQ_NO, @@ -520,4 +538,20 @@ public static SearchResponse createEmptySearchResponse() throws IOException { SearchResponse.Clusters.EMPTY ); } + + public static AnomalyResult randomDetectState() { + return randomAnomalyDetectResult(randomDouble(), randomAlphaOfLength(5)); + } + + public static DetectorInternalState randomDetectState(String error) { + return randomDetectState(error, Instant.now()); + } + + public static DetectorInternalState randomDetectState(Instant lastUpdateTime) { + return randomDetectState(randomAlphaOfLength(5), lastUpdateTime); + } + + public static DetectorInternalState randomDetectState(String error, Instant lastUpdateTime) { + return new DetectorInternalState.Builder().lastUpdateTime(lastUpdateTime).error(error).build(); + } } diff --git a/src/test/java/com/amazon/opendistroforelasticsearch/ad/feature/FeatureManagerTests.java b/src/test/java/com/amazon/opendistroforelasticsearch/ad/feature/FeatureManagerTests.java index 0d14105d..e7533a16 100644 --- a/src/test/java/com/amazon/opendistroforelasticsearch/ad/feature/FeatureManagerTests.java +++ b/src/test/java/com/amazon/opendistroforelasticsearch/ad/feature/FeatureManagerTests.java @@ -59,7 +59,7 @@ import com.amazon.opendistroforelasticsearch.ad.dataprocessor.SingleFeatureLinearUniformInterpolator; import com.amazon.opendistroforelasticsearch.ad.model.AnomalyDetector; import com.amazon.opendistroforelasticsearch.ad.model.IntervalTimeConfiguration; -import com.amazon.opendistroforelasticsearch.ad.transport.ADStateManager; +import com.amazon.opendistroforelasticsearch.ad.transport.TransportStateManager; import com.amazon.opendistroforelasticsearch.ad.util.ArrayEqMatcher; @RunWith(JUnitParamsRunner.class) @@ -91,7 +91,7 @@ public class FeatureManagerTests { private Clock clock; @Mock - private ADStateManager stateManager; + private TransportStateManager stateManager; private FeatureManager featureManager; diff --git a/src/test/java/com/amazon/opendistroforelasticsearch/ad/feature/SearchFeatureDaoTests.java b/src/test/java/com/amazon/opendistroforelasticsearch/ad/feature/SearchFeatureDaoTests.java index 610bc7da..c6976c2d 100644 --- a/src/test/java/com/amazon/opendistroforelasticsearch/ad/feature/SearchFeatureDaoTests.java +++ b/src/test/java/com/amazon/opendistroforelasticsearch/ad/feature/SearchFeatureDaoTests.java @@ -89,7 +89,7 @@ import com.amazon.opendistroforelasticsearch.ad.dataprocessor.SingleFeatureLinearUniformInterpolator; import com.amazon.opendistroforelasticsearch.ad.model.AnomalyDetector; import com.amazon.opendistroforelasticsearch.ad.model.IntervalTimeConfiguration; -import com.amazon.opendistroforelasticsearch.ad.transport.ADStateManager; +import com.amazon.opendistroforelasticsearch.ad.transport.TransportStateManager; import com.amazon.opendistroforelasticsearch.ad.util.ClientUtil; import com.amazon.opendistroforelasticsearch.ad.util.ParseUtils; @@ -128,7 +128,7 @@ public class SearchFeatureDaoTests { @Mock private Max max; @Mock - private ADStateManager stateManager; + private TransportStateManager stateManager; @Mock private AnomalyDetector detector; diff --git a/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/ADStatsNodesTransportActionTests.java b/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/ADStatsNodesTransportActionTests.java index 49fd78c8..8c66031b 100644 --- a/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/ADStatsNodesTransportActionTests.java +++ b/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/ADStatsNodesTransportActionTests.java @@ -26,6 +26,7 @@ import org.elasticsearch.action.support.ActionFilters; import org.elasticsearch.client.Client; +import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.threadpool.ThreadPool; @@ -51,6 +52,7 @@ public class ADStatsNodesTransportActionTests extends ESIntegTestCase { private String clusterStatName1, clusterStatName2; private String nodeStatName1, nodeStatName2; + @Override @Before public void setUp() throws Exception { super.setUp(); @@ -59,7 +61,13 @@ public void setUp() throws Exception { Clock clock = mock(Clock.class); Throttler throttler = new Throttler(clock); ThreadPool threadPool = mock(ThreadPool.class); - IndexUtils indexUtils = new IndexUtils(client, new ClientUtil(Settings.EMPTY, client, throttler, threadPool), clusterService()); + IndexNameExpressionResolver indexNameResolver = mock(IndexNameExpressionResolver.class); + IndexUtils indexUtils = new IndexUtils( + client, + new ClientUtil(Settings.EMPTY, client, throttler, threadPool), + clusterService(), + indexNameResolver + ); ModelManager modelManager = mock(ModelManager.class); clusterStatName1 = "clusterStat1"; diff --git a/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/AnomalyResultTests.java b/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/AnomalyResultTests.java index f8ebc54b..faa552b3 100644 --- a/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/AnomalyResultTests.java +++ b/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/AnomalyResultTests.java @@ -42,6 +42,7 @@ import java.io.IOException; import java.time.Clock; +import java.time.Instant; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -55,6 +56,8 @@ import org.elasticsearch.ElasticsearchTimeoutException; import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.ActionRequestValidationException; +import org.elasticsearch.action.get.GetRequest; +import org.elasticsearch.action.get.GetResponse; import org.elasticsearch.action.index.IndexRequest; import org.elasticsearch.action.index.IndexResponse; import org.elasticsearch.action.support.ActionFilters; @@ -92,6 +95,7 @@ import test.com.amazon.opendistroforelasticsearch.ad.util.JsonDeserializer; import com.amazon.opendistroforelasticsearch.ad.AbstractADTest; +import com.amazon.opendistroforelasticsearch.ad.TestHelpers; import com.amazon.opendistroforelasticsearch.ad.breaker.ADCircuitBreakerService; import com.amazon.opendistroforelasticsearch.ad.cluster.HashRing; import com.amazon.opendistroforelasticsearch.ad.common.exception.AnomalyDetectionException; @@ -111,6 +115,7 @@ import com.amazon.opendistroforelasticsearch.ad.ml.rcf.CombinedRcfResult; import com.amazon.opendistroforelasticsearch.ad.model.AnomalyDetector; import com.amazon.opendistroforelasticsearch.ad.model.AnomalyResult; +import com.amazon.opendistroforelasticsearch.ad.model.DetectorInternalState; import com.amazon.opendistroforelasticsearch.ad.model.FeatureData; import com.amazon.opendistroforelasticsearch.ad.stats.ADStat; import com.amazon.opendistroforelasticsearch.ad.stats.ADStats; @@ -126,7 +131,7 @@ public class AnomalyResultTests extends AbstractADTest { private static Settings settings = Settings.EMPTY; private TransportService transportService; private ClusterService clusterService; - private ADStateManager stateManager; + private TransportStateManager stateManager; private ColdStartRunner runner; private FeatureManager featureQuery; private ModelManager normalModelManager; @@ -163,7 +168,7 @@ public void setUp() throws Exception { runner = new ColdStartRunner(); transportService = testNodes[0].transportService; clusterService = testNodes[0].clusterService; - stateManager = mock(ADStateManager.class); + stateManager = mock(TransportStateManager.class); // return 2 RCF partitions when(stateManager.getPartitionNumber(any(String.class), any(AnomalyDetector.class))).thenReturn(2); when(stateManager.isMuted(any(String.class))).thenReturn(false); @@ -242,7 +247,7 @@ public void setUp() throws Exception { Throttler throttler = new Throttler(clock); ThreadPool threadpool = mock(ThreadPool.class); ClientUtil clientUtil = new ClientUtil(Settings.EMPTY, client, throttler, threadpool); - IndexUtils indexUtils = new IndexUtils(client, clientUtil, clusterService); + IndexUtils indexUtils = new IndexUtils(client, clientUtil, clusterService, indexNameResolver); Map> statsMap = new HashMap>() { { @@ -252,6 +257,25 @@ public void setUp() throws Exception { }; adStats = new ADStats(indexUtils, normalModelManager, statsMap); + + doAnswer(invocation -> { + Object[] args = invocation.getArguments(); + GetRequest request = (GetRequest) args[0]; + ActionListener listener = (ActionListener) args[1]; + + if (request.index().equals(DetectorInternalState.DETECTOR_STATE_INDEX)) { + + DetectorInternalState.Builder result = new DetectorInternalState.Builder().lastUpdateTime(Instant.now()); + + listener + .onResponse( + TestHelpers.createGetResponse(result.build(), detector.getDetectorId(), DetectorInternalState.DETECTOR_STATE_INDEX) + ); + + } + + return null; + }).when(client).get(any(), any()); } @Override @@ -269,11 +293,6 @@ private Throwable assertException(PlainActionFuture liste return expectThrows(exceptionType, () -> listener.actionGet()); } - private void assertException(PlainActionFuture listener, Class exceptionType, String msg) { - Exception e = expectThrows(exceptionType, () -> listener.actionGet()); - assertThat(e.getMessage(), containsString(msg)); - } - public void testNormal() throws IOException { // These constructors register handler in transport service @@ -643,7 +662,7 @@ public void testTemporaryThresholdNodeNotConnectedException() { @SuppressWarnings("unchecked") public void testMute() { - ADStateManager muteStateManager = mock(ADStateManager.class); + TransportStateManager muteStateManager = mock(TransportStateManager.class); when(muteStateManager.isMuted(any(String.class))).thenReturn(true); doAnswer(invocation -> { ActionListener> listener = invocation.getArgument(1); diff --git a/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/CronTransportActionTests.java b/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/CronTransportActionTests.java index 38ce9fa6..172df264 100644 --- a/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/CronTransportActionTests.java +++ b/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/CronTransportActionTests.java @@ -64,7 +64,7 @@ public void setUp() throws Exception { TransportService transportService = mock(TransportService.class); ActionFilters actionFilters = mock(ActionFilters.class); - ADStateManager tarnsportStatemanager = mock(ADStateManager.class); + TransportStateManager tarnsportStatemanager = mock(TransportStateManager.class); ModelManager modelManager = mock(ModelManager.class); FeatureManager featureManager = mock(FeatureManager.class); diff --git a/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/DeleteModelTransportActionTests.java b/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/DeleteModelTransportActionTests.java index e5b1dd55..8defa7b4 100644 --- a/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/DeleteModelTransportActionTests.java +++ b/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/DeleteModelTransportActionTests.java @@ -69,7 +69,7 @@ public void setUp() throws Exception { TransportService transportService = mock(TransportService.class); ActionFilters actionFilters = mock(ActionFilters.class); - ADStateManager tarnsportStatemanager = mock(ADStateManager.class); + TransportStateManager tarnsportStatemanager = mock(TransportStateManager.class); ModelManager modelManager = mock(ModelManager.class); FeatureManager featureManager = mock(FeatureManager.class); diff --git a/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/RCFPollingTests.java b/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/RCFPollingTests.java new file mode 100644 index 00000000..cc8b8799 --- /dev/null +++ b/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/RCFPollingTests.java @@ -0,0 +1,354 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.opendistroforelasticsearch.ad.transport; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.anyInt; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import java.io.IOException; +import java.net.InetAddress; +import java.net.InetSocketAddress; +import java.util.Collections; +import java.util.Optional; + +import org.elasticsearch.Version; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.support.ActionFilters; +import org.elasticsearch.action.support.PlainActionFuture; +import org.elasticsearch.cluster.node.DiscoveryNode; +import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.transport.TransportAddress; +import org.elasticsearch.common.xcontent.ToXContent; +import org.elasticsearch.tasks.Task; +import org.elasticsearch.transport.ConnectTransportException; +import org.elasticsearch.transport.Transport; +import org.elasticsearch.transport.TransportException; +import org.elasticsearch.transport.TransportInterceptor; +import org.elasticsearch.transport.TransportRequest; +import org.elasticsearch.transport.TransportRequestOptions; +import org.elasticsearch.transport.TransportResponse; +import org.elasticsearch.transport.TransportResponseHandler; +import org.elasticsearch.transport.TransportService; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; + +import test.com.amazon.opendistroforelasticsearch.ad.util.FakeNode; +import test.com.amazon.opendistroforelasticsearch.ad.util.JsonDeserializer; + +import com.amazon.opendistroforelasticsearch.ad.AbstractADTest; +import com.amazon.opendistroforelasticsearch.ad.TestHelpers; +import com.amazon.opendistroforelasticsearch.ad.cluster.HashRing; +import com.amazon.opendistroforelasticsearch.ad.common.exception.AnomalyDetectionException; +import com.amazon.opendistroforelasticsearch.ad.common.exception.JsonPathNotFoundException; +import com.amazon.opendistroforelasticsearch.ad.constant.CommonMessageAttributes; +import com.amazon.opendistroforelasticsearch.ad.ml.ModelManager; +import com.google.gson.Gson; +import com.google.gson.GsonBuilder; + +public class RCFPollingTests extends AbstractADTest { + Gson gson = new GsonBuilder().create(); + private String detectorId = "jqIG6XIBEyaF3zCMZfcB"; + private String model0Id = detectorId + "_rcf_0"; + private long totalUpdates = 3L; + private String nodeId = "abc"; + private ClusterService clusterService; + private HashRing hashRing; + private TransportAddress transportAddress1; + private ModelManager manager; + private TransportService transportService; + private PlainActionFuture future; + private RCFPollingTransportAction action; + private RCFPollingRequest request; + private TransportInterceptor normalTransportInterceptor, failureTransportInterceptor; + + @BeforeClass + public static void setUpBeforeClass() { + setUpThreadPool(RCFPollingTests.class.getSimpleName()); + + } + + @AfterClass + public static void tearDownAfterClass() { + tearDownThreadPool(); + } + + private void registerHandler(FakeNode node) { + new RCFPollingTransportAction( + new ActionFilters(Collections.emptySet()), + node.transportService, + Settings.EMPTY, + manager, + hashRing, + node.clusterService + ); + } + + @Override + @Before + public void setUp() throws Exception { + super.setUp(); + clusterService = mock(ClusterService.class); + hashRing = mock(HashRing.class); + transportAddress1 = new TransportAddress(new InetSocketAddress(InetAddress.getByName("1.2.3.4"), 9300)); + manager = mock(ModelManager.class); + transportService = new TransportService( + Settings.EMPTY, + mock(Transport.class), + null, + TransportService.NOOP_TRANSPORT_INTERCEPTOR, + x -> null, + null, + Collections.emptySet() + ); + future = new PlainActionFuture<>(); + + request = new RCFPollingRequest(detectorId); + when(manager.getRcfModelId(any(String.class), anyInt())).thenReturn(model0Id); + + doAnswer(invocation -> { + Object[] args = invocation.getArguments(); + @SuppressWarnings("unchecked") + ActionListener listener = (ActionListener) args[2]; + listener.onResponse(totalUpdates); + return null; + }).when(manager).getTotalUpdates(any(String.class), any(String.class), any()); + + normalTransportInterceptor = new TransportInterceptor() { + @Override + public AsyncSender interceptSender(AsyncSender sender) { + return new AsyncSender() { + @Override + public void sendRequest( + Transport.Connection connection, + String action, + TransportRequest request, + TransportRequestOptions options, + TransportResponseHandler handler + ) { + if (RCFPollingAction.NAME.equals(action)) { + sender.sendRequest(connection, action, request, options, rcfRollingHandler(handler)); + } else { + sender.sendRequest(connection, action, request, options, handler); + } + } + }; + } + }; + + failureTransportInterceptor = new TransportInterceptor() { + @Override + public AsyncSender interceptSender(AsyncSender sender) { + return new AsyncSender() { + @Override + public void sendRequest( + Transport.Connection connection, + String action, + TransportRequest request, + TransportRequestOptions options, + TransportResponseHandler handler + ) { + if (RCFPollingAction.NAME.equals(action)) { + sender.sendRequest(connection, action, request, options, rcfFailureRollingHandler(handler)); + } else { + sender.sendRequest(connection, action, request, options, handler); + } + } + }; + } + }; + } + + public void testNormal() { + DiscoveryNode localNode = new DiscoveryNode(nodeId, transportAddress1, Version.CURRENT.minimumCompatibilityVersion()); + when(hashRing.getOwningNode(any(String.class))).thenReturn(Optional.of(localNode)); + + when(clusterService.localNode()).thenReturn(localNode); + + action = new RCFPollingTransportAction( + mock(ActionFilters.class), + transportService, + Settings.EMPTY, + manager, + hashRing, + clusterService + ); + action.doExecute(mock(Task.class), request, future); + + RCFPollingResponse response = future.actionGet(); + assertEquals(totalUpdates, response.getTotalUpdates()); + } + + public void testNoNodeFoundForModel() { + when(manager.getRcfModelId(any(String.class), anyInt())).thenReturn(model0Id); + when(hashRing.getOwningNode(any(String.class))).thenReturn(Optional.empty()); + action = new RCFPollingTransportAction( + mock(ActionFilters.class), + transportService, + Settings.EMPTY, + manager, + hashRing, + clusterService + ); + action.doExecute(mock(Task.class), request, future); + assertException(future, AnomalyDetectionException.class, RCFPollingTransportAction.NO_NODE_FOUND_MSG); + } + + /** + * Precondition: receiver's model manager respond with a response. See + * manager.getRcfModelId mocked output in setUp method. + * When receiving a response, respond back with totalUpdates. + * @param handler handler for receiver + * @return handler for request sender + */ + private TransportResponseHandler rcfRollingHandler(TransportResponseHandler handler) { + return new TransportResponseHandler() { + @Override + public T read(StreamInput in) throws IOException { + return handler.read(in); + } + + @Override + @SuppressWarnings("unchecked") + public void handleResponse(T response) { + handler.handleResponse((T) new RCFPollingResponse(totalUpdates)); + } + + @Override + public void handleException(TransportException exp) { + handler.handleException(exp); + } + + @Override + public String executor() { + return handler.executor(); + } + }; + } + + /** + * Precondition: receiver's model manager respond with a response. See + * manager.getRcfModelId mocked output in setUp method. + * Create handler that would return a connection failure + * @param handler callback handler + * @return handlder that would return a connection failure + */ + private TransportResponseHandler rcfFailureRollingHandler(TransportResponseHandler handler) { + return new TransportResponseHandler() { + @Override + public T read(StreamInput in) throws IOException { + return handler.read(in); + } + + @Override + public void handleResponse(T response) { + handler + .handleException( + new ConnectTransportException( + new DiscoveryNode(nodeId, transportAddress1, Version.CURRENT.minimumCompatibilityVersion()), + RCFPollingAction.NAME + ) + ); + } + + @Override + public void handleException(TransportException exp) { + handler.handleException(exp); + } + + @Override + public String executor() { + return handler.executor(); + } + }; + } + + public void testGetRemoteNormalResponse() { + setupTestNodes(Settings.EMPTY, normalTransportInterceptor); + try { + TransportService realTransportService = testNodes[0].transportService; + clusterService = testNodes[0].clusterService; + + action = new RCFPollingTransportAction( + new ActionFilters(Collections.emptySet()), + realTransportService, + Settings.EMPTY, + manager, + hashRing, + clusterService + ); + + when(hashRing.getOwningNode(any(String.class))).thenReturn(Optional.of(testNodes[1].discoveryNode())); + registerHandler(testNodes[1]); + + action.doExecute(null, request, future); + + RCFPollingResponse response = future.actionGet(); + assertEquals(totalUpdates, response.getTotalUpdates()); + } finally { + tearDownTestNodes(); + } + } + + public void testGetRemoteFailureResponse() { + setupTestNodes(Settings.EMPTY, failureTransportInterceptor); + try { + TransportService realTransportService = testNodes[0].transportService; + clusterService = testNodes[0].clusterService; + + action = new RCFPollingTransportAction( + new ActionFilters(Collections.emptySet()), + realTransportService, + Settings.EMPTY, + manager, + hashRing, + clusterService + ); + + when(hashRing.getOwningNode(any(String.class))).thenReturn(Optional.of(testNodes[1].discoveryNode())); + registerHandler(testNodes[1]); + + action.doExecute(null, request, future); + + expectThrows(ConnectTransportException.class, () -> future.actionGet()); + } finally { + tearDownTestNodes(); + } + } + + public void testResponseToXContent() throws IOException, JsonPathNotFoundException { + RCFPollingResponse response = new RCFPollingResponse(totalUpdates); + String json = TestHelpers.xContentBuilderToString(response.toXContent(TestHelpers.builder(), ToXContent.EMPTY_PARAMS)); + assertEquals(totalUpdates, JsonDeserializer.getLongValue(json, RCFPollingResponse.TOTAL_UPDATES_KEY)); + } + + public void testRequestToXContent() throws IOException, JsonPathNotFoundException { + RCFPollingRequest response = new RCFPollingRequest(detectorId); + String json = TestHelpers.xContentBuilderToString(response.toXContent(TestHelpers.builder(), ToXContent.EMPTY_PARAMS)); + assertEquals(detectorId, JsonDeserializer.getTextValue(json, CommonMessageAttributes.ID_JSON_KEY)); + } + + public void testNullDetectorId() { + String nullDetectorId = null; + RCFPollingRequest emptyRequest = new RCFPollingRequest(nullDetectorId); + assertTrue(emptyRequest.validate() != null); + } +} diff --git a/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/ADStateManagerTests.java b/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/TransportStateManagerTests.java similarity index 57% rename from src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/ADStateManagerTests.java rename to src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/TransportStateManagerTests.java index a088ec39..eeacd5d5 100644 --- a/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/ADStateManagerTests.java +++ b/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/TransportStateManagerTests.java @@ -15,10 +15,11 @@ package com.amazon.opendistroforelasticsearch.ad.transport; -import static org.elasticsearch.index.seqno.SequenceNumbers.UNASSIGNED_SEQ_NO; import static org.mockito.Matchers.any; import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; import static org.mockito.Mockito.verifyZeroInteractions; import static org.mockito.Mockito.when; @@ -29,8 +30,6 @@ import java.util.AbstractMap.SimpleImmutableEntry; import java.util.Arrays; import java.util.Collections; -import java.util.Map.Entry; -import java.util.concurrent.ConcurrentHashMap; import java.util.stream.IntStream; import org.elasticsearch.action.ActionListener; @@ -38,15 +37,9 @@ import org.elasticsearch.action.get.GetResponse; import org.elasticsearch.action.search.SearchRequest; import org.elasticsearch.client.Client; -import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.common.xcontent.NamedXContentRegistry; -import org.elasticsearch.common.xcontent.ToXContent; -import org.elasticsearch.common.xcontent.XContentBuilder; -import org.elasticsearch.common.xcontent.XContentFactory; -import org.elasticsearch.index.get.GetResult; -import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.search.SearchModule; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.threadpool.ThreadPool; @@ -60,8 +53,8 @@ import com.amazon.opendistroforelasticsearch.ad.util.Throttler; import com.google.common.collect.ImmutableMap; -public class ADStateManagerTests extends ESTestCase { - private ADStateManager stateManager; +public class TransportStateManagerTests extends ESTestCase { + private TransportStateManager stateManager; private ModelManager modelManager; private Client client; private ClientUtil clientUtil; @@ -71,6 +64,9 @@ public class ADStateManagerTests extends ESTestCase { private ThreadPool context; private AnomalyDetector detectorToCheck; private Settings settings; + private String adId = "123"; + + private GetResponse checkpointResponse; @Override protected NamedXContentRegistry xContentRegistry() { @@ -85,7 +81,6 @@ public void setUp() throws Exception { modelManager = mock(ModelManager.class); when(modelManager.getPartitionedForestSizes(any(AnomalyDetector.class))).thenReturn(new SimpleImmutableEntry<>(2, 20)); client = mock(Client.class); - clientUtil = mock(ClientUtil.class); settings = Settings .builder() .put("opendistro.anomaly_detection.max_retry_for_unresponsive_node", 3) @@ -96,8 +91,10 @@ public void setUp() throws Exception { context = TestHelpers.createThreadPool(); throttler = new Throttler(clock); - stateManager = new ADStateManager(client, xContentRegistry(), modelManager, settings, clientUtil, clock, duration); + clientUtil = new ClientUtil(Settings.EMPTY, client, throttler, mock(ThreadPool.class)); + stateManager = new TransportStateManager(client, xContentRegistry(), modelManager, settings, clientUtil, clock, duration); + checkpointResponse = mock(GetResponse.class); } @Override @@ -112,9 +109,8 @@ public void tearDown() throws Exception { } @SuppressWarnings("unchecked") - private String setupDetector(boolean responseExists) throws IOException { + private String setupDetector() throws IOException { detectorToCheck = TestHelpers.randomAnomalyDetector(TestHelpers.randomUiMetadata(), null); - XContentBuilder content = detectorToCheck.toXContent(XContentFactory.jsonBuilder(), ToXContent.EMPTY_PARAMS); doAnswer(invocation -> { Object[] args = invocation.getArguments(); @@ -125,39 +121,63 @@ private String setupDetector(boolean responseExists) throws IOException { if (args[0] instanceof GetRequest) { request = (GetRequest) args[0]; } - if (args[2] instanceof ActionListener) { - listener = (ActionListener) args[2]; + if (args[1] instanceof ActionListener) { + listener = (ActionListener) args[1]; } assertTrue(request != null && listener != null); listener .onResponse( - new GetResponse( - new GetResult( - AnomalyDetector.ANOMALY_DETECTORS_INDEX, - MapperService.SINGLE_MAPPING_NAME, - detectorToCheck.getDetectorId(), - UNASSIGNED_SEQ_NO, - 0, - -1, - responseExists, - BytesReference.bytes(content), - Collections.emptyMap(), - Collections.emptyMap() - ) - ) + TestHelpers.createGetResponse(detectorToCheck, detectorToCheck.getDetectorId(), AnomalyDetector.ANOMALY_DETECTORS_INDEX) ); return null; - }).when(clientUtil).asyncRequest(any(GetRequest.class), any(), any(ActionListener.class)); + }).when(client).get(any(), any(ActionListener.class)); return detectorToCheck.getDetectorId(); } + @SuppressWarnings("unchecked") + private void setupCheckpoint(boolean responseExists) throws IOException { + when(checkpointResponse.isExists()).thenReturn(responseExists); + + doAnswer(invocation -> { + Object[] args = invocation.getArguments(); + assertTrue(String.format("The size of args is %d. Its content is %s", args.length, Arrays.toString(args)), args.length >= 2); + + GetRequest request = null; + ActionListener listener = null; + if (args[0] instanceof GetRequest) { + request = (GetRequest) args[0]; + } + if (args[1] instanceof ActionListener) { + listener = (ActionListener) args[1]; + } + + assertTrue(request != null && listener != null); + listener.onResponse(checkpointResponse); + + return null; + }).when(client).get(any(), any(ActionListener.class)); + } + public void testGetPartitionNumber() throws IOException, InterruptedException { - String detectorId = setupDetector(true); - int partitionNumber = stateManager - .getPartitionNumber(detectorId, TestHelpers.randomAnomalyDetector(TestHelpers.randomUiMetadata(), null)); - assertEquals(2, partitionNumber); + String detectorId = setupDetector(); + AnomalyDetector detector = TestHelpers.randomAnomalyDetector(TestHelpers.randomUiMetadata(), null); + for (int i = 0; i < 2; i++) { + // call two times should return the same result + int partitionNumber = stateManager.getPartitionNumber(detectorId, detector); + assertEquals(2, partitionNumber); + } + + // the 2nd call should directly fetch cached result + verify(modelManager, times(1)).getPartitionedForestSizes(any()); + } + + public void testGetLastError() throws IOException, InterruptedException { + String error = "blah"; + assertEquals(TransportStateManager.NO_ERROR, stateManager.getLastError(adId)); + stateManager.setLastError(adId, error); + assertEquals(error, stateManager.getLastError(adId)); } public void testShouldMute() { @@ -186,26 +206,8 @@ public void testMaintenanceDoNothing() { verifyZeroInteractions(clock); } - public void testMaintenanceNotRemove() throws IOException { - ConcurrentHashMap> states = new ConcurrentHashMap<>(); - when(clock.instant()).thenReturn(Instant.MIN); - states.put("123", new SimpleImmutableEntry<>(TestHelpers.randomAnomalyDetector(TestHelpers.randomUiMetadata(), null), Instant.MAX)); - stateManager.maintenance(states); - assertEquals(1, states.size()); - - } - - public void testMaintenancRemove() throws IOException { - ConcurrentHashMap> states = new ConcurrentHashMap<>(); - when(clock.instant()).thenReturn(Instant.MAX); - states.put("123", new SimpleImmutableEntry<>(TestHelpers.randomAnomalyDetector(TestHelpers.randomUiMetadata(), null), Instant.MIN)); - stateManager.maintenance(states); - assertEquals(0, states.size()); - - } - public void testHasRunningQuery() throws IOException { - stateManager = new ADStateManager( + stateManager = new TransportStateManager( client, xContentRegistry(), modelManager, @@ -223,11 +225,64 @@ public void testHasRunningQuery() throws IOException { } public void testGetAnomalyDetector() throws IOException { - String detectorId = setupDetector(true); + String detectorId = setupDetector(); stateManager .getAnomalyDetector( detectorId, ActionListener.wrap(asDetector -> { assertEquals(detectorToCheck, asDetector.get()); }, exception -> assertTrue(false)) ); } + + public void getCheckpointTestTemplate(boolean exists) throws IOException { + setupCheckpoint(exists); + when(clock.instant()).thenReturn(Instant.MIN); + stateManager + .getDetectorCheckpoint(adId, ActionListener.wrap(checkpointExists -> { assertEquals(exists, checkpointExists); }, exception -> { + for (StackTraceElement ste : exception.getStackTrace()) { + logger.info(ste); + } + assertTrue(false); + })); + } + + public void testCheckpointExists() throws IOException { + getCheckpointTestTemplate(true); + } + + public void testCheckpointNotExists() throws IOException { + getCheckpointTestTemplate(false); + } + + public void testMaintenanceNotRemove() throws IOException { + setupCheckpoint(true); + when(clock.instant()).thenReturn(Instant.ofEpochMilli(1)); + stateManager + .getDetectorCheckpoint( + adId, + ActionListener.wrap(gotCheckpoint -> { assertTrue(gotCheckpoint); }, exception -> assertTrue(false)) + ); + when(clock.instant()).thenReturn(Instant.ofEpochMilli(1)); + stateManager.maintenance(); + stateManager + .getDetectorCheckpoint(adId, ActionListener.wrap(gotCheckpoint -> assertTrue(gotCheckpoint), exception -> assertTrue(false))); + verify(client, times(1)).get(any(), any()); + } + + public void testMaintenanceRemove() throws IOException { + setupCheckpoint(true); + when(clock.instant()).thenReturn(Instant.ofEpochMilli(1)); + stateManager + .getDetectorCheckpoint( + adId, + ActionListener.wrap(gotCheckpoint -> { assertTrue(gotCheckpoint); }, exception -> assertTrue(false)) + ); + when(clock.instant()).thenReturn(Instant.ofEpochSecond(7200L)); + stateManager.maintenance(); + stateManager + .getDetectorCheckpoint( + adId, + ActionListener.wrap(gotCheckpoint -> { assertTrue(gotCheckpoint); }, exception -> assertTrue(false)) + ); + verify(client, times(2)).get(any(), any()); + } } diff --git a/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/TransportStateTests.java b/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/TransportStateTests.java new file mode 100644 index 00000000..88087d77 --- /dev/null +++ b/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/TransportStateTests.java @@ -0,0 +1,96 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.opendistroforelasticsearch.ad.transport; + +import java.io.IOException; +import java.time.Duration; +import java.time.Instant; +import java.util.AbstractMap.SimpleImmutableEntry; + +import org.elasticsearch.test.ESTestCase; + +import com.amazon.opendistroforelasticsearch.ad.TestHelpers; + +public class TransportStateTests extends ESTestCase { + private TransportState state; + + @Override + public void setUp() throws Exception { + super.setUp(); + state = new TransportState("123"); + } + + private Duration duration = Duration.ofHours(1); + + public void testMaintenanceNotRemoveSingle() throws IOException { + state + .setDetectorDef( + new SimpleImmutableEntry<>( + TestHelpers.randomAnomalyDetector(TestHelpers.randomUiMetadata(), null), + Instant.ofEpochMilli(1000) + ) + ); + + assertTrue(!state.expired(duration, Instant.MIN)); + } + + public void testMaintenanceNotRemove() throws IOException { + state + .setDetectorDef( + new SimpleImmutableEntry<>( + TestHelpers.randomAnomalyDetector(TestHelpers.randomUiMetadata(), null), + Instant.ofEpochSecond(1000) + ) + ); + state.setLastError(new SimpleImmutableEntry<>(null, Instant.ofEpochMilli(1000))); + + assertTrue(!state.expired(duration, Instant.ofEpochSecond(3700))); + } + + public void testMaintenanceRemoveLastError() throws IOException { + state + .setDetectorDef( + new SimpleImmutableEntry<>( + TestHelpers.randomAnomalyDetector(TestHelpers.randomUiMetadata(), null), + Instant.ofEpochMilli(1000) + ) + ); + state.setLastError(new SimpleImmutableEntry<>(null, Instant.ofEpochMilli(1000))); + + assertTrue(state.expired(duration, Instant.ofEpochSecond(3700))); + } + + public void testMaintenancRemoveDetector() throws IOException { + state + .setDetectorDef( + new SimpleImmutableEntry<>(TestHelpers.randomAnomalyDetector(TestHelpers.randomUiMetadata(), null), Instant.MIN) + ); + assertTrue(state.expired(duration, Instant.MAX)); + + } + + public void testMaintenanceFlagNotRemove() throws IOException { + state.setCheckpoint(Instant.ofEpochMilli(1000)); + assertTrue(!state.expired(duration, Instant.MIN)); + + } + + public void testMaintenancFlagRemove() throws IOException { + state.setCheckpoint(Instant.MIN); + assertTrue(!state.expired(duration, Instant.MIN)); + + } +} diff --git a/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/handler/AnomalyResultHandlerTests.java b/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/handler/AnomalyResultHandlerTests.java index 65565bf1..075e704a 100644 --- a/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/handler/AnomalyResultHandlerTests.java +++ b/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/handler/AnomalyResultHandlerTests.java @@ -41,6 +41,7 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.common.util.concurrent.EsRejectedExecutionException; +import org.elasticsearch.threadpool.ThreadPool; import org.junit.AfterClass; import org.junit.Before; import org.junit.BeforeClass; @@ -57,6 +58,10 @@ import com.amazon.opendistroforelasticsearch.ad.indices.AnomalyDetectionIndices; import com.amazon.opendistroforelasticsearch.ad.model.AnomalyResult; import com.amazon.opendistroforelasticsearch.ad.transport.AnomalyResultTests; +import com.amazon.opendistroforelasticsearch.ad.util.ClientUtil; +import com.amazon.opendistroforelasticsearch.ad.util.IndexUtils; +import com.amazon.opendistroforelasticsearch.ad.util.Throttler; +import com.amazon.opendistroforelasticsearch.ad.util.ThrowingConsumerWrapper; public class AnomalyResultHandlerTests extends AbstractADTest { private static Settings settings; @@ -66,11 +71,22 @@ public class AnomalyResultHandlerTests extends AbstractADTest { @Mock private Client client; + private ClientUtil clientUtil; + + @Mock + private IndexNameExpressionResolver indexNameResolver; + @Mock private AnomalyDetectionIndices anomalyDetectionIndices; + private String detectorId = "123"; + @Mock - private IndexNameExpressionResolver indexNameResolver; + private Throttler throttler; + + private ThreadPool context; + + private IndexUtils indexUtil; @BeforeClass public static void setUpBeforeClass() { @@ -88,9 +104,12 @@ public static void tearDownAfterClass() { @Before public void setUp() throws Exception { super.setUp(); - super.setUpLog4jForJUnit(AnomalyResultHandler.class); + super.setUpLog4jForJUnit(AnomalyIndexHandler.class); MockitoAnnotations.initMocks(this); setWriteBlockAdResultIndex(false); + context = TestHelpers.createThreadPool(); + clientUtil = new ClientUtil(settings, client, throttler, context); + indexUtil = new IndexUtils(client, clientUtil, clusterService, indexNameResolver); } @Override @@ -114,25 +133,29 @@ public void testSavingAdResult() throws IOException { listener.onResponse(mock(IndexResponse.class)); return null; }).when(client).index(any(IndexRequest.class), ArgumentMatchers.>any()); - AnomalyResultHandler handler = new AnomalyResultHandler( + AnomalyIndexHandler handler = new AnomalyIndexHandler( client, settings, - clusterService, - indexNameResolver, - anomalyDetectionIndices, - threadPool + threadPool, + AnomalyResult.ANOMALY_RESULT_INDEX, + ThrowingConsumerWrapper.throwingConsumerWrapper(anomalyDetectionIndices::initAnomalyResultIndexDirectly), + anomalyDetectionIndices::doesAnomalyResultIndexExist, + false, + clientUtil, + indexUtil, + clusterService ); - handler.indexAnomalyResult(TestHelpers.randomAnomalyDetectResult()); - assertEquals(1, testAppender.countMessage((AnomalyResultHandler.SUCCESS_SAVING_MSG))); + handler.index(TestHelpers.randomAnomalyDetectResult(), detectorId); + assertEquals(1, testAppender.countMessage(AnomalyIndexHandler.SUCCESS_SAVING_MSG, true)); } @Test public void testSavingFailureNotRetry() throws InterruptedException, IOException { savingFailureTemplate(false, 1, true); - assertEquals(1, testAppender.countMessage((AnomalyResultHandler.FAIL_TO_SAVE_ERR_MSG))); - assertTrue(!testAppender.containsMessage(AnomalyResultHandler.SUCCESS_SAVING_MSG)); - assertTrue(!testAppender.containsMessage(AnomalyResultHandler.RETRY_SAVING_ERR_MSG)); + assertEquals(1, testAppender.countMessage(AnomalyIndexHandler.FAIL_TO_SAVE_ERR_MSG, true)); + assertTrue(!testAppender.containsMessage(AnomalyIndexHandler.SUCCESS_SAVING_MSG, true)); + assertTrue(!testAppender.containsMessage(AnomalyIndexHandler.RETRY_SAVING_ERR_MSG, true)); } @Test @@ -140,57 +163,69 @@ public void testSavingFailureRetry() throws InterruptedException, IOException { setWriteBlockAdResultIndex(false); savingFailureTemplate(true, 3, true); - assertEquals(2, testAppender.countMessage((AnomalyResultHandler.RETRY_SAVING_ERR_MSG))); - assertEquals(1, testAppender.countMessage((AnomalyResultHandler.FAIL_TO_SAVE_ERR_MSG))); - assertTrue(!testAppender.containsMessage(AnomalyResultHandler.SUCCESS_SAVING_MSG)); + assertEquals(2, testAppender.countMessage(AnomalyIndexHandler.RETRY_SAVING_ERR_MSG, true)); + assertEquals(1, testAppender.countMessage(AnomalyIndexHandler.FAIL_TO_SAVE_ERR_MSG, true)); + assertTrue(!testAppender.containsMessage(AnomalyIndexHandler.SUCCESS_SAVING_MSG, true)); } @Test public void testIndexWriteBlock() { setWriteBlockAdResultIndex(true); - AnomalyResultHandler handler = new AnomalyResultHandler( + AnomalyIndexHandler handler = new AnomalyIndexHandler( client, settings, - clusterService, - indexNameResolver, - anomalyDetectionIndices, - threadPool + threadPool, + AnomalyResult.ANOMALY_RESULT_INDEX, + ThrowingConsumerWrapper.throwingConsumerWrapper(anomalyDetectionIndices::initAnomalyResultIndexDirectly), + anomalyDetectionIndices::doesAnomalyResultIndexExist, + false, + clientUtil, + indexUtil, + clusterService ); - handler.indexAnomalyResult(TestHelpers.randomAnomalyDetectResult()); + handler.index(TestHelpers.randomAnomalyDetectResult(), detectorId); - assertTrue(testAppender.containsMessage(AnomalyResultHandler.CANNOT_SAVE_ERR_MSG)); + assertTrue(testAppender.containsMessage(AnomalyIndexHandler.CANNOT_SAVE_ERR_MSG, true)); } @Test public void testAdResultIndexExist() throws IOException { setInitAnomalyResultIndexException(true); - AnomalyResultHandler handler = new AnomalyResultHandler( + AnomalyIndexHandler handler = new AnomalyIndexHandler( client, settings, - clusterService, - indexNameResolver, - anomalyDetectionIndices, - threadPool + threadPool, + AnomalyResult.ANOMALY_RESULT_INDEX, + ThrowingConsumerWrapper.throwingConsumerWrapper(anomalyDetectionIndices::initAnomalyResultIndexDirectly), + anomalyDetectionIndices::doesAnomalyResultIndexExist, + false, + clientUtil, + indexUtil, + clusterService ); - handler.indexAnomalyResult(TestHelpers.randomAnomalyDetectResult()); + handler.index(TestHelpers.randomAnomalyDetectResult(), detectorId); verify(client, times(1)).index(any(), any()); } @Test public void testAdResultIndexOtherException() throws IOException { expectedEx.expect(AnomalyDetectionException.class); - expectedEx.expectMessage("Error in saving anomaly index for ID"); + expectedEx.expectMessage("Error in saving .opendistro-anomaly-results for detector " + detectorId); setInitAnomalyResultIndexException(false); - AnomalyResultHandler handler = new AnomalyResultHandler( + AnomalyIndexHandler handler = new AnomalyIndexHandler( client, settings, - clusterService, - indexNameResolver, - anomalyDetectionIndices, - threadPool + threadPool, + AnomalyResult.ANOMALY_RESULT_INDEX, + ThrowingConsumerWrapper.throwingConsumerWrapper(anomalyDetectionIndices::initAnomalyResultIndexDirectly), + anomalyDetectionIndices::doesAnomalyResultIndexExist, + false, + clientUtil, + indexUtil, + clusterService ); - handler.indexAnomalyResult(TestHelpers.randomAnomalyDetectResult()); + handler.index(TestHelpers.randomAnomalyDetectResult(), detectorId); verify(client, never()).index(any(), any()); } @@ -257,16 +292,20 @@ private void savingFailureTemplate(boolean throwEsRejectedExecutionException, in .put("opendistro.anomaly_detection.backoff_initial_delay", TimeValue.timeValueMillis(1)) .build(); - AnomalyResultHandler handler = new AnomalyResultHandler( + AnomalyIndexHandler handler = new AnomalyIndexHandler( client, backoffSettings, - clusterService, - indexNameResolver, - anomalyDetectionIndices, - threadPool + threadPool, + AnomalyResult.ANOMALY_RESULT_INDEX, + ThrowingConsumerWrapper.throwingConsumerWrapper(anomalyDetectionIndices::initAnomalyResultIndexDirectly), + anomalyDetectionIndices::doesAnomalyResultIndexExist, + false, + clientUtil, + indexUtil, + clusterService ); - handler.indexAnomalyResult(TestHelpers.randomAnomalyDetectResult()); + handler.index(TestHelpers.randomAnomalyDetectResult(), detectorId); backoffLatch.await(); } diff --git a/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/handler/DetectorStateHandlerTests.java b/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/handler/DetectorStateHandlerTests.java new file mode 100644 index 00000000..8a52b2d3 --- /dev/null +++ b/src/test/java/com/amazon/opendistroforelasticsearch/ad/transport/handler/DetectorStateHandlerTests.java @@ -0,0 +1,164 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.opendistroforelasticsearch.ad.transport.handler; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import java.time.Clock; +import java.time.Instant; + +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.get.GetResponse; +import org.elasticsearch.client.Client; +import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.xcontent.NamedXContentRegistry; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.threadpool.ThreadPool; + +import com.amazon.opendistroforelasticsearch.ad.TestHelpers; +import com.amazon.opendistroforelasticsearch.ad.indices.AnomalyDetectionIndices; +import com.amazon.opendistroforelasticsearch.ad.model.DetectorInternalState; +import com.amazon.opendistroforelasticsearch.ad.transport.TransportStateManager; +import com.amazon.opendistroforelasticsearch.ad.transport.handler.DetectionStateHandler.ErrorStrategy; +import com.amazon.opendistroforelasticsearch.ad.util.ClientUtil; +import com.amazon.opendistroforelasticsearch.ad.util.IndexUtils; +import com.amazon.opendistroforelasticsearch.ad.util.Throttler; +import com.amazon.opendistroforelasticsearch.ad.util.ThrowingConsumerWrapper; + +public class DetectorStateHandlerTests extends ESTestCase { + private DetectionStateHandler detectorStateHandler; + private String detectorId = "123"; + private Client client; + private String error = "Stopped due to blah"; + private IndexUtils indexUtils; + private TransportStateManager stateManager; + + @Override + public void setUp() throws Exception { + super.setUp(); + AnomalyDetectionIndices anomalyDetectionIndices = mock(AnomalyDetectionIndices.class); + client = mock(Client.class); + Settings settings = Settings.EMPTY; + Clock clock = mock(Clock.class); + Throttler throttler = new Throttler(clock); + ThreadPool threadpool = mock(ThreadPool.class); + ClientUtil clientUtil = new ClientUtil(Settings.EMPTY, client, throttler, threadpool); + indexUtils = mock(IndexUtils.class); + ClusterService clusterService = mock(ClusterService.class); + ThreadPool threadPool = mock(ThreadPool.class); + stateManager = mock(TransportStateManager.class); + detectorStateHandler = new DetectionStateHandler( + client, + settings, + threadPool, + ThrowingConsumerWrapper.throwingConsumerWrapper(anomalyDetectionIndices::initDetectorStateIndex), + anomalyDetectionIndices::doesDetectorStateIndexExist, + clientUtil, + indexUtils, + clusterService, + NamedXContentRegistry.EMPTY, + stateManager + ); + } + + public void testNullState() { + ErrorStrategy errorStrategy = detectorStateHandler.new ErrorStrategy(error); + DetectorInternalState state = errorStrategy.createNewState(null); + assertEquals(error, state.getError()); + assertTrue(state.getLastUpdateTime() != null); + } + + public void testNonNullState() { + String error = "blah"; + DetectorInternalState oldState = new DetectorInternalState.Builder().error(error).lastUpdateTime(Instant.ofEpochSecond(1L)).build(); + ErrorStrategy errorStrategy = detectorStateHandler.new ErrorStrategy(error); + DetectorInternalState state = errorStrategy.createNewState(oldState); + assertEquals(null, state); + } + + public void testOldErrorNull() { + ErrorStrategy errorStrategy = detectorStateHandler.new ErrorStrategy(error); + // old state's error is null + DetectorInternalState state = errorStrategy.createNewState(new DetectorInternalState.Builder().build()); + assertEquals(error, state.getError()); + assertTrue(state.getLastUpdateTime() != null); + } + + public void testBothErrorNull() { + ErrorStrategy errorStrategy = detectorStateHandler.new ErrorStrategy(null); + // old state's error is null + DetectorInternalState state = errorStrategy.createNewState(new DetectorInternalState.Builder().build()); + assertEquals(null, state); + } + + public void testNoUpdateWitoutErrorChange() { + when(stateManager.getLastError(anyString())).thenReturn(error); + doAnswer(invocation -> { + Object[] args = invocation.getArguments(); + @SuppressWarnings("unchecked") + ActionListener listener = (ActionListener) args[1]; + DetectorInternalState.Builder result = new DetectorInternalState.Builder().lastUpdateTime(Instant.now()).error(error); + listener.onResponse(TestHelpers.createGetResponse(result.build(), detectorId, DetectorInternalState.DETECTOR_STATE_INDEX)); + return null; + }).when(client).get(any(), any()); + + detectorStateHandler.saveError(error, detectorId); + + verify(indexUtils, never()).checkIndicesBlocked(any(), any(), any()); + } + + public void testUpdateWithErrorChange() { + when(stateManager.getLastError(anyString())).thenReturn("blah"); + doAnswer(invocation -> { + Object[] args = invocation.getArguments(); + @SuppressWarnings("unchecked") + ActionListener listener = (ActionListener) args[1]; + DetectorInternalState.Builder result = new DetectorInternalState.Builder().lastUpdateTime(Instant.now()).error("blah"); + listener.onResponse(TestHelpers.createGetResponse(result.build(), detectorId, DetectorInternalState.DETECTOR_STATE_INDEX)); + return null; + }).when(client).get(any(), any()); + + detectorStateHandler.saveError(error, detectorId); + + verify(indexUtils, times(1)).checkIndicesBlocked(any(), any(), any()); + } + + public void testUpdateWithFirstChange() { + when(stateManager.getLastError(anyString())).thenReturn(TransportStateManager.NO_ERROR); + doAnswer(invocation -> { + Object[] args = invocation.getArguments(); + @SuppressWarnings("unchecked") + ActionListener listener = (ActionListener) args[1]; + DetectorInternalState.Builder result = new DetectorInternalState.Builder() + .lastUpdateTime(Instant.ofEpochMilli(1)) + .error("blah"); + listener.onResponse(TestHelpers.createGetResponse(result.build(), detectorId, DetectorInternalState.DETECTOR_STATE_INDEX)); + return null; + }).when(client).get(any(), any()); + + detectorStateHandler.saveError(error, detectorId); + + verify(indexUtils, times(1)).checkIndicesBlocked(any(), any(), any()); + } +} diff --git a/src/test/java/com/amazon/opendistroforelasticsearch/ad/util/IndexUtilsTests.java b/src/test/java/com/amazon/opendistroforelasticsearch/ad/util/IndexUtilsTests.java index fc250f3f..d9386da4 100644 --- a/src/test/java/com/amazon/opendistroforelasticsearch/ad/util/IndexUtilsTests.java +++ b/src/test/java/com/amazon/opendistroforelasticsearch/ad/util/IndexUtilsTests.java @@ -21,6 +21,7 @@ import org.elasticsearch.action.support.master.AcknowledgedResponse; import org.elasticsearch.client.Client; +import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.threadpool.ThreadPool; @@ -33,6 +34,8 @@ public class IndexUtilsTests extends ESIntegTestCase { private ClientUtil clientUtil; + private IndexNameExpressionResolver indexNameResolver; + @Before public void setup() { Client client = client(); @@ -40,11 +43,12 @@ public void setup() { Throttler throttler = new Throttler(clock); ThreadPool context = TestHelpers.createThreadPool(); clientUtil = new ClientUtil(Settings.EMPTY, client, throttler, context); + indexNameResolver = mock(IndexNameExpressionResolver.class); } @Test public void testGetIndexHealth_NoIndex() { - IndexUtils indexUtils = new IndexUtils(client(), clientUtil, clusterService()); + IndexUtils indexUtils = new IndexUtils(client(), clientUtil, clusterService(), indexNameResolver); String output = indexUtils.getIndexHealthStatus("test"); assertEquals(IndexUtils.NONEXISTENT_INDEX_STATUS, output); } @@ -54,7 +58,7 @@ public void testGetIndexHealth_Index() { String indexName = "test-2"; createIndex(indexName); flush(); - IndexUtils indexUtils = new IndexUtils(client(), clientUtil, clusterService()); + IndexUtils indexUtils = new IndexUtils(client(), clientUtil, clusterService(), indexNameResolver); String status = indexUtils.getIndexHealthStatus(indexName); assertTrue(status.equals("green") || status.equals("yellow")); } @@ -67,14 +71,14 @@ public void testGetIndexHealth_Alias() { flush(); AcknowledgedResponse response = client().admin().indices().prepareAliases().addAlias(indexName, aliasName).execute().actionGet(); assertTrue(response.isAcknowledged()); - IndexUtils indexUtils = new IndexUtils(client(), clientUtil, clusterService()); + IndexUtils indexUtils = new IndexUtils(client(), clientUtil, clusterService(), indexNameResolver); String status = indexUtils.getIndexHealthStatus(aliasName); assertTrue(status.equals("green") || status.equals("yellow")); } @Test public void testGetNumberOfDocumentsInIndex_NonExistentIndex() { - IndexUtils indexUtils = new IndexUtils(client(), clientUtil, clusterService()); + IndexUtils indexUtils = new IndexUtils(client(), clientUtil, clusterService(), indexNameResolver); assertEquals((Long) 0L, indexUtils.getNumberOfDocumentsInIndex("index")); } @@ -89,7 +93,7 @@ public void testGetNumberOfDocumentsInIndex_RegularIndex() { index(indexName, "_doc", String.valueOf(i), "{}"); } flushAndRefresh(indexName); - IndexUtils indexUtils = new IndexUtils(client(), clientUtil, clusterService()); + IndexUtils indexUtils = new IndexUtils(client(), clientUtil, clusterService(), indexNameResolver); assertEquals((Long) count, indexUtils.getNumberOfDocumentsInIndex(indexName)); } } diff --git a/src/test/java/com/amazon/opendistroforelasticsearch/ad/util/MultiResponsesDelegateActionListenerTests.java b/src/test/java/com/amazon/opendistroforelasticsearch/ad/util/MultiResponsesDelegateActionListenerTests.java new file mode 100644 index 00000000..7bd905ca --- /dev/null +++ b/src/test/java/com/amazon/opendistroforelasticsearch/ad/util/MultiResponsesDelegateActionListenerTests.java @@ -0,0 +1,48 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.opendistroforelasticsearch.ad.util; + +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; + +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.test.ESTestCase; + +import com.amazon.opendistroforelasticsearch.ad.model.DetectorProfile; + +public class MultiResponsesDelegateActionListenerTests extends ESTestCase { + + public void testEmptyResponse() throws InterruptedException { + final CountDownLatch inProgressLatch = new CountDownLatch(1); + ActionListener actualListener = ActionListener.wrap(response -> { + assertTrue("Should not reach here", false); + inProgressLatch.countDown(); + }, exception -> { + String exceptionMsg = exception.getMessage(); + assertTrue(exceptionMsg, exceptionMsg.contains(MultiResponsesDelegateActionListener.NO_RESPONSE)); + inProgressLatch.countDown(); + }); + + MultiResponsesDelegateActionListener multiListener = new MultiResponsesDelegateActionListener( + actualListener, + 2, + "blah" + ); + multiListener.onResponse(null); + multiListener.onResponse(null); + assertTrue(inProgressLatch.await(100, TimeUnit.SECONDS)); + } +} diff --git a/src/test/java/test/com/amazon/opendistroforelasticsearch/ad/util/FakeNode.java b/src/test/java/test/com/amazon/opendistroforelasticsearch/ad/util/FakeNode.java index 7e0fe2b1..1dffbeda 100644 --- a/src/test/java/test/com/amazon/opendistroforelasticsearch/ad/util/FakeNode.java +++ b/src/test/java/test/com/amazon/opendistroforelasticsearch/ad/util/FakeNode.java @@ -23,7 +23,9 @@ import java.util.ArrayList; import java.util.Collections; import java.util.List; +import java.util.Map; import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; import java.util.function.Function; import org.apache.lucene.util.SetOnce; @@ -40,16 +42,19 @@ import org.elasticsearch.common.network.NetworkService; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.transport.BoundTransportAddress; +import org.elasticsearch.common.transport.TransportAddress; import org.elasticsearch.common.util.PageCacheRecycler; import org.elasticsearch.indices.breaker.NoneCircuitBreakerService; import org.elasticsearch.tasks.TaskManager; +import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.tasks.MockTaskManager; import org.elasticsearch.threadpool.ThreadPool; +import org.elasticsearch.transport.TransportInterceptor; import org.elasticsearch.transport.TransportService; import org.elasticsearch.transport.nio.MockNioTransport; public class FakeNode implements Releasable { - public FakeNode(String name, ThreadPool threadPool, Settings settings) { + public FakeNode(String name, ThreadPool threadPool, Settings settings, TransportInterceptor transportInterceptor) { final Function boundTransportAddressDiscoveryNodeFunction = address -> { discoveryNode.set(new DiscoveryNode(name, address.publishAddress(), emptyMap(), emptySet(), Version.CURRENT)); return discoveryNode.get(); @@ -64,9 +69,14 @@ public FakeNode(String name, ThreadPool threadPool, Settings settings) { PageCacheRecycler.NON_RECYCLING_INSTANCE, new NamedWriteableRegistry(ClusterModule.getNamedWriteables()), new NoneCircuitBreakerService() - ), + ) { + @Override + public TransportAddress[] addressesFromString(String address) { + return new TransportAddress[] { dns.getOrDefault(address, ESTestCase.buildNewFakeTransportAddress()) }; + } + }, threadPool, - TransportService.NOOP_TRANSPORT_INTERCEPTOR, + transportInterceptor, boundTransportAddressDiscoveryNodeFunction, null, Collections.emptySet() @@ -80,6 +90,7 @@ protected TaskManager createTaskManager(Settings settings, ThreadPool threadPool } } }; + transportService.start(); clusterService = createClusterService(threadPool, discoveryNode.get()); clusterService.addStateApplier(transportService.getTaskManager()); @@ -89,11 +100,16 @@ protected TaskManager createTaskManager(Settings settings, ThreadPool threadPool transportService.acceptIncomingRequests(); } + public FakeNode(String name, ThreadPool threadPool, Settings settings) { + this(name, threadPool, settings, TransportService.NOOP_TRANSPORT_INTERCEPTOR); + } + public final ClusterService clusterService; public final TransportService transportService; private final SetOnce discoveryNode = new SetOnce<>(); public final TransportListTasksAction transportListTasksAction; public final TransportCancelTasksAction transportCancelTasksAction; + private final Map dns = new ConcurrentHashMap<>(); @Override public void close() {